Projects
Essentials
x265
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 39
View file
x265.changes
Changed
@@ -1,4 +1,40 @@ ------------------------------------------------------------------- +Mon Jun 1 17:51:22 UTC 2020 - Luigi Baldoni <aloisio@gmx.com> + +- Update to version 3.4 + New features: + * Edge-aware quadtree partitioning to terminate CU depth + recursion based on edge information. --rskip level 2 enables + the feature and --rskip-edge-threshold denotes the minimum + expected edge-density percentage within the CU, below which + the recursion is skipped. Experimental feature. + * Application-level feature --abr-ladder for automating + efficient ABR ladder generation. Shows ~65% savings in the + over-all turn-around time required for the generation of a + typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280 + CPU @ 2.70GHz over a sequential ABR-ladder generation + approach that leverages save-load architecture. + Enhancements to existing features: + * Improved efficiency in 2-pass rate-control algorithm. The + savings in the bitrate is ~1.72% with visual improvement in + quality in the initial 1-2 secs. + Encoder enhancements: + * Faster ARM64 encodes enabled by ASM contributions from + Huawei. The speed-up over no-asm version for 1080p encodes @ + medium preset is ~15% in a 16 core H/W. + * Strict VBV conformance in zone encoding. + Bug fixes: + * Multi-pass encode failures with --frame-dup. + * Corrupted bitstreams with --hist-scenecut when input depth + and internal bit-depth differ. + * Incorrect analysis propagation in multi-level save-load + architecture. + * Failure in detecting NUMA packages installed in non-standard + directories. + +- Refreshed arm.patch + +------------------------------------------------------------------- Sat Mar 28 14:28:56 UTC 2020 - Luigi Baldoni <aloisio@gmx.com> - Update to version 3.3
View file
x265.spec
Changed
@@ -17,11 +17,11 @@ # -%define sover 188 +%define sover 192 %define libname lib%{name} %define libsoname %{libname}-%{sover} Name: x265 -Version: 3.3 +Version: 3.4 Release: 0 Summary: A free h265/HEVC encoder - encoder binary License: GPL-2.0-or-later @@ -67,7 +67,6 @@ %patch0 -p1 %patch1 -p1 %patch2 -p1 - sed -i -e "s/0.0/%{sover}.0/g" source/cmake/version.cmake
View file
arm.patch
Changed
@@ -1,8 +1,8 @@ -Index: x265_2.2/source/CMakeLists.txt +Index: x265_3.4/source/CMakeLists.txt =================================================================== ---- x265_2.2.orig/source/CMakeLists.txt -+++ x265_2.2/source/CMakeLists.txt -@@ -65,15 +65,22 @@ elseif(POWERMATCH GREATER "-1") +--- x265_3.4.orig/source/CMakeLists.txt ++++ x265_3.4/source/CMakeLists.txt +@@ -64,26 +64,26 @@ elseif(POWERMATCH GREATER "-1") add_definitions(-DPPC64=1) message(STATUS "Detected POWER PPC64 target processor") endif() @@ -12,41 +12,62 @@ - else() - set(CROSS_COMPILE_ARM 0) - endif() -- message(STATUS "Detected ARM target processor") - set(ARM 1) -- add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1) +- if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8) +- message(STATUS "Detected ARM64 target processor") +- set(ARM64 1) +- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0) +- else() +- message(STATUS "Detected ARM target processor") +- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1) +- endif() +elseif(${SYSPROC} MATCHES "armv5.*") + message(STATUS "Detected ARMV5 system processor") + set(ARMV5 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=0 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "armv6l") + message(STATUS "Detected ARMV6 system processor") + set(ARMV6 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "armv7l") + message(STATUS "Detected ARMV7 system processor") + set(ARMV7 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "aarch64") + message(STATUS "Detected AArch64 system processor") + set(ARMV7 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0) else() message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown") message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}") -@@ -208,18 +215,9 @@ if(GCC) + endif() +- + if(UNIX) + list(APPEND PLATFORM_LIBS pthread) + find_library(LIBRT rt) +@@ -238,28 +238,9 @@ if(GCC) endif() endif() endif() - if(ARM AND CROSS_COMPILE_ARM) -- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) +- if(ARM64) +- set(ARM_ARGS -fPIC) +- else() +- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) +- endif() +- message(STATUS "cross compile arm") - elseif(ARM) -- find_package(Neon) -- if(CPU_HAS_NEON) -- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) +- if(ARM64) +- set(ARM_ARGS -fPIC) - add_definitions(-DHAVE_NEON) - else() -- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) +- find_package(Neon) +- if(CPU_HAS_NEON) +- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) +- add_definitions(-DHAVE_NEON) +- else() +- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) +- endif() - endif() + if(ARMV7) + add_definitions(-fPIC) @@ -55,11 +76,11 @@ if(FPROFILE_GENERATE) if(INTEL_CXX) add_definitions(-prof-gen -prof-dir="${CMAKE_CURRENT_BINARY_DIR}") -Index: x265_2.2/source/common/cpu.cpp +Index: x265_3.4/source/common/cpu.cpp =================================================================== ---- x265_2.2.orig/source/common/cpu.cpp -+++ x265_2.2/source/common/cpu.cpp -@@ -37,7 +37,7 @@ +--- x265_3.4.orig/source/common/cpu.cpp ++++ x265_3.4/source/common/cpu.cpp +@@ -39,7 +39,7 @@ #include <machine/cpu.h> #endif @@ -68,7 +89,7 @@ #include <signal.h> #include <setjmp.h> static sigjmp_buf jmpbuf; -@@ -344,7 +344,6 @@ uint32_t cpu_detect(void) +@@ -350,7 +350,6 @@ uint32_t cpu_detect(bool benableavx512) } canjump = 1; @@ -76,7 +97,7 @@ canjump = 0; signal(SIGILL, oldsig); #endif // if !HAVE_NEON -@@ -360,7 +359,7 @@ uint32_t cpu_detect(void) +@@ -366,7 +365,7 @@ uint32_t cpu_detect(bool benableavx512) // which may result in incorrect detection and the counters stuck enabled. // right now Apple does not seem to support performance counters for this test #ifndef __MACH__ @@ -84,4 +105,4 @@ + //flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) - #endif // if HAVE_ARMV6 + #elif X265_ARCH_ARM64
View file
baselibs.conf
Changed
@@ -1,1 +1,1 @@ -libx265-179 +libx265-192
View file
x265_3.3.tar.gz/.hg_archival.txt -> x265_3.4.tar.gz/.hg_archival.txt
Changed
@@ -1,5 +1,4 @@ repo: 09fe40627f03a0f9c3e6ac78b22ac93da23f9fdf -node: f94b0d32737d40b2b9a9d74df57fee45e6be5cb0 -branch: Release_3.3 -latesttag: 3.3 -latesttagdistance: 1 +node: 2a65b720985096bcb1664f7cb05c3d04aeb576f5 +branch: Release_3.4 +tag: 3.4
View file
x265_3.3.tar.gz/.hgtags -> x265_3.4.tar.gz/.hgtags
Changed
@@ -40,3 +40,4 @@ 5ee3593ebd82b4d8957909bbc1b68b99b59ba773 3.3_RC1 96a10df63c0b778b480330bdf3be8da7db8a5fb1 3.3_RC2 057215961bc4b51b6260a584ff3d506e6d65cfd6 3.3 +ee92f36782800f145970131e01c79955a3ed5c10 3.4_RC1
View file
x265_3.4.tar.gz/build/aarch64-linux/crosscompile.cmake
Added
@@ -0,0 +1,15 @@ +# CMake toolchain file for cross compiling x265 for aarch64 +# This feature is only supported as experimental. Use with caution. +# Please report bugs on bitbucket +# Run cmake with: cmake -DCMAKE_TOOLCHAIN_FILE=crosscompile.cmake -G "Unix Makefiles" ../../source && ccmake ../../source + +set(CROSS_COMPILE_ARM 1) +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR aarch64) + +# specify the cross compiler +set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++) + +# specify the target environment +SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu)
View file
x265_3.4.tar.gz/build/aarch64-linux/make-Makefiles.bash
Added
@@ -0,0 +1,4 @@ +#!/bin/bash +# Run this from within a bash shell + +cmake -DCMAKE_TOOLCHAIN_FILE="crosscompile.cmake" -G "Unix Makefiles" ../../source && ccmake ../../source
View file
x265_3.3.tar.gz/doc/reST/cli.rst -> x265_3.4.tar.gz/doc/reST/cli.rst
Changed
@@ -107,6 +107,9 @@ **BufferFillFinal** Buffer bits available after removing the frame out of CPB. + **UnclippedBufferFillFinal** Unclipped buffer bits available after removing the frame + out of CPB only used for csv logging purpose. + **Latency** Latency in terms of number of frames between when the frame was given in and when the frame is given out. @@ -842,15 +845,31 @@ Measure 2Nx2N merge candidates first; if no residual is found, additional modes at that depth are not analysed. Default disabled -.. option:: --rskip, --no-rskip +.. option:: --rskip <0|1|2> + + This option determines early exit from CU depth recursion in modes 1 and 2. When a skip CU is + found, additional heuristics (depending on the RD level and rskip mode) are used to decide whether + to terminate recursion. The following table summarizes the behavior. + + +----------+------------+----------------------------------------------------------------+ + | RD Level | Rskip Mode | Skip Recursion Heuristic | + +==========+============+================================================================+ + | 0 - 4 | 1 | Neighbour costs and CU homogenity. | + +----------+------------+----------------------------------------------------------------+ + | 5 - 6 | 1 | Comparison with inter2Nx2N. | + +----------+------------+----------------------------------------------------------------+ + | 0 - 6 | 2 | CU edge density. | + +----------+------------+----------------------------------------------------------------+ + + Provides minimal quality degradation at good performance gains for non-zero modes. + :option:`--rskip mode 0` means disabled. Default: 1, disabled when :option:`--tune grain` is used. + This is a integer value representing the edge-density percentage within the CU. Internally normalized to a number between 0.0 to 1.0 in x265. + Recommended low thresholds for slow encodes and high for fast encodes. - This option determines early exit from CU depth recursion. When a skip CU is - found, additional heuristics (depending on rd-level) are used to decide whether - to terminate recursion. In rdlevels 5 and 6, comparison with inter2Nx2N is used, - while at rdlevels 4 and neighbour costs are used to skip recursion. - Provides minimal quality degradation at good performance gains when enabled. +.. option:: --rskip-edge-threshold <0..100> - Default: enabled, disabled for :option:`--tune grain` + Denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped. + Default: 5, requires :option:`--rskip mode 2` to be enabled. .. option:: --splitrd-skip, --no-splitrd-skip @@ -2501,6 +2520,28 @@ --recon-y4m-exec "ffplay -i pipe:0 -autoexit" **CLI ONLY** + +ABR-ladder Options +================== + +.. option:: --abr-ladder <filename> + + File containing the encoder configurations to generate ABR ladder. + The format of each line is: + + **<encID:reuse-level:refID> <CLI>** + + where, encID indicates the unique name given to the encode, refID indicates + the name of the encode from which analysis info has to be re-used ( set to 'nil' + if analysis reuse isn't preferred ), and reuse-level indicates the level ( :option:`--analysis-load-reuse-level`) + at which analysis info has to be reused. + + A sample config file is available in `the downloads page <https://bitbucket.org/multicoreware/x265/downloads/Sample_ABR_ladder_config>`_ + + Default: Disabled ( Conventional single encode generation ). Experimental feature. + + **CLI ONLY** + SVT-HEVC Encoder Options ========================
View file
x265_3.3.tar.gz/doc/reST/releasenotes.rst -> x265_3.4.tar.gz/doc/reST/releasenotes.rst
Changed
@@ -2,6 +2,32 @@ Release Notes ************* +Version 3.4 +=========== + +Release date - 29th May, 2020. + +New features +------------ +1. **Edge-aware quadtree partitioning** to terminate CU depth recursion based on edge information. :option:`--rskip` level 2 enables the feature and :option:`--rskip-edge-threshold` denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped. Experimental feature. +2. Application-level feature :option:`--abr-ladder` for automating efficient ABR ladder generation. Shows ~65% savings in the over-all turn-around time required for the generation of a typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz over a sequential ABR-ladder generation approach that leverages save-load architecture. + +Enhancements to existing features +--------------------------------- +1. Improved efficiency in 2-pass rate-control algorithm. The savings in the bitrate is ~1.72% with visual improvement in quality in the initial 1-2 secs. + +Encoder enhancements +-------------------- +1. Faster ARM64 encodes enabled by ASM contributions from Huawei. The speed-up over no-asm version for 1080p encodes @ medium preset is ~15% in a 16 core H/W. +2. Strict VBV conformance in zone encoding. + +Bug fixes +--------- +1. Multi-pass encode failures with :option:`--frame-dup`. +2. Corrupted bitstreams with :option:`--hist-scenecut` when input depth and internal bit-depth differ. +3. Incorrect analysis propagation in multi-level save-load architecture. +4. Failure in detecting NUMA packages installed in non-standard directories. + Version 3.3 ===========
View file
x265_3.3.tar.gz/source/CMakeLists.txt -> x265_3.4.tar.gz/source/CMakeLists.txt
Changed
@@ -29,7 +29,7 @@ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 188) +set(X265_BUILD 192) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" @@ -40,7 +40,7 @@ # System architecture detection string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC) set(X86_ALIASES x86 i386 i686 x86_64 amd64) -set(ARM_ALIASES armv6l armv7l) +set(ARM_ALIASES armv6l armv7l aarch64) list(FIND X86_ALIASES "${SYSPROC}" X86MATCH) list(FIND ARM_ALIASES "${SYSPROC}" ARMMATCH) set(POWER_ALIASES ppc64 ppc64le) @@ -70,9 +70,15 @@ else() set(CROSS_COMPILE_ARM 0) endif() - message(STATUS "Detected ARM target processor") set(ARM 1) - add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1) + if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8) + message(STATUS "Detected ARM64 target processor") + set(ARM64 1) + add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0) + else() + message(STATUS "Detected ARM target processor") + add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1) + endif() else() message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown") message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}") @@ -95,6 +101,8 @@ if(NUMA_FOUND) link_directories(${NUMA_LIBRARY_DIR}) list(APPEND CMAKE_REQUIRED_LIBRARIES numa) + list(APPEND CMAKE_REQUIRED_INCLUDES ${NUMA_INCLUDE_DIR}) + list(APPEND CMAKE_REQUIRED_LINK_OPTIONS "-L${NUMA_LIBRARY_DIR}") check_symbol_exists(numa_node_of_cpu numa.h NUMA_V2) if(NUMA_V2) add_definitions(-DHAVE_LIBNUMA) @@ -231,14 +239,24 @@ endif() endif() if(ARM AND CROSS_COMPILE_ARM) - set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) + if(ARM64) + set(ARM_ARGS -fPIC) + else() + set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) + endif() + message(STATUS "cross compile arm") elseif(ARM) - find_package(Neon) - if(CPU_HAS_NEON) - set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) + if(ARM64) + set(ARM_ARGS -fPIC) add_definitions(-DHAVE_NEON) else() - set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) + find_package(Neon) + if(CPU_HAS_NEON) + set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) + add_definitions(-DHAVE_NEON) + else() + set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) + endif() endif() endif() add_definitions(${ARM_ARGS}) @@ -518,7 +536,11 @@ # compile ARM arch asm files here enable_language(ASM) foreach(ASM ${ARM_ASMS}) - set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) + if(ARM64) + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/aarch64/${ASM}) + else() + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) + endif() list(APPEND ASM_SRCS ${ASM_SRC}) list(APPEND ASM_OBJS ${ASM}.${SUFFIX}) add_custom_command( @@ -725,16 +747,16 @@ # Xcode seems unable to link the CLI with libs, so link as one targget if(ENABLE_HDR10_PLUS) add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} - x265.cpp x265.h x265cli.h + x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS}) else() add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} - x265.cpp x265.h x265cli.h + x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS}) endif() else() add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE} - ${ExportDefs} x265.cpp x265.h x265cli.h) + ${ExportDefs} x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h) if(WIN32 OR NOT ENABLE_SHARED OR INTEL_CXX) # The CLI cannot link to the shared library on Windows, it # requires internal APIs not exported from the DLL
View file
x265_3.4.tar.gz/source/abrEncApp.cpp
Added
@@ -0,0 +1,1108 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* Aruna Matheswaran <aruna@multicorewareinc.com> +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#include "abrEncApp.h" +#include "mv.h" +#include "slice.h" +#include "param.h" + +#include <signal.h> +#include <errno.h> + +#include <queue> + +using namespace X265_NS; + +/* Ctrl-C handler */ +static volatile sig_atomic_t b_ctrl_c /* = 0 */; +static void sigint_handler(int) +{ + b_ctrl_c = 1; +} + +namespace X265_NS { + // private namespace +#define X265_INPUT_QUEUE_SIZE 250 + + AbrEncoder::AbrEncoder(CLIOptions cliopt, uint8_t numEncodes, int &ret) + { + m_numEncodes = numEncodes; + m_numActiveEncodes.set(numEncodes); + m_queueSize = (numEncodes > 1) ? X265_INPUT_QUEUE_SIZE : 1; + m_passEnc = X265_MALLOC(PassEncoder*, m_numEncodes); + + for (uint8_t i = 0; i < m_numEncodes; i++) + { + m_passEnci = new PassEncoder(i, cliopti, this); + if (!m_passEnci) + { + x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for passEncoder\n"); + ret = 4; + } + m_passEnci->init(ret); + } + + if (!allocBuffers()) + { + x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for buffers\n"); + ret = 4; + } + + /* start passEncoder worker threads */ + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + m_passEncpass->startThreads(); + } + + bool AbrEncoder::allocBuffers() + { + m_inputPicBuffer = X265_MALLOC(x265_picture**, m_numEncodes); + m_analysisBuffer = X265_MALLOC(x265_analysis_data*, m_numEncodes); + + m_picWriteCnt = new ThreadSafeIntegerm_numEncodes; + m_picReadCnt = new ThreadSafeIntegerm_numEncodes; + m_analysisWriteCnt = new ThreadSafeIntegerm_numEncodes; + m_analysisReadCnt = new ThreadSafeIntegerm_numEncodes; + + m_picIdxReadCnt = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_analysisWrite = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_analysisRead = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_readFlag = X265_MALLOC(int*, m_numEncodes); + + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + { + m_inputPicBufferpass = X265_MALLOC(x265_picture*, m_queueSize); + for (uint32_t idx = 0; idx < m_queueSize; idx++) + { + m_inputPicBufferpassidx = x265_picture_alloc(); + x265_picture_init(m_passEncpass->m_param, m_inputPicBufferpassidx); + } + + m_analysisBufferpass = X265_MALLOC(x265_analysis_data, m_queueSize); + m_picIdxReadCntpass = new ThreadSafeIntegerm_queueSize; + m_analysisWritepass = new ThreadSafeIntegerm_queueSize; + m_analysisReadpass = new ThreadSafeIntegerm_queueSize; + m_readFlagpass = X265_MALLOC(int, m_queueSize); + } + return true; + } + + void AbrEncoder::destroy() + { + x265_cleanup(); /* Free library singletons */ + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + { + for (uint32_t index = 0; index < m_queueSize; index++) + { + X265_FREE(m_inputPicBufferpassindex->planes0); + x265_picture_free(m_inputPicBufferpassindex); + } + + X265_FREE(m_inputPicBufferpass); + X265_FREE(m_analysisBufferpass); + X265_FREE(m_readFlagpass); + delete m_picIdxReadCntpass; + delete m_analysisWritepass; + delete m_analysisReadpass; + m_passEncpass->destroy(); + delete m_passEncpass; + } + X265_FREE(m_inputPicBuffer); + X265_FREE(m_analysisBuffer); + X265_FREE(m_readFlag); + + delete m_picWriteCnt; + delete m_picReadCnt; + delete m_analysisWriteCnt; + delete m_analysisReadCnt; + + X265_FREE(m_picIdxReadCnt); + X265_FREE(m_analysisWrite); + X265_FREE(m_analysisRead); + + X265_FREE(m_passEnc); + } + + PassEncoder::PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent) + { + m_id = id; + m_cliopt = cliopt; + m_parent = parent; + if(!(m_cliopt.enableScaler && m_id)) + m_input = m_cliopt.input; + m_param = cliopt.param; + m_inputOver = false; + m_lastIdx = -1; + m_encoder = NULL; + m_scaler = NULL; + m_reader = NULL; + m_ret = 0; + } + + int PassEncoder::init(int &result) + { + if (m_parent->m_numEncodes > 1) + setReuseLevel(); + + if (!(m_cliopt.enableScaler && m_id)) + m_reader = new Reader(m_id, this); + else + { + VideoDesc *src = NULL, *dst = NULL; + dst = new VideoDesc(m_param->sourceWidth, m_param->sourceHeight, m_param->internalCsp, m_param->internalBitDepth); + int dstW = m_parent->m_passEncm_id - 1->m_param->sourceWidth; + int dstH = m_parent->m_passEncm_id - 1->m_param->sourceHeight; + src = new VideoDesc(dstW, dstH, m_param->internalCsp, m_param->internalBitDepth); + if (src != NULL && dst != NULL) + { + m_scaler = new Scaler(0, 1, m_id, src, dst, this); + if (!m_scaler) + { + x265_log(m_param, X265_LOG_ERROR, "\n MALLOC failure in Scaler"); + result = 4; + } + } + } + + /* note: we could try to acquire a different libx265 API here based on + * the profile found during option parsing, but it must be done before + * opening an encoder */ + + if (m_param) + m_encoder = m_cliopt.api->encoder_open(m_param); + if (!m_encoder) + { + x265_log(NULL, X265_LOG_ERROR, "x265_encoder_open() failed for Enc, \n"); + m_ret = 2; + return -1; + } + + /* get the encoder parameters post-initialization */ + m_cliopt.api->encoder_parameters(m_encoder, m_param); + + return 1; + } + + void PassEncoder::setReuseLevel() + { + uint32_t r, padh = 0, padw = 0; + + m_param->confWinBottomOffset = m_param->confWinRightOffset = 0; + + m_param->analysisLoadReuseLevel = m_cliopt.loadLevel; + m_param->analysisSaveReuseLevel = m_cliopt.saveLevel; + m_param->analysisSave = m_cliopt.saveLevel ? "save.dat" : NULL; + m_param->analysisLoad = m_cliopt.loadLevel ? "load.dat" : NULL; + m_param->bUseAnalysisFile = 0; + + if (m_cliopt.loadLevel) + { + x265_param *refParam = m_parent->m_passEncm_cliopt.refId->m_param; + + if (m_param->sourceHeight == (refParam->sourceHeight - refParam->confWinBottomOffset) && + m_param->sourceWidth == (refParam->sourceWidth - refParam->confWinRightOffset)) + { + m_parent->m_passEncm_id->m_param->confWinBottomOffset = refParam->confWinBottomOffset; + m_parent->m_passEncm_id->m_param->confWinRightOffset = refParam->confWinRightOffset; + } + else + { + int srcH = refParam->sourceHeight - refParam->confWinBottomOffset; + int srcW = refParam->sourceWidth - refParam->confWinRightOffset; + + double scaleFactorH = double(m_param->sourceHeight / srcH); + double scaleFactorW = double(m_param->sourceWidth / srcW); + + int absScaleFactorH = (int)(10 * scaleFactorH + 0.5); + int absScaleFactorW = (int)(10 * scaleFactorW + 0.5); + + if (absScaleFactorH == 20 && absScaleFactorW == 20) + { + m_param->scaleFactor = 2; + + m_parent->m_passEncm_id->m_param->confWinBottomOffset = refParam->confWinBottomOffset * 2; + m_parent->m_passEncm_id->m_param->confWinRightOffset = refParam->confWinRightOffset * 2; + + } + } + } + + int h = m_param->sourceHeight + m_param->confWinBottomOffset; + int w = m_param->sourceWidth + m_param->confWinRightOffset; + if (h & (m_param->minCUSize - 1)) + { + r = h & (m_param->minCUSize - 1); + padh = m_param->minCUSize - r; + m_param->confWinBottomOffset += padh; + + } + + if (w & (m_param->minCUSize - 1)) + { + r = w & (m_param->minCUSize - 1); + padw = m_param->minCUSize - r; + m_param->confWinRightOffset += padw; + } + } + + void PassEncoder::startThreads() + { + /* Start slave worker threads */ + m_threadActive = true; + start(); + /* Start reader threads*/ + if (m_reader != NULL) + { + m_reader->m_threadActive = true; + m_reader->start(); + } + /* Start scaling worker threads */ + if (m_scaler != NULL) + { + m_scaler->m_threadActive = true; + m_scaler->start(); + } + } + + void PassEncoder::copyInfo(x265_analysis_data * src) + { + + uint32_t written = m_parent->m_analysisWriteCntm_id.get(); + + int index = written % m_parent->m_queueSize; + //If all streams have read analysis data, reuse that position in Queue + + int read = m_parent->m_analysisReadm_idindex.get(); + int write = m_parent->m_analysisWritem_idindex.get(); + + int overwrite = written / m_parent->m_queueSize; + bool emptyIdxFound = 0; + while (!emptyIdxFound && overwrite) + { + for (uint32_t i = 0; i < m_parent->m_queueSize; i++) + { + read = m_parent->m_analysisReadm_idi.get(); + write = m_parent->m_analysisWritem_idi.get(); + write *= m_cliopt.numRefs; + + if (read == write) + { + index = i; + emptyIdxFound = 1; + } + } + } + + x265_analysis_data *m_analysisInfo = &m_parent->m_analysisBufferm_idindex; + + memcpy(m_analysisInfo, src, sizeof(x265_analysis_data)); + x265_alloc_analysis_data(m_param, m_analysisInfo); + + bool isVbv = m_param->rc.vbvBufferSize && m_param->rc.vbvMaxBitrate; + if (m_param->bDisableLookahead && isVbv) + { + memcpy(m_analysisInfo->lookahead.intraSatdForVbv, src->lookahead.intraSatdForVbv, src->numCuInHeight * sizeof(uint32_t)); + memcpy(m_analysisInfo->lookahead.satdForVbv, src->lookahead.satdForVbv, src->numCuInHeight * sizeof(uint32_t)); + memcpy(m_analysisInfo->lookahead.intraVbvCost, src->lookahead.intraVbvCost, src->numCUsInFrame * sizeof(uint32_t)); + memcpy(m_analysisInfo->lookahead.vbvCost, src->lookahead.vbvCost, src->numCUsInFrame * sizeof(uint32_t)); + } + + if (src->sliceType == X265_TYPE_IDR || src->sliceType == X265_TYPE_I) + { + if (m_param->analysisSaveReuseLevel < 2) + goto ret; + x265_analysis_intra_data *intraDst, *intraSrc; + intraDst = (x265_analysis_intra_data*)m_analysisInfo->intraData; + intraSrc = (x265_analysis_intra_data*)src->intraData; + memcpy(intraDst->depth, intraSrc->depth, sizeof(uint8_t) * src->depthBytes); + memcpy(intraDst->modes, intraSrc->modes, sizeof(uint8_t) * src->numCUsInFrame * src->numPartitions); + memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char) * src->depthBytes); + memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes); + if (m_param->rc.cuTree) + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * src->depthBytes); + } + else + { + bool bIntraInInter = (src->sliceType == X265_TYPE_P || m_param->bIntraInBFrames); + int numDir = src->sliceType == X265_TYPE_P ? 1 : 2; + memcpy(m_analysisInfo->wt, src->wt, sizeof(WeightParam) * 3 * numDir); + if (m_param->analysisSaveReuseLevel < 2) + goto ret; + x265_analysis_inter_data *interDst, *interSrc; + interDst = (x265_analysis_inter_data*)m_analysisInfo->interData; + interSrc = (x265_analysis_inter_data*)src->interData; + memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * src->depthBytes); + memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * src->depthBytes); + if (m_param->rc.cuTree) + memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * src->depthBytes); + if (m_param->analysisSaveReuseLevel > 4) + { + memcpy(interDst->partSize, interSrc->partSize, sizeof(uint8_t) * src->depthBytes); + memcpy(interDst->mergeFlag, interSrc->mergeFlag, sizeof(uint8_t) * src->depthBytes); + if (m_param->analysisSaveReuseLevel == 10) + { + memcpy(interDst->interDir, interSrc->interDir, sizeof(uint8_t) * src->depthBytes); + for (int dir = 0; dir < numDir; dir++) + { + memcpy(interDst->mvpIdxdir, interSrc->mvpIdxdir, sizeof(uint8_t) * src->depthBytes); + memcpy(interDst->refIdxdir, interSrc->refIdxdir, sizeof(int8_t) * src->depthBytes); + memcpy(interDst->mvdir, interSrc->mvdir, sizeof(MV) * src->depthBytes); + } + if (bIntraInInter) + { + x265_analysis_intra_data *intraDst = (x265_analysis_intra_data*)m_analysisInfo->intraData; + x265_analysis_intra_data *intraSrc = (x265_analysis_intra_data*)src->intraData; + memcpy(intraDst->modes, intraSrc->modes, sizeof(uint8_t) * src->numPartitions * src->numCUsInFrame); + memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes); + } + } + } + if (m_param->analysisSaveReuseLevel != 10) + memcpy(interDst->ref, interSrc->ref, sizeof(int32_t) * src->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir); + } + +ret: + //increment analysis Write counter + m_parent->m_analysisWriteCntm_id.incr(); + m_parent->m_analysisWritem_idindex.incr(); + return; + } + + + bool PassEncoder::readPicture(x265_picture *dstPic) + { + /*Check and wait if there any input frames to read*/ + int ipread = m_parent->m_picReadCntm_id.get(); + int ipwrite = m_parent->m_picWriteCntm_id.get(); + + bool isAbrLoad = m_cliopt.loadLevel && (m_parent->m_numEncodes > 1); + while (!m_inputOver && (ipread == ipwrite)) + { + ipwrite = m_parent->m_picWriteCntm_id.waitForChange(ipwrite); + } + + if (m_threadActive && ipread < ipwrite) + { + /*Get input index to read from inputQueue. If doesn't need analysis info, it need not wait to fetch poc from analysisQueue*/ + int readPos = ipread % m_parent->m_queueSize; + x265_analysis_data* analysisData = 0; + + if (isAbrLoad) + { + /*If stream is master of each slave pass, then fetch analysis data from prev pass*/ + int analysisQId = m_cliopt.refId; + /*Check and wait if there any analysis Data to read*/ + int analysisWrite = m_parent->m_analysisWriteCntanalysisQId.get(); + int written = analysisWrite * m_parent->m_passEncanalysisQId->m_cliopt.numRefs; + int analysisRead = m_parent->m_analysisReadCntanalysisQId.get(); + + while (m_threadActive && written == analysisRead) + { + analysisWrite = m_parent->m_analysisWriteCntanalysisQId.waitForChange(analysisWrite); + written = analysisWrite * m_parent->m_passEncanalysisQId->m_cliopt.numRefs; + } + + if (analysisRead < written) + { + int analysisIdx = 0; + if (!m_param->bDisableLookahead) + { + bool analysisdRead = false; + while ((analysisRead < written) && !analysisdRead) + { + while (analysisWrite < ipread) + { + analysisWrite = m_parent->m_analysisWriteCntanalysisQId.waitForChange(analysisWrite); + written = analysisWrite * m_parent->m_passEncanalysisQId->m_cliopt.numRefs; + } + for (uint32_t i = 0; i < m_parent->m_queueSize; i++) + { + analysisData = &m_parent->m_analysisBufferanalysisQIdi; + int read = m_parent->m_analysisReadanalysisQIdi.get(); + int write = m_parent->m_analysisWriteanalysisQIdi.get() * m_parent->m_passEncanalysisQId->m_cliopt.numRefs; + if ((analysisData->poc == (uint32_t)(ipread)) && (read < write)) + { + analysisIdx = i; + analysisdRead = true; + break; + } + } + } + } + else + { + analysisIdx = analysisRead % m_parent->m_queueSize; + analysisData = &m_parent->m_analysisBufferanalysisQIdanalysisIdx; + readPos = analysisData->poc % m_parent->m_queueSize; + while ((ipwrite < readPos) || ((ipwrite - 1) < (int)analysisData->poc)) + { + ipwrite = m_parent->m_picWriteCntm_id.waitForChange(ipwrite); + } + } + + m_lastIdx = analysisIdx; + } + else + return false; + } + + + x265_picture *srcPic = (x265_picture*)(m_parent->m_inputPicBufferm_idreadPos); + + x265_picture *pic = (x265_picture*)(dstPic); + pic->colorSpace = srcPic->colorSpace; + pic->bitDepth = srcPic->bitDepth; + pic->framesize = srcPic->framesize; + pic->height = srcPic->height; + pic->pts = srcPic->pts; + pic->dts = srcPic->dts; + pic->reorderedPts = srcPic->reorderedPts; + pic->width = srcPic->width; + pic->analysisData = srcPic->analysisData; + pic->userSEI = srcPic->userSEI; + pic->stride0 = srcPic->stride0; + pic->stride1 = srcPic->stride1; + pic->stride2 = srcPic->stride2; + pic->planes0 = srcPic->planes0; + pic->planes1 = srcPic->planes1; + pic->planes2 = srcPic->planes2; + if (isAbrLoad) + pic->analysisData = *analysisData; + return true; + } + else + return false; + } + + void PassEncoder::threadMain() + { + THREAD_NAME("PassEncoder", m_id); + + while (m_threadActive) + { + +#if ENABLE_LIBVMAF + x265_vmaf_data* vmafdata = m_cliopt.vmafData; +#endif + /* This allows muxers to modify bitstream format */ + m_cliopt.output->setParam(m_param); + const x265_api* api = m_cliopt.api; + ReconPlay* reconPlay = NULL; + if (m_cliopt.reconPlayCmd) + reconPlay = new ReconPlay(m_cliopt.reconPlayCmd, *m_param); + char* profileName = m_cliopt.encName ? m_cliopt.encName : (char *)"x265"; + + if (m_cliopt.zoneFile) + { + if (!m_cliopt.parseZoneFile()) + { + x265_log(NULL, X265_LOG_ERROR, "Unable to parse zonefile in %s\n", profileName); + fclose(m_cliopt.zoneFile); + m_cliopt.zoneFile = NULL; + } + } + + if (signal(SIGINT, sigint_handler) == SIG_ERR) + x265_log(m_param, X265_LOG_ERROR, "Unable to register CTRL+C handler: %s in %s\n", + strerror(errno), profileName); + + x265_picture pic_orig, pic_out; + x265_picture *pic_in = &pic_orig; + /* Allocate recon picture if analysis save/load is enabled */ + std::priority_queue<int64_t>* pts_queue = m_cliopt.output->needPTS() ? new std::priority_queue<int64_t>() : NULL; + x265_picture *pic_recon = (m_cliopt.recon || m_param->analysisSave || m_param->analysisLoad || pts_queue || reconPlay || m_param->csvLogLevel) ? &pic_out : NULL; + uint32_t inFrameCount = 0; + uint32_t outFrameCount = 0; + x265_nal *p_nal; + x265_stats stats; + uint32_t nal; + int16_t *errorBuf = NULL; + bool bDolbyVisionRPU = false; + uint8_t *rpuPayload = NULL; + int inputPicNum = 1; + x265_picture picField1, picField2; + x265_analysis_data* analysisInfo = (x265_analysis_data*)(&pic_out.analysisData); + bool isAbrSave = m_cliopt.saveLevel && (m_parent->m_numEncodes > 1); + + if (!m_param->bRepeatHeaders && !m_param->bEnableSvtHevc) + { + if (api->encoder_headers(m_encoder, &p_nal, &nal) < 0) + { + x265_log(m_param, X265_LOG_ERROR, "Failure generating stream headers in %s\n", profileName); + m_ret = 3; + goto fail; + } + else + m_cliopt.totalbytes += m_cliopt.output->writeHeaders(p_nal, nal); + } + + if (m_param->bField && m_param->interlaceMode) + { + api->picture_init(m_param, &picField1); + api->picture_init(m_param, &picField2); + // return back the original height of input + m_param->sourceHeight *= 2; + api->picture_init(m_param, &pic_orig); + } + else + api->picture_init(m_param, &pic_orig); + + if (m_param->dolbyProfile && m_cliopt.dolbyVisionRpu) + { + rpuPayload = X265_MALLOC(uint8_t, 1024); + pic_in->rpu.payload = rpuPayload; + if (pic_in->rpu.payload) + bDolbyVisionRPU = true; + } + + if (m_cliopt.bDither) + { + errorBuf = X265_MALLOC(int16_t, m_param->sourceWidth + 1); + if (errorBuf) + memset(errorBuf, 0, (m_param->sourceWidth + 1) * sizeof(int16_t)); + else + m_cliopt.bDither = false; + } + + // main encoder loop + while (pic_in && !b_ctrl_c) + { + pic_orig.poc = (m_param->bField && m_param->interlaceMode) ? inFrameCount * 2 : inFrameCount; + if (m_cliopt.qpfile) + { + if (!m_cliopt.parseQPFile(pic_orig)) + { + x265_log(NULL, X265_LOG_ERROR, "can't parse qpfile for frame %d in %s\n", + pic_in->poc, profileName); + fclose(m_cliopt.qpfile); + m_cliopt.qpfile = NULL; + } + } + + if (m_cliopt.framesToBeEncoded && inFrameCount >= m_cliopt.framesToBeEncoded) + pic_in = NULL; + else if (readPicture(pic_in)) + inFrameCount++; + else + pic_in = NULL; + + if (pic_in) + { + if (pic_in->bitDepth > m_param->internalBitDepth && m_cliopt.bDither) + { + x265_dither_image(pic_in, m_cliopt.input->getWidth(), m_cliopt.input->getHeight(), errorBuf, m_param->internalBitDepth); + pic_in->bitDepth = m_param->internalBitDepth; + } + /* Overwrite PTS */ + pic_in->pts = pic_in->poc; + + // convert to field + if (m_param->bField && m_param->interlaceMode) + { + int height = pic_in->height >> 1; + + int static bCreated = 0; + if (bCreated == 0) + { + bCreated = 1; + inputPicNum = 2; + picField1.fieldNum = 1; + picField2.fieldNum = 2; + + picField1.bitDepth = picField2.bitDepth = pic_in->bitDepth; + picField1.colorSpace = picField2.colorSpace = pic_in->colorSpace; + picField1.height = picField2.height = pic_in->height >> 1; + picField1.framesize = picField2.framesize = pic_in->framesize >> 1; + + size_t fieldFrameSize = (size_t)pic_in->framesize >> 1; + char* field1Buf = X265_MALLOC(char, fieldFrameSize); + char* field2Buf = X265_MALLOC(char, fieldFrameSize); + + int stride = picField1.stride0 = picField2.stride0 = pic_in->stride0; + uint64_t framesize = stride * (height >> x265_cli_cspspic_in->colorSpace.height0); + picField1.planes0 = field1Buf; + picField2.planes0 = field2Buf; + for (int i = 1; i < x265_cli_cspspic_in->colorSpace.planes; i++) + { + picField1.planesi = field1Buf + framesize; + picField2.planesi = field2Buf + framesize; + + stride = picField1.stridei = picField2.stridei = pic_in->stridei; + framesize += (stride * (height >> x265_cli_cspspic_in->colorSpace.heighti)); + } + assert(framesize == picField1.framesize); + } + + picField1.pts = picField1.poc = pic_in->poc; + picField2.pts = picField2.poc = pic_in->poc + 1; + + picField1.userSEI = picField2.userSEI = pic_in->userSEI; + + //if (pic_in->userData) + //{ + // // Have to handle userData here + //} + + if (pic_in->framesize) + { + for (int i = 0; i < x265_cli_cspspic_in->colorSpace.planes; i++) + { + char* srcP1 = (char*)pic_in->planesi; + char* srcP2 = (char*)pic_in->planesi + pic_in->stridei; + char* p1 = (char*)picField1.planesi; + char* p2 = (char*)picField2.planesi; + + int stride = picField1.stridei; + + for (int y = 0; y < (height >> x265_cli_cspspic_in->colorSpace.heighti); y++) + { + memcpy(p1, srcP1, stride); + memcpy(p2, srcP2, stride); + srcP1 += 2 * stride; + srcP2 += 2 * stride; + p1 += stride; + p2 += stride; + } + } + } + } + + if (bDolbyVisionRPU) + { + if (m_param->bField && m_param->interlaceMode) + { + if (m_cliopt.rpuParser(&picField1) > 0) + goto fail; + if (m_cliopt.rpuParser(&picField2) > 0) + goto fail; + } + else + { + if (m_cliopt.rpuParser(pic_in) > 0) + goto fail; + } + } + } + + for (int inputNum = 0; inputNum < inputPicNum; inputNum++) + { + x265_picture *picInput = NULL; + if (inputPicNum == 2) + picInput = pic_in ? (inputNum ? &picField2 : &picField1) : NULL; + else + picInput = pic_in; + + int numEncoded = api->encoder_encode(m_encoder, &p_nal, &nal, picInput, pic_recon); + + int idx = (inFrameCount - 1) % m_parent->m_queueSize; + m_parent->m_picIdxReadCntm_ididx.incr(); + m_parent->m_picReadCntm_id.incr(); + if (m_cliopt.loadLevel && picInput) + { + m_parent->m_analysisReadCntm_cliopt.refId.incr(); + m_parent->m_analysisReadm_cliopt.refIdm_lastIdx.incr(); + } + + if (numEncoded < 0) + { + b_ctrl_c = 1; + m_ret = 4; + break; + } + + if (reconPlay && numEncoded) + reconPlay->writePicture(*pic_recon); + + outFrameCount += numEncoded; + + if (isAbrSave && numEncoded) + { + copyInfo(analysisInfo); + } + + if (numEncoded && pic_recon && m_cliopt.recon) + m_cliopt.recon->writePicture(pic_out); + if (nal) + { + m_cliopt.totalbytes += m_cliopt.output->writeFrame(p_nal, nal, pic_out); + if (pts_queue) + { + pts_queue->push(-pic_out.pts); + if (pts_queue->size() > 2) + pts_queue->pop(); + } + } + m_cliopt.printStatus(outFrameCount); + } + } + + /* Flush the encoder */ + while (!b_ctrl_c) + { + int numEncoded = api->encoder_encode(m_encoder, &p_nal, &nal, NULL, pic_recon); + if (numEncoded < 0) + { + m_ret = 4; + break; + } + + if (reconPlay && numEncoded) + reconPlay->writePicture(*pic_recon); + + outFrameCount += numEncoded; + if (isAbrSave && numEncoded) + { + copyInfo(analysisInfo); + } + + if (numEncoded && pic_recon && m_cliopt.recon) + m_cliopt.recon->writePicture(pic_out); + if (nal) + { + m_cliopt.totalbytes += m_cliopt.output->writeFrame(p_nal, nal, pic_out); + if (pts_queue) + { + pts_queue->push(-pic_out.pts); + if (pts_queue->size() > 2) + pts_queue->pop(); + } + } + + m_cliopt.printStatus(outFrameCount); + + if (!numEncoded) + break; + } + + if (bDolbyVisionRPU) + { + if (fgetc(m_cliopt.dolbyVisionRpu) != EOF) + x265_log(NULL, X265_LOG_WARNING, "Dolby Vision RPU count is greater than frame count in %s\n", + profileName); + x265_log(NULL, X265_LOG_INFO, "VES muxing with Dolby Vision RPU file successful in %s\n", + profileName); + } + + /* clear progress report */ + if (m_cliopt.bProgress) + fprintf(stderr, "%*s\r", 80, " "); + + fail: + + delete reconPlay; + + api->encoder_get_stats(m_encoder, &stats, sizeof(stats)); + if (m_param->csvfn && !b_ctrl_c) +#if ENABLE_LIBVMAF + api->vmaf_encoder_log(m_encoder, m_cliopt.argCount, m_cliopt.argString, m_cliopt.param, vmafdata); +#else + api->encoder_log(m_encoder, m_cliopt.argCnt, m_cliopt.argString); +#endif + api->encoder_close(m_encoder); + + int64_t second_largest_pts = 0; + int64_t largest_pts = 0; + if (pts_queue && pts_queue->size() >= 2) + { + second_largest_pts = -pts_queue->top(); + pts_queue->pop(); + largest_pts = -pts_queue->top(); + pts_queue->pop(); + delete pts_queue; + pts_queue = NULL; + } + m_cliopt.output->closeFile(largest_pts, second_largest_pts); + + if (b_ctrl_c) + general_log(m_param, NULL, X265_LOG_INFO, "aborted at input frame %d, output frame %d in %s\n", + m_cliopt.seek + inFrameCount, stats.encodedPictureCount, profileName); + + api->param_free(m_param); + + X265_FREE(errorBuf); + X265_FREE(rpuPayload); + + m_threadActive = false; + m_parent->m_numActiveEncodes.decr(); + } + } + + void PassEncoder::destroy() + { + stop(); + if (m_reader) + { + m_reader->stop(); + delete m_reader; + } + else + { + m_scaler->stop(); + m_scaler->destroy(); + delete m_scaler; + } + } + + Scaler::Scaler(int threadId, int threadNum, int id, VideoDesc *src, VideoDesc *dst, PassEncoder *parentEnc) + { + m_parentEnc = parentEnc; + m_id = id; + m_srcFormat = src; + m_dstFormat = dst; + m_threadActive = false; + m_scaleFrameSize = 0; + m_filterManager = NULL; + m_threadId = threadId; + m_threadTotal = threadNum; + + int csp = dst->m_csp; + uint32_t pixelbytes = dst->m_inputDepth > 8 ? 2 : 1; + for (int i = 0; i < x265_cli_cspscsp.planes; i++) + { + int w = dst->m_width >> x265_cli_cspscsp.widthi; + int h = dst->m_height >> x265_cli_cspscsp.heighti; + m_scalePlanesi = w * h * pixelbytes; + m_scaleFrameSize += m_scalePlanesi; + } + + if (src->m_height != dst->m_height || src->m_width != dst->m_width) + { + m_filterManager = new ScalerFilterManager; + m_filterManager->init(4, m_srcFormat, m_dstFormat); + } + } + + bool Scaler::scalePic(x265_picture * destination, x265_picture * source) + { + if (!destination || !source) + return false; + x265_param* param = m_parentEnc->m_param; + int pixelBytes = m_dstFormat->m_inputDepth > 8 ? 2 : 1; + if (m_srcFormat->m_height != m_dstFormat->m_height || m_srcFormat->m_width != m_dstFormat->m_width) + { + void **srcPlane = NULL, **dstPlane = NULL; + int srcStride3, dstStride3; + destination->bitDepth = source->bitDepth; + destination->colorSpace = source->colorSpace; + destination->pts = source->pts; + destination->dts = source->dts; + destination->reorderedPts = source->reorderedPts; + destination->poc = source->poc; + destination->userSEI = source->userSEI; + srcPlane = source->planes; + dstPlane = destination->planes; + srcStride0 = source->stride0; + destination->stride0 = m_dstFormat->m_width * pixelBytes; + dstStride0 = destination->stride0; + if (param->internalCsp != X265_CSP_I400) + { + srcStride1 = source->stride1; + srcStride2 = source->stride2; + destination->stride1 = destination->stride0 >> x265_cli_cspsparam->internalCsp.width1; + destination->stride2 = destination->stride0 >> x265_cli_cspsparam->internalCsp.width2; + dstStride1 = destination->stride1; + dstStride2 = destination->stride2; + } + if (m_scaleFrameSize) + { + m_filterManager->scale_pic(srcPlane, dstPlane, srcStride, dstStride); + return true; + } + else + x265_log(param, X265_LOG_INFO, "Empty frame received\n"); + } + return false; + } + + void Scaler::threadMain() + { + THREAD_NAME("Scaler", m_id); + + /* unscaled picture is stored in the last index */ + uint32_t srcId = m_id - 1; + int QDepth = m_parentEnc->m_parent->m_queueSize; + while (!m_parentEnc->m_inputOver) + { + + uint32_t scaledWritten = m_parentEnc->m_parent->m_picWriteCntm_id.get(); + + if (m_parentEnc->m_cliopt.framesToBeEncoded && scaledWritten >= m_parentEnc->m_cliopt.framesToBeEncoded) + break; + + if (m_threadTotal > 1 && (m_threadId != scaledWritten % m_threadTotal)) + { + continue; + } + uint32_t written = m_parentEnc->m_parent->m_picWriteCntsrcId.get(); + + /*If all the input pictures are scaled by the current scale worker thread wait for input pictures*/ + while (m_threadActive && (scaledWritten == written)) { + written = m_parentEnc->m_parent->m_picWriteCntsrcId.waitForChange(written); + } + + if (m_threadActive && scaledWritten < written) + { + + int scaledWriteIdx = scaledWritten % QDepth; + int overWritePicBuffer = scaledWritten / QDepth; + int read = m_parentEnc->m_parent->m_picIdxReadCntm_idscaledWriteIdx.get(); + + while (overWritePicBuffer && read < overWritePicBuffer) + { + read = m_parentEnc->m_parent->m_picIdxReadCntm_idscaledWriteIdx.waitForChange(read); + } + + if (!m_parentEnc->m_parent->m_inputPicBufferm_idscaledWriteIdx) + { + int framesize = 0; + int planesize3; + int csp = m_dstFormat->m_csp; + int stride3; + stride0 = m_dstFormat->m_width; + stride1 = stride0 >> x265_cli_cspscsp.width1; + stride2 = stride0 >> x265_cli_cspscsp.width2; + for (int i = 0; i < x265_cli_cspscsp.planes; i++) + { + uint32_t h = m_dstFormat->m_height >> x265_cli_cspscsp.heighti; + planesizei = h * stridei; + framesize += planesizei; + } + + m_parentEnc->m_parent->m_inputPicBufferm_idscaledWriteIdx = x265_picture_alloc(); + x265_picture_init(m_parentEnc->m_param, m_parentEnc->m_parent->m_inputPicBufferm_idscaledWriteIdx); + + ((x265_picture*)m_parentEnc->m_parent->m_inputPicBufferm_idscaledWritten % QDepth)->framesize = framesize; + for (int32_t j = 0; j < x265_cli_cspscsp.planes; j++) + { + m_parentEnc->m_parent->m_inputPicBufferm_idscaledWritten % QDepth->planesj = X265_MALLOC(char, planesizej); + } + } + + x265_picture *srcPic = m_parentEnc->m_parent->m_inputPicBuffersrcIdscaledWritten % QDepth; + x265_picture* destPic = m_parentEnc->m_parent->m_inputPicBufferm_idscaledWriteIdx; + + // Enqueue this picture up with the current encoder so that it will asynchronously encode + if (!scalePic(destPic, srcPic)) + x265_log(NULL, X265_LOG_ERROR, "Unable to copy scaled input picture to input queue \n"); + else + m_parentEnc->m_parent->m_picWriteCntm_id.incr(); + m_scaledWriteCnt.incr(); + m_parentEnc->m_parent->m_picIdxReadCntsrcIdscaledWriteIdx.incr(); + } + if (m_threadTotal > 1) + { + written = m_parentEnc->m_parent->m_picWriteCntsrcId.get(); + int totalWrite = written / m_threadTotal; + if (written % m_threadTotal > m_threadId) + totalWrite++; + if (totalWrite == m_scaledWriteCnt.get()) + { + m_parentEnc->m_parent->m_picWriteCntsrcId.poke(); + m_parentEnc->m_parent->m_picWriteCntm_id.poke(); + break; + } + } + else + { + /* Once end of video is reached and all frames are scaled, release wait on picwritecount */ + scaledWritten = m_parentEnc->m_parent->m_picWriteCntm_id.get(); + written = m_parentEnc->m_parent->m_picWriteCntsrcId.get(); + if (written == scaledWritten) + { + m_parentEnc->m_parent->m_picWriteCntsrcId.poke(); + m_parentEnc->m_parent->m_picWriteCntm_id.poke(); + break; + } + } + + } + m_threadActive = false; + destroy(); + } + + Reader::Reader(int id, PassEncoder *parentEnc) + { + m_parentEnc = parentEnc; + m_id = id; + m_input = parentEnc->m_input; + } + + void Reader::threadMain() + { + THREAD_NAME("Reader", m_id); + + int QDepth = m_parentEnc->m_parent->m_queueSize; + x265_picture* src = x265_picture_alloc(); + x265_picture_init(m_parentEnc->m_param, src); + + while (m_threadActive) + { + uint32_t written = m_parentEnc->m_parent->m_picWriteCntm_id.get(); + uint32_t writeIdx = written % QDepth; + uint32_t read = m_parentEnc->m_parent->m_picIdxReadCntm_idwriteIdx.get(); + uint32_t overWritePicBuffer = written / QDepth; + + if (m_parentEnc->m_cliopt.framesToBeEncoded && written >= m_parentEnc->m_cliopt.framesToBeEncoded) + break; + + while (overWritePicBuffer && read < overWritePicBuffer) + { + read = m_parentEnc->m_parent->m_picIdxReadCntm_idwriteIdx.waitForChange(read); + } + + x265_picture* dest = m_parentEnc->m_parent->m_inputPicBufferm_idwriteIdx; + if (m_input->readPicture(*src)) + { + dest->poc = src->poc; + dest->pts = src->pts; + dest->userSEI = src->userSEI; + dest->bitDepth = src->bitDepth; + dest->framesize = src->framesize; + dest->height = src->height; + dest->width = src->width; + dest->colorSpace = src->colorSpace; + dest->userSEI = src->userSEI; + dest->rpu.payload = src->rpu.payload; + dest->picStruct = src->picStruct; + dest->stride0 = src->stride0; + dest->stride1 = src->stride1; + dest->stride2 = src->stride2; + + if (!dest->planes0) + dest->planes0 = X265_MALLOC(char, dest->framesize); + + memcpy(dest->planes0, src->planes0, src->framesize * sizeof(char)); + dest->planes1 = (char*)dest->planes0 + src->stride0 * src->height; + dest->planes2 = (char*)dest->planes1 + src->stride1 * (src->height >> x265_cli_cspssrc->colorSpace.height1); + m_parentEnc->m_parent->m_picWriteCntm_id.incr(); + } + else + { + m_threadActive = false; + m_parentEnc->m_inputOver = true; + m_parentEnc->m_parent->m_picWriteCntm_id.poke(); + } + } + x265_picture_free(src); + } +}
View file
x265_3.4.tar.gz/source/abrEncApp.h
Added
@@ -0,0 +1,153 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* Aruna Matheswaran <aruna@multicorewareinc.com> +* +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#ifndef ABR_ENCODE_H +#define ABR_ENCODE_H + +#include "x265.h" +#include "scaler.h" +#include "threading.h" +#include "x265cli.h" + +namespace X265_NS { + // private namespace + + class PassEncoder; + class Scaler; + class Reader; + + class AbrEncoder + { + public: + uint8_t m_numEncodes; + PassEncoder **m_passEnc; + uint32_t m_queueSize; + ThreadSafeInteger m_numActiveEncodes; + + x265_picture ***m_inputPicBuffer; //numEncodesqueueSize + x265_analysis_data **m_analysisBuffer; //numEncodesqueueSize + int **m_readFlag; + + ThreadSafeInteger *m_picWriteCnt; + ThreadSafeInteger *m_picReadCnt; + ThreadSafeInteger **m_picIdxReadCnt; + ThreadSafeInteger *m_analysisWriteCnt; //numEncodesqueueSize + ThreadSafeInteger *m_analysisReadCnt; //numEncodesqueueSize + ThreadSafeInteger **m_analysisWrite; //numEncodesqueueSize + ThreadSafeInteger **m_analysisRead; //numEncodesqueueSize + + AbrEncoder(CLIOptions cliopt, uint8_t numEncodes, int& ret); + bool allocBuffers(); + void destroy(); + + }; + + class PassEncoder : public Thread + { + public: + + uint32_t m_id; + x265_param *m_param; + AbrEncoder *m_parent; + x265_encoder *m_encoder; + Reader *m_reader; + Scaler *m_scaler; + bool m_inputOver; + + int m_threadActive; + int m_lastIdx; + uint32_t m_outputNalsCount; + + x265_picture **m_inputPicBuffer; + x265_analysis_data **m_analysisBuffer; + x265_nal **m_outputNals; + x265_picture **m_outputRecon; + + CLIOptions m_cliopt; + InputFile* m_input; + const char* m_reconPlayCmd; + FILE* m_qpfile; + FILE* m_zoneFile; + FILE* m_dolbyVisionRpu;/* File containing Dolby Vision BL RPU metadata */ + + int m_ret; + + PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent); + int init(int &result); + void setReuseLevel(); + + void startThreads(); + void copyInfo(x265_analysis_data *src); + + bool readPicture(x265_picture*); + void destroy(); + + private: + void threadMain(); + }; + + class Scaler : public Thread + { + public: + PassEncoder *m_parentEnc; + int m_id; + int m_scalePlanes3; + int m_scaleFrameSize; + uint32_t m_threadId; + uint32_t m_threadTotal; + ThreadSafeInteger m_scaledWriteCnt; + VideoDesc* m_srcFormat; + VideoDesc* m_dstFormat; + int m_threadActive; + ScalerFilterManager* m_filterManager; + + Scaler(int threadId, int threadNum, int id, VideoDesc *src, VideoDesc * dst, PassEncoder *parentEnc); + bool scalePic(x265_picture *destination, x265_picture *source); + void threadMain(); + void destroy() + { + if (m_filterManager) + { + delete m_filterManager; + m_filterManager = NULL; + } + } + }; + + class Reader : public Thread + { + public: + PassEncoder *m_parentEnc; + int m_id; + InputFile* m_input; + int m_threadActive; + + Reader(int id, PassEncoder *parentEnc); + void threadMain(); + }; +} + +#endif // ifndef ABR_ENCODE_H +#pragma once
View file
x265_3.3.tar.gz/source/common/CMakeLists.txt -> x265_3.4.tar.gz/source/common/CMakeLists.txt
Changed
@@ -14,7 +14,7 @@ endif(EXTRA_LIB) if(ENABLE_ASSEMBLY) - set_source_files_properties(threading.cpp primitives.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1) + set_source_files_properties(threading.cpp primitives.cpp pixel.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1) list(APPEND VFLAGS "-DENABLE_ASSEMBLY=1") endif(ENABLE_ASSEMBLY) @@ -84,16 +84,33 @@ endif(ENABLE_ASSEMBLY AND X86) if(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM)) - set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) + if(ARM64) + if(GCC AND (CMAKE_CXX_FLAGS_RELEASE MATCHES "-O3")) + message(STATUS "Detected CXX compiler using -O3 optimization level") + add_definitions(-DAUTO_VECTORIZE=1) + endif() + set(C_SRCS asm-primitives.cpp pixel.h ipfilter8.h) - # add ARM assembly/intrinsic files here - set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S) - set(VEC_PRIMITIVES) + # add ARM assembly/intrinsic files here + set(A_SRCS asm.S mc-a.S sad-a.S pixel-util.S ipfilter8.S) + set(VEC_PRIMITIVES) - set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") - foreach(SRC ${C_SRCS}) - set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) - endforeach() + set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") + foreach(SRC ${C_SRCS}) + set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) + endforeach() + else() + set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) + + # add ARM assembly/intrinsic files here + set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S) + set(VEC_PRIMITIVES) + + set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") + foreach(SRC ${C_SRCS}) + set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) + endforeach() + endif() source_group(Assembly FILES ${ASM_PRIMITIVES}) endif(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM)) @@ -151,4 +168,5 @@ predict.cpp predict.h scalinglist.cpp scalinglist.h quant.cpp quant.h contexts.h - deblock.cpp deblock.h) + deblock.cpp deblock.h + scaler.cpp scaler.h)
View file
x265_3.4.tar.gz/source/common/aarch64/asm-primitives.cpp
Added
@@ -0,0 +1,219 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "common.h" +#include "primitives.h" +#include "x265.h" +#include "cpu.h" + + +#if defined(__GNUC__) +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#define GCC_4_9_0 40900 +#define GCC_5_1_0 50100 + +extern "C" { +#include "pixel.h" +#include "pixel-util.h" +#include "ipfilter8.h" +} + +namespace X265_NS { +// private x265 namespace + + +template<int size> +void interp_8tap_hv_pp_cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY) +{ + ALIGN_VAR_32(int16_t, immedMAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)); + const int halfFilterSize = NTAPS_LUMA >> 1; + const int immedStride = MAX_CU_SIZE; + + primitives.pusize.luma_hps(src, srcStride, immed, immedStride, idxX, 1); + primitives.pusize.luma_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, idxY); +} + + +/* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ +void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask) +{ + if (cpuMask & X265_CPU_NEON) + { + asmp.puLUMA_8x4.luma_vsp = cp.puLUMA_8x4.luma_vsp; + asmp.puLUMA_8x8.luma_vsp = cp.puLUMA_8x8.luma_vsp; + asmp.puLUMA_8x16.luma_vsp = cp.puLUMA_8x16.luma_vsp; + asmp.puLUMA_8x32.luma_vsp = cp.puLUMA_8x32.luma_vsp; + asmp.puLUMA_12x16.luma_vsp = cp.puLUMA_12x16.luma_vsp; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + asmp.puLUMA_16x4.luma_vsp = cp.puLUMA_16x4.luma_vsp; + asmp.puLUMA_16x8.luma_vsp = cp.puLUMA_16x8.luma_vsp; + asmp.puLUMA_16x12.luma_vsp = cp.puLUMA_16x12.luma_vsp; + asmp.puLUMA_16x16.luma_vsp = cp.puLUMA_16x16.luma_vsp; + asmp.puLUMA_16x32.luma_vsp = cp.puLUMA_16x32.luma_vsp; + asmp.puLUMA_16x64.luma_vsp = cp.puLUMA_16x64.luma_vsp; + asmp.puLUMA_32x16.luma_vsp = cp.puLUMA_32x16.luma_vsp; + asmp.puLUMA_32x24.luma_vsp = cp.puLUMA_32x24.luma_vsp; + asmp.puLUMA_32x32.luma_vsp = cp.puLUMA_32x32.luma_vsp; + asmp.puLUMA_32x64.luma_vsp = cp.puLUMA_32x64.luma_vsp; + asmp.puLUMA_48x64.luma_vsp = cp.puLUMA_48x64.luma_vsp; + asmp.puLUMA_64x16.luma_vsp = cp.puLUMA_64x16.luma_vsp; + asmp.puLUMA_64x32.luma_vsp = cp.puLUMA_64x32.luma_vsp; + asmp.puLUMA_64x48.luma_vsp = cp.puLUMA_64x48.luma_vsp; + asmp.puLUMA_64x64.luma_vsp = cp.puLUMA_64x64.luma_vsp; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_4_9_0 /* gcc_version < gcc-4.9.0 */ + asmp.puLUMA_4x4.luma_vsp = cp.puLUMA_4x4.luma_vsp; + asmp.puLUMA_4x8.luma_vsp = cp.puLUMA_4x8.luma_vsp; + asmp.puLUMA_4x16.luma_vsp = cp.puLUMA_4x16.luma_vsp; + asmp.puLUMA_24x32.luma_vsp = cp.puLUMA_24x32.luma_vsp; + asmp.puLUMA_32x8.luma_vsp = cp.puLUMA_32x8.luma_vsp; +#endif +#endif + } +} + + +void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) +{ + if (cpuMask & X265_CPU_NEON) + { + p.puLUMA_4x4.satd = PFX(pixel_satd_4x4_neon); + p.puLUMA_4x8.satd = PFX(pixel_satd_4x8_neon); + p.puLUMA_4x16.satd = PFX(pixel_satd_4x16_neon); + p.puLUMA_8x4.satd = PFX(pixel_satd_8x4_neon); + p.puLUMA_8x8.satd = PFX(pixel_satd_8x8_neon); + p.puLUMA_12x16.satd = PFX(pixel_satd_12x16_neon); + + p.chromaX265_CSP_I420.puCHROMA_420_4x4.satd = PFX(pixel_satd_4x4_neon); + p.chromaX265_CSP_I420.puCHROMA_420_4x8.satd = PFX(pixel_satd_4x8_neon); + p.chromaX265_CSP_I420.puCHROMA_420_4x16.satd = PFX(pixel_satd_4x16_neon); + p.chromaX265_CSP_I420.puCHROMA_420_8x4.satd = PFX(pixel_satd_8x4_neon); + p.chromaX265_CSP_I420.puCHROMA_420_8x8.satd = PFX(pixel_satd_8x8_neon); + p.chromaX265_CSP_I420.puCHROMA_420_12x16.satd = PFX(pixel_satd_12x16_neon); + + p.chromaX265_CSP_I422.puCHROMA_422_4x4.satd = PFX(pixel_satd_4x4_neon); + p.chromaX265_CSP_I422.puCHROMA_422_4x8.satd = PFX(pixel_satd_4x8_neon); + p.chromaX265_CSP_I422.puCHROMA_422_4x16.satd = PFX(pixel_satd_4x16_neon); + p.chromaX265_CSP_I422.puCHROMA_422_4x32.satd = PFX(pixel_satd_4x32_neon); + p.chromaX265_CSP_I422.puCHROMA_422_8x4.satd = PFX(pixel_satd_8x4_neon); + p.chromaX265_CSP_I422.puCHROMA_422_8x8.satd = PFX(pixel_satd_8x8_neon); + p.chromaX265_CSP_I422.puCHROMA_422_12x32.satd = PFX(pixel_satd_12x32_neon); + + p.puLUMA_4x4.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_4x4_neon); + p.puLUMA_4x8.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_4x8_neon); + p.puLUMA_4x16.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_4x16_neon); + p.puLUMA_8x4.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_8x4_neon); + p.puLUMA_8x8.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_8x8_neon); + p.puLUMA_8x16.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_8x16_neon); + p.puLUMA_8x32.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_8x32_neon); + + p.puLUMA_4x4.pixelavg_ppALIGNED = PFX(pixel_avg_pp_4x4_neon); + p.puLUMA_4x8.pixelavg_ppALIGNED = PFX(pixel_avg_pp_4x8_neon); + p.puLUMA_4x16.pixelavg_ppALIGNED = PFX(pixel_avg_pp_4x16_neon); + p.puLUMA_8x4.pixelavg_ppALIGNED = PFX(pixel_avg_pp_8x4_neon); + p.puLUMA_8x8.pixelavg_ppALIGNED = PFX(pixel_avg_pp_8x8_neon); + p.puLUMA_8x16.pixelavg_ppALIGNED = PFX(pixel_avg_pp_8x16_neon); + p.puLUMA_8x32.pixelavg_ppALIGNED = PFX(pixel_avg_pp_8x32_neon); + + p.puLUMA_8x4.sad_x3 = PFX(sad_x3_8x4_neon); + p.puLUMA_8x8.sad_x3 = PFX(sad_x3_8x8_neon); + p.puLUMA_8x16.sad_x3 = PFX(sad_x3_8x16_neon); + p.puLUMA_8x32.sad_x3 = PFX(sad_x3_8x32_neon); + + p.puLUMA_8x4.sad_x4 = PFX(sad_x4_8x4_neon); + p.puLUMA_8x8.sad_x4 = PFX(sad_x4_8x8_neon); + p.puLUMA_8x16.sad_x4 = PFX(sad_x4_8x16_neon); + p.puLUMA_8x32.sad_x4 = PFX(sad_x4_8x32_neon); + + // quant + p.quant = PFX(quant_neon); + // luma_hps + p.puLUMA_4x4.luma_hps = PFX(interp_8tap_horiz_ps_4x4_neon); + p.puLUMA_4x8.luma_hps = PFX(interp_8tap_horiz_ps_4x8_neon); + p.puLUMA_4x16.luma_hps = PFX(interp_8tap_horiz_ps_4x16_neon); + p.puLUMA_8x4.luma_hps = PFX(interp_8tap_horiz_ps_8x4_neon); + p.puLUMA_8x8.luma_hps = PFX(interp_8tap_horiz_ps_8x8_neon); + p.puLUMA_8x16.luma_hps = PFX(interp_8tap_horiz_ps_8x16_neon); + p.puLUMA_8x32.luma_hps = PFX(interp_8tap_horiz_ps_8x32_neon); + p.puLUMA_12x16.luma_hps = PFX(interp_8tap_horiz_ps_12x16_neon); + p.puLUMA_24x32.luma_hps = PFX(interp_8tap_horiz_ps_24x32_neon); +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + p.puLUMA_16x4.luma_hps = PFX(interp_8tap_horiz_ps_16x4_neon); + p.puLUMA_16x8.luma_hps = PFX(interp_8tap_horiz_ps_16x8_neon); + p.puLUMA_16x12.luma_hps = PFX(interp_8tap_horiz_ps_16x12_neon); + p.puLUMA_16x16.luma_hps = PFX(interp_8tap_horiz_ps_16x16_neon); + p.puLUMA_16x32.luma_hps = PFX(interp_8tap_horiz_ps_16x32_neon); + p.puLUMA_16x64.luma_hps = PFX(interp_8tap_horiz_ps_16x64_neon); + p.puLUMA_32x8.luma_hps = PFX(interp_8tap_horiz_ps_32x8_neon); + p.puLUMA_32x16.luma_hps = PFX(interp_8tap_horiz_ps_32x16_neon); + p.puLUMA_32x24.luma_hps = PFX(interp_8tap_horiz_ps_32x24_neon); + p.puLUMA_32x32.luma_hps = PFX(interp_8tap_horiz_ps_32x32_neon); + p.puLUMA_32x64.luma_hps = PFX(interp_8tap_horiz_ps_32x64_neon); + p.puLUMA_48x64.luma_hps = PFX(interp_8tap_horiz_ps_48x64_neon); + p.puLUMA_64x16.luma_hps = PFX(interp_8tap_horiz_ps_64x16_neon); + p.puLUMA_64x32.luma_hps = PFX(interp_8tap_horiz_ps_64x32_neon); + p.puLUMA_64x48.luma_hps = PFX(interp_8tap_horiz_ps_64x48_neon); + p.puLUMA_64x64.luma_hps = PFX(interp_8tap_horiz_ps_64x64_neon); +#endif + + p.puLUMA_8x4.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x4>; + p.puLUMA_8x8.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x8>; + p.puLUMA_8x16.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x16>; + p.puLUMA_8x32.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x32>; + p.puLUMA_12x16.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_12x16>; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + p.puLUMA_16x4.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x4>; + p.puLUMA_16x8.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x8>; + p.puLUMA_16x12.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x12>; + p.puLUMA_16x16.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x16>; + p.puLUMA_16x32.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x32>; + p.puLUMA_16x64.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x64>; + p.puLUMA_32x16.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x16>; + p.puLUMA_32x24.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x24>; + p.puLUMA_32x32.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x32>; + p.puLUMA_32x64.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x64>; + p.puLUMA_48x64.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_48x64>; + p.puLUMA_64x16.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x16>; + p.puLUMA_64x32.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>; + p.puLUMA_64x48.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>; + p.puLUMA_64x64.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_4_9_0 /* gcc_version < gcc-4.9.0 */ + p.puLUMA_4x4.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x4>; + p.puLUMA_4x8.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x8>; + p.puLUMA_4x16.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x16>; + p.puLUMA_24x32.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_24x32>; + p.puLUMA_32x8.luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x8>; +#endif +#endif + +#if !HIGH_BIT_DEPTH + p.cuBLOCK_4x4.psy_cost_pp = PFX(psyCost_4x4_neon); +#endif // !HIGH_BIT_DEPTH + + } +} +} // namespace X265_NS
View file
x265_3.4.tar.gz/source/common/aarch64/asm.S
Added
@@ -0,0 +1,69 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +.arch armv8-a + +#ifdef PREFIX +#define EXTERN_ASM _ +#else +#define EXTERN_ASM +#endif + +#ifdef __ELF__ +#define ELF +#else +#define ELF @ +#endif + +#define HAVE_AS_FUNC 1 + +#if HAVE_AS_FUNC +#define FUNC +#else +#define FUNC @ +#endif + +.macro function name, export=1 + .macro endfunc +ELF .size \name, . - \name +FUNC .endfunc + .purgem endfunc + .endm + .align 2 +.if \export == 1 + .global EXTERN_ASM\name +ELF .hidden EXTERN_ASM\name +ELF .type EXTERN_ASM\name, %function +FUNC .func EXTERN_ASM\name +EXTERN_ASM\name: +.else +ELF .hidden \name +ELF .type \name, %function +FUNC .func \name +\name: +.endif +.endm + + +#define FENC_STRIDE 64 +#define FDEC_STRIDE 32
View file
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.S
Added
@@ -0,0 +1,414 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + + + +.macro qpel_filter_0_32b + movi v24.8h, #64 + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v24.4h + smull2 v18.4s, v19.8h, v24.8h +.endm + +.macro qpel_filter_1_32b + movi v16.8h, #58 + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v16.4h + smull2 v18.4s, v19.8h, v16.8h + + movi v24.8h, #10 + uxtl v21.8h, v1.8b + smull v19.4s, v21.4h, v24.4h + smull2 v20.4s, v21.8h, v24.8h + + movi v16.8h, #17 + uxtl v23.8h, v2.8b + smull v21.4s, v23.4h, v16.4h + smull2 v22.4s, v23.8h, v16.8h + + movi v24.8h, #5 + uxtl v1.8h, v6.8b + smull v23.4s, v1.4h, v24.4h + smull2 v16.4s, v1.8h, v24.8h + + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s + + uxtl v1.8h, v4.8b + sshll v19.4s, v1.4h, #2 + sshll2 v20.4s, v1.8h, #2 + + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s + + uxtl v1.8h, v0.8b + uxtl v2.8h, v3.8b + ssubl v21.4s, v2.4h, v1.4h + ssubl2 v22.4s, v2.8h, v1.8h + + add v17.4s, v17.4s, v19.4s + add v18.4s, v18.4s, v20.4s + sub v21.4s, v21.4s, v23.4s + sub v22.4s, v22.4s, v16.4s + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s +.endm + +.macro qpel_filter_2_32b + movi v16.4s, #11 + uxtl v19.8h, v5.8b + uxtl v20.8h, v2.8b + saddl v17.4s, v19.4h, v20.4h + saddl2 v18.4s, v19.8h, v20.8h + + uxtl v21.8h, v1.8b + uxtl v22.8h, v6.8b + saddl v19.4s, v21.4h, v22.4h + saddl2 v20.4s, v21.8h, v22.8h + + mul v19.4s, v19.4s, v16.4s + mul v20.4s, v20.4s, v16.4s + + movi v16.4s, #40 + mul v17.4s, v17.4s, v16.4s + mul v18.4s, v18.4s, v16.4s + + uxtl v21.8h, v4.8b + uxtl v22.8h, v3.8b + saddl v23.4s, v21.4h, v22.4h + saddl2 v16.4s, v21.8h, v22.8h + + uxtl v1.8h, v0.8b + uxtl v2.8h, v7.8b + saddl v21.4s, v1.4h, v2.4h + saddl2 v22.4s, v1.8h, v2.8h + + shl v23.4s, v23.4s, #2 + shl v16.4s, v16.4s, #2 + + add v19.4s, v19.4s, v21.4s + add v20.4s, v20.4s, v22.4s + add v17.4s, v17.4s, v23.4s + add v18.4s, v18.4s, v16.4s + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s +.endm + +.macro qpel_filter_3_32b + movi v16.8h, #17 + movi v24.8h, #5 + + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v16.4h + smull2 v18.4s, v19.8h, v16.8h + + uxtl v21.8h, v1.8b + smull v19.4s, v21.4h, v24.4h + smull2 v20.4s, v21.8h, v24.8h + + movi v16.8h, #58 + uxtl v23.8h, v2.8b + smull v21.4s, v23.4h, v16.4h + smull2 v22.4s, v23.8h, v16.8h + + movi v24.8h, #10 + uxtl v1.8h, v6.8b + smull v23.4s, v1.4h, v24.4h + smull2 v16.4s, v1.8h, v24.8h + + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s + + uxtl v1.8h, v3.8b + sshll v19.4s, v1.4h, #2 + sshll2 v20.4s, v1.8h, #2 + + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s + + uxtl v1.8h, v4.8b + uxtl v2.8h, v7.8b + ssubl v21.4s, v1.4h, v2.4h + ssubl2 v22.4s, v1.8h, v2.8h + + add v17.4s, v17.4s, v19.4s + add v18.4s, v18.4s, v20.4s + sub v21.4s, v21.4s, v23.4s + sub v22.4s, v22.4s, v16.4s + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s +.endm + + + + +.macro vextin8 + ld1 {v3.16b}, x11, #16 + mov v7.d0, v3.d1 + ext v0.8b, v3.8b, v7.8b, #1 + ext v4.8b, v3.8b, v7.8b, #2 + ext v1.8b, v3.8b, v7.8b, #3 + ext v5.8b, v3.8b, v7.8b, #4 + ext v2.8b, v3.8b, v7.8b, #5 + ext v6.8b, v3.8b, v7.8b, #6 + ext v3.8b, v3.8b, v7.8b, #7 +.endm + + + +// void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt) +.macro HPS_FILTER a b filterhps + mov w12, #8192 + mov w6, w10 + sub x3, x3, #\a + lsl x3, x3, #1 + mov w9, #\a + cmp w9, #4 + b.eq 14f + cmp w9, #12 + b.eq 15f + b 7f +14: + HPS_FILTER_4 \a \b \filterhps + b 10f +15: + HPS_FILTER_12 \a \b \filterhps + b 10f +7: + cmp w5, #0 + b.eq 8f + cmp w5, #1 + b.eq 9f +8: +loop1_hps_\filterhps\()_\a\()x\b\()_rowext0: + mov w7, #\a + lsr w7, w7, #3 + mov x11, x0 + sub x11, x11, #4 +loop2_hps_\filterhps\()_\a\()x\b\()_rowext0: + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + sub v18.4s, v18.4s, v16.4s + xtn v0.4h, v17.4s + xtn2 v0.8h, v18.4s + st1 {v0.8h}, x2, #16 + subs w7, w7, #1 + sub x11, x11, #8 + b.ne loop2_hps_\filterhps\()_\a\()x\b\()_rowext0 + subs w6, w6, #1 + add x0, x0, x1 + add x2, x2, x3 + b.ne loop1_hps_\filterhps\()_\a\()x\b\()_rowext0 + b 10f +9: +loop3_hps_\filterhps\()_\a\()x\b\()_rowext1: + mov w7, #\a + lsr w7, w7, #3 + mov x11, x0 + sub x11, x11, #4 +loop4_hps_\filterhps\()_\a\()x\b\()_rowext1: + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + sub v18.4s, v18.4s, v16.4s + xtn v0.4h, v17.4s + xtn2 v0.8h, v18.4s + st1 {v0.8h}, x2, #16 + subs w7, w7, #1 + sub x11, x11, #8 + b.ne loop4_hps_\filterhps\()_\a\()x\b\()_rowext1 + subs w6, w6, #1 + add x0, x0, x1 + add x2, x2, x3 + b.ne loop3_hps_\filterhps\()_\a\()x\b\()_rowext1 +10: +.endm + +.macro HPS_FILTER_4 w h filterhps + cmp w5, #0 + b.eq 11f + cmp w5, #1 + b.eq 12f +11: +loop4_hps_\filterhps\()_\w\()x\h\()_rowext0: + mov x11, x0 + sub x11, x11, #4 + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + xtn v0.4h, v17.4s + st1 {v0.4h}, x2, #8 + sub x11, x11, #8 + subs w6, w6, #1 + add x0, x0, x1 + add x2, x2, x3 + b.ne loop4_hps_\filterhps\()_\w\()x\h\()_rowext0 + b 13f +12: +loop5_hps_\filterhps\()_\w\()x\h\()_rowext1: + mov x11, x0 + sub x11, x11, #4 + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + xtn v0.4h, v17.4s + st1 {v0.4h}, x2, #8 + sub x11, x11, #8 + subs w6, w6, #1 + add x0, x0, x1 + add x2, x2, x3 + b.ne loop5_hps_\filterhps\()_\w\()x\h\()_rowext1 +13: +.endm + +.macro HPS_FILTER_12 w h filterhps + cmp w5, #0 + b.eq 14f + cmp w5, #1 + b.eq 15f +14: +loop12_hps_\filterhps\()_\w\()x\h\()_rowext0: + mov x11, x0 + sub x11, x11, #4 + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + sub v18.4s, v18.4s, v16.4s + xtn v0.4h, v17.4s + xtn2 v0.8h, v18.4s + st1 {v0.8h}, x2, #16 + sub x11, x11, #8 + + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + xtn v0.4h, v17.4s + st1 {v0.4h}, x2, #8 + add x2, x2, x3 + subs w6, w6, #1 + add x0, x0, x1 + b.ne loop12_hps_\filterhps\()_\w\()x\h\()_rowext0 + b 16f +15: +loop12_hps_\filterhps\()_\w\()x\h\()_rowext1: + mov x11, x0 + sub x11, x11, #4 + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + sub v18.4s, v18.4s, v16.4s + xtn v0.4h, v17.4s + xtn2 v0.8h, v18.4s + st1 {v0.8h}, x2, #16 + sub x11, x11, #8 + + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + xtn v0.4h, v17.4s + st1 {v0.4h}, x2, #8 + add x2, x2, x3 + subs w6, w6, #1 + add x0, x0, x1 + b.ne loop12_hps_\filterhps\()_\w\()x\h\()_rowext1 +16: +.endm + +// void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt) +.macro LUMA_HPS w h +function x265_interp_8tap_horiz_ps_\w\()x\h\()_neon + mov w10, #\h + cmp w5, #0 + b.eq 6f + sub x0, x0, x1, lsl #2 + + add x0, x0, x1 + add w10, w10, #7 +6: + cmp w4, #0 + b.eq 0f + cmp w4, #1 + b.eq 1f + cmp w4, #2 + b.eq 2f + cmp w4, #3 + b.eq 3f +0: + HPS_FILTER \w \h qpel_filter_0_32b + b 5f +1: + HPS_FILTER \w \h qpel_filter_1_32b + b 5f +2: + HPS_FILTER \w \h qpel_filter_2_32b + b 5f +3: + HPS_FILTER \w \h qpel_filter_3_32b + b 5f +5: + ret +endfunc +.endm + +LUMA_HPS 4 4 +LUMA_HPS 4 8 +LUMA_HPS 4 16 +LUMA_HPS 8 4 +LUMA_HPS 8 8 +LUMA_HPS 8 16 +LUMA_HPS 8 32 +LUMA_HPS 12 16 +LUMA_HPS 16 4 +LUMA_HPS 16 8 +LUMA_HPS 16 12 +LUMA_HPS 16 16 +LUMA_HPS 16 32 +LUMA_HPS 16 64 +LUMA_HPS 24 32 +LUMA_HPS 32 8 +LUMA_HPS 32 16 +LUMA_HPS 32 24 +LUMA_HPS 32 32 +LUMA_HPS 32 64 +LUMA_HPS 48 64 +LUMA_HPS 64 16 +LUMA_HPS 64 32 +LUMA_HPS 64 48 +LUMA_HPS 64 64
View file
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.h
Added
@@ -0,0 +1,55 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_IPFILTER8_AARCH64_H +#define X265_IPFILTER8_AARCH64_H + + +void x265_interp_8tap_horiz_ps_4x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_4x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_4x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_12x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x12_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_24x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x24_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_48x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); + + +#endif // ifndef X265_IPFILTER8_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/mc-a.S
Added
@@ -0,0 +1,63 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro pixel_avg_pp_4xN_neon h +function x265_pixel_avg_pp_4x\h\()_neon +.rept \h + ld1 {v0.s}0, x2, x3 + ld1 {v1.s}0, x4, x5 + urhadd v2.8b, v0.8b, v1.8b + st1 {v2.s}0, x0, x1 +.endr + ret +endfunc +.endm + +pixel_avg_pp_4xN_neon 4 +pixel_avg_pp_4xN_neon 8 +pixel_avg_pp_4xN_neon 16 + +.macro pixel_avg_pp_8xN_neon h +function x265_pixel_avg_pp_8x\h\()_neon +.rept \h + ld1 {v0.8b}, x2, x3 + ld1 {v1.8b}, x4, x5 + urhadd v2.8b, v0.8b, v1.8b + st1 {v2.8b}, x0, x1 +.endr + ret +endfunc +.endm + +pixel_avg_pp_8xN_neon 4 +pixel_avg_pp_8xN_neon 8 +pixel_avg_pp_8xN_neon 16 +pixel_avg_pp_8xN_neon 32
View file
x265_3.4.tar.gz/source/common/aarch64/pixel-util.S
Added
@@ -0,0 +1,419 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro x265_satd_4x8_8x4_end_neon + add v0.8h, v4.8h, v6.8h + add v1.8h, v5.8h, v7.8h + sub v2.8h, v4.8h, v6.8h + sub v3.8h, v5.8h, v7.8h + + trn1 v16.8h, v0.8h, v1.8h + trn2 v17.8h, v0.8h, v1.8h + add v4.8h, v16.8h, v17.8h + trn1 v18.8h, v2.8h, v3.8h + trn2 v19.8h, v2.8h, v3.8h + sub v5.8h, v16.8h, v17.8h + add v6.8h, v18.8h, v19.8h + sub v7.8h, v18.8h, v19.8h + trn1 v0.4s, v4.4s, v6.4s + trn2 v2.4s, v4.4s, v6.4s + abs v0.8h, v0.8h + trn1 v1.4s, v5.4s, v7.4s + trn2 v3.4s, v5.4s, v7.4s + abs v2.8h, v2.8h + abs v1.8h, v1.8h + abs v3.8h, v3.8h + umax v0.8h, v0.8h, v2.8h + umax v1.8h, v1.8h, v3.8h + add v0.8h, v0.8h, v1.8h + uaddlv s0, v0.8h +.endm + +.macro pixel_satd_4x8_neon + ld1r {v1.2s}, x2, x3 + ld1r {v0.2s}, x0, x1 + ld1r {v3.2s}, x2, x3 + ld1r {v2.2s}, x0, x1 + ld1r {v5.2s}, x2, x3 + ld1r {v4.2s}, x0, x1 + ld1r {v7.2s}, x2, x3 + ld1r {v6.2s}, x0, x1 + + ld1 {v1.s}1, x2, x3 + ld1 {v0.s}1, x0, x1 + usubl v0.8h, v0.8b, v1.8b + ld1 {v3.s}1, x2, x3 + ld1 {v2.s}1, x0, x1 + usubl v1.8h, v2.8b, v3.8b + ld1 {v5.s}1, x2, x3 + ld1 {v4.s}1, x0, x1 + usubl v2.8h, v4.8b, v5.8b + ld1 {v7.s}1, x2, x3 + add v4.8h, v0.8h, v1.8h + sub v5.8h, v0.8h, v1.8h + ld1 {v6.s}1, x0, x1 + usubl v3.8h, v6.8b, v7.8b + add v6.8h, v2.8h, v3.8h + sub v7.8h, v2.8h, v3.8h + x265_satd_4x8_8x4_end_neon +.endm + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x8_neon + pixel_satd_4x8_neon + mov w0, v0.s0 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x16_neon + eor w4, w4, w4 + pixel_satd_4x8_neon + mov w5, v0.s0 + add w4, w4, w5 + pixel_satd_4x8_neon + mov w5, v0.s0 + add w0, w5, w4 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x32_neon + eor w4, w4, w4 +.rept 4 + pixel_satd_4x8_neon + mov w5, v0.s0 + add w4, w4, w5 +.endr + mov w0, w4 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_12x16_neon + mov x4, x0 + mov x5, x2 + eor w7, w7, w7 + pixel_satd_4x8_neon + mov w6, v0.s0 + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s0 + add w7, w7, w6 + + add x0, x4, #4 + add x2, x5, #4 + pixel_satd_4x8_neon + mov w6, v0.s0 + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s0 + add w7, w7, w6 + + add x0, x4, #8 + add x2, x5, #8 + pixel_satd_4x8_neon + mov w6, v0.s0 + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s0 + add w0, w7, w6 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_12x32_neon + mov x4, x0 + mov x5, x2 + eor w7, w7, w7 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s0 + add w7, w7, w6 +.endr + + add x0, x4, #4 + add x2, x5, #4 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s0 + add w7, w7, w6 +.endr + + add x0, x4, #8 + add x2, x5, #8 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s0 + add w7, w7, w6 +.endr + + mov w0, w7 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_8x8_neon + eor w4, w4, w4 + mov x6, x0 + mov x7, x2 + pixel_satd_4x8_neon + mov w5, v0.s0 + add w4, w4, w5 + add x0, x6, #4 + add x2, x7, #4 + pixel_satd_4x8_neon + mov w5, v0.s0 + add w0, w4, w5 + ret +endfunc + +// int psyCost_pp(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride) +function x265_psyCost_4x4_neon + ld1r {v4.2s}, x0, x1 + ld1r {v5.2s}, x0, x1 + ld1 {v4.s}1, x0, x1 + ld1 {v5.s}1, x0, x1 + + ld1r {v6.2s}, x2, x3 + ld1r {v7.2s}, x2, x3 + ld1 {v6.s}1, x2, x3 + ld1 {v7.s}1, x2, x3 + + uaddl v2.8h, v4.8b, v5.8b + usubl v3.8h, v4.8b, v5.8b + uaddl v18.8h, v6.8b, v7.8b + usubl v19.8h, v6.8b, v7.8b + + mov v20.d0, v2.d1 + add v0.4h, v2.4h, v20.4h + sub v1.4h, v2.4h, v20.4h + mov v21.d0, v3.d1 + add v22.4h, v3.4h, v21.4h + sub v23.4h, v3.4h, v21.4h + + mov v24.d0, v18.d1 + add v16.4h, v18.4h, v24.4h + sub v17.4h, v18.4h, v24.4h + mov v25.d0, v19.d1 + add v26.4h, v19.4h, v25.4h + sub v27.4h, v19.4h, v25.4h + + mov v0.d1, v22.d0 + mov v1.d1, v23.d0 + trn1 v22.8h, v0.8h, v1.8h + trn2 v23.8h, v0.8h, v1.8h + mov v16.d1, v26.d0 + mov v17.d1, v27.d0 + trn1 v26.8h, v16.8h, v17.8h + trn2 v27.8h, v16.8h, v17.8h + + add v2.8h, v22.8h, v23.8h + sub v3.8h, v22.8h, v23.8h + add v18.8h, v26.8h, v27.8h + sub v19.8h, v26.8h, v27.8h + + uaddl v20.8h, v4.8b, v5.8b + uaddl v21.8h, v6.8b, v7.8b + + trn1 v0.4s, v2.4s, v3.4s + trn2 v1.4s, v2.4s, v3.4s + trn1 v16.4s, v18.4s, v19.4s + trn2 v17.4s, v18.4s, v19.4s + abs v0.8h, v0.8h + abs v16.8h, v16.8h + abs v1.8h, v1.8h + abs v17.8h, v17.8h + + uaddlv s20, v20.8h + uaddlv s21, v21.8h + mov v20.s1, v21.s0 + + smax v0.8h, v0.8h, v1.8h + smax v16.8h, v16.8h, v17.8h + + trn1 v4.2d, v0.2d, v16.2d + trn2 v5.2d, v0.2d, v16.2d + add v0.8h, v4.8h, v5.8h + mov v4.d0, v0.d1 + uaddlv s0, v0.4h + uaddlv s4, v4.4h + + ushr v20.2s, v20.2s, #2 + mov v0.s1, v4.s0 + sub v0.2s, v0.2s, v20.2s + mov w0, v0.s0 + mov w1, v0.s1 + subs w0, w0, w1 + cneg w0, w0, mi + + ret +endfunc + +// uint32_t quant_c(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff) +function x265_quant_neon + mov w9, #1 + lsl w9, w9, w4 + dup v0.2s, w9 + neg w9, w4 + dup v1.4s, w9 + add w9, w9, #8 + dup v2.4s, w9 + dup v3.4s, w5 + + lsr w6, w6, #2 + eor v4.16b, v4.16b, v4.16b + eor w10, w10, w10 + eor v17.16b, v17.16b, v17.16b + +.loop_quant: + + ld1 {v18.4h}, x0, #8 + ld1 {v7.4s}, x1, #16 + sxtl v6.4s, v18.4h + + cmlt v5.4s, v6.4s, #0 + + abs v6.4s, v6.4s + + + mul v6.4s, v6.4s, v7.4s + + add v7.4s, v6.4s, v3.4s + sshl v7.4s, v7.4s, v1.4s + + mls v6.4s, v7.4s, v0.s0 + sshl v16.4s, v6.4s, v2.4s + st1 {v16.4s}, x2, #16 + + // numsig + cmeq v16.4s, v7.4s, v17.4s + add v4.4s, v4.4s, v16.4s + add w10, w10, #4 + + // level *= sign + eor v16.16b, v7.16b, v5.16b + sub v16.4s, v16.4s, v5.4s + sqxtn v5.4h, v16.4s + st1 {v5.4h}, x3, #8 + + subs w6, w6, #1 + b.ne .loop_quant + + addv s4, v4.4s + mov w9, v4.s0 + add w0, w10, w9 + ret +endfunc + +.macro satd_4x4_neon + ld1 {v1.s}0, x2, x3 + ld1 {v0.s}0, x0, x1 + ld1 {v3.s}0, x2, x3 + ld1 {v2.s}0, x0, x1 + + ld1 {v1.s}1, x2, x3 + ld1 {v0.s}1, x0, x1 + ld1 {v3.s}1, x2, x3 + ld1 {v2.s}1, x0, x1 + + usubl v4.8h, v0.8b, v1.8b + usubl v5.8h, v2.8b, v3.8b + + add v6.8h, v4.8h, v5.8h + sub v7.8h, v4.8h, v5.8h + + mov v4.d0, v6.d1 + add v0.8h, v6.8h, v4.8h + sub v2.8h, v6.8h, v4.8h + + mov v5.d0, v7.d1 + add v1.8h, v7.8h, v5.8h + sub v3.8h, v7.8h, v5.8h + + trn1 v4.4h, v0.4h, v1.4h + trn2 v5.4h, v0.4h, v1.4h + + trn1 v6.4h, v2.4h, v3.4h + trn2 v7.4h, v2.4h, v3.4h + + add v0.4h, v4.4h, v5.4h + sub v1.4h, v4.4h, v5.4h + + add v2.4h, v6.4h, v7.4h + sub v3.4h, v6.4h, v7.4h + + trn1 v4.2s, v0.2s, v1.2s + trn2 v5.2s, v0.2s, v1.2s + + trn1 v6.2s, v2.2s, v3.2s + trn2 v7.2s, v2.2s, v3.2s + + abs v4.4h, v4.4h + abs v5.4h, v5.4h + abs v6.4h, v6.4h + abs v7.4h, v7.4h + + smax v1.4h, v4.4h, v5.4h + smax v2.4h, v6.4h, v7.4h + + add v0.4h, v1.4h, v2.4h + uaddlp v0.2s, v0.4h + uaddlp v0.1d, v0.2s +.endm + +// int satd_4x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x4_neon + satd_4x4_neon + umov x0, v0.d0 + ret +endfunc + +// int satd_8x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_8x4_neon + mov x4, x0 + mov x5, x2 + satd_4x4_neon + add x0, x4, #4 + add x2, x5, #4 + umov x6, v0.d0 + satd_4x4_neon + umov x0, v0.d0 + add x0, x0, x6 + ret +endfunc
View file
x265_3.4.tar.gz/source/common/aarch64/pixel-util.h
Added
@@ -0,0 +1,40 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_PIXEL_UTIL_AARCH64_H +#define X265_PIXEL_UTIL_AARCH64_H + +int x265_pixel_satd_4x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_8x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_12x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_12x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); + +uint32_t x265_quant_neon(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff); +int PFX(psyCost_4x4_neon)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride); + +#endif // ifndef X265_PIXEL_UTIL_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/pixel.h
Added
@@ -0,0 +1,105 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_I386_PIXEL_AARCH64_H +#define X265_I386_PIXEL_AARCH64_H + +void x265_pixel_avg_pp_4x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_4x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_4x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_12x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x12_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_24x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x24_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_48x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x48_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); + +void x265_sad_x3_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); + +void x265_sad_x4_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); + +#endif // ifndef X265_I386_PIXEL_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/sad-a.S
Added
@@ -0,0 +1,105 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro SAD_X_START_8 x + ld1 {v0.8b}, x0, x9 +.if \x == 3 + ld1 {v1.8b}, x1, x4 + ld1 {v2.8b}, x2, x4 + ld1 {v3.8b}, x3, x4 +.elseif \x == 4 + ld1 {v1.8b}, x1, x5 + ld1 {v2.8b}, x2, x5 + ld1 {v3.8b}, x3, x5 + ld1 {v4.8b}, x4, x5 +.endif + uabdl v16.8h, v0.8b, v1.8b + uabdl v17.8h, v0.8b, v2.8b + uabdl v18.8h, v0.8b, v3.8b +.if \x == 4 + uabdl v19.8h, v0.8b, v4.8b +.endif +.endm + +.macro SAD_X_8 x + ld1 {v0.8b}, x0, x9 +.if \x == 3 + ld1 {v1.8b}, x1, x4 + ld1 {v2.8b}, x2, x4 + ld1 {v3.8b}, x3, x4 +.elseif \x == 4 + ld1 {v1.8b}, x1, x5 + ld1 {v2.8b}, x2, x5 + ld1 {v3.8b}, x3, x5 + ld1 {v4.8b}, x4, x5 +.endif + uabal v16.8h, v0.8b, v1.8b + uabal v17.8h, v0.8b, v2.8b + uabal v18.8h, v0.8b, v3.8b +.if \x == 4 + uabal v19.8h, v0.8b, v4.8b +.endif +.endm + +.macro SAD_X_8xN x, h +function x265_sad_x\x\()_8x\h\()_neon + mov x9, #FENC_STRIDE + SAD_X_START_8 \x +.rept \h - 1 + SAD_X_8 \x +.endr + uaddlv s0, v16.8h + uaddlv s1, v17.8h + uaddlv s2, v18.8h +.if \x == 4 + uaddlv s3, v19.8h +.endif + +.if \x == 3 + stp s0, s1, x5 + str s2, x5, #8 +.elseif \x == 4 + stp s0, s1, x6 + stp s2, s3, x6, #8 +.endif + ret +endfunc +.endm + +SAD_X_8xN 3 4 +SAD_X_8xN 3 8 +SAD_X_8xN 3 16 +SAD_X_8xN 3 32 + +SAD_X_8xN 4 4 +SAD_X_8xN 4 8 +SAD_X_8xN 4 16 +SAD_X_8xN 4 32
View file
x265_3.3.tar.gz/source/common/arm/asm-primitives.cpp -> x265_3.4.tar.gz/source/common/arm/asm-primitives.cpp
Changed
@@ -5,6 +5,7 @@ * Praveen Kumar Tiwari <praveen@multicorewareinc.com> * Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com> * Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com> + * Hongbin Liu<liuhongbin1@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -48,77 +49,77 @@ p.ssim_4x4x2_core = PFX(ssim_4x4x2_core_neon); // addAvg - p.puLUMA_4x4.addAvg = PFX(addAvg_4x4_neon); - p.puLUMA_4x8.addAvg = PFX(addAvg_4x8_neon); - p.puLUMA_4x16.addAvg = PFX(addAvg_4x16_neon); - p.puLUMA_8x4.addAvg = PFX(addAvg_8x4_neon); - p.puLUMA_8x8.addAvg = PFX(addAvg_8x8_neon); - p.puLUMA_8x16.addAvg = PFX(addAvg_8x16_neon); - p.puLUMA_8x32.addAvg = PFX(addAvg_8x32_neon); - p.puLUMA_12x16.addAvg = PFX(addAvg_12x16_neon); - p.puLUMA_16x4.addAvg = PFX(addAvg_16x4_neon); - p.puLUMA_16x8.addAvg = PFX(addAvg_16x8_neon); - p.puLUMA_16x12.addAvg = PFX(addAvg_16x12_neon); - p.puLUMA_16x16.addAvg = PFX(addAvg_16x16_neon); - p.puLUMA_16x32.addAvg = PFX(addAvg_16x32_neon); - p.puLUMA_16x64.addAvg = PFX(addAvg_16x64_neon); - p.puLUMA_24x32.addAvg = PFX(addAvg_24x32_neon); - p.puLUMA_32x8.addAvg = PFX(addAvg_32x8_neon); - p.puLUMA_32x16.addAvg = PFX(addAvg_32x16_neon); - p.puLUMA_32x24.addAvg = PFX(addAvg_32x24_neon); - p.puLUMA_32x32.addAvg = PFX(addAvg_32x32_neon); - p.puLUMA_32x64.addAvg = PFX(addAvg_32x64_neon); - p.puLUMA_48x64.addAvg = PFX(addAvg_48x64_neon); - p.puLUMA_64x16.addAvg = PFX(addAvg_64x16_neon); - p.puLUMA_64x32.addAvg = PFX(addAvg_64x32_neon); - p.puLUMA_64x48.addAvg = PFX(addAvg_64x48_neon); - p.puLUMA_64x64.addAvg = PFX(addAvg_64x64_neon); + p.puLUMA_4x4.addAvgNONALIGNED = PFX(addAvg_4x4_neon); + p.puLUMA_4x8.addAvgNONALIGNED = PFX(addAvg_4x8_neon); + p.puLUMA_4x16.addAvgNONALIGNED = PFX(addAvg_4x16_neon); + p.puLUMA_8x4.addAvgNONALIGNED = PFX(addAvg_8x4_neon); + p.puLUMA_8x8.addAvgNONALIGNED = PFX(addAvg_8x8_neon); + p.puLUMA_8x16.addAvgNONALIGNED = PFX(addAvg_8x16_neon); + p.puLUMA_8x32.addAvgNONALIGNED = PFX(addAvg_8x32_neon); + p.puLUMA_12x16.addAvgNONALIGNED = PFX(addAvg_12x16_neon); + p.puLUMA_16x4.addAvgNONALIGNED = PFX(addAvg_16x4_neon); + p.puLUMA_16x8.addAvgNONALIGNED = PFX(addAvg_16x8_neon); + p.puLUMA_16x12.addAvgNONALIGNED = PFX(addAvg_16x12_neon); + p.puLUMA_16x16.addAvgNONALIGNED = PFX(addAvg_16x16_neon); + p.puLUMA_16x32.addAvgNONALIGNED = PFX(addAvg_16x32_neon); + p.puLUMA_16x64.addAvgNONALIGNED = PFX(addAvg_16x64_neon); + p.puLUMA_24x32.addAvgNONALIGNED = PFX(addAvg_24x32_neon); + p.puLUMA_32x8.addAvgNONALIGNED = PFX(addAvg_32x8_neon); + p.puLUMA_32x16.addAvgNONALIGNED = PFX(addAvg_32x16_neon); + p.puLUMA_32x24.addAvgNONALIGNED = PFX(addAvg_32x24_neon); + p.puLUMA_32x32.addAvgNONALIGNED = PFX(addAvg_32x32_neon); + p.puLUMA_32x64.addAvgNONALIGNED = PFX(addAvg_32x64_neon); + p.puLUMA_48x64.addAvgNONALIGNED = PFX(addAvg_48x64_neon); + p.puLUMA_64x16.addAvgNONALIGNED = PFX(addAvg_64x16_neon); + p.puLUMA_64x32.addAvgNONALIGNED = PFX(addAvg_64x32_neon); + p.puLUMA_64x48.addAvgNONALIGNED = PFX(addAvg_64x48_neon); + p.puLUMA_64x64.addAvgNONALIGNED = PFX(addAvg_64x64_neon); // chroma addAvg - p.chromaX265_CSP_I420.puCHROMA_420_4x2.addAvg = PFX(addAvg_4x2_neon); - p.chromaX265_CSP_I420.puCHROMA_420_4x4.addAvg = PFX(addAvg_4x4_neon); - p.chromaX265_CSP_I420.puCHROMA_420_4x8.addAvg = PFX(addAvg_4x8_neon); - p.chromaX265_CSP_I420.puCHROMA_420_4x16.addAvg = PFX(addAvg_4x16_neon); - p.chromaX265_CSP_I420.puCHROMA_420_6x8.addAvg = PFX(addAvg_6x8_neon); - p.chromaX265_CSP_I420.puCHROMA_420_8x2.addAvg = PFX(addAvg_8x2_neon); - p.chromaX265_CSP_I420.puCHROMA_420_8x4.addAvg = PFX(addAvg_8x4_neon); - p.chromaX265_CSP_I420.puCHROMA_420_8x6.addAvg = PFX(addAvg_8x6_neon); - p.chromaX265_CSP_I420.puCHROMA_420_8x8.addAvg = PFX(addAvg_8x8_neon); - p.chromaX265_CSP_I420.puCHROMA_420_8x16.addAvg = PFX(addAvg_8x16_neon); - p.chromaX265_CSP_I420.puCHROMA_420_8x32.addAvg = PFX(addAvg_8x32_neon); - p.chromaX265_CSP_I420.puCHROMA_420_12x16.addAvg = PFX(addAvg_12x16_neon); - p.chromaX265_CSP_I420.puCHROMA_420_16x4.addAvg = PFX(addAvg_16x4_neon); - p.chromaX265_CSP_I420.puCHROMA_420_16x8.addAvg = PFX(addAvg_16x8_neon); - p.chromaX265_CSP_I420.puCHROMA_420_16x12.addAvg = PFX(addAvg_16x12_neon); - p.chromaX265_CSP_I420.puCHROMA_420_16x16.addAvg = PFX(addAvg_16x16_neon); - p.chromaX265_CSP_I420.puCHROMA_420_16x32.addAvg = PFX(addAvg_16x32_neon); - p.chromaX265_CSP_I420.puCHROMA_420_24x32.addAvg = PFX(addAvg_24x32_neon); - p.chromaX265_CSP_I420.puCHROMA_420_32x8.addAvg = PFX(addAvg_32x8_neon); - p.chromaX265_CSP_I420.puCHROMA_420_32x16.addAvg = PFX(addAvg_32x16_neon); - p.chromaX265_CSP_I420.puCHROMA_420_32x24.addAvg = PFX(addAvg_32x24_neon); - p.chromaX265_CSP_I420.puCHROMA_420_32x32.addAvg = PFX(addAvg_32x32_neon); - - p.chromaX265_CSP_I422.puCHROMA_422_4x8.addAvg = PFX(addAvg_4x8_neon); - p.chromaX265_CSP_I422.puCHROMA_422_4x16.addAvg = PFX(addAvg_4x16_neon); - p.chromaX265_CSP_I422.puCHROMA_422_4x32.addAvg = PFX(addAvg_4x32_neon); - p.chromaX265_CSP_I422.puCHROMA_422_6x16.addAvg = PFX(addAvg_6x16_neon); - p.chromaX265_CSP_I422.puCHROMA_422_8x4.addAvg = PFX(addAvg_8x4_neon); - p.chromaX265_CSP_I422.puCHROMA_422_8x8.addAvg = PFX(addAvg_8x8_neon); - p.chromaX265_CSP_I422.puCHROMA_422_8x12.addAvg = PFX(addAvg_8x12_neon); - p.chromaX265_CSP_I422.puCHROMA_422_8x16.addAvg = PFX(addAvg_8x16_neon); - p.chromaX265_CSP_I422.puCHROMA_422_8x32.addAvg = PFX(addAvg_8x32_neon); - p.chromaX265_CSP_I422.puCHROMA_422_8x64.addAvg = PFX(addAvg_8x64_neon); - p.chromaX265_CSP_I422.puCHROMA_422_12x32.addAvg = PFX(addAvg_12x32_neon); - p.chromaX265_CSP_I422.puCHROMA_422_16x8.addAvg = PFX(addAvg_16x8_neon); - p.chromaX265_CSP_I422.puCHROMA_422_16x16.addAvg = PFX(addAvg_16x16_neon); - p.chromaX265_CSP_I422.puCHROMA_422_16x24.addAvg = PFX(addAvg_16x24_neon); - p.chromaX265_CSP_I422.puCHROMA_422_16x32.addAvg = PFX(addAvg_16x32_neon); - p.chromaX265_CSP_I422.puCHROMA_422_16x64.addAvg = PFX(addAvg_16x64_neon); - p.chromaX265_CSP_I422.puCHROMA_422_24x64.addAvg = PFX(addAvg_24x64_neon); - p.chromaX265_CSP_I422.puCHROMA_422_32x16.addAvg = PFX(addAvg_32x16_neon); - p.chromaX265_CSP_I422.puCHROMA_422_32x32.addAvg = PFX(addAvg_32x32_neon); - p.chromaX265_CSP_I422.puCHROMA_422_32x48.addAvg = PFX(addAvg_32x48_neon); - p.chromaX265_CSP_I422.puCHROMA_422_32x64.addAvg = PFX(addAvg_32x64_neon); + p.chromaX265_CSP_I420.puCHROMA_420_4x2.addAvgNONALIGNED = PFX(addAvg_4x2_neon); + p.chromaX265_CSP_I420.puCHROMA_420_4x4.addAvgNONALIGNED = PFX(addAvg_4x4_neon); + p.chromaX265_CSP_I420.puCHROMA_420_4x8.addAvgNONALIGNED = PFX(addAvg_4x8_neon); + p.chromaX265_CSP_I420.puCHROMA_420_4x16.addAvgNONALIGNED = PFX(addAvg_4x16_neon); + p.chromaX265_CSP_I420.puCHROMA_420_6x8.addAvgNONALIGNED = PFX(addAvg_6x8_neon); + p.chromaX265_CSP_I420.puCHROMA_420_8x2.addAvgNONALIGNED = PFX(addAvg_8x2_neon); + p.chromaX265_CSP_I420.puCHROMA_420_8x4.addAvgNONALIGNED = PFX(addAvg_8x4_neon); + p.chromaX265_CSP_I420.puCHROMA_420_8x6.addAvgNONALIGNED = PFX(addAvg_8x6_neon); + p.chromaX265_CSP_I420.puCHROMA_420_8x8.addAvgNONALIGNED = PFX(addAvg_8x8_neon); + p.chromaX265_CSP_I420.puCHROMA_420_8x16.addAvgNONALIGNED = PFX(addAvg_8x16_neon); + p.chromaX265_CSP_I420.puCHROMA_420_8x32.addAvgNONALIGNED = PFX(addAvg_8x32_neon); + p.chromaX265_CSP_I420.puCHROMA_420_12x16.addAvgNONALIGNED = PFX(addAvg_12x16_neon); + p.chromaX265_CSP_I420.puCHROMA_420_16x4.addAvgNONALIGNED = PFX(addAvg_16x4_neon); + p.chromaX265_CSP_I420.puCHROMA_420_16x8.addAvgNONALIGNED = PFX(addAvg_16x8_neon); + p.chromaX265_CSP_I420.puCHROMA_420_16x12.addAvgNONALIGNED = PFX(addAvg_16x12_neon); + p.chromaX265_CSP_I420.puCHROMA_420_16x16.addAvgNONALIGNED = PFX(addAvg_16x16_neon); + p.chromaX265_CSP_I420.puCHROMA_420_16x32.addAvgNONALIGNED = PFX(addAvg_16x32_neon); + p.chromaX265_CSP_I420.puCHROMA_420_24x32.addAvgNONALIGNED = PFX(addAvg_24x32_neon); + p.chromaX265_CSP_I420.puCHROMA_420_32x8.addAvgNONALIGNED = PFX(addAvg_32x8_neon); + p.chromaX265_CSP_I420.puCHROMA_420_32x16.addAvgNONALIGNED = PFX(addAvg_32x16_neon); + p.chromaX265_CSP_I420.puCHROMA_420_32x24.addAvgNONALIGNED = PFX(addAvg_32x24_neon); + p.chromaX265_CSP_I420.puCHROMA_420_32x32.addAvgNONALIGNED = PFX(addAvg_32x32_neon); + + p.chromaX265_CSP_I422.puCHROMA_422_4x8.addAvgNONALIGNED = PFX(addAvg_4x8_neon); + p.chromaX265_CSP_I422.puCHROMA_422_4x16.addAvgNONALIGNED = PFX(addAvg_4x16_neon); + p.chromaX265_CSP_I422.puCHROMA_422_4x32.addAvgNONALIGNED = PFX(addAvg_4x32_neon); + p.chromaX265_CSP_I422.puCHROMA_422_6x16.addAvgNONALIGNED = PFX(addAvg_6x16_neon); + p.chromaX265_CSP_I422.puCHROMA_422_8x4.addAvgNONALIGNED = PFX(addAvg_8x4_neon); + p.chromaX265_CSP_I422.puCHROMA_422_8x8.addAvgNONALIGNED = PFX(addAvg_8x8_neon); + p.chromaX265_CSP_I422.puCHROMA_422_8x12.addAvgNONALIGNED = PFX(addAvg_8x12_neon); + p.chromaX265_CSP_I422.puCHROMA_422_8x16.addAvgNONALIGNED = PFX(addAvg_8x16_neon); + p.chromaX265_CSP_I422.puCHROMA_422_8x32.addAvgNONALIGNED = PFX(addAvg_8x32_neon); + p.chromaX265_CSP_I422.puCHROMA_422_8x64.addAvgNONALIGNED = PFX(addAvg_8x64_neon); + p.chromaX265_CSP_I422.puCHROMA_422_12x32.addAvgNONALIGNED = PFX(addAvg_12x32_neon); + p.chromaX265_CSP_I422.puCHROMA_422_16x8.addAvgNONALIGNED = PFX(addAvg_16x8_neon); + p.chromaX265_CSP_I422.puCHROMA_422_16x16.addAvgNONALIGNED = PFX(addAvg_16x16_neon); + p.chromaX265_CSP_I422.puCHROMA_422_16x24.addAvgNONALIGNED = PFX(addAvg_16x24_neon); + p.chromaX265_CSP_I422.puCHROMA_422_16x32.addAvgNONALIGNED = PFX(addAvg_16x32_neon); + p.chromaX265_CSP_I422.puCHROMA_422_16x64.addAvgNONALIGNED = PFX(addAvg_16x64_neon); + p.chromaX265_CSP_I422.puCHROMA_422_24x64.addAvgNONALIGNED = PFX(addAvg_24x64_neon); + p.chromaX265_CSP_I422.puCHROMA_422_32x16.addAvgNONALIGNED = PFX(addAvg_32x16_neon); + p.chromaX265_CSP_I422.puCHROMA_422_32x32.addAvgNONALIGNED = PFX(addAvg_32x32_neon); + p.chromaX265_CSP_I422.puCHROMA_422_32x48.addAvgNONALIGNED = PFX(addAvg_32x48_neon); + p.chromaX265_CSP_I422.puCHROMA_422_32x64.addAvgNONALIGNED = PFX(addAvg_32x64_neon); // quant p.quant = PFX(quant_neon); @@ -402,7 +403,7 @@ p.scale2D_64to32 = PFX(scale2D_64to32_neon); // scale1D_128to64 - p.scale1D_128to64 = PFX(scale1D_128to64_neon); + p.scale1D_128to64NONALIGNED = PFX(scale1D_128to64_neon); // copy_count p.cuBLOCK_4x4.copy_cnt = PFX(copy_cnt_4_neon); @@ -411,37 +412,37 @@ p.cuBLOCK_32x32.copy_cnt = PFX(copy_cnt_32_neon); // filterPixelToShort - p.puLUMA_4x4.convert_p2s = PFX(filterPixelToShort_4x4_neon); - p.puLUMA_4x8.convert_p2s = PFX(filterPixelToShort_4x8_neon); - p.puLUMA_4x16.convert_p2s = PFX(filterPixelToShort_4x16_neon); - p.puLUMA_8x4.convert_p2s = PFX(filterPixelToShort_8x4_neon); - p.puLUMA_8x8.convert_p2s = PFX(filterPixelToShort_8x8_neon); - p.puLUMA_8x16.convert_p2s = PFX(filterPixelToShort_8x16_neon); - p.puLUMA_8x32.convert_p2s = PFX(filterPixelToShort_8x32_neon); - p.puLUMA_12x16.convert_p2s = PFX(filterPixelToShort_12x16_neon); - p.puLUMA_16x4.convert_p2s = PFX(filterPixelToShort_16x4_neon); - p.puLUMA_16x8.convert_p2s = PFX(filterPixelToShort_16x8_neon); - p.puLUMA_16x12.convert_p2s = PFX(filterPixelToShort_16x12_neon); - p.puLUMA_16x16.convert_p2s = PFX(filterPixelToShort_16x16_neon); - p.puLUMA_16x32.convert_p2s = PFX(filterPixelToShort_16x32_neon); - p.puLUMA_16x64.convert_p2s = PFX(filterPixelToShort_16x64_neon); - p.puLUMA_24x32.convert_p2s = PFX(filterPixelToShort_24x32_neon); - p.puLUMA_32x8.convert_p2s = PFX(filterPixelToShort_32x8_neon); - p.puLUMA_32x16.convert_p2s = PFX(filterPixelToShort_32x16_neon); - p.puLUMA_32x24.convert_p2s = PFX(filterPixelToShort_32x24_neon); - p.puLUMA_32x32.convert_p2s = PFX(filterPixelToShort_32x32_neon); - p.puLUMA_32x64.convert_p2s = PFX(filterPixelToShort_32x64_neon); - p.puLUMA_48x64.convert_p2s = PFX(filterPixelToShort_48x64_neon); - p.puLUMA_64x16.convert_p2s = PFX(filterPixelToShort_64x16_neon); - p.puLUMA_64x32.convert_p2s = PFX(filterPixelToShort_64x32_neon); - p.puLUMA_64x48.convert_p2s = PFX(filterPixelToShort_64x48_neon); - p.puLUMA_64x64.convert_p2s = PFX(filterPixelToShort_64x64_neon); + p.puLUMA_4x4.convert_p2sNONALIGNED = PFX(filterPixelToShort_4x4_neon); + p.puLUMA_4x8.convert_p2sNONALIGNED = PFX(filterPixelToShort_4x8_neon); + p.puLUMA_4x16.convert_p2sNONALIGNED = PFX(filterPixelToShort_4x16_neon); + p.puLUMA_8x4.convert_p2sNONALIGNED = PFX(filterPixelToShort_8x4_neon); + p.puLUMA_8x8.convert_p2sNONALIGNED = PFX(filterPixelToShort_8x8_neon); + p.puLUMA_8x16.convert_p2sNONALIGNED = PFX(filterPixelToShort_8x16_neon); + p.puLUMA_8x32.convert_p2sNONALIGNED = PFX(filterPixelToShort_8x32_neon); + p.puLUMA_12x16.convert_p2sNONALIGNED = PFX(filterPixelToShort_12x16_neon); + p.puLUMA_16x4.convert_p2sNONALIGNED = PFX(filterPixelToShort_16x4_neon); + p.puLUMA_16x8.convert_p2sNONALIGNED = PFX(filterPixelToShort_16x8_neon); + p.puLUMA_16x12.convert_p2sNONALIGNED = PFX(filterPixelToShort_16x12_neon); + p.puLUMA_16x16.convert_p2sNONALIGNED = PFX(filterPixelToShort_16x16_neon); + p.puLUMA_16x32.convert_p2sNONALIGNED = PFX(filterPixelToShort_16x32_neon); + p.puLUMA_16x64.convert_p2sNONALIGNED = PFX(filterPixelToShort_16x64_neon); + p.puLUMA_24x32.convert_p2sNONALIGNED = PFX(filterPixelToShort_24x32_neon); + p.puLUMA_32x8.convert_p2sNONALIGNED = PFX(filterPixelToShort_32x8_neon); + p.puLUMA_32x16.convert_p2sNONALIGNED = PFX(filterPixelToShort_32x16_neon); + p.puLUMA_32x24.convert_p2sNONALIGNED = PFX(filterPixelToShort_32x24_neon); + p.puLUMA_32x32.convert_p2sNONALIGNED = PFX(filterPixelToShort_32x32_neon); + p.puLUMA_32x64.convert_p2sNONALIGNED = PFX(filterPixelToShort_32x64_neon); + p.puLUMA_48x64.convert_p2sNONALIGNED = PFX(filterPixelToShort_48x64_neon); + p.puLUMA_64x16.convert_p2sNONALIGNED = PFX(filterPixelToShort_64x16_neon); + p.puLUMA_64x32.convert_p2sNONALIGNED = PFX(filterPixelToShort_64x32_neon); + p.puLUMA_64x48.convert_p2sNONALIGNED = PFX(filterPixelToShort_64x48_neon); + p.puLUMA_64x64.convert_p2sNONALIGNED = PFX(filterPixelToShort_64x64_neon); // Block_fill - p.cuBLOCK_4x4.blockfill_s = PFX(blockfill_s_4x4_neon); - p.cuBLOCK_8x8.blockfill_s = PFX(blockfill_s_8x8_neon); - p.cuBLOCK_16x16.blockfill_s = PFX(blockfill_s_16x16_neon); - p.cuBLOCK_32x32.blockfill_s = PFX(blockfill_s_32x32_neon); + p.cuBLOCK_4x4.blockfill_sNONALIGNED = PFX(blockfill_s_4x4_neon); + p.cuBLOCK_8x8.blockfill_sNONALIGNED = PFX(blockfill_s_8x8_neon); + p.cuBLOCK_16x16.blockfill_sNONALIGNED = PFX(blockfill_s_16x16_neon); + p.cuBLOCK_32x32.blockfill_sNONALIGNED = PFX(blockfill_s_32x32_neon); // Blockcopy_ss p.cuBLOCK_4x4.copy_ss = PFX(blockcopy_ss_4x4_neon); @@ -495,21 +496,21 @@ p.chromaX265_CSP_I422.cuBLOCK_422_32x64.copy_sp = PFX(blockcopy_sp_32x64_neon); // pixel_add_ps - p.cuBLOCK_4x4.add_ps = PFX(pixel_add_ps_4x4_neon); - p.cuBLOCK_8x8.add_ps = PFX(pixel_add_ps_8x8_neon); - p.cuBLOCK_16x16.add_ps = PFX(pixel_add_ps_16x16_neon); - p.cuBLOCK_32x32.add_ps = PFX(pixel_add_ps_32x32_neon); - p.cuBLOCK_64x64.add_ps = PFX(pixel_add_ps_64x64_neon); + p.cuBLOCK_4x4.add_psNONALIGNED = PFX(pixel_add_ps_4x4_neon); + p.cuBLOCK_8x8.add_psNONALIGNED = PFX(pixel_add_ps_8x8_neon); + p.cuBLOCK_16x16.add_psNONALIGNED = PFX(pixel_add_ps_16x16_neon); + p.cuBLOCK_32x32.add_psNONALIGNED = PFX(pixel_add_ps_32x32_neon); + p.cuBLOCK_64x64.add_psNONALIGNED = PFX(pixel_add_ps_64x64_neon); // chroma add_ps - p.chromaX265_CSP_I420.cuBLOCK_420_4x4.add_ps = PFX(pixel_add_ps_4x4_neon); - p.chromaX265_CSP_I420.cuBLOCK_420_8x8.add_ps = PFX(pixel_add_ps_8x8_neon); - p.chromaX265_CSP_I420.cuBLOCK_420_16x16.add_ps = PFX(pixel_add_ps_16x16_neon); - p.chromaX265_CSP_I420.cuBLOCK_420_32x32.add_ps = PFX(pixel_add_ps_32x32_neon); - p.chromaX265_CSP_I422.cuBLOCK_422_4x8.add_ps = PFX(pixel_add_ps_4x8_neon); - p.chromaX265_CSP_I422.cuBLOCK_422_8x16.add_ps = PFX(pixel_add_ps_8x16_neon); - p.chromaX265_CSP_I422.cuBLOCK_422_16x32.add_ps = PFX(pixel_add_ps_16x32_neon); - p.chromaX265_CSP_I422.cuBLOCK_422_32x64.add_ps = PFX(pixel_add_ps_32x64_neon); + p.chromaX265_CSP_I420.cuBLOCK_420_4x4.add_psNONALIGNED = PFX(pixel_add_ps_4x4_neon); + p.chromaX265_CSP_I420.cuBLOCK_420_8x8.add_psNONALIGNED = PFX(pixel_add_ps_8x8_neon); + p.chromaX265_CSP_I420.cuBLOCK_420_16x16.add_psNONALIGNED = PFX(pixel_add_ps_16x16_neon); + p.chromaX265_CSP_I420.cuBLOCK_420_32x32.add_psNONALIGNED = PFX(pixel_add_ps_32x32_neon); + p.chromaX265_CSP_I422.cuBLOCK_422_4x8.add_psNONALIGNED = PFX(pixel_add_ps_4x8_neon); + p.chromaX265_CSP_I422.cuBLOCK_422_8x16.add_psNONALIGNED = PFX(pixel_add_ps_8x16_neon); + p.chromaX265_CSP_I422.cuBLOCK_422_16x32.add_psNONALIGNED = PFX(pixel_add_ps_16x32_neon); + p.chromaX265_CSP_I422.cuBLOCK_422_32x64.add_psNONALIGNED = PFX(pixel_add_ps_32x64_neon); // cpy2Dto1D_shr p.cuBLOCK_4x4.cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_4x4_neon); @@ -518,10 +519,10 @@ p.cuBLOCK_32x32.cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_32x32_neon); // ssd_s - p.cuBLOCK_4x4.ssd_s = PFX(pixel_ssd_s_4x4_neon); - p.cuBLOCK_8x8.ssd_s = PFX(pixel_ssd_s_8x8_neon); - p.cuBLOCK_16x16.ssd_s = PFX(pixel_ssd_s_16x16_neon); - p.cuBLOCK_32x32.ssd_s = PFX(pixel_ssd_s_32x32_neon); + p.cuBLOCK_4x4.ssd_sNONALIGNED = PFX(pixel_ssd_s_4x4_neon); + p.cuBLOCK_8x8.ssd_sNONALIGNED = PFX(pixel_ssd_s_8x8_neon); + p.cuBLOCK_16x16.ssd_sNONALIGNED = PFX(pixel_ssd_s_16x16_neon); + p.cuBLOCK_32x32.ssd_sNONALIGNED = PFX(pixel_ssd_s_32x32_neon); // sse_ss p.cuBLOCK_4x4.sse_ss = PFX(pixel_sse_ss_4x4_neon); @@ -548,10 +549,10 @@ p.chromaX265_CSP_I422.cuBLOCK_422_32x64.sub_ps = PFX(pixel_sub_ps_32x64_neon); // calc_Residual - p.cuBLOCK_4x4.calcresidual = PFX(getResidual4_neon); - p.cuBLOCK_8x8.calcresidual = PFX(getResidual8_neon); - p.cuBLOCK_16x16.calcresidual = PFX(getResidual16_neon); - p.cuBLOCK_32x32.calcresidual = PFX(getResidual32_neon); + p.cuBLOCK_4x4.calcresidualNONALIGNED = PFX(getResidual4_neon); + p.cuBLOCK_8x8.calcresidualNONALIGNED = PFX(getResidual8_neon); + p.cuBLOCK_16x16.calcresidualNONALIGNED = PFX(getResidual16_neon); + p.cuBLOCK_32x32.calcresidualNONALIGNED = PFX(getResidual32_neon); // sse_pp p.cuBLOCK_4x4.sse_pp = PFX(pixel_sse_pp_4x4_neon); @@ -722,31 +723,31 @@ p.puLUMA_64x64.sad_x4 = PFX(sad_x4_64x64_neon); // pixel_avg_pp - p.puLUMA_4x4.pixelavg_pp = PFX(pixel_avg_pp_4x4_neon); - p.puLUMA_4x8.pixelavg_pp = PFX(pixel_avg_pp_4x8_neon); - p.puLUMA_4x16.pixelavg_pp = PFX(pixel_avg_pp_4x16_neon); - p.puLUMA_8x4.pixelavg_pp = PFX(pixel_avg_pp_8x4_neon); - p.puLUMA_8x8.pixelavg_pp = PFX(pixel_avg_pp_8x8_neon); - p.puLUMA_8x16.pixelavg_pp = PFX(pixel_avg_pp_8x16_neon); - p.puLUMA_8x32.pixelavg_pp = PFX(pixel_avg_pp_8x32_neon); - p.puLUMA_12x16.pixelavg_pp = PFX(pixel_avg_pp_12x16_neon); - p.puLUMA_16x4.pixelavg_pp = PFX(pixel_avg_pp_16x4_neon); - p.puLUMA_16x8.pixelavg_pp = PFX(pixel_avg_pp_16x8_neon); - p.puLUMA_16x12.pixelavg_pp = PFX(pixel_avg_pp_16x12_neon); - p.puLUMA_16x16.pixelavg_pp = PFX(pixel_avg_pp_16x16_neon); - p.puLUMA_16x32.pixelavg_pp = PFX(pixel_avg_pp_16x32_neon); - p.puLUMA_16x64.pixelavg_pp = PFX(pixel_avg_pp_16x64_neon); - p.puLUMA_24x32.pixelavg_pp = PFX(pixel_avg_pp_24x32_neon); - p.puLUMA_32x8.pixelavg_pp = PFX(pixel_avg_pp_32x8_neon); - p.puLUMA_32x16.pixelavg_pp = PFX(pixel_avg_pp_32x16_neon); - p.puLUMA_32x24.pixelavg_pp = PFX(pixel_avg_pp_32x24_neon); - p.puLUMA_32x32.pixelavg_pp = PFX(pixel_avg_pp_32x32_neon); - p.puLUMA_32x64.pixelavg_pp = PFX(pixel_avg_pp_32x64_neon); - p.puLUMA_48x64.pixelavg_pp = PFX(pixel_avg_pp_48x64_neon); - p.puLUMA_64x16.pixelavg_pp = PFX(pixel_avg_pp_64x16_neon); - p.puLUMA_64x32.pixelavg_pp = PFX(pixel_avg_pp_64x32_neon); - p.puLUMA_64x48.pixelavg_pp = PFX(pixel_avg_pp_64x48_neon); - p.puLUMA_64x64.pixelavg_pp = PFX(pixel_avg_pp_64x64_neon); + p.puLUMA_4x4.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_4x4_neon); + p.puLUMA_4x8.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_4x8_neon); + p.puLUMA_4x16.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_4x16_neon); + p.puLUMA_8x4.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_8x4_neon); + p.puLUMA_8x8.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_8x8_neon); + p.puLUMA_8x16.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_8x16_neon); + p.puLUMA_8x32.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_8x32_neon); + p.puLUMA_12x16.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_12x16_neon); + p.puLUMA_16x4.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_16x4_neon); + p.puLUMA_16x8.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_16x8_neon); + p.puLUMA_16x12.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_16x12_neon); + p.puLUMA_16x16.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_16x16_neon); + p.puLUMA_16x32.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_16x32_neon); + p.puLUMA_16x64.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_16x64_neon); + p.puLUMA_24x32.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_24x32_neon); + p.puLUMA_32x8.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_32x8_neon); + p.puLUMA_32x16.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_32x16_neon); + p.puLUMA_32x24.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_32x24_neon); + p.puLUMA_32x32.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_32x32_neon); + p.puLUMA_32x64.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_32x64_neon); + p.puLUMA_48x64.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_48x64_neon); + p.puLUMA_64x16.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_64x16_neon); + p.puLUMA_64x32.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_64x32_neon); + p.puLUMA_64x48.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_64x48_neon); + p.puLUMA_64x64.pixelavg_ppNONALIGNED = PFX(pixel_avg_pp_64x64_neon); // planecopy p.planecopy_cp = PFX(pixel_planecopy_cp_neon);
View file
x265_3.3.tar.gz/source/common/common.h -> x265_3.4.tar.gz/source/common/common.h
Changed
@@ -129,6 +129,7 @@ typedef uint64_t sum2_t; typedef uint64_t pixel4; typedef int64_t ssum2_t; +#define SHIFT_TO_BITPLANE 9 #define HISTOGRAM_BINS 1024 #else typedef uint8_t pixel; @@ -136,6 +137,7 @@ typedef uint32_t sum2_t; typedef uint32_t pixel4; typedef int32_t ssum2_t; // Signed sum +#define SHIFT_TO_BITPLANE 7 #define HISTOGRAM_BINS 256 #endif // if HIGH_BIT_DEPTH @@ -270,6 +272,9 @@ #define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE) #define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE) +#define RDCOST_BASED_RSKIP 1 +#define EDGE_BASED_RSKIP 2 + #define COEF_REMAIN_BIN_REDUCTION 3 // indicates the level at which the VLC // transitions from Golomb-Rice to TU+EG(k)
View file
x265_3.3.tar.gz/source/common/cpu.cpp -> x265_3.4.tar.gz/source/common/cpu.cpp
Changed
@@ -5,6 +5,8 @@ * Laurent Aimar <fenrir@via.ecp.fr> * Fiona Glaser <fiona@x264.com> * Steve Borho <steve@borho.org> + * Hongbin Liu <liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -367,6 +369,8 @@ flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) +#elif X265_ARCH_ARM64 + flags |= X265_CPU_NEON; #endif // if HAVE_ARMV6 return flags; }
View file
x265_3.3.tar.gz/source/common/frame.cpp -> x265_3.4.tar.gz/source/common/frame.cpp
Changed
@@ -61,6 +61,8 @@ m_edgePic = NULL; m_gaussianPic = NULL; m_thetaPic = NULL; + m_edgeBitPlane = NULL; + m_edgeBitPic = NULL; } bool Frame::create(x265_param *param, float* quantOffsets) @@ -115,6 +117,19 @@ m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2))); } + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + { + uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize; + uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize; + uint32_t lumaMarginX = param->maxCUSize + 32; + uint32_t lumaMarginY = param->maxCUSize + 16; + uint32_t stride = (numCuInWidth * param->maxCUSize) + (lumaMarginX << 1); + uint32_t maxHeight = numCuInHeight * param->maxCUSize; + uint32_t bitPlaneSize = stride * (maxHeight + (lumaMarginY * 2)); + CHECKED_MALLOC_ZERO(m_edgeBitPlane, pixel, bitPlaneSize); + m_edgeBitPic = m_edgeBitPlane + lumaMarginY * stride + lumaMarginX; + } + if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize)) { X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized"); @@ -267,4 +282,10 @@ X265_FREE(m_gaussianPic); X265_FREE(m_thetaPic); } + + if (m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { + X265_FREE_ZERO(m_edgeBitPlane); + m_edgeBitPic = NULL; + } }
View file
x265_3.3.tar.gz/source/common/frame.h -> x265_3.4.tar.gz/source/common/frame.h
Changed
@@ -99,7 +99,7 @@ float* m_quantOffsets; // points to quantOffsets in x265_picture x265_sei m_userSEI; uint32_t m_picStruct; // picture structure SEI message - x265_dolby_vision_rpu m_rpu; + x265_dolby_vision_rpu m_rpu; /* Frame Parallelism - notification between FrameEncoders of available motion reference rows */ ThreadSafeInteger* m_reconRowFlag; // flag of CTU rows completely reconstructed and extended for motion reference @@ -137,6 +137,10 @@ pixel* m_gaussianPic; pixel* m_thetaPic; + /* edge bit plane for rskips 2 and 3 */ + pixel* m_edgeBitPlane; + pixel* m_edgeBitPic; + Frame(); bool create(x265_param *param, float* quantOffsets);
View file
x265_3.3.tar.gz/source/common/param.cpp -> x265_3.4.tar.gz/source/common/param.cpp
Changed
@@ -198,7 +198,8 @@ param->bEnableWeightedPred = 1; param->bEnableWeightedBiPred = 0; param->bEnableEarlySkip = 1; - param->bEnableRecursionSkip = 1; + param->recursionSkipMode = 1; + param->edgeVarThreshold = 0.05f; param->bEnableAMP = 0; param->bEnableRectInter = 0; param->rdLevel = 3; @@ -285,6 +286,7 @@ param->rc.bEnableConstVbv = 0; param->bResetZoneConfig = 1; param->reconfigWindowSize = 0; + param->decoderVbvMaxRate = 0; /* Video Usability Information (VUI) */ param->vui.aspectRatioIdc = 0; @@ -546,7 +548,7 @@ param->maxNumMergeCand = 5; param->searchMethod = X265_STAR_SEARCH; param->bEnableTransformSkip = 1; - param->bEnableRecursionSkip = 0; + param->recursionSkipMode = 0; param->maxNumReferences = 5; param->limitReferences = 0; param->lookaheadSlices = 0; // disabled for best quality @@ -598,7 +600,7 @@ param->rc.hevcAq = 0; param->rc.qpStep = 1; param->rc.bEnableGrain = 1; - param->bEnableRecursionSkip = 0; + param->recursionSkipMode = 0; param->psyRd = 4.0; param->psyRdoq = 10.0; param->bEnableSAO = 0; @@ -702,8 +704,9 @@ OPT("ref") p->maxNumReferences = atoi(value); OPT("fast-intra") p->bEnableFastIntra = atobool(value); OPT("early-skip") p->bEnableEarlySkip = atobool(value); - OPT("rskip") p->bEnableRecursionSkip = atobool(value); - OPT("me")p->searchMethod = parseName(value, x265_motion_est_names, bError); + OPT("rskip") p->recursionSkipMode = atoi(value); + OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f; + OPT("me") p->searchMethod = parseName(value, x265_motion_est_names, bError); OPT("subme") p->subpelRefine = atoi(value); OPT("merange") p->searchRange = atoi(value); OPT("rect") p->bEnableRectInter = atobool(value); @@ -919,7 +922,7 @@ OPT("max-merge") p->maxNumMergeCand = (uint32_t)atoi(value); OPT("temporal-mvp") p->bEnableTemporalMvp = atobool(value); OPT("early-skip") p->bEnableEarlySkip = atobool(value); - OPT("rskip") p->bEnableRecursionSkip = atobool(value); + OPT("rskip") p->recursionSkipMode = atoi(value); OPT("rdpenalty") p->rdPenalty = atoi(value); OPT("tskip") p->bEnableTransformSkip = atobool(value); OPT("no-tskip-fast") p->bEnableTSkipFast = atobool(value); @@ -1221,6 +1224,7 @@ } } OPT("hist-threshold") p->edgeTransitionThreshold = atof(value); + OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f; OPT("lookahead-threads") p->lookaheadThreads = atoi(value); OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value); OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = atobool(value); @@ -1596,9 +1600,16 @@ CHECK(param->rdLevel < 1 || param->rdLevel > 6, "RD Level is out of range"); CHECK(param->rdoqLevel < 0 || param->rdoqLevel > 2, - "RDOQ Level is out of range"); + "RDOQ Level is out of range"); CHECK(param->dynamicRd < 0 || param->dynamicRd > x265_ADAPT_RD_STRENGTH, - "Dynamic RD strength must be between 0 and 4"); + "Dynamic RD strength must be between 0 and 4"); + CHECK(param->recursionSkipMode > 2 || param->recursionSkipMode < 0, + "Invalid Recursion skip mode. Valid modes 0,1,2"); + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + { + CHECK(param->edgeVarThreshold < 0.0f || param->edgeVarThreshold > 1.0f, + "Minimum edge density percentage for a CU should be an integer between 0 to 100"); + } CHECK(param->bframes && param->bframes >= param->lookaheadDepth && !param->rc.bStatRead, "Lookahead depth must be greater than the max consecutive bframe count"); CHECK(param->bframes < 0, @@ -1789,6 +1800,7 @@ } CHECK(param->confWinRightOffset < 0, "Conformance Window Right Offset must be 0 or greater"); CHECK(param->confWinBottomOffset < 0, "Conformance Window Bottom Offset must be 0 or greater"); + CHECK(param->decoderVbvMaxRate < 0, "Invalid Decoder Vbv Maxrate. Value can not be less than zero"); return check_failed; } @@ -1908,7 +1920,9 @@ TOOLVAL(param->psyRdoq, "psy-rdoq=%.2lf"); TOOLOPT(param->bEnableRdRefine, "rd-refine"); TOOLOPT(param->bEnableEarlySkip, "early-skip"); - TOOLOPT(param->bEnableRecursionSkip, "rskip"); + TOOLVAL(param->recursionSkipMode, "rskip mode=%d"); + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + TOOLVAL(param->edgeVarThreshold, "rskip-edge-threshold=%.2f"); TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip"); TOOLVAL(param->noiseReductionIntra, "nr-intra=%d"); TOOLVAL(param->noiseReductionInter, "nr-inter=%d"); @@ -2066,7 +2080,10 @@ s += sprintf(s, " rd=%d", p->rdLevel); s += sprintf(s, " selective-sao=%d", p->selectiveSAO); BOOL(p->bEnableEarlySkip, "early-skip"); - BOOL(p->bEnableRecursionSkip, "rskip"); + BOOL(p->recursionSkipMode, "rskip"); + if (p->recursionSkipMode == EDGE_BASED_RSKIP) + s += sprintf(s, " rskip-edge-threshold=%f", p->edgeVarThreshold); + BOOL(p->bEnableFastIntra, "fast-intra"); BOOL(p->bEnableTSkipFast, "tskip-fast"); BOOL(p->bCULossless, "cu-lossless"); @@ -2204,6 +2221,7 @@ if (p->bEnableSceneCutAwareQp) s += sprintf(s, " scenecut-window=%d max-qp-delta=%d", p->scenecutWindow, p->maxQpDelta); s += sprintf(s, "conformance-window-offsets right=%d bottom=%d", p->confWinRightOffset, p->confWinBottomOffset); + s += sprintf(s, " decoder-max-rate=%d", p->decoderVbvMaxRate); #undef BOOL return buf; } @@ -2373,7 +2391,8 @@ dst->bSaoNonDeblocked = src->bSaoNonDeblocked; dst->rdLevel = src->rdLevel; dst->bEnableEarlySkip = src->bEnableEarlySkip; - dst->bEnableRecursionSkip = src->bEnableRecursionSkip; + dst->recursionSkipMode = src->recursionSkipMode; + dst->edgeVarThreshold = src->edgeVarThreshold; dst->bEnableFastIntra = src->bEnableFastIntra; dst->bEnableTSkipFast = src->bEnableTSkipFast; dst->bCULossless = src->bCULossless; @@ -2419,8 +2438,9 @@ dst->rc.zonefileCount = src->rc.zonefileCount; dst->reconfigWindowSize = src->reconfigWindowSize; dst->bResetZoneConfig = src->bResetZoneConfig; + dst->decoderVbvMaxRate = src->decoderVbvMaxRate; - if (src->rc.zonefileCount && src->rc.zones) + if (src->rc.zonefileCount && src->rc.zones && src->bResetZoneConfig) { for (int i = 0; i < src->rc.zonefileCount; i++) {
View file
x265_3.3.tar.gz/source/common/pixel.cpp -> x265_3.4.tar.gz/source/common/pixel.cpp
Changed
@@ -5,6 +5,7 @@ * Mandar Gurav <mandar@multicorewareinc.com> * Mahesh Pittala <mahesh@multicorewareinc.com> * Min Chen <min.chen@multicorewareinc.com> + * Hongbin Liu<liuhongbin1@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -265,6 +266,10 @@ { int satd = 0; +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 + pixelcmp_t satd_4x4 = x265_pixel_satd_4x4_neon; +#endif + for (int row = 0; row < h; row += 4) for (int col = 0; col < w; col += 4) satd += satd_4x4(pix1 + row * stride_pix1 + col, stride_pix1, @@ -279,6 +284,10 @@ { int satd = 0; +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 + pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon; +#endif + for (int row = 0; row < h; row += 4) for (int col = 0; col < w; col += 8) satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1, @@ -876,6 +885,18 @@ } } +static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift) +{ + for (int r = 0; r < height; r++) + { + for (int c = 0; c < width; c++) + dstc = (pixel)((srcc >> shift)); + + dst += dstStride; + src += srcStride; + } +} + static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask) { for (int r = 0; r < height; r++) @@ -1316,6 +1337,7 @@ p.planecopy_cp = planecopy_cp_c; p.planecopy_sp = planecopy_sp_c; p.planecopy_sp_shl = planecopy_sp_shl_c; + p.planecopy_pp_shr = planecopy_pp_shr_c; #if HIGH_BIT_DEPTH p.planeClipAndMax = planeClipAndMax_c; #endif
View file
x265_3.3.tar.gz/source/common/primitives.h -> x265_3.4.tar.gz/source/common/primitives.h
Changed
@@ -8,6 +8,8 @@ * Rajesh Paulraj <rajesh@multicorewareinc.com> * Praveen Kumar Tiwari <praveen@multicorewareinc.com> * Min Chen <chenm003@163.com> + * Hongbin Liu<liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -204,6 +206,7 @@ typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX); typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift); typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask); +typedef void (*planecopy_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift); typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix); typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len); @@ -358,6 +361,7 @@ planecopy_cp_t planecopy_cp; planecopy_sp_t planecopy_sp; planecopy_sp_t planecopy_sp_shl; + planecopy_pp_t planecopy_pp_shr; planeClipAndMax_t planeClipAndMax; weightp_sp_t weight_sp; @@ -465,6 +469,9 @@ void setupInstrinsicPrimitives(EncoderPrimitives &p, int cpuMask); void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask); void setupAliasPrimitives(EncoderPrimitives &p); +#if X265_ARCH_ARM64 +void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask); +#endif #if HAVE_ALTIVEC void setupPixelPrimitives_altivec(EncoderPrimitives &p); void setupDCTPrimitives_altivec(EncoderPrimitives &p); @@ -479,4 +486,10 @@ extern const char* PFX(build_info_str); #endif +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 +extern "C" { +#include "aarch64/pixel-util.h" +} +#endif + #endif // ifndef X265_PRIMITIVES_H
View file
x265_3.4.tar.gz/source/common/scaler.cpp
Added
@@ -0,0 +1,1110 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#include "scaler.h" + +#if _MSC_VER +#pragma warning(disable: 4706) // assignment within conditional +#pragma warning(disable: 4244) // '=' : possible loss of data +#endif + +#define SHORT_MIN (-(1 << 15)) +#define SHORT_MAX ((1 << 15) - 1) +#define SHORT_MAX_10 ((1 << 10) - 1) + +namespace X265_NS{ + +ScalerFilterManager::ScalerFilterManager() : + m_bitDepth(0), + m_algorithmFlags(0), + m_srcW(0), + m_srcH(0), + m_dstW(0), + m_dstH(0), + m_crSrcW(0), + m_crSrcH(0), + m_crDstW(0), + m_crDstH(0), + m_crSrcHSubSample(0), + m_crSrcVSubSample(0), + m_crDstHSubSample(0), + m_crDstVSubSample(0) +{ + for (int i = 0; i < m_numSlice; i++) + m_slicesi = NULL; + for (int i = 0; i < m_numFilter; i++) + m_ScalerFiltersi = NULL; +} + +inline static void filter_copy_c(int64_t* filter, int64_t* filter2, int size) +{ + for (int i = 0; i < size; i++) + filter2i = filteri; +} + +#if X265_DEPTH == 8 +static void doScaling_c(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + for (int i = 0; i < dstW; i++) + { + int val = 0; + int sourcePos = filterPosi; + for (int j = 0; j < filterSize; j++) + val += ((int)srcsourcePos + j) * filterfilterSize * i + j; + // the cubic equation does overflow ... + dsti = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 7); + } +} +static uint8_t clipUint8(int a) +{ + if (a&(~0xFF)) + return (-a) >> 31; + else + return a; +} + +static void yuv2PlaneX_c(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + for (int i = 0; i < dstW; i++) + { + int val = 64 << 12; + for (int j = 0; j < filterSize; j++) + val += srcji * filterj; + desti = clipUint8(val >> 19); + } +} +#else +static void yuv2PlaneX_c_h(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + for (int i = 0; i < dstW; i++) + { + int val = 1 << 16; + uint16_t* dst16bit = (uint16_t *)dest; + for (int j = 0; j < filterSize; j++) + val += srcji * filterj; + uint16_t d = x265_clip3(0, SHORT_MAX_10, val >> 17); + ((uint8_t*)(&dst16biti))0 = (d); + ((uint8_t*)(&dst16biti))1 = (d) >> 8; + } +} +static void doScaling_c_h(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + const uint16_t *srcLocal = (const uint16_t *)src; + for (int i = 0; i < dstW; i++) + { + int val = 0; + int sourcePos = filterPosi; + for (int j = 0; j < filterSize; j++) + val += ((int)srcLocalsourcePos + j) * filterfilterSize * i + j; + // the cubic equation does overflow + dsti = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 9); + } +} +#endif + +ScalerFilter::ScalerFilter() : + m_filtLen(0), + m_filtPos(NULL), + m_filt(NULL), + m_sourceSlice(NULL), + m_destSlice(NULL) +{ +} + +ScalerFilter::~ScalerFilter() +{ + if (m_filtPos) { + delete m_filtPos; m_filtPos = NULL; + } + if (m_filt) { + delete m_filt; m_filt = NULL; + } +} + +void ScalerHLumFilter::process(int sliceVer, int sliceHor) +{ + uint8_t ** src = m_sourceSlice->m_plane0.lineBuf; + uint8_t ** dst = m_destSlice->m_plane0.lineBuf; + int sourcePos = sliceVer - m_sourceSlice->m_plane0.sliceVer; + int destPos = sliceVer - m_destSlice->m_plane0.sliceVer; + int dstW = m_destSlice->m_width; + for (int i = 0; i < sliceHor; ++i) + { + m_hFilterScaler->doScaling((int16_t*)dstdestPos + i, dstW, (const uint8_t *)srcsourcePos + i, m_filt, m_filtPos, m_filtLen); + m_destSlice->m_plane0.sliceHor += 1; + } +} + +void ScalerHCrFilter::process(int sliceVer, int sliceHor) +{ + uint8_t ** src1 = m_sourceSlice->m_plane1.lineBuf; + uint8_t ** dst1 = m_destSlice->m_plane1.lineBuf; + uint8_t ** src2 = m_sourceSlice->m_plane2.lineBuf; + uint8_t ** dst2 = m_destSlice->m_plane2.lineBuf; + + int sourcePos1 = sliceVer - m_sourceSlice->m_plane1.sliceVer; + int destPos1 = sliceVer - m_destSlice->m_plane1.sliceVer; + int sourcePos2 = sliceVer - m_sourceSlice->m_plane2.sliceVer; + int destPos2 = sliceVer - m_destSlice->m_plane2.sliceVer; + + int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample; + + for (int i = 0; i < sliceHor; ++i) + { + m_hFilterScaler->doScaling((int16_t*)dst1destPos1 + i, dstW, src1sourcePos1 + i, m_filt, m_filtPos, m_filtLen); + m_hFilterScaler->doScaling((int16_t*)dst2destPos2 + i, dstW, src2sourcePos2 + i, m_filt, m_filtPos, m_filtLen); + m_destSlice->m_plane1.sliceHor += 1; + m_destSlice->m_plane2.sliceHor += 1; + } +} + +void VFilterScaler8Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF; + + (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4); + +#if X265_DEPTH == 8 + yuv2PlaneX_c(filter, filterSize, src, dest, dstW); +#else + yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW); +#endif +} + +void VFilterScaler10Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF; + + (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4); + +#if X265_DEPTH == 8 + yuv2PlaneX_c(filter, filterSize, src, dest, dstW); +#else + yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW); +#endif +} + +void ScalerVLumFilter::process(int sliceVer, int sliceHor) +{ + (void)sliceHor; + int first = X265_MAX(1 - m_filtLen, m_filtPossliceVer); + int sp = first - m_sourceSlice->m_plane0.sliceVer; + int dp = sliceVer - m_destSlice->m_plane0.sliceVer; + uint8_t **src = m_sourceSlice->m_plane0.lineBuf + sp; + uint8_t **dst = m_destSlice->m_plane0.lineBuf + dp; + int16_t *filter = m_filt + (sliceVer * m_filtLen); + int dstW = m_destSlice->m_width; + m_vFilterScaler->yuv2PlaneX(filter, m_filtLen, (const int16_t**)src, dst0, dstW); +} + +void ScalerVCrFilter::process(int sliceVer, int sliceHor) +{ + (void)sliceHor; + + const int crSkipMask = (1 << m_destSlice->m_vCrSubSample) - 1; + if (sliceVer & crSkipMask) + return; + else + { + int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample; + int crSliceVer = sliceVer >> m_destSlice->m_vCrSubSample; + int first = X265_MAX(1 - m_filtLen, m_filtPoscrSliceVer); + int sp1 = first - m_sourceSlice->m_plane1.sliceVer; + int sp2 = first - m_sourceSlice->m_plane2.sliceVer; + int dp1 = crSliceVer - m_destSlice->m_plane1.sliceVer; + int dp2 = crSliceVer - m_destSlice->m_plane2.sliceVer; + uint8_t **src1 = m_sourceSlice->m_plane1.lineBuf + sp1; + uint8_t **src2 = m_sourceSlice->m_plane2.lineBuf + sp2; + uint8_t **dst1 = m_destSlice->m_plane1.lineBuf + dp1; + uint8_t **dst2 = m_destSlice->m_plane2.lineBuf + dp2; + int16_t *filter = m_filt + (crSliceVer * m_filtLen); + + m_vFilterScaler->yuv2PlaneX((int16_t*)filter, m_filtLen, (const int16_t**)src1, dst10, dstW); + m_vFilterScaler->yuv2PlaneX((int16_t*)filter, m_filtLen, (const int16_t**)src2, dst20, dstW); + } +} + +int ScalerFilter::initCoeff(int flag, int inc, int srcW, int dstW, int filtAlign, int one, int sourcePos, int destPos) +{ + int filterSize; + int filter2Size; + int minFilterSize; + int64_t *filter = NULL; + int64_t *filter2 = NULL; + const int64_t fone = 1LL << (54 - x265_min((int)X265_LOG2(srcW / dstW), 8)); + int *outFilterSize = &m_filtLen; + int64_t xDstInSrc; + int sizeFactor = flag; + + // Init filter pos, the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end + m_filtPos = new int32_tdstW + 3; + int32_t **filterPos = &m_filtPos; + + if (inc <= 1 << 16) + filterSize = 1 + sizeFactor; // upscale + else + filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW; + + filterSize = x265_min(filterSize, srcW - 2); + filterSize = x265_max(filterSize, 1); + filter = new int64_tdstW * sizeof(*filter) * filterSize; + + xDstInSrc = ((destPos*(int64_t)inc) >> 7) - ((sourcePos * 0x10000LL) >> 7); + for (int i = 0; i < dstW; i++) + { + int xx = (xDstInSrc - (filterSize - 2) * (1LL << 16)) / (1 << 17); + (*filterPos)i = xx; + for (int j = 0; j < filterSize; j++) + { + int64_t d = (X265_ABS(((int64_t)xx * (1 << 17)) - xDstInSrc)) << 13; + int64_t coeff = 0; + + if (inc > 1 << 16) + d = d * dstW / srcW; + + if (flag == 4) // BiCUBIC + { + int64_t B = (0) * (1 << 24); + int64_t C = (0.6) * (1 << 24); + + if (d >= 1LL << 31) + coeff = 0.0; + else + { + int64_t dd = (d * d) >> 30; + int64_t ddd = (dd * d) >> 30; + + if (d < 1LL << 30) + coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd + (-18 * (1 << 24) + 12 * B + 6 * C) * dd + (6 * (1 << 24) - 2 * B) * (1 << 30); + else + coeff = (-B - 6 * C) * ddd + (6 * B + 30 * C) * dd + (-12 * B - 48 * C) * d + (8 * B + 24 * C) * (1 << 30); + } + coeff /= (1LL << 54) / fone; + } + else if (flag == 1) // BILINEAR + { + coeff = (1 << 30) - d; + if (coeff < 0) + coeff = 0; + coeff *= fone >> 30; + } + else + assert(0); + + filteri * filterSize + j = coeff; + xx++; + } + xDstInSrc += 2 * inc; + } + + //apply src & dst Filter to filter -> filter2 + X265_CHECK(filterSize > 0, "invalid filterSize value.\n"); + filter2Size = filterSize; + filter2 = new int64_tdstW * sizeof(*filter2) * filter2Size; + + /* This is hard to read code, but much faster. Speed is crucial here */ + int index = RES_FACTOR_DEF; + int size = dstW * filterSize; + + (size % 4 == 0) && (index = RES_FACTOR_4); + (size % 8 == 0) && (index = RES_FACTOR_8); + (size % 16 == 0) && (index = RES_FACTOR_16); + (size % 32 == 0) && (index = RES_FACTOR_32); + (size % 64 == 0) && (index = RES_FACTOR_64); + + filter_copy_c(filter, filter2, size); + + delete(filter); + + // try to reduce the filter-size (step1 find size and shift left) + // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not). + minFilterSize = 0; + for (int i = dstW - 1; i >= 0; i--) + { + int min = filter2Size; + int64_t cutOff = 0.0; + + // get rid of near zero elements on the left by shifting left + for (int j = 0; j < filter2Size; j++) + { + int k; + cutOff += X265_ABS(filter2i * filter2Size); + + if (cutOff > SCALER_MAX_REDUCE_CUTOFF * fone) + break; + // preserve monotonicity because the core can't handle the filter otherwise + if (i < dstW - 1 && (*filterPos)i >= (*filterPos)i + 1) + break; + + // move filter coefficients left + for (k = 1; k < filter2Size; k++) + filter2i * filter2Size + k - 1 = filter2i * filter2Size + k; + filter2i * filter2Size + k - 1 = 0; + (*filterPos)i++; + } + + cutOff = 0; + // count near zeros on the right + for (int j = filter2Size - 1; j > 0; j--) + { + cutOff += X265_ABS(filter2i * filter2Size + j); + + if (cutOff > SCALER_MAX_REDUCE_CUTOFF * fone) + break; + min--; + } + + if (min > minFilterSize) + minFilterSize = min; + } + + X265_CHECK(minFilterSize > 0, "invalid minFilterSize value.\n"); + filterSize = (minFilterSize + (filtAlign - 1)) & (~(filtAlign - 1)); + X265_CHECK(filterSize > 0, "invalid filterSize value.\n"); + filter = new int64_tdstW*filterSize * sizeof(*filter); + + *outFilterSize = filterSize; + + // try to reduce the filter-size (step2 reduce it) + for (int i = 0; i < dstW; i++) + { + for (int j = 0; j < filterSize; j++) + { + if (j >= filter2Size) + filteri * filterSize + j = 0; + else + filteri * filterSize + j = filter2i * filter2Size + j; + if ((flag & SCALER_BITEXACT) && j >= minFilterSize) + filteri * filterSize + j = 0; + } + } + + // fix borders + for (int i = 0; i < dstW; i++) + { + int j; + if ((*filterPos)i < 0) + { + // move filter coefficients left to compensate for filterPos + for (j = 1; j < filterSize; j++) + { + int left = x265_max(j + (*filterPos)i, 0); + filteri * filterSize + left += filteri * filterSize + j; + filteri * filterSize + j = 0; + } + (*filterPos)i = 0; + } + + if ((*filterPos)i + filterSize > srcW) + { + int shift = (*filterPos)i + x265_min(filterSize - srcW, 0); + int64_t acc = 0; + + for (j = filterSize - 1; j >= 0; j--) + { + if ((*filterPos)i + j >= srcW) + { + acc += filteri * filterSize + j; + filteri * filterSize + j = 0; + } + } + for (j = filterSize - 1; j >= 0; j--) + { + if (j < shift) + filteri * filterSize + j = 0; + else + filteri * filterSize + j = filteri * filterSize + j - shift; + } + + (*filterPos)i -= shift; + filteri * filterSize + srcW - 1 - (*filterPos)i += acc; + } + + X265_CHECK((*filterPos)i >= 0, "invalid: Value of (*filterPos)%d < 0.\n", i); + X265_CHECK((*filterPos)i < srcW, "invalid: Value of (*filterPos)%d > %d .\n", i, srcW); + if ((*filterPos)i + filterSize > srcW) + { + for (j = 0; j < filterSize; j++) + { + X265_CHECK(!filteri * filterSize + j, "invalid: Value of filter%d * filterSize + %d != 0.\n", i, j); + X265_CHECK((*filterPos)i + j < srcW, "invalid: (*filterPos)%d + %d > %d .\n", i, i, srcW); + } + } + } + + // init filter + m_filt = new int16_t(dstW + 3)*(*outFilterSize); + int16_t **outFilter = &m_filt; + + // normalize & store in outFilter + for (int i = 0; i < dstW; i++) + { + int64_t error = 0; + int64_t sum = 0; + + for (int j = 0; j < filterSize; j++) + sum += filteri * filterSize + j; + sum = (sum + one / 2) / one; + if (!sum) + { + x265_log(NULL, X265_LOG_WARNING, "Scaler: zero vector in scaling\n"); + sum = 1; + } + for (int j = 0; j < *outFilterSize; j++) + { + int64_t v = filteri * filterSize + j + error; + int intV = ROUNDED_DIVISION(v, sum); + (*outFilter)i * (*outFilterSize) + j = intV; + error = v - intV * sum; + } + } + + (*filterPos)dstW + 0 = + (*filterPos)dstW + 1 = + (*filterPos)dstW + 2 = (*filterPos)dstW - 1; + for (int i = 0; i < *outFilterSize; i++) + { + int k = (dstW - 1) * (*outFilterSize) + i; + (*outFilter)k + 1 * (*outFilterSize) = + (*outFilter)k + 2 * (*outFilterSize) = + (*outFilter)k + 3 * (*outFilterSize) = (*outFilter)k; + } + + delete(filter); + delete(filter2); + return 0; +} + +int ScalerFilterManager::init(int algorithmFlags, VideoDesc *srcVideoDesc, VideoDesc *dstVideoDesc) +{ + int srcW = m_srcW = srcVideoDesc->m_width; + int srcH = m_srcH = srcVideoDesc->m_height; + int dstW = m_dstW = dstVideoDesc->m_width; + int dstH = m_dstH = dstVideoDesc->m_height; + int lumXInc, crXInc; + int lumYInc, crYInc; + int srcHCrPos; + int dstHCrPos; + int srcVCrPos; + int dstVCrPos; + int dst_stride = SCALER_ALIGN(dstW * sizeof(int16_t) + 66, 16); + m_bitDepth = dstVideoDesc->m_inputDepth; + if (m_bitDepth == 16) + dst_stride <<= 1; + + m_algorithmFlags = algorithmFlags; + lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW; + lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH; + + srcHCrPos = -513; + dstHCrPos = -513; + srcVCrPos = -513; + dstVCrPos = -513; + + int srcCsp = srcVideoDesc->m_csp; + if (x265_cli_cspssrcCsp.planes > 1) + { + m_crSrcHSubSample = x265_cli_cspssrcCsp.width1; + m_crSrcVSubSample = x265_cli_cspssrcCsp.height1; + m_crSrcW = srcVideoDesc->m_width >> m_crSrcHSubSample; + m_crSrcH = srcVideoDesc->m_height >> m_crSrcVSubSample; + if (srcCsp == 1)// i420 + srcVCrPos = 128; + } + else + { + m_crSrcW = 0; + m_crSrcH = 0; + m_crSrcHSubSample = 0; + m_crSrcVSubSample = 0; + } + int dstCsp = dstVideoDesc->m_csp; + if (x265_cli_cspsdstCsp.planes > 1) + { + m_crDstHSubSample = x265_cli_cspsdstCsp.width1; + m_crDstVSubSample = x265_cli_cspsdstCsp.height1; + m_crDstW = dstVideoDesc->m_width >> m_crDstHSubSample; + m_crDstH = dstVideoDesc->m_height >> m_crDstVSubSample; + if (dstCsp == 1)// i420 + dstVCrPos = 128; + } + else + { + m_crDstW = 0; + m_crDstH = 0; + m_crDstHSubSample = 0; + m_crDstVSubSample = 0; + } + // Only srcCsp == dstCsp is supported at present + if (srcCsp != dstCsp) + { + x265_log(NULL, X265_LOG_ERROR, "wrong, source csp != destination csp \n"); + return false; + } + + lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW; + lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH; + crXInc = (((int64_t)m_crSrcW << 16) + (m_crDstW >> 1)) / m_crDstW; + crYInc = (((int64_t)m_crSrcH << 16) + (m_crDstH >> 1)) / m_crDstH; + + const int filterAlign = 1; + + // init horizontal Luma Scaler filter + m_ScalerFilters0 = new ScalerHLumFilter(m_bitDepth); + m_ScalerFilters0->initCoeff(m_algorithmFlags, lumXInc, srcW, dstW, filterAlign, 1 << 14, getLocalPos(0, 0), getLocalPos(0, 0)); + + // init horizontal cr Scaler filter + m_ScalerFilters1 = new ScalerHCrFilter(m_bitDepth); + m_ScalerFilters1->initCoeff(m_algorithmFlags, crXInc, m_crSrcW, m_crDstW, filterAlign, 1 << 14, + getLocalPos(m_crSrcHSubSample, srcHCrPos), getLocalPos(m_crDstHSubSample, dstHCrPos)); + + // init vertical Luma scaler filter + m_ScalerFilters2 = new ScalerVLumFilter(m_bitDepth); + m_ScalerFilters2->initCoeff(m_algorithmFlags, lumYInc, srcH, dstH, filterAlign, 1 << 12, getLocalPos(0, 0), getLocalPos(0, 0)); + + // init vertical cr scaler filter + m_ScalerFilters3 = new ScalerVCrFilter(m_bitDepth); + m_ScalerFilters3->initCoeff(m_algorithmFlags, crYInc, m_crSrcH, m_crDstH, filterAlign, 1 << 12, + getLocalPos(m_crSrcVSubSample, srcVCrPos), getLocalPos(m_crDstVSubSample, dstVCrPos)); + + // init slice, must after filter initialization + initScalerSlice(); + + // set slice + m_ScalerFilters0->setSlice(m_slices0, m_slices1); + m_ScalerFilters1->setSlice(m_slices0, m_slices1); + + m_ScalerFilters2->setSlice(m_slices1, m_slices2); + m_ScalerFilters3->setSlice(m_slices1, m_slices2); + + return 0; +} + +void HFilterScaler8Bit::doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF; + + /* This is hard to read code, but much faster. Speed is crucial here */ + (dstW % 8 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 11) && (IdxF = FIL_11) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 10) && (IdxF = FIL_10) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 9) && (IdxF = FIL_9) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 15) && (IdxF = FIL_15) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 13) && (IdxF = FIL_13) && (IdxW = FACTOR_8); + + /* Do not check multiple of width 4, if width is already multiple of 8 */ + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4); + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4); + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_4); + + (dstW % 4 == 0) && (filterSize == 24) && (IdxF = FIL_24) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 22) && (IdxF = FIL_22) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 19) && (IdxF = FIL_19) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 17) && (IdxF = FIL_17) && (IdxW = FACTOR_4); + +#if X265_DEPTH == 8 + doScaling_c(dst, dstW, src, filter, filterPos, filterSize); +#else + doScaling_c_h(dst, dstW, src, filter, filterPos, filterSize); +#endif +} + +void HFilterScaler10Bit::doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF; + + /* This is hard to read code, but much faster. Speed is crucial here */ + (dstW % 8 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 11) && (IdxF = FIL_11) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 10) && (IdxF = FIL_10) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 9) && (IdxF = FIL_9) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 15) && (IdxF = FIL_15) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 13) && (IdxF = FIL_13) && (IdxW = FACTOR_8); + + /* Do not check multiple of width 4, if width is already multiple of 8 */ + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4); + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4); + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_4); + + (dstW % 4 == 0) && (filterSize == 24) && (IdxF = FIL_24) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 22) && (IdxF = FIL_22) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 19) && (IdxF = FIL_19) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 17) && (IdxF = FIL_17) && (IdxW = FACTOR_4); + +#if X265_DEPTH == 8 + doScaling_c(dst, dstW, src, filter, filterPos, filterSize); +#else + doScaling_c_h(dst, dstW, src, filter, filterPos, filterSize); +#endif +} + +int ScalerFilterManager::scale_pic(void ** src, void ** dst, int * srcStride, int * dstStride) +{ + uint8_t** src_8bit, **dst_8bit; + src_8bit = (uint8_t**)src; + dst_8bit = (uint8_t**)dst; + if (!src_8bit || !dst_8bit) + return -1; + + const int srcsliceHor = m_srcH; + const int dstW = m_dstW; + const int dstH = m_dstH; + int32_t *vLumFilterPos = m_ScalerFilters2->m_filtPos; + int32_t *vCrFilterPos = m_ScalerFilters3->m_filtPos; + const int vLumFilterSize = m_ScalerFilters2->m_filtLen; + const int vCrFilterSize = m_ScalerFilters3->m_filtLen; + const int crSrcsliceHor = UH_CEIL_SHIFTR(srcsliceHor, m_crSrcVSubSample); + + // vars which will change and which we need to store back in the context + int lumBufIndex = -1; + int crBufIndex = -1; + int lastInLumBuf = -1; + int lastInCrBuf = -1; + + int hasLumHoles = 1; + int hasCrHoles = 1; + + ScalerSlice *src_slice = m_slices0; + ScalerSlice *hout_slice = m_slices1; + ScalerSlice *vout_slice = m_slices2; + src_slice->initFromSrc((uint8_t**)src, srcStride, m_srcW, 0, srcsliceHor, 0, crSrcsliceHor, 1); + vout_slice->initFromSrc((uint8_t**)dst, dstStride, m_dstW, 0, dstH, 0, UH_CEIL_SHIFTR(dstH, m_crDstVSubSample), 0); + + hout_slice->m_plane0.sliceVer = 0; + hout_slice->m_plane1.sliceVer = 0; + hout_slice->m_plane2.sliceVer = 0; + hout_slice->m_plane3.sliceVer = 0; + hout_slice->m_plane0.sliceHor = 0; + hout_slice->m_plane1.sliceHor = 0; + hout_slice->m_plane2.sliceHor = 0; + hout_slice->m_plane3.sliceHor = 0; + hout_slice->m_width = dstW; + + for (int dstY = 0; dstY < dstH; dstY++) + { + const int crDstY = dstY >> m_crDstVSubSample; + const int firstLumSrcY = x265_max(1 - vLumFilterSize, vLumFilterPosdstY); + const int firstLumSrcY2 = x265_max(1 - vLumFilterSize, vLumFilterPosx265_min(dstY | ((1 << m_crDstVSubSample) - 1), dstH - 1)); + const int firstCrSrcY = x265_max(1 - vCrFilterSize, vCrFilterPoscrDstY); + + int lastLumSrcY = x265_min(m_srcH, firstLumSrcY + vLumFilterSize) - 1; + int lastLumSrcY2 = x265_min(m_srcH, firstLumSrcY2 + vLumFilterSize) - 1; + int lastCrSrcY = x265_min(m_crSrcH, firstCrSrcY + vCrFilterSize) - 1; + + // handle holes + if (firstLumSrcY > lastInLumBuf) + { + hasLumHoles = lastInLumBuf != firstLumSrcY - 1; + if (hasLumHoles) + { + hout_slice->m_plane0.sliceVer = firstLumSrcY; + hout_slice->m_plane3.sliceVer = firstLumSrcY; + hout_slice->m_plane0.sliceHor = + hout_slice->m_plane3.sliceHor = 0; + } + + lastInLumBuf = firstLumSrcY - 1; + } + if (firstCrSrcY > lastInCrBuf) + { + hasCrHoles = lastInCrBuf != firstCrSrcY - 1; + if (hasCrHoles) + { + hout_slice->m_plane1.sliceVer = firstCrSrcY; + hout_slice->m_plane2.sliceVer = firstCrSrcY; + hout_slice->m_plane1.sliceHor = + hout_slice->m_plane2.sliceHor = 0; + } + + lastInCrBuf = firstCrSrcY - 1; + } + + // Do we have enough lines in this slice to output the dstY line + int enoughLines = lastLumSrcY2 < 0 + srcsliceHor && lastCrSrcY < UH_CEIL_SHIFTR(0 + srcsliceHor, m_crSrcVSubSample); + if (!enoughLines) + { + lastLumSrcY = 0 + srcsliceHor - 1; + lastCrSrcY = 0 + crSrcsliceHor - 1; + x265_log(NULL, X265_LOG_INFO, "buffering slice: lastLumSrcY %d lastCrSrcY %d\n", lastLumSrcY, lastCrSrcY); + } + + X265_CHECK(((lastLumSrcY - firstLumSrcY + 1) <= hout_slice->m_plane0.availLines), "invalid value %d", lastLumSrcY - firstLumSrcY + 1); + X265_CHECK((lastCrSrcY - firstCrSrcY + 1) <= hout_slice->m_plane1.availLines, "invalid value %d", lastCrSrcY - firstCrSrcY + 1); + + int firstPosY, lastPosY, firstCPosY, lastCPosY; + int posY = hout_slice->m_plane0.sliceVer + hout_slice->m_plane0.sliceHor; + if (posY <= lastLumSrcY && !hasLumHoles) + { + firstPosY = x265_max(firstLumSrcY, posY); + lastPosY = x265_min(firstLumSrcY + hout_slice->m_plane0.availLines - 1, 0 + srcsliceHor - 1); + } + else + { + firstPosY = posY; + lastPosY = lastLumSrcY; + } + + int cPosY = hout_slice->m_plane1.sliceVer + hout_slice->m_plane1.sliceHor; + if (cPosY <= lastCrSrcY && !hasCrHoles) + { + firstCPosY = x265_max(firstCrSrcY, cPosY); + lastCPosY = x265_min(firstCrSrcY + hout_slice->m_plane1.availLines - 1, UH_CEIL_SHIFTR(0 + srcsliceHor, m_crSrcVSubSample) - 1); + } + else + { + firstCPosY = cPosY; + lastCPosY = lastCrSrcY; + } + + hout_slice->rotate(lastPosY, lastCPosY); + // horizontal luma scale + if (posY < lastLumSrcY + 1) + m_ScalerFilters0->process(firstPosY, lastPosY - firstPosY + 1); + + lumBufIndex += lastLumSrcY - lastInLumBuf; + lastInLumBuf = lastLumSrcY; + // horizontal chroma Scale + if (cPosY < lastCrSrcY + 1) + m_ScalerFilters1->process(firstCPosY, lastCPosY - firstCPosY + 1); + + crBufIndex += lastCrSrcY - lastInCrBuf; + lastInCrBuf = lastCrSrcY; + + // wrap buf index around to stay inside the ring buffer + if (lumBufIndex >= vLumFilterSize) + lumBufIndex -= vLumFilterSize; + if (crBufIndex >= vCrFilterSize) + crBufIndex -= vCrFilterSize; + if (!enoughLines) + break; // we can't output a dstY line so let's try with the next slice + + // vertical scale(output converter) + for (int i = 2; i < m_numFilter; ++i) + m_ScalerFiltersi->process(dstY, 1); + } + return 0; +} + +void ScalerFilterManager::getMinBufferSize(int *out_lum_size, int *out_cr_size) +{ + int lumY; + int dstH = m_dstH; + int crDstH = m_crDstH; + int *lumFilterPos = m_ScalerFilters2->m_filtPos; + int *crFilterPos = m_ScalerFilters3->m_filtPos; + int lumFilterSize = m_ScalerFilters2->m_filtLen; + int crFilterSize = m_ScalerFilters3->m_filtLen; + int crSubSample = m_crSrcVSubSample; + + *out_lum_size = lumFilterSize; + *out_cr_size = crFilterSize; + + for (lumY = 0; lumY < dstH; lumY++) + { + int crY = (int64_t)lumY * crDstH / dstH; + int nextSlice = x265_max(lumFilterPoslumY + lumFilterSize - 1, ((crFilterPoscrY + crFilterSize - 1) << crSubSample)); + + nextSlice >>= crSubSample; + nextSlice <<= crSubSample; + (*out_lum_size) = x265_max((*out_lum_size), nextSlice - lumFilterPoslumY); + (*out_cr_size) = x265_max((*out_cr_size), (nextSlice >> crSubSample) - crFilterPoscrY); + } +} + +int ScalerFilterManager::initScalerSlice() +{ + int ret = 0; + int dst_stride = SCALER_ALIGN(m_dstW * sizeof(int16_t) + 66, 16); + if (m_bitDepth == 16) + dst_stride <<= 1; + + int lumBufSize; + int crBufSize; + int vLumFilterSize = m_ScalerFilters2->m_filtLen; // Vertical filter size for luma pixels. + int vCrFilterSize = m_ScalerFilters3->m_filtLen; // Vertical filter size for chroma pixels. + getMinBufferSize(&lumBufSize, &crBufSize); + lumBufSize = X265_MAX(lumBufSize, vLumFilterSize + MAX_NUM_LINES_AHEAD); + crBufSize = X265_MAX(crBufSize, vCrFilterSize + MAX_NUM_LINES_AHEAD); + + for (int i = 0; i < m_numSlice; i++) + m_slicesi = new ScalerSlice; + ret = m_slices0->create(m_srcH, m_crSrcH, m_crSrcHSubSample, m_crSrcVSubSample, 0); + if (ret < 0) + { + x265_log(NULL, X265_LOG_ERROR, "alloc_slice m_slice0 failed\n"); + return -1; + } + + // horizontal scaler output + ret = m_slices1->create(lumBufSize, crBufSize, m_crDstHSubSample, m_crDstVSubSample, 1); + if (ret < 0) + { + x265_log(NULL, X265_LOG_ERROR, "m_slice1.create failed\n"); + return -1; + } + ret = m_slices1->createLines(dst_stride, m_dstW); + if (ret < 0) + { + x265_log(NULL, X265_LOG_ERROR, "m_slice1.createLines failed\n"); + return -1; + } + + m_slices1->fillOnes(dst_stride >> 1, m_bitDepth == 16); + + // vertical scaler output + ret = m_slices2->create(m_dstH, m_crDstH, m_crDstHSubSample, m_crDstVSubSample, 0); + if (ret < 0) + { + x265_log(NULL, X265_LOG_ERROR, "m_slice2.create failed\n"); + return -1; + } + + return 0; +} + +int ScalerFilterManager::getLocalPos(int crSubSample, int pos) +{ + if (pos == -1 || pos <= -513) + pos = (128 << crSubSample) - 128; + pos += 128; // relative to ideal left edge + return pos >> crSubSample; +} + +ScalerSlice::ScalerSlice() : + m_width(0), + m_hCrSubSample(0), + m_vCrSubSample(0), + m_isRing(0), + m_destroyLines(0) +{ + for (int i = 0; i < m_numSlicePlane; i++) + { + m_planei.availLines = 0; + m_planei.sliceVer = 0; + m_planei.sliceHor = 0; + m_planei.lineBuf = NULL; + } +} + +void ScalerSlice::destroy() +{ + if (m_destroyLines) + destroyLines(); + for (int i = 0; i < m_numSlicePlane; i++) + { + if (m_planei.lineBuf) + X265_FREE(m_planei.lineBuf); + } +} + +int ScalerSlice::create(int lumLines, int crLines, int h_sub_sample, int v_sub_sample, int ring) +{ + int i; + int size4 = { lumLines, crLines, crLines, lumLines }; + + m_hCrSubSample = h_sub_sample; + m_vCrSubSample = v_sub_sample; + m_isRing = ring; + m_destroyLines = 0; + + for (i = 0; i < m_numSlicePlane; ++i) + { + int n = sizei * (ring == 0 ? 1 : 3); + m_planei.lineBuf = X265_MALLOC(uint8_t*, n); + if (!m_planei.lineBuf) + return -1; + + m_planei.availLines = sizei; + m_planei.sliceVer = 0; + m_planei.sliceHor = 0; + } + return 0; +} + +/* +slice lines contains extra bytes for vectorial code thus @size +is the allocated memory size and @width is the number of pixels +*/ +int ScalerSlice::createLines(int size, int width) +{ + int i; + int idx2 = { 3, 2 }; + + m_destroyLines = 1; + m_width = width; + + for (i = 0; i < 2; ++i) { + int n = m_planei.availLines; + int j; + int ii = idxi; + assert(n == m_planeii.availLines); + for (j = 0; j < n; ++j) + { + // chroma plane line U and V are expected to be contiguous in memory + m_planei.lineBufj = (uint8_t*)X265_MALLOC(uint8_t, size * 2 + 32); + if (!m_planei.lineBufj) + { + destroyLines(); + return -1; + } + m_planeii.lineBufj = m_planei.lineBufj + size + 16; + if (m_isRing) + { + m_planei.lineBufj + n = m_planei.lineBufj; + m_planeii.lineBufj + n = m_planeii.lineBufj; + } + } + } + + return 0; +} + +void ScalerSlice::destroyLines() +{ + int i; + for (i = 0; i < 2; ++i) + { + int n = m_planei.availLines; + int j; + for (j = 0; j < n; ++j) + { + X265_FREE(m_planei.lineBufj); + m_planei.lineBufj = NULL; + if (m_isRing) + m_planei.lineBufj + n = NULL; + } + } + + for (i = 0; i < m_numSlicePlane; ++i) + memset(m_planei.lineBuf, 0, sizeof(uint8_t*) * m_planei.availLines * (m_isRing ? 3 : 1)); + m_destroyLines = 0; +} + +void ScalerSlice::fillOnes(int n, int is16bit) +{ + int i; + for (i = 0; i < m_numSlicePlane; ++i) + { + int j; + int size = m_planei.availLines; + for (j = 0; j < size; ++j) + { + int k; + int end = is16bit ? n >> 1 : n; + // fill also one extra element + end += 1; + if (is16bit) + for (k = 0; k < end; ++k) + ((int32_t*)(m_planei.lineBufj))k = 1 << 18; + else + for (k = 0; k < end; ++k) + ((int16_t*)(m_planei.lineBufj))k = 1 << 14; + } + } +} + +int ScalerSlice::rotate(int lum, int cr) +{ + int i; + if (lum) + { + for (i = 0; i < m_numSlicePlane; i += 3) + { + int n = m_planei.availLines; + int l = lum - m_planei.sliceVer; + + if (l >= n * 2) + { + m_planei.sliceVer += n; + m_planei.sliceHor -= n; + } + } + } + if (cr) + { + for (i = 1; i < 3; ++i) + { + int n = m_planei.availLines; + int l = cr - m_planei.sliceVer; + + if (l >= n * 2) + { + m_planei.sliceVer += n; + m_planei.sliceHor -= n; + } + } + } + return 0; +} + +int ScalerSlice::initFromSrc(uint8_t *src4, const int stride4, int srcW, int lumY, int lumH, int crY, int crH, int relative) +{ + int i = 0; + + const int startm_numSlicePlane = { lumY, crY, crY, lumY }; + + const int endm_numSlicePlane = { lumY + lumH, crY + crH, crY + crH, lumY + lumH }; + + uint8_t *const src_m_numSlicePlane = { src0 + (relative ? 0 : start0) * stride0, + src1 + (relative ? 0 : start1) * stride1, + src2 + (relative ? 0 : start2) * stride2, + src3 + (relative ? 0 : start3) * stride3 }; + + m_width = srcW; + + for (i = 0; i < m_numSlicePlane; ++i) + { + int j; + int first = m_planei.sliceVer; + int n = m_planei.availLines; + int lines = endi - starti; + int tot_lines = endi - first; + + if (starti >= first && n >= tot_lines) + { + m_planei.sliceHor = x265_max(tot_lines, m_planei.sliceHor); + for (j = 0; j < lines; j += 1) + m_planei.lineBufstarti - first + j = src_i + j * stridei; + } + else + { + m_planei.sliceVer = starti; + lines = lines > n ? n : lines; + m_planei.sliceHor = lines; + for (j = 0; j < lines; j += 1) + m_planei.lineBufj = src_i + j * stridei; + } + } + return 0; +} +}
View file
x265_3.4.tar.gz/source/common/scaler.h
Added
@@ -0,0 +1,254 @@ +/***************************************************************************** + * Copyright (C) 2013-2020 MulticoreWare, Inc + * + * Authors: Pooja Venkatesan <pooja@multicorewareinc.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_SCALER_H +#define X265_SCALER_H + +#include "common.h" + +namespace X265_NS { +//x265 private namespace + +class ScalerSlice; +class VideoDesc; + +#define MAX_NUM_LINES_AHEAD 4 +#define SCALER_ALIGN(x, j) (((x)+(j)-1)&~((j)-1)) +#define X265_ABS(j) ((j) >= 0 ? (j) : (-(j))) +#define SCALER_MAX_REDUCE_CUTOFF 0.002 +#define SCALER_BITEXACT 0x80000 +#define ROUNDED_DIVISION(i,j) (((i)>0 ? (i) + ((j)>>1) : (i) - ((j)>>1))/(j)) +#define UH_CEIL_SHIFTR(i,j) (!scale_builtin_constant_p(j) ? -((-(i)) >> (j)) \ + : ((i) + (1<<(j)) - 1) >> (j)) + +#if defined(__GNUC__) || defined(__clang__) +# define scale_builtin_constant_p __builtin_constant_p +#else +# define scale_builtin_constant_p(x) 0 +#endif + +enum ResFactor +{ + RES_FACTOR_64, RES_FACTOR_32, RES_FACTOR_16, RES_FACTOR_8, + RES_FACTOR_4, RES_FACTOR_DEF, NUM_RES_FACTOR +}; + +enum ScalerFactor +{ + FACTOR_4, FACTOR_8, NUM_FACTOR +}; + +enum FilterSize +{ + FIL_4, FIL_6, FIL_8, FIL_9, FIL_10, FIL_11, FIL_13, FIL_15, + FIL_16, FIL_17, FIL_19, FIL_22, FIL_24, FIL_DEF, NUM_FIL +}; + +class ScalerFilter { +public: + int m_filtLen; + int32_t* m_filtPos; // Array of horizontal/vertical starting pos for each dst for luma / chroma planes. + int16_t* m_filt; // Array of horizontal/vertical filter coefficients for luma / chroma planes. + ScalerSlice* m_sourceSlice; // Source slice + ScalerSlice* m_destSlice; // Output slice + ScalerFilter(); + virtual ~ScalerFilter(); + virtual void process(int sliceVer, int sliceHor) = 0; + int initCoeff(int flag, int inc, int srcW, int dstW, int filtAlign, int one, int sourcePos, int destPos); + void setSlice(ScalerSlice* source, ScalerSlice* dest) { m_sourceSlice = source; m_destSlice = dest; } +}; + +class VideoDesc { +public: + int m_width; + int m_height; + int m_csp; + int m_inputDepth; + + VideoDesc(int w, int h, int csp, int bitDepth) + { + m_width = w; + m_height = h; + m_csp = csp; + m_inputDepth = bitDepth; + } +}; + +typedef struct ScalerPlane +{ + int availLines; // max number of lines that can be held by this plane + int sliceVer; // index of first line + int sliceHor; // number of lines + uint8_t** lineBuf; // line buffer +} ScalerPlane; + +// Assist horizontal filtering, base class +class HFilterScaler { +public: + int m_bitDepth; +public: + HFilterScaler() :m_bitDepth(0) {}; + virtual ~HFilterScaler() {}; + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) = 0; +}; + +// Assist vertical filtering, base class +class VFilterScaler { +public: + int m_bitDepth; +public: + VFilterScaler() :m_bitDepth(0) {}; + virtual ~VFilterScaler() {}; + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) = 0; +}; + +// Assist horizontal filtering, process 8 bit case +class HFilterScaler8Bit : public HFilterScaler { +public: + HFilterScaler8Bit() { m_bitDepth = 8; } + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); +}; + +// Assist horizontal filtering, process 10 bit case +class HFilterScaler10Bit : public HFilterScaler { +public: + HFilterScaler10Bit() { m_bitDepth = 10; } + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); +}; + +// Assist vertical filtering, process 8 bit case +class VFilterScaler8Bit : public VFilterScaler { +public: + VFilterScaler8Bit() { m_bitDepth = 8; } + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW); +}; + +// Assist vertical filtering, process 10 bit case +class VFilterScaler10Bit : public VFilterScaler { +public: + VFilterScaler10Bit() { m_bitDepth = 10; } + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW); +}; + +// Horizontal filter for luma +class ScalerHLumFilter : public ScalerFilter { +private: + HFilterScaler* m_hFilterScaler; +public: + ScalerHLumFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;} + ~ScalerHLumFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Horizontal filter for chroma +class ScalerHCrFilter : public ScalerFilter { +private: + HFilterScaler* m_hFilterScaler; +public: + ScalerHCrFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;} + ~ScalerHCrFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Vertical filter for luma +class ScalerVLumFilter : public ScalerFilter { +private: + VFilterScaler* m_vFilterScaler; +public: + ScalerVLumFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;} + ~ScalerVLumFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Vertical filter for chroma +class ScalerVCrFilter : public ScalerFilter { +private: + VFilterScaler* m_vFilterScaler; +public: + ScalerVCrFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;} + ~ScalerVCrFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +class ScalerSlice +{ +private: + enum ScalerSlicePlaneNum { m_numSlicePlane = 4 }; +public: + int m_width; // Slice line width + int m_hCrSubSample; // horizontal Chroma subsampling factor + int m_vCrSubSample; // vertical chroma subsampling factor + int m_isRing; // flag to identify if this ScalerSlice is a ring buffer + int m_destroyLines; // flag to identify if there are dynamic allocated lines + ScalerPlane m_planem_numSlicePlane; +public: + ScalerSlice(); + ~ScalerSlice() { destroy(); } + int rotate(int lum, int cr); + void fillOnes(int n, int is16bit); + int create(int lumLines, int crLines, int h_sub_sample, int v_sub_sample, int ring); + int createLines(int size, int width); + void destroyLines(); + void destroy(); + int initFromSrc(uint8_t *src4, const int stride4, int srcW, int lumY, int lumH, int crY, int crH, int relative); +}; + +class ScalerFilterManager { +private: + enum ScalerFilterNum { m_numSlice = 3, m_numFilter = 4 }; + +private: + int m_bitDepth; + int m_algorithmFlags; // 1, bilinear; 4 bicubic, default is bicubic + int m_srcW; // Width of source luma planes. + int m_srcH; // Height of source luma planes. + int m_dstW; // Width of dest luma planes. + int m_dstH; // Height of dest luma planes. + int m_crSrcW; // Width of source chroma planes. + int m_crSrcH; // Height of source chroma planes. + int m_crDstW; // Width of dest chroma planes. + int m_crDstH; // Height of dest chroma planes. + int m_crSrcHSubSample; // Binary log of horizontal subsampling factor between Y and Cr planes in src image. + int m_crSrcVSubSample; // Binary log of vertical subsampling factor between Y and Cr planes in src image. + int m_crDstHSubSample; // Binary log of horizontal subsampling factor between Y and Cr planes in dest image. + int m_crDstVSubSample; // Binary log of vertical subsampling factor between Y and Cr planes in dest image. + ScalerSlice* m_slicesm_numSlice; + ScalerFilter* m_ScalerFiltersm_numFilter; +private: + int getLocalPos(int crSubSample, int pos); + void getMinBufferSize(int *out_lum_size, int *out_cr_size); + int initScalerSlice(); +public: + ScalerFilterManager(); + ~ScalerFilterManager() { + for (int i = 0; i < m_numSlice; i++) + if (m_slicesi) { m_slicesi->destroy(); delete m_slicesi; m_slicesi = NULL; } + for (int i = 0; i < m_numFilter; i++) + if (m_ScalerFiltersi) { delete m_ScalerFiltersi; m_ScalerFiltersi = NULL; } + } + int init(int algorithmFlags, VideoDesc* srcVideoDesc, VideoDesc* dstVideoDesc); + int scale_pic(void** src, void** dst, int* srcStride, int* dstStride); +}; +} + +#endif //ifndef X265_SCALER_H
View file
x265_3.3.tar.gz/source/common/threading.h -> x265_3.4.tar.gz/source/common/threading.h
Changed
@@ -238,6 +238,14 @@ LeaveCriticalSection(&m_cs); } + void decr() + { + EnterCriticalSection(&m_cs); + m_val--; + WakeAllConditionVariable(&m_cv); + LeaveCriticalSection(&m_cs); + } + protected: CRITICAL_SECTION m_cs; @@ -436,6 +444,14 @@ pthread_mutex_unlock(&m_mutex); } + void decr() + { + pthread_mutex_lock(&m_mutex); + m_val--; + pthread_cond_broadcast(&m_cond); + pthread_mutex_unlock(&m_mutex); + } + protected: pthread_mutex_t m_mutex;
View file
x265_3.3.tar.gz/source/encoder/analysis.cpp -> x265_3.4.tar.gz/source/encoder/analysis.cpp
Changed
@@ -1272,7 +1272,7 @@ md.predPRED_SKIP.cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd0_4(md.predPRED_SKIP, md.predPRED_MERGE, cuGeom); - skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode; + skipRecursion = !!m_param->recursionSkipMode && md.bestMode; if (m_param->rdLevel) skipModes = m_param->bEnableEarlySkip && md.bestMode; } @@ -1296,7 +1296,7 @@ md.predPRED_SKIP.cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd0_4(md.predPRED_SKIP, md.predPRED_MERGE, cuGeom); - skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode; + skipRecursion = !!m_param->recursionSkipMode && md.bestMode; if (m_param->rdLevel) skipModes = m_param->bEnableEarlySkip && md.bestMode; } @@ -1314,15 +1314,23 @@ skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2) && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth } - if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag0 || m_modeFlag1))) + if (md.bestMode && m_param->recursionSkipMode && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag0 || m_modeFlag1))) { skipRecursion = md.bestMode->cu.isSkipped(0); - if (mightSplit && depth >= minDepth && !skipRecursion) + if (mightSplit && !skipRecursion) { - if (depth) - skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode); - if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE) + if (depth >= minDepth && m_param->recursionSkipMode == RDCOST_BASED_RSKIP) + { + if (depth) + skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode); + if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE) + skipRecursion = complexityCheckCU(*md.bestMode); + } + else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { skipRecursion = complexityCheckCU(*md.bestMode); + } + } } if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7) @@ -1972,7 +1980,7 @@ checkInter_rd5_6(md.predPRED_2Nx2N, cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.predPRED_2Nx2N, cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepthdepth - 1.bestMode) + if (m_param->recursionSkipMode && depth && m_modeDepthdepth - 1.bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); } if (m_param->analysisLoadReuseLevel > 4 && m_reusePartSizecuGeom.absPartIdx == SIZE_2Nx2N) @@ -1996,7 +2004,7 @@ checkInter_rd5_6(md.predPRED_2Nx2N, cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.predPRED_2Nx2N, cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepthdepth - 1.bestMode) + if (m_param->recursionSkipMode && depth && m_modeDepthdepth - 1.bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); } } @@ -2015,8 +2023,10 @@ checkInter_rd5_6(md.predPRED_2Nx2N, cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.predPRED_2Nx2N, cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepthdepth - 1.bestMode) + if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP && depth && m_modeDepthdepth - 1.bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); + else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + skipRecursion = md.bestMode && complexityCheckCU(*md.bestMode); } if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7) skipRecursion = true; @@ -3525,27 +3535,47 @@ bool Analysis::complexityCheckCU(const Mode& bestMode) { - uint32_t mean = 0; - uint32_t homo = 0; - uint32_t cuSize = bestMode.fencYuv->m_size; - for (uint32_t y = 0; y < cuSize; y++) { - for (uint32_t x = 0; x < cuSize; x++) { - mean += (bestMode.fencYuv->m_buf0y * cuSize + x); + if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP) + { + uint32_t mean = 0; + uint32_t homo = 0; + uint32_t cuSize = bestMode.fencYuv->m_size; + for (uint32_t y = 0; y < cuSize; y++) { + for (uint32_t x = 0; x < cuSize; x++) { + mean += (bestMode.fencYuv->m_buf0y * cuSize + x); + } } - } - mean = mean / (cuSize * cuSize); - for (uint32_t y = 0 ; y < cuSize; y++){ - for (uint32_t x = 0 ; x < cuSize; x++){ - homo += abs(int(bestMode.fencYuv->m_buf0y * cuSize + x - mean)); + mean = mean / (cuSize * cuSize); + for (uint32_t y = 0; y < cuSize; y++) { + for (uint32_t x = 0; x < cuSize; x++) { + homo += abs(int(bestMode.fencYuv->m_buf0y * cuSize + x - mean)); + } } - } - homo = homo / (cuSize * cuSize); + homo = homo / (cuSize * cuSize); - if (homo < (.1 * mean)) - return true; + if (homo < (.1 * mean)) + return true; - return false; -} + return false; + } + else + { + int blockType = bestMode.cu.m_log2CUSize0 - LOG2_UNIT_SIZE; + int shift = bestMode.cu.m_log2CUSize0 * LOG2_UNIT_SIZE; + intptr_t stride = m_frame->m_fencPic->m_stride; + intptr_t blockOffsetLuma = bestMode.cu.m_cuPelX + bestMode.cu.m_cuPelY * stride; + uint64_t sum_ss = primitives.cublockType.var(m_frame->m_edgeBitPic + blockOffsetLuma, stride); + uint32_t sum = (uint32_t)sum_ss; + uint32_t ss = (uint32_t)(sum_ss >> 32); + uint32_t pixelCount = 1 << shift; + double cuEdgeVariance = (ss - ((double)sum * sum / pixelCount)) / pixelCount; + + if (cuEdgeVariance > (double)m_param->edgeVarThreshold) + return false; + else + return true; + } + } uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom) { @@ -3570,7 +3600,6 @@ cnt++; } } - return cuVariance / cnt; }
View file
x265_3.3.tar.gz/source/encoder/analysis.h -> x265_3.4.tar.gz/source/encoder/analysis.h
Changed
@@ -52,7 +52,7 @@ splitRefs = 0; mvCost0 = 0; // L0 mvCost1 = 0; // L1 - sa8dCost = 0; + sa8dCost = 0; } }; @@ -120,7 +120,6 @@ Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext); int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU); - protected: /* Analysis data for save/load mode, writes/reads data based on absPartIdx */ x265_analysis_inter_data* m_reuseInterDataCTU;
View file
x265_3.3.tar.gz/source/encoder/api.cpp -> x265_3.4.tar.gz/source/encoder/api.cpp
Changed
@@ -1016,12 +1016,12 @@ void x265_zone_free(x265_param *param) { - if (param && param->rc.zonefileCount) { + if (param && param->rc.zones && (param->rc.zoneCount || param->rc.zonefileCount)) + { for (int i = 0; i < param->rc.zonefileCount; i++) x265_free(param->rc.zonesi.zoneParam); - } - if (param && (param->rc.zoneCount || param->rc.zonefileCount)) x265_free(param->rc.zones); + } } static const x265_api libapi = @@ -1294,6 +1294,8 @@ fprintf(csvfp, "RateFactor, "); if (param->rc.vbvBufferSize) fprintf(csvfp, "BufferFill, BufferFillFinal, "); + if (param->rc.vbvBufferSize && param->csvLogLevel >= 2) + fprintf(csvfp, "UnclippedBufferFillFinal, "); if (param->bEnablePsnr) fprintf(csvfp, "Y PSNR, U PSNR, V PSNR, YUV PSNR, "); if (param->bEnableSsim) @@ -1405,6 +1407,8 @@ fprintf(param->csvfpt, "%.3lf,", frameStats->rateFactor); if (param->rc.vbvBufferSize) fprintf(param->csvfpt, "%.3lf, %.3lf,", frameStats->bufferFill, frameStats->bufferFillFinal); + if (param->rc.vbvBufferSize && param->csvLogLevel >= 2) + fprintf(param->csvfpt, "%.3lf,", frameStats->unclippedBufferFillFinal); if (param->bEnablePsnr) fprintf(param->csvfpt, "%.3lf, %.3lf, %.3lf, %.3lf,", frameStats->psnrY, frameStats->psnrU, frameStats->psnrV, frameStats->psnr); if (param->bEnableSsim)
View file
x265_3.3.tar.gz/source/encoder/encoder.cpp -> x265_3.4.tar.gz/source/encoder/encoder.cpp
Changed
@@ -218,10 +218,7 @@ if (m_param->bHistBasedSceneCut) { - for (int i = 0; i < x265_cli_cspsm_param->internalCsp.planes; i++) - { - m_planeSizesi = (m_param->sourceWidth >> x265_cli_cspsp->internalCsp.widthi) * (m_param->sourceHeight >> x265_cli_cspsm_param->internalCsp.heighti); - } + m_planeSizes0 = (m_param->sourceWidth >> x265_cli_cspsp->internalCsp.width0) * (m_param->sourceHeight >> x265_cli_cspsm_param->internalCsp.height0); uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1; m_edgePic = X265_MALLOC(pixel, m_planeSizes0 * pixelbytes); m_edgeHistThreshold = m_param->edgeTransitionThreshold; @@ -1443,9 +1440,9 @@ int32_t planeCount = x265_cli_cspsm_param->internalCsp.planes; memset(m_edgePic, 0, bufSize); - if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false)) + if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false, 1)) { - x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!"); + x265_log(m_param, X265_LOG_ERROR, "Failed to compute edge!"); return false; } @@ -1605,6 +1602,14 @@ if (m_param->bHistBasedSceneCut && pic_in) { x265_picture *pic = (x265_picture *) pic_in; + + if (pic->poc == 0) + { + /* for entire encode compute the chroma plane sizes only once */ + for (int i = 1; i < x265_cli_cspsm_param->internalCsp.planes; i++) + m_planeSizesi = (pic->width >> x265_cli_cspsm_param->internalCsp.widthi) * (pic->height >> x265_cli_cspsm_param->internalCsp.heighti); + } + if (computeHistograms(pic)) { double maxUVSad = 0.0, edgeSad = 0.0; @@ -1752,6 +1757,12 @@ } } } + if (m_param->recursionSkipMode == EDGE_BASED_RSKIP && m_param->bHistBasedSceneCut) + { + pixel* src = m_edgePic; + primitives.planecopy_pp_shr(src, inFrame->m_fencPic->m_picWidth, inFrame->m_edgeBitPic, inFrame->m_fencPic->m_stride, + inFrame->m_fencPic->m_picWidth, inFrame->m_fencPic->m_picHeight, 0); + } } else { @@ -2414,7 +2425,7 @@ encParam->maxNumReferences = param->maxNumReferences; // never uses more refs than specified in stream headers encParam->bEnableFastIntra = param->bEnableFastIntra; encParam->bEnableEarlySkip = param->bEnableEarlySkip; - encParam->bEnableRecursionSkip = param->bEnableRecursionSkip; + encParam->recursionSkipMode = param->recursionSkipMode; encParam->searchMethod = param->searchMethod; /* Scratch buffer prevents me_range from being increased for esa/tesa */ if (param->searchRange < encParam->searchRange) @@ -3006,6 +3017,8 @@ frameStats->ipCostRatio = curFrame->m_lowres.ipCostRatio; frameStats->bufferFill = m_rateControl->m_bufferFillActual; frameStats->bufferFillFinal = m_rateControl->m_bufferFillFinal; + if (m_param->csvLogLevel >= 2) + frameStats->unclippedBufferFillFinal = m_rateControl->m_unclippedBufferFillFinal; frameStats->frameLatency = inPoc - poc; if (m_param->rc.rateControlMode == X265_RC_CRF) frameStats->rateFactor = curEncData.m_rateFactor; @@ -3400,7 +3413,7 @@ p->maxNumReferences = zone->maxNumReferences; p->bEnableFastIntra = zone->bEnableFastIntra; p->bEnableEarlySkip = zone->bEnableEarlySkip; - p->bEnableRecursionSkip = zone->bEnableRecursionSkip; + p->recursionSkipMode = zone->recursionSkipMode; p->searchMethod = zone->searchMethod; p->searchRange = zone->searchRange; p->subpelRefine = zone->subpelRefine; @@ -3681,20 +3694,6 @@ if (p->analysisLoad && !p->analysisLoadReuseLevel) p->analysisLoadReuseLevel = 5; - if ((p->bAnalysisType == DEFAULT) && p->rc.cuTree) - { - if (p->analysisSaveReuseLevel && p->analysisSaveReuseLevel < 10) - { - x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-save-reuse-level 10, Disabling cu-tree\n"); - p->rc.cuTree = 0; - } - if (p->analysisLoadReuseLevel && p->analysisLoadReuseLevel < 10) - { - x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-load-reuse-level 10, Disabling cu-tree\n"); - p->rc.cuTree = 0; - } - } - if ((p->analysisLoad || p->analysisSave) && (p->bDistributeModeAnalysis || p->bDistributeMotionEstimation)) { x265_log(p, X265_LOG_WARNING, "Analysis load/save options incompatible with pmode/pme, Disabling pmode/pme\n"); @@ -3867,29 +3866,30 @@ } else { - if (fread(&m_conformanceWindow.rightOffset, sizeof(int), 1, m_analysisFileIn) != 1) + int rightOffset, bottomOffset; + if (fread(&rightOffset, sizeof(int), 1, m_analysisFileIn) != 1) { x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window right offset missing\n"); m_aborted = true; } - else if (m_conformanceWindow.rightOffset && p->analysisLoadReuseLevel > 1) + else if (rightOffset && p->analysisLoadReuseLevel > 1) { int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor; - padsize = m_conformanceWindow.rightOffset * scaleFactor; + padsize = rightOffset * scaleFactor; p->sourceWidth += padsize; m_conformanceWindow.bEnabled = true; m_conformanceWindow.rightOffset = padsize; } - if (fread(&m_conformanceWindow.bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1) + if (fread(&bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1) { x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window bottom offset missing\n"); m_aborted = true; } - else if (m_conformanceWindow.bottomOffset && p->analysisLoadReuseLevel > 1) + else if (bottomOffset && p->analysisLoadReuseLevel > 1) { int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor; - padsize = m_conformanceWindow.bottomOffset * scaleFactor; + padsize = bottomOffset * scaleFactor; p->sourceHeight += padsize; m_conformanceWindow.bEnabled = true; m_conformanceWindow.bottomOffset = padsize; @@ -4196,7 +4196,7 @@ x265_log(p, X265_LOG_WARNING, "Radl requires fixed gop-length (keyint == min-keyint). Disabling radl.\n"); } - if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP) + if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP && m_param->bResetZoneConfig) { p->chunkStart = p->chunkEnd = 0; x265_log(p, X265_LOG_WARNING, "Chunking requires closed gop structure. Disabling chunking.\n"); @@ -4229,12 +4229,6 @@ x265_log(p, X265_LOG_WARNING, "Turning on repeat - headers for zone encoding\n"); } - if (!m_param->bResetZoneConfig && (p->keyframeMax != p->keyframeMin)) - x265_log(p, X265_LOG_WARNING, "External zone reconfiguration requires a fixed GOP size to enable appropriate signaling of HRD info\n"); - - if (!m_param->bResetZoneConfig && (p->reconfigWindowSize != (uint64_t)p->keyframeMax)) - x265_log(p, X265_LOG_WARNING, "Zone size must be multiple of GOP size to enable appropriate signaling of HRD info\n"); - if (m_param->bEnableHME) { if (m_param->sourceHeight < 540) @@ -4311,18 +4305,27 @@ } } + uint32_t numCUsLoad, numCUsInHeightLoad; + /* Now arrived at the right frame, read the record */ analysis->poc = poc; analysis->frameRecordSize = frameRecordSize; X265_FREAD(&analysis->sliceType, sizeof(int), 1, m_analysisFileIn, &(picData->sliceType)); X265_FREAD(&analysis->bScenecut, sizeof(int), 1, m_analysisFileIn, &(picData->bScenecut)); X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1, m_analysisFileIn, &(picData->satdCost)); - X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame)); + X265_FREAD(&numCUsLoad, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame)); X265_FREAD(&analysis->numPartitions, sizeof(int), 1, m_analysisFileIn, &(picData->numPartitions)); + /* Update analysis info to save current settings */ + uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; + uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; + uint32_t numCUsInFrame = widthInCU * heightInCU; + analysis->numCUsInFrame = numCUsInFrame; + analysis->numCuInHeight = heightInCU; + if (m_param->bDisableLookahead) { - X265_FREAD(&analysis->numCuInHeight, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight)); + X265_FREAD(&numCUsInHeightLoad, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight)); X265_FREAD(&analysis->lookahead, sizeof(x265_lookahead_data), 1, m_analysisFileIn, &(picData->lookahead)); } int scaledNumPartition = analysis->numPartitions; @@ -4335,16 +4338,16 @@ if (m_param->ctuDistortionRefine == CTU_DISTORTION_INTERNAL) { - X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), analysis->numCUsInFrame, m_analysisFileIn, picDistortion); + X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), numCUsLoad, m_analysisFileIn, picDistortion); computeDistortionOffset(analysis); } if (m_param->bDisableLookahead && m_rateControl->m_isVbv) { size_t vbvCount = m_param->lookaheadDepth + m_param->bframes + 2; - X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.intraVbvCost); - X265_FREAD(analysis->lookahead.vbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.vbvCost); - X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv); - X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.intraSatdForVbv); + X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), numCUsLoad, m_analysisFileIn, picData->lookahead.intraVbvCost); + X265_FREAD(analysis->lookahead.vbvCost, sizeof(uint32_t), numCUsLoad, m_analysisFileIn, picData->lookahead.vbvCost); + X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), numCUsInHeightLoad, m_analysisFileIn, picData->lookahead.satdForVbv); + X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), numCUsInHeightLoad, m_analysisFileIn, picData->lookahead.intraSatdForVbv); X265_FREAD(analysis->lookahead.plannedSatd, sizeof(int64_t), vbvCount, m_analysisFileIn, picData->lookahead.plannedSatd); if (m_param->scaleFactor) @@ -4352,12 +4355,12 @@ for (uint64_t index = 0; index < vbvCount; index++) analysis->lookahead.plannedSatdindex *= factor; - for (uint32_t i = 0; i < analysis->numCuInHeight; i++) + for (uint32_t i = 0; i < numCUsInHeightLoad; i++) { analysis->lookahead.satdForVbvi *= factor; analysis->lookahead.intraSatdForVbvi *= factor; } - for (uint32_t i = 0; i < analysis->numCUsInFrame; i++) + for (uint32_t i = 0; i < numCUsLoad; i++) { analysis->lookahead.vbvCosti *= factor; analysis->lookahead.intraVbvCosti *= factor; @@ -4407,13 +4410,13 @@ if (!m_param->scaleFactor) { - X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileIn, intraPic->modes); + X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes); } else { - uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition); - X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, intraPic->modes); - for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor) + uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, numCUsLoad * scaledNumPartition); + X265_FREAD(tempLumaBuf, sizeof(uint8_t), numCUsLoad * scaledNumPartition, m_analysisFileIn, intraPic->modes); + for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < numCUsLoad * scaledNumPartition; ctu32Idx++, cnt += factor) memset(&(analysis->intraData)->modescnt, tempLumaBufctu32Idx, factor); X265_FREE(tempLumaBuf); } @@ -4447,7 +4450,7 @@ } if (m_param->bAnalysisType == HEVC_INFO) { - depthBytes = analysis->numCUsInFrame * analysis->numPartitions; + depthBytes = numCUsLoad * analysis->numPartitions; memcpy(((x265_analysis_inter_data *)analysis->interData)->depth, interPic->depth, depthBytes); } else @@ -4551,25 +4554,26 @@ { if (!m_param->scaleFactor) { - X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileIn, intraPic->modes); + X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes); } else { - uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition); - X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, intraPic->modes); - for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor) + uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, numCUsLoad * scaledNumPartition); + X265_FREAD(tempLumaBuf, sizeof(uint8_t), numCUsLoad * scaledNumPartition, m_analysisFileIn, intraPic->modes); + for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < numCUsLoad * scaledNumPartition; ctu32Idx++, cnt += factor) memset(&(analysis->intraData)->modescnt, tempLumaBufctu32Idx, factor); X265_FREE(tempLumaBuf); } } } else - X265_FREAD((analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn, interPic->ref); + X265_FREAD((analysis->interData)->ref, sizeof(int32_t), numCUsLoad * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn, interPic->ref); consumedBytes += frameRecordSize; if (numDir == 1) totalConsumedBytes = consumedBytes; } + #undef X265_FREAD } @@ -5032,13 +5036,14 @@ X265_PARAM_VALIDATE(saveParam->lookaheadDepth, sizeof(int), 1, &m_param->lookaheadDepth, rc - lookahead); X265_PARAM_VALIDATE(saveParam->chunkStart, sizeof(int), 1, &m_param->chunkStart, chunk-start); X265_PARAM_VALIDATE(saveParam->chunkEnd, sizeof(int), 1, &m_param->chunkEnd, chunk-end); - X265_PARAM_VALIDATE(saveParam->cuTree,sizeof(int),1,&m_param->rc.cuTree, cutree - offset); X265_PARAM_VALIDATE(saveParam->ctuDistortionRefine, sizeof(int), 1, &m_param->ctuDistortionRefine, ctu - distortion); + X265_PARAM_VALIDATE(saveParam->frameDuplication, sizeof(int), 1, &m_param->bEnableFrameDuplication, frame - dup); int sourceHeight, sourceWidth; if (writeFlag) { X265_PARAM_VALIDATE(saveParam->analysisReuseLevel, sizeof(int), 1, &m_param->analysisSaveReuseLevel, analysis - save - reuse - level); + X265_PARAM_VALIDATE(saveParam->cuTree, sizeof(int), 1, &m_param->rc.cuTree, cutree-offset); sourceHeight = m_param->sourceHeight - m_conformanceWindow.bottomOffset; sourceWidth = m_param->sourceWidth - m_conformanceWindow.rightOffset; X265_PARAM_VALIDATE(saveParam->sourceWidth, sizeof(int), 1, &sourceWidth, res-width); @@ -5073,6 +5078,15 @@ return -1; } + int bcutree; + X265_FREAD(&bcutree, sizeof(int), 1, m_analysisFileIn, &(saveParam->cuTree)); + if (loadLevel == 10 && m_param->rc.cuTree && (!bcutree || saveLevel < 2)) + { + x265_log(NULL, X265_LOG_ERROR, "Error reading cu-tree info. Disabling cutree offsets. \n"); + m_param->rc.cuTree = 0; + return -1; + } + bool error = false; int curSourceHeight = m_param->sourceHeight - m_conformanceWindow.bottomOffset; int curSourceWidth = m_param->sourceWidth - m_conformanceWindow.rightOffset; @@ -5701,7 +5715,7 @@ TOOLCMP(oldParam->maxNumReferences, newParam->maxNumReferences, "ref=%d to %d\n"); TOOLCMP(oldParam->bEnableFastIntra, newParam->bEnableFastIntra, "fast-intra=%d to %d\n"); TOOLCMP(oldParam->bEnableEarlySkip, newParam->bEnableEarlySkip, "early-skip=%d to %d\n"); - TOOLCMP(oldParam->bEnableRecursionSkip, newParam->bEnableRecursionSkip, "rskip=%d to %d\n"); + TOOLCMP(oldParam->recursionSkipMode, newParam->recursionSkipMode, "rskip=%d to %d\n"); TOOLCMP(oldParam->searchMethod, newParam->searchMethod, "me=%d to %d\n"); TOOLCMP(oldParam->searchRange, newParam->searchRange, "merange=%d to %d\n"); TOOLCMP(oldParam->subpelRefine, newParam->subpelRefine, "subme= %d to %d\n");
View file
x265_3.3.tar.gz/source/encoder/frameencoder.cpp -> x265_3.4.tar.gz/source/encoder/frameencoder.cpp
Changed
@@ -130,7 +130,7 @@ { rowSum += sliceGroupSizeAccu; m_sliceBaseRow++sidx = i; - } + } } X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); m_sliceBaseRow0 = 0; @@ -448,6 +448,18 @@ m_ssimCnt = 0; memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats)); + if (!m_param->bHistBasedSceneCut && m_param->rc.aqMode != X265_AQ_EDGE && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { + int height = m_frame->m_fencPic->m_picHeight; + int width = m_frame->m_fencPic->m_picWidth; + intptr_t stride = m_frame->m_fencPic->m_stride; + + if (!computeEdge(m_frame->m_edgeBitPic, m_frame->m_fencPic->m_picOrg0, NULL, stride, height, width, false, 1)) + { + x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !"); + } + } + /* Emit access unit delimiter unless this is the first frame and the user is * not repeating headers (since AUD is supposed to be the first NAL in the access * unit) */
View file
x265_3.3.tar.gz/source/encoder/ratecontrol.cpp -> x265_3.4.tar.gz/source/encoder/ratecontrol.cpp
Changed
@@ -269,7 +269,7 @@ x265_log(m_param, X265_LOG_WARNING, "NAL HRD parameters require VBV parameters, ignored\n"); m_param->bEmitHRDSEI = 0; } - m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && !m_2pass && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate; + m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate; if (m_param->rc.bStrictCbr && !m_isCbr) { x265_log(m_param, X265_LOG_WARNING, "strict CBR set without CBR mode, ignored\n"); @@ -335,7 +335,7 @@ int vbvBufferSize = m_param->rc.vbvBufferSize * 1000; int vbvMaxBitrate = m_param->rc.vbvMaxBitrate * 1000; - if (m_param->bEmitHRDSEI) + if (m_param->bEmitHRDSEI && !m_param->decoderVbvMaxRate) { const HRDInfo* hrd = &sps.vuiParameters.hrdParameters; vbvBufferSize = hrd->cpbSizeValue << (hrd->cpbSizeScale + CPB_SHIFT); @@ -509,6 +509,7 @@ CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax); CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold); CMP_OPT_FIRST_PASS("intra-refresh", m_param->bIntraRefresh); + CMP_OPT_FIRST_PASS("frame-dup", m_param->bEnableFrameDuplication); if (m_param->bMultiPassOptRPS) { CMP_OPT_FIRST_PASS("multi-pass-opt-rps", m_param->bMultiPassOptRPS); @@ -546,7 +547,7 @@ x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n", m_param->totalFrames, m_numEntries); } - if (m_param->totalFrames > m_numEntries) + if (m_param->totalFrames > m_numEntries && !m_param->bEnableFrameDuplication) { x265_log(m_param, X265_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n", m_param->totalFrames, m_numEntries); @@ -781,6 +782,10 @@ // Init HRD HRDInfo* hrd = &sps.vuiParameters.hrdParameters; hrd->cbrFlag = m_isCbr; + if (m_param->reconfigWindowSize) { + hrd->cbrFlag = 0; + vbvMaxBitrate = m_param->decoderVbvMaxRate * 1000; + } // normalize HRD size and rate to the value / scale notation hrd->bitRateScale = x265_clip3(0, 15, calcScale(vbvMaxBitrate) - BR_SHIFT); @@ -829,7 +834,7 @@ /* weighted average of cplx of future frames */ for (int j = 1; j < cplxBlur * 2 && j < m_numEntries - i; j++) { - int index = m_encOrderi + j; + int index = i+j; RateControlEntry *rcj = &m_rce2Passindex; weight *= 1 - pow(rcj->iCuCount / m_ncu, 2); if (weight < 0.0001) @@ -842,7 +847,7 @@ weight = 1.0; for (int j = 0; j <= cplxBlur * 2 && j <= i; j++) { - int index = m_encOrderi - j; + int index = i-j; RateControlEntry *rcj = &m_rce2Passindex; gaussianWeight = weight * exp(-j * j / 200.0); weightSum += gaussianWeight; @@ -851,7 +856,7 @@ if (weight < .0001) break; } - m_rce2Passm_encOrderi.blurredComplexity = cplxSum / weightSum; + m_rce2Passi.blurredComplexity= cplxSum / weightSum; } CHECKED_MALLOC(qScale, double, m_numEntries); if (filterSize > 1) @@ -870,7 +875,7 @@ expectedBits = 1; for (int i = 0; i < m_numEntries; i++) { - RateControlEntry* rce = &m_rce2Passm_encOrderi; + RateControlEntry* rce = &m_rce2Passi; double q = getQScale(rce, 1.0); expectedBits += qScale2bits(rce, q); m_lastQScaleForrce->sliceType = q; @@ -893,15 +898,15 @@ /* find qscale */ for (int i = 0; i < m_numEntries; i++) { - RateControlEntry *rce = &m_rce2Passm_encOrderi; + RateControlEntry *rce = &m_rce2Passi; qScalei = getQScale(rce, rateFactor); m_lastQScaleForrce->sliceType = qScalei; } /* fixed I/B qscale relative to P */ - for (int i = m_numEntries - 1; i >= 0; i--) + for (int i = 0; i < m_numEntries; i++) { - qScalei = getDiffLimitedQScale(&m_rce2Passm_encOrderi, qScalei); + qScalei = getDiffLimitedQScale(&m_rce2Passi, qScalei); X265_CHECK(qScalei >= 0, "qScale became negative\n"); } @@ -912,7 +917,6 @@ for (int i = 0; i < m_numEntries; i++) { double q = 0.0, sum = 0.0; - for (int j = 0; j < filterSize; j++) { int idx = i + j - filterSize / 2; @@ -920,7 +924,7 @@ double coeff = qBlur == 0 ? 1.0 : exp(-d * d / (qBlur * qBlur)); if (idx < 0 || idx >= m_numEntries) continue; - if (m_rce2Passm_encOrderi.sliceType != m_rce2Passm_encOrderidx.sliceType) + if (m_rce2Passi.sliceType != m_rce2Passidx.sliceType) continue; q += qScaleidx * coeff; sum += coeff; @@ -932,7 +936,7 @@ /* find expected bits */ for (int i = 0; i < m_numEntries; i++) { - RateControlEntry *rce = &m_rce2Passm_encOrderi; + RateControlEntry *rce = &m_rce2Passi; rce->newQScale = clipQscale(NULL, rce, blurredQscalei); // check if needed X265_CHECK(rce->newQScale >= 0, "new Qscale is negative\n"); expectedBits += qScale2bits(rce, rce->newQScale); @@ -1279,6 +1283,7 @@ m_param->rc.vbvMaxBitrate = m_param->rc.zonesi.zoneParam->rc.vbvMaxBitrate; memcpy(m_relativeComplexity, m_param->rc.zonesi.relativeComplexity, sizeof(double) * m_param->reconfigWindowSize); reconfigureRC(); + m_isCbr = 1; /* Always vbvmaxrate == bitrate here*/ m_top->zoneReadCounti.incr(); } } @@ -1951,7 +1956,7 @@ /* Adjust quant based on the difference between * achieved and expected bitrate so far */ double curTime = (double)rce->encodeOrder / m_numEntries; - double w = x265_clip3(0.0, 1.0, curTime * 100); + double w = x265_clip3(0.0, 1.0, curTime); q *= pow((double)m_totalBits / m_expectedBitsSum, w); } if (m_framesDone == 0 && m_param->rc.rateControlMode == X265_RC_ABR && m_isGrainEnabled) @@ -2742,7 +2747,9 @@ x265_log(m_param, X265_LOG_WARNING, "poc:%d, VBV underflow (%.0f bits)\n", rce->poc, m_bufferFillFinal); m_bufferFillFinal = X265_MAX(m_bufferFillFinal, 0); - m_bufferFillFinal += m_bufferRate; + m_bufferFillFinal += rce->bufferRate; + if (m_param->csvLogLevel >= 2) + m_unclippedBufferFillFinal = m_bufferFillFinal; if (m_param->rc.bStrictCbr) { @@ -2752,14 +2759,14 @@ filler += FILLER_OVERHEAD * 8; } m_bufferFillFinal -= filler; - bufferBits = X265_MIN(bits + filler + m_bufferExcess, m_bufferRate); + bufferBits = X265_MIN(bits + filler + m_bufferExcess, rce->bufferRate); m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits + filler, 0); m_bufferFillActual += bufferBits - bits - filler; } else { m_bufferFillFinal = X265_MIN(m_bufferFillFinal, m_bufferSize); - bufferBits = X265_MIN(bits + m_bufferExcess, m_bufferRate); + bufferBits = X265_MIN(bits + m_bufferExcess, rce->bufferRate); m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits, 0); m_bufferFillActual += bufferBits - bits; m_bufferFillActual = X265_MIN(m_bufferFillActual, m_bufferSize);
View file
x265_3.3.tar.gz/source/encoder/ratecontrol.h -> x265_3.4.tar.gz/source/encoder/ratecontrol.h
Changed
@@ -157,6 +157,7 @@ double m_rateFactorConstant; double m_bufferSize; double m_bufferFillFinal; /* real buffer as of the last finished frame */ + double m_unclippedBufferFillFinal; /* real unclipped buffer as of the last finished frame used to log in CSV*/ double m_bufferFill; /* planned buffer, if all in-progress frames hit their bit budget */ double m_bufferRate; /* # of bits added to buffer_fill after each frame */ double m_vbvMaxRate; /* in kbps */
View file
x265_3.3.tar.gz/source/encoder/slicetype.cpp -> x265_3.4.tar.gz/source/encoder/slicetype.cpp
Changed
@@ -87,7 +87,7 @@ namespace X265_NS { -bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta) +bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel) { intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = 0, colThree = 0; intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0, bottomRight = 0; @@ -141,7 +141,7 @@ theta = 180 + theta; edgeThetamiddle = (pixel)theta; } - edgePicmiddle = (pixel)(gradientMagnitude >= edgeThreshold ? edgeThreshold : blackPixel); + edgePicmiddle = (pixel)(gradientMagnitude >= EDGE_THRESHOLD ? whitePixel : blackPixel); } } return true; @@ -519,6 +519,13 @@ if (param->rc.aqMode == X265_AQ_EDGE) edgeFilter(curFrame, param); + if (param->rc.aqMode == X265_AQ_EDGE && !param->bHistBasedSceneCut && param->recursionSkipMode == EDGE_BASED_RSKIP) + { + pixel* src = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX; + primitives.planecopy_pp_shr(src, curFrame->m_fencPic->m_stride, curFrame->m_edgeBitPic, + curFrame->m_fencPic->m_stride, curFrame->m_fencPic->m_picWidth, curFrame->m_fencPic->m_picHeight, SHIFT_TO_BITPLANE); + } + if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE) { double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));
View file
x265_3.3.tar.gz/source/encoder/slicetype.h -> x265_3.4.tar.gz/source/encoder/slicetype.h
Changed
@@ -44,9 +44,9 @@ #define EDGE_INCLINATION 45 #if HIGH_BIT_DEPTH -#define edgeThreshold 1023.0 +#define EDGE_THRESHOLD 1023.0 #else -#define edgeThreshold 255.0 +#define EDGE_THRESHOLD 255.0 #endif #define PI 3.14159265 @@ -101,7 +101,7 @@ protected: uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize); - uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize); + uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize); uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize); uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp); bool allocWeightedRef(Lowres& fenc); @@ -265,7 +265,6 @@ CostEstimateGroup& operator=(const CostEstimateGroup&); }; -bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta); - +bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel = EDGE_THRESHOLD); } #endif // ifndef X265_SLICETYPE_H
View file
x265_3.3.tar.gz/source/test/CMakeLists.txt -> x265_3.4.tar.gz/source/test/CMakeLists.txt
Changed
@@ -23,13 +23,15 @@ # add ARM assembly files if(ARM OR CROSS_COMPILE_ARM) - enable_language(ASM) - set(NASM_SRC checkasm-arm.S) - add_custom_command( - OUTPUT checkasm-arm.obj - COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj - DEPENDS checkasm-arm.S) + if(NOT ARM64) + enable_language(ASM) + set(NASM_SRC checkasm-arm.S) + add_custom_command( + OUTPUT checkasm-arm.obj + COMMAND ${CMAKE_CXX_COMPILER} + ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj + DEPENDS checkasm-arm.S) + endif() endif(ARM OR CROSS_COMPILE_ARM) # add PowerPC assembly files
View file
x265_3.3.tar.gz/source/test/regression-tests.txt -> x265_3.4.tar.gz/source/test/regression-tests.txt
Changed
@@ -75,7 +75,7 @@ News-4k.y4m,--preset superfast --lookahead-slices 6 --aq-mode 0 News-4k.y4m,--preset superfast --slices 4 --aq-mode 0 News-4k.y4m,--preset medium --tune ssim --no-sao --qg-size 16 -News-4k.y4m,--preset veryslow --no-rskip +News-4k.y4m,--preset veryslow --rskip 0 News-4k.y4m,--preset veryslow --pme --crf 40 OldTownCross_1920x1080_50_10bit_422.yuv,--preset superfast --weightp OldTownCross_1920x1080_50_10bit_422.yuv,--preset medium --no-weightp @@ -162,7 +162,11 @@ sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000 sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 sintel_trailer_2k_1920x1080_24.yuv, --preset ultrafast --hist-scenecut --hist-threshold 0.02 - +crowd_run_1920x1080_50.yuv, --preset faster --ctu 32 --rskip 2 --rskip-edge-threshold 5 +crowd_run_1920x1080_50.yuv, --preset fast --ctu 64 --rskip 2 --rskip-edge-threshold 5 --aq-mode 4 +crowd_run_1920x1080_50.yuv, --preset slow --ctu 32 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 +crowd_run_1920x1080_50.yuv, --preset slower --ctu 16 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 --aq-mode 4 + # Main12 intraCost overflow bug test 720p50_parkrun_ter.y4m,--preset medium
View file
x265_3.3.tar.gz/source/test/save-load-tests.txt -> x265_3.4.tar.gz/source/test/save-load-tests.txt
Changed
@@ -18,3 +18,4 @@ RaceHorses_416x240_30.y4m, --preset slow --no-cutree --ctu 16 --analysis-save x265_analysis.dat --analysis-save-reuse-level 10 --scale-factor 2 --crf 22 --vbv-maxrate 1000 --vbv-bufsize 1000::RaceHorses_832x480_30.y4m, --preset slow --no-cutree --ctu 32 --analysis-load x265_analysis.dat --analysis-save x265_analysis_2.dat --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --crf 16 --vbv-maxrate 4000 --vbv-bufsize 4000 --refine-intra 0 --refine-inter 1::RaceHorses_1664x960_30.y4m, --preset slow --no-cutree --ctu 64 --analysis-load x265_analysis_2.dat --analysis-load-reuse-level 10 --scale-factor 2 --crf 12 --vbv-maxrate 7000 --vbv-bufsize 7000 --refine-intra 2 --refine-inter 2 crowd_run_540p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_540.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m, --preset veryslow --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000 crowd_run_540p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_540.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000 +News-4k.y4m, --preset medium --analysis-save x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000::News-4k.y4m, --analysis-load x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
View file
x265_3.3.tar.gz/source/test/testbench.cpp -> x265_3.4.tar.gz/source/test/testbench.cpp
Changed
@@ -5,6 +5,7 @@ * Mandar Gurav <mandar@multicorewareinc.com> * Mahesh Pittala <mahesh@multicorewareinc.com> * Min Chen <chenm003@163.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -208,6 +209,14 @@ EncoderPrimitives asmprim; memset(&asmprim, 0, sizeof(asmprim)); setupAssemblyPrimitives(asmprim, test_archi.flag); + +#if X265_ARCH_ARM64 + /* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ + setupAliasCPrimitives(cprim, asmprim, test_archi.flag); +#endif + setupAliasPrimitives(asmprim); memcpy(&primitives, &asmprim, sizeof(EncoderPrimitives)); for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++) @@ -232,6 +241,13 @@ #endif setupAssemblyPrimitives(optprim, cpuid); +#if X265_ARCH_ARM64 + /* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ + setupAliasCPrimitives(cprim, optprim, cpuid); +#endif + /* Note that we do not setup aliases for performance tests, that would be * redundant. The testbench only verifies they are correctly aliased */
View file
x265_3.3.tar.gz/source/test/testharness.h -> x265_3.4.tar.gz/source/test/testharness.h
Changed
@@ -3,6 +3,7 @@ * * Authors: Steve Borho <steve@borho.org> * Min Chen <chenm003@163.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -81,12 +82,16 @@ #if X265_ARCH_X86 asm volatile("rdtsc" : "=a" (a) ::"edx"); #elif X265_ARCH_ARM +#if X265_ARCH_ARM64 + asm volatile("mrs %0, cntvct_el0" : "=r"(a)); +#else // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a)); // TO-DO: replace clock() function with appropriate ARM cpu instructions a = clock(); #endif +#endif return a; } #endif // ifdef _MSC_VER
View file
x265_3.3.tar.gz/source/x265.cpp -> x265_3.4.tar.gz/source/x265.cpp
Changed
@@ -27,11 +27,7 @@ #include "x265.h" #include "x265cli.h" - -#include "input/input.h" -#include "output/output.h" -#include "output/reconplay.h" -#include "svt.h" +#include "abrEncApp.h" #if HAVE_VLD /* Visual Leak Detector */ @@ -47,191 +43,59 @@ #include <fstream> #include <queue> -#define CONSOLE_TITLE_SIZE 200 -#ifdef _WIN32 -#include <windows.h> -#define SetThreadExecutionState(es) -static char orgConsoleTitleCONSOLE_TITLE_SIZE = ""; -#else -#define GetConsoleTitle(t, n) -#define SetConsoleTitle(t) -#define SetThreadExecutionState(es) -#endif - using namespace X265_NS; -/* Ctrl-C handler */ -static volatile sig_atomic_t b_ctrl_c /* = 0 */; -static void sigint_handler(int) -{ - b_ctrl_c = 1; -} -#define START_CODE 0x00000001 -#define START_CODE_BYTES 4 - -struct CLIOptions -{ - InputFile* input; - ReconFile* recon; - OutputFile* output; - FILE* qpfile; - FILE* zoneFile; - FILE* dolbyVisionRpu; /* File containing Dolby Vision BL RPU metadata */ - const char* reconPlayCmd; - const x265_api* api; - x265_param* param; - x265_vmaf_data* vmafData; - bool bProgress; - bool bForceY4m; - bool bDither; - uint32_t seek; // number of frames to skip from the beginning - uint32_t framesToBeEncoded; // number of frames to encode - uint64_t totalbytes; - int64_t startTime; - int64_t prevUpdateTime; - - /* in microseconds */ - static const int UPDATE_INTERVAL = 250000; - - CLIOptions() - { - input = NULL; - recon = NULL; - output = NULL; - qpfile = NULL; - zoneFile = NULL; - dolbyVisionRpu = NULL; - reconPlayCmd = NULL; - api = NULL; - param = NULL; - vmafData = NULL; - framesToBeEncoded = seek = 0; - totalbytes = 0; - bProgress = true; - bForceY4m = false; - startTime = x265_mdate(); - prevUpdateTime = 0; - bDither = false; - } +#define X265_HEAD_ENTRIES 3 - void destroy(); - void printStatus(uint32_t frameNum); - bool parse(int argc, char **argv); - bool parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount); - bool parseQPFile(x265_picture &pic_org); - bool parseZoneFile(); -}; - -void CLIOptions::destroy() -{ - if (input) - input->release(); - input = NULL; - if (recon) - recon->release(); - recon = NULL; - if (qpfile) - fclose(qpfile); - qpfile = NULL; - if (zoneFile) - fclose(zoneFile); - zoneFile = NULL; - if (dolbyVisionRpu) - fclose(dolbyVisionRpu); - dolbyVisionRpu = NULL; - if (output) - output->release(); - output = NULL; -} - -void CLIOptions::printStatus(uint32_t frameNum) -{ - char buf200; - int64_t time = x265_mdate(); - - if (!bProgress || !frameNum || (prevUpdateTime && time - prevUpdateTime < UPDATE_INTERVAL)) - return; - - int64_t elapsed = time - startTime; - double fps = elapsed > 0 ? frameNum * 1000000. / elapsed : 0; - float bitrate = 0.008f * totalbytes * (param->fpsNum / param->fpsDenom) / ((float)frameNum); - if (framesToBeEncoded) - { - int eta = (int)(elapsed * (framesToBeEncoded - frameNum) / ((int64_t)frameNum * 1000000)); - sprintf(buf, "x265 %.1f%% %d/%d frames, %.2f fps, %.2f kb/s, eta %d:%02d:%02d", - 100. * frameNum / (param->chunkEnd ? param->chunkEnd : param->totalFrames), frameNum, (param->chunkEnd ? param->chunkEnd : param->totalFrames), fps, bitrate, - eta / 3600, (eta / 60) % 60, eta % 60); - } - else - sprintf(buf, "x265 %d frames: %.2f fps, %.2f kb/s", frameNum, fps, bitrate); - - fprintf(stderr, "%s \r", buf + 5); - SetConsoleTitle(buf); - fflush(stderr); // needed in windows - prevUpdateTime = time; -} +#ifdef _WIN32 +#define strdup _strdup +#endif -bool CLIOptions::parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount) +#ifdef _WIN32 +/* Copy of x264 code, which allows for Unicode characters in the command line. + * Retrieve command line arguments as UTF-8. */ +static int get_argv_utf8(int *argc_ptr, char ***argv_ptr) { - bool bError = false; - int bShowHelp = false; - int outputBitDepth = 0; - const char *profile = NULL; - - /* Presets are applied before all other options. */ - for (optind = 0;;) - { - int c = getopt_long(argc, argv, short_options, long_options, NULL); - if (c == -1) - break; - else if (c == 'D') - outputBitDepth = atoi(optarg); - else if (c == 'P') - profile = optarg; - else if (c == '?') - bShowHelp = true; - } - - if (!outputBitDepth && profile) - { - /* try to derive the output bit depth from the requested profile */ - if (strstr(profile, "10")) - outputBitDepth = 10; - else if (strstr(profile, "12")) - outputBitDepth = 12; - else - outputBitDepth = 8; - } - - api = x265_api_get(outputBitDepth); - if (!api) + int ret = 0; + wchar_t **argv_utf16 = CommandLineToArgvW(GetCommandLineW(), argc_ptr); + if (argv_utf16) { - x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n"); - api = x265_api_get(0); - } + int argc = *argc_ptr; + int offset = (argc + 1) * sizeof(char*); + int size = offset; - if (bShowHelp) - { - printVersion(globalParam, api); - showHelp(globalParam); - } + for (int i = 0; i < argc; i++) + size += WideCharToMultiByte(CP_UTF8, 0, argv_utf16i, -1, NULL, 0, NULL, NULL); - globalParam->rc.zoneszonefileCount.zoneParam = api->param_alloc(); - if (!globalParam->rc.zoneszonefileCount.zoneParam) - { - x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n"); - return true; + char **argv = *argv_ptr = (char**)malloc(size); + if (argv) + { + for (int i = 0; i < argc; i++) + { + argvi = (char*)argv + offset; + offset += WideCharToMultiByte(CP_UTF8, 0, argv_utf16i, -1, argvi, size - offset, NULL, NULL); + } + argvargc = NULL; + ret = 1; + } + LocalFree(argv_utf16); } + return ret; +} +#endif - memcpy(globalParam->rc.zoneszonefileCount.zoneParam, globalParam, sizeof(x265_param)); +/* Checks for abr-ladder config file in the command line. + * Returns true if abr-config file is present. Returns + * false otherwise */ +static bool checkAbrLadder(int argc, char **argv, FILE **abrConfig) +{ for (optind = 0;;) { int long_options_index = -1; int c = getopt_long(argc, argv, short_options, long_options, &long_options_index); if (c == -1) break; - if (long_options_index < 0 && c > 0) { for (size_t i = 0; i < sizeof(long_options) / sizeof(long_options0); i++) @@ -248,593 +112,138 @@ /* getopt_long might have already printed an error message */ if (c != 63) x265_log(NULL, X265_LOG_WARNING, "internal error: short option '%c' has no long option\n", c); - return true; + return false; } } if (long_options_index < 0) { x265_log(NULL, X265_LOG_WARNING, "short option '%c' unrecognized\n", c); - return true; + return false; } - - bError |= !!api->zone_param_parse(globalParam->rc.zoneszonefileCount.zoneParam, long_optionslong_options_index.name, optarg); - - if (bError) + if (!strcmp(long_optionslong_options_index.name, "abr-ladder")) { - const char *name = long_options_index > 0 ? long_optionslong_options_index.name : argvoptind - 2; - x265_log(NULL, X265_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg); + *abrConfig = x265_fopen(optarg, "rb"); + if (!abrConfig) + x265_log_file(NULL, X265_LOG_ERROR, "%s abr-ladder config file not found or error in opening zone file\n", optarg); return true; } } - - if (optind < argc) - { - x265_log(param, X265_LOG_WARNING, "extra unused command arguments given <%s>\n", argvoptind); - return true; - } return false; } -bool CLIOptions::parse(int argc, char **argv) +static uint8_t getNumAbrEncodes(FILE* abrConfig) { - bool bError = false; - int bShowHelp = false; - int inputBitDepth = 8; - int outputBitDepth = 0; - int reconFileBitDepth = 0; - const char *inputfn = NULL; - const char *reconfn = NULL; - const char *outputfn = NULL; - const char *preset = NULL; - const char *tune = NULL; - const char *profile = NULL; - int svtEnabled = 0; - - if (argc <= 1) - { - x265_log(NULL, X265_LOG_ERROR, "No input file. Run x265 --help for a list of options.\n"); - return true; - } - - /* Presets are applied before all other options. */ - for (optind = 0;; ) - { - int optionsIndex = -1; - int c = getopt_long(argc, argv, short_options, long_options, &optionsIndex); - if (c == -1) - break; - else if (c == 'p') - preset = optarg; - else if (c == 't') - tune = optarg; - else if (c == 'D') - outputBitDepth = atoi(optarg); - else if (c == 'P') - profile = optarg; - else if (c == '?') - bShowHelp = true; - else if (!c && !strcmp(long_optionsoptionsIndex.name, "svt")) - svtEnabled = 1; - } + char line1024; + uint8_t numEncodes = 0; - if (!outputBitDepth && profile) + while (fgets(line, sizeof(line), abrConfig)) { - /* try to derive the output bit depth from the requested profile */ - if (strstr(profile, "10")) - outputBitDepth = 10; - else if (strstr(profile, "12")) - outputBitDepth = 12; - else - outputBitDepth = 8; - } - - api = x265_api_get(outputBitDepth); - if (!api) - { - x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n"); - api = x265_api_get(0); - } - - param = api->param_alloc(); - if (!param) - { - x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n"); - return true; - } -#if ENABLE_LIBVMAF - vmafData = (x265_vmaf_data*)x265_malloc(sizeof(x265_vmaf_data)); - if(!vmafData) - { - x265_log(NULL, X265_LOG_ERROR, "vmaf data alloc failed\n"); - return true; - } -#endif - - if (api->param_default_preset(param, preset, tune) < 0) - { - x265_log(NULL, X265_LOG_ERROR, "preset or tune unrecognized\n"); - return true; - } - - if (bShowHelp) - { - printVersion(param, api); - showHelp(param); + if (strcmp(line, "\n") == 0) + continue; + else if (!(*line == '#')) + numEncodes++; } + rewind(abrConfig); + return numEncodes; +} - //Set enable SVT-HEVC encoder first if found in the command line - if (svtEnabled) api->param_parse(param, "svt", NULL); +static bool parseAbrConfig(FILE* abrConfig, CLIOptions cliopt, uint8_t numEncodes) +{ + char line1024; + char* argLine; - for (optind = 0;; ) + for (uint32_t i = 0; i < numEncodes; i++) { - int long_options_index = -1; - int c = getopt_long(argc, argv, short_options, long_options, &long_options_index); - if (c == -1) - break; - - switch (c) + fgets(line, sizeof(line), abrConfig); + if (*line == '#' || (strcmp(line, "\r\n") == 0)) + continue; + int index = (int)strcspn(line, "\r\n"); + lineindex = '\0'; + argLine = line; + char* start = strchr(argLine, ' '); + while (isspace((unsigned char)*start)) start++; + int argc = 0; + char **argv = (char**)malloc(256 * sizeof(char *)); + // Adding a dummy string to avoid file parsing error + argvargc++ = (char *)"x265"; + + /* Parse CLI header to identify the ID of the load encode and the reuse level */ + char *header = strtok(argLine, ""); + uint32_t idCount = 0; + char *id = strtok(header, ":"); + char *headX265_HEAD_ENTRIES; + cliopti.encId = i; + cliopti.isAbrLadderConfig = true; + + while (id && (idCount <= X265_HEAD_ENTRIES)) { - case 'h': - printVersion(param, api); - showHelp(param); - break; - - case 'V': - printVersion(param, api); - x265_report_simd(param); - exit(0); - - default: - if (long_options_index < 0 && c > 0) - { - for (size_t i = 0; i < sizeof(long_options) / sizeof(long_options0); i++) - { - if (long_optionsi.val == c) - { - long_options_index = (int)i; - break; - } - } - - if (long_options_index < 0) - { - /* getopt_long might have already printed an error message */ - if (c != 63) - x265_log(NULL, X265_LOG_WARNING, "internal error: short option '%c' has no long option\n", c); - return true; - } - } - if (long_options_index < 0) - { - x265_log(NULL, X265_LOG_WARNING, "short option '%c' unrecognized\n", c); - return true; - } -#define OPT(longname) \ - else if (!strcmp(long_optionslong_options_index.name, longname)) -#define OPT2(name1, name2) \ - else if (!strcmp(long_optionslong_options_index.name, name1) || \ - !strcmp(long_optionslong_options_index.name, name2)) - - if (0) ; - OPT2("frame-skip", "seek") this->seek = (uint32_t)x265_atoi(optarg, bError); - OPT("frames") this->framesToBeEncoded = (uint32_t)x265_atoi(optarg, bError); - OPT("no-progress") this->bProgress = false; - OPT("output") outputfn = optarg; - OPT("input") inputfn = optarg; - OPT("recon") reconfn = optarg; - OPT("input-depth") inputBitDepth = (uint32_t)x265_atoi(optarg, bError); - OPT("dither") this->bDither = true; - OPT("recon-depth") reconFileBitDepth = (uint32_t)x265_atoi(optarg, bError); - OPT("y4m") this->bForceY4m = true; - OPT("profile") /* handled above */; - OPT("preset") /* handled above */; - OPT("tune") /* handled above */; - OPT("output-depth") /* handled above */; - OPT("recon-y4m-exec") reconPlayCmd = optarg; - OPT("svt") /* handled above */; - OPT("qpfile") - { - this->qpfile = x265_fopen(optarg, "rb"); - if (!this->qpfile) - x265_log_file(param, X265_LOG_ERROR, "%s qpfile not found or error in opening qp file\n", optarg); - } - OPT("dolby-vision-rpu") - { - this->dolbyVisionRpu = x265_fopen(optarg, "rb"); - if (!this->dolbyVisionRpu) - { - x265_log_file(param, X265_LOG_ERROR, "Dolby Vision RPU metadata file %s not found or error in opening file\n", optarg); - return true; - } - } - OPT("zonefile") - { - this->zoneFile = x265_fopen(optarg, "rb"); - if (!this->zoneFile) - x265_log_file(param, X265_LOG_ERROR, "%s zone file not found or error in opening zone file\n", optarg); - } - OPT("fullhelp") - { - param->logLevel = X265_LOG_FULL; - printVersion(param, api); - showHelp(param); - break; - } - else - bError |= !!api->param_parse(param, long_optionslong_options_index.name, optarg); - if (bError) - { - const char *name = long_options_index > 0 ? long_optionslong_options_index.name : argvoptind - 2; - x265_log(NULL, X265_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg); - return true; - } -#undef OPT + headidCount = id; + id = strtok(NULL, ":"); + idCount++; } - } - - if (optind < argc && !inputfn) - inputfn = argvoptind++; - if (optind < argc && !outputfn) - outputfn = argvoptind++; - if (optind < argc) - { - x265_log(param, X265_LOG_WARNING, "extra unused command arguments given <%s>\n", argvoptind); - return true; - } - - if (argc <= 1) - { - api->param_default(param); - printVersion(param, api); - showHelp(param); - } - - if (!inputfn || !outputfn) - { - x265_log(param, X265_LOG_ERROR, "input or output file not specified, try --help for help\n"); - return true; - } - - if (param->internalBitDepth != api->bit_depth) - { - x265_log(param, X265_LOG_ERROR, "Only bit depths of %d are supported in this build\n", api->bit_depth); - return true; - } - -#ifdef SVT_HEVC - if (svtEnabled) - { - EB_H265_ENC_CONFIGURATION* svtParam = (EB_H265_ENC_CONFIGURATION*)param->svtHevcParam; - param->sourceWidth = svtParam->sourceWidth; - param->sourceHeight = svtParam->sourceHeight; - param->fpsNum = svtParam->frameRateNumerator; - param->fpsDenom = svtParam->frameRateDenominator; - svtParam->encoderBitDepth = inputBitDepth; - } -#endif - - InputFileInfo info; - info.filename = inputfn; - info.depth = inputBitDepth; - info.csp = param->internalCsp; - info.width = param->sourceWidth; - info.height = param->sourceHeight; - info.fpsNum = param->fpsNum; - info.fpsDenom = param->fpsDenom; - info.sarWidth = param->vui.sarWidth; - info.sarHeight = param->vui.sarHeight; - info.skipFrames = seek; - info.frameCount = 0; - getParamAspectRatio(param, info.sarWidth, info.sarHeight); - - - this->input = InputFile::open(info, this->bForceY4m); - if (!this->input || this->input->isFail()) - { - x265_log_file(param, X265_LOG_ERROR, "unable to open input file <%s>\n", inputfn); - return true; - } - - if (info.depth < 8 || info.depth > 16) - { - x265_log(param, X265_LOG_ERROR, "Input bit depth (%d) must be between 8 and 16\n", inputBitDepth); - return true; - } - - /* Unconditionally accept height/width/csp/bitDepth from file info */ - param->sourceWidth = info.width; - param->sourceHeight = info.height; - param->internalCsp = info.csp; - param->sourceBitDepth = info.depth; - - /* Accept fps and sar from file info if not specified by user */ - if (param->fpsDenom == 0 || param->fpsNum == 0) - { - param->fpsDenom = info.fpsDenom; - param->fpsNum = info.fpsNum; - } - if (!param->vui.aspectRatioIdc && info.sarWidth && info.sarHeight) - setParamAspectRatio(param, info.sarWidth, info.sarHeight); - if (this->framesToBeEncoded == 0 && info.frameCount > (int)seek) - this->framesToBeEncoded = info.frameCount - seek; - param->totalFrames = this->framesToBeEncoded; - -#ifdef SVT_HEVC - if (svtEnabled) - { - EB_H265_ENC_CONFIGURATION* svtParam = (EB_H265_ENC_CONFIGURATION*)param->svtHevcParam; - svtParam->sourceWidth = param->sourceWidth; - svtParam->sourceHeight = param->sourceHeight; - svtParam->frameRateNumerator = param->fpsNum; - svtParam->frameRateDenominator = param->fpsDenom; - svtParam->framesToBeEncoded = param->totalFrames; - svtParam->encoderColorFormat = (EB_COLOR_FORMAT)param->internalCsp; - } -#endif - - /* Force CFR until we have support for VFR */ - info.timebaseNum = param->fpsDenom; - info.timebaseDenom = param->fpsNum; - - if (param->bField && param->interlaceMode) - { // Field FPS - param->fpsNum *= 2; - // Field height - param->sourceHeight = param->sourceHeight >> 1; - // Number of fields to encode - param->totalFrames *= 2; - } - - if (api->param_apply_profile(param, profile)) - return true; - - if (param->logLevel >= X265_LOG_INFO) - { - char buf128; - int p = sprintf(buf, "%dx%d fps %d/%d %sp%d", param->sourceWidth, param->sourceHeight, - param->fpsNum, param->fpsDenom, x265_source_csp_namesparam->internalCsp, info.depth); - - int width, height; - getParamAspectRatio(param, width, height); - if (width && height) - p += sprintf(buf + p, " sar %d:%d", width, height); - - if (framesToBeEncoded <= 0 || info.frameCount <= 0) - strcpy(buf + p, " unknown frame count"); - else - sprintf(buf + p, " frames %u - %d of %d", this->seek, this->seek + this->framesToBeEncoded - 1, info.frameCount); - - general_log(param, input->getName(), X265_LOG_INFO, "%s\n", buf); - } - - this->input->startReader(); - - if (reconfn) - { - if (reconFileBitDepth == 0) - reconFileBitDepth = param->internalBitDepth; - this->recon = ReconFile::open(reconfn, param->sourceWidth, param->sourceHeight, reconFileBitDepth, - param->fpsNum, param->fpsDenom, param->internalCsp); - if (this->recon->isFail()) + if (idCount != X265_HEAD_ENTRIES) { - x265_log(param, X265_LOG_WARNING, "unable to write reconstructed outputs file\n"); - this->recon->release(); - this->recon = 0; + x265_log(NULL, X265_LOG_ERROR, "Incorrect number of arguments in ABR CLI header at line %d\n", i); + return false; } else - general_log(param, this->recon->getName(), X265_LOG_INFO, - "reconstructed images %dx%d fps %d/%d %s\n", - param->sourceWidth, param->sourceHeight, param->fpsNum, param->fpsDenom, - x265_source_csp_namesparam->internalCsp); - } -#if ENABLE_LIBVMAF - if (!reconfn) - { - x265_log(param, X265_LOG_ERROR, "recon file must be specified to get VMAF score, try --help for help\n"); - return true; - } - const char *str = strrchr(info.filename, '.'); - - if (!strcmp(str, ".y4m")) - { - x265_log(param, X265_LOG_ERROR, "VMAF supports YUV file format only.\n"); - return true; - } - if(param->internalCsp == X265_CSP_I420 || param->internalCsp == X265_CSP_I422 || param->internalCsp == X265_CSP_I444) - { - vmafData->reference_file = x265_fopen(inputfn, "rb"); - vmafData->distorted_file = x265_fopen(reconfn, "rb"); - } - else - { - x265_log(param, X265_LOG_ERROR, "VMAF will support only yuv420p, yu422p, yu444p, yuv420p10le, yuv422p10le, yuv444p10le formats.\n"); - return true; - } -#endif - this->output = OutputFile::open(outputfn, info); - if (this->output->isFail()) - { - x265_log_file(param, X265_LOG_ERROR, "failed to open output file <%s> for writing\n", outputfn); - return true; - } - general_log_file(param, this->output->getName(), X265_LOG_INFO, "output file: %s\n", outputfn); - return false; -} - -bool CLIOptions::parseQPFile(x265_picture &pic_org) -{ - int32_t num = -1, qp, ret; - char type; - uint32_t filePos; - pic_org.forceqp = 0; - pic_org.sliceType = X265_TYPE_AUTO; - while (num < pic_org.poc) - { - filePos = ftell(qpfile); - qp = -1; - ret = fscanf(qpfile, "%d %c%* \t%d\n", &num, &type, &qp); - - if (num > pic_org.poc || ret == EOF) { - fseek(qpfile, filePos, SEEK_SET); - break; + cliopti.encName = strdup(head0); + cliopti.loadLevel = atoi(head1); + cliopti.reuseName = strdup(head2); } - if (num < pic_org.poc && ret >= 2) - continue; - if (ret == 3 && qp >= 0) - pic_org.forceqp = qp + 1; - if (type == 'I') pic_org.sliceType = X265_TYPE_IDR; - else if (type == 'i') pic_org.sliceType = X265_TYPE_I; - else if (type == 'K') pic_org.sliceType = param->bOpenGOP ? X265_TYPE_I : X265_TYPE_IDR; - else if (type == 'P') pic_org.sliceType = X265_TYPE_P; - else if (type == 'B') pic_org.sliceType = X265_TYPE_BREF; - else if (type == 'b') pic_org.sliceType = X265_TYPE_B; - else ret = 0; - if (ret < 2 || qp < -1 || qp > 51) - return 0; - } - return 1; -} -bool CLIOptions::parseZoneFile() -{ - char line256; - char* argLine; - param->rc.zonefileCount = 0; - - while (fgets(line, sizeof(line), zoneFile)) - { - if (!((*line == '#') || (strcmp(line, "\r\n") == 0))) - param->rc.zonefileCount++; - } - - rewind(zoneFile); - param->rc.zones = X265_MALLOC(x265_zone, param->rc.zonefileCount); - for (int i = 0; i < param->rc.zonefileCount; i++) - { - while (fgets(line, sizeof(line), zoneFile)) + char* token = strtok(start, " "); + while (token) { - if (*line == '#' || (strcmp(line, "\r\n") == 0)) - continue; - param->rc.zonesi.zoneParam = X265_MALLOC(x265_param, 1); - int index = (int)strcspn(line, "\r\n"); - lineindex = '\0'; - argLine = line; - while (isspace((unsigned char)*argLine)) argLine++; - char* start = strchr(argLine, ' '); - start++; - param->rc.zonesi.startFrame = atoi(argLine); - int argCount = 0; - char **args = (char**)malloc(256 * sizeof(char *)); - // Adding a dummy string to avoid file parsing error - argsargCount++ = (char *)"x265"; - char* token = strtok(start, " "); - while (token) - { - argsargCount++ = token; - token = strtok(NULL, " "); - } - argsargCount = NULL; - CLIOptions cliopt; - if (cliopt.parseZoneParam(argCount, args,param, i)) - { - cliopt.destroy(); - if (cliopt.api) - cliopt.api->param_free(cliopt.param); - exit(1); - } - break; + argvargc++ = strdup(token); + token = strtok(NULL, " "); } - } - return 1; -} - -#ifdef _WIN32 -/* Copy of x264 code, which allows for Unicode characters in the command line. - * Retrieve command line arguments as UTF-8. */ -static int get_argv_utf8(int *argc_ptr, char ***argv_ptr) -{ - int ret = 0; - wchar_t **argv_utf16 = CommandLineToArgvW(GetCommandLineW(), argc_ptr); - if (argv_utf16) - { - int argc = *argc_ptr; - int offset = (argc + 1) * sizeof(char*); - int size = offset; - - for (int i = 0; i < argc; i++) - size += WideCharToMultiByte(CP_UTF8, 0, argv_utf16i, -1, NULL, 0, NULL, NULL); - - char **argv = *argv_ptr = (char**)malloc(size); - if (argv) + argvargc = NULL; + if (cliopti.parse(argc++, argv)) { - for (int i = 0; i < argc; i++) - { - argvi = (char*)argv + offset; - offset += WideCharToMultiByte(CP_UTF8, 0, argv_utf16i, -1, argvi, size - offset, NULL, NULL); - } - argvargc = NULL; - ret = 1; + cliopti.destroy(); + if (cliopti.api) + cliopti.api->param_free(cliopti.param); + exit(1); } - LocalFree(argv_utf16); } - return ret; + return true; } -#endif -/* Parse the RPU file and extract the RPU corresponding to the current picture - * and fill the rpu field of the input picture */ -static int rpuParser(x265_picture * pic, FILE * ptr) +static bool setRefContext(CLIOptions cliopt, uint32_t numEncodes) { - uint8_t byteVal; - uint32_t code = 0; - int bytesRead = 0; - pic->rpu.payloadSize = 0; + bool hasRef = false; + bool isRefFound = false; - if (!pic->pts) + /* Identify reference encode IDs and set save/load reuse levels */ + for (uint32_t curEnc = 0; curEnc < numEncodes; curEnc++) { - while (bytesRead++ < 4 && fread(&byteVal, sizeof(uint8_t), 1, ptr)) - code = (code << 8) | byteVal; - - if (code != START_CODE) + isRefFound = false; + hasRef = !strcmp(clioptcurEnc.reuseName, "nil") ? false : true; + if (hasRef) { - x265_log(NULL, X265_LOG_ERROR, "Invalid Dolby Vision RPU startcode in POC %d\n", pic->pts); - return 1; - } - } - - bytesRead = 0; - while (fread(&byteVal, sizeof(uint8_t), 1, ptr)) - { - code = (code << 8) | byteVal; - if (bytesRead++ < 3) - continue; - if (bytesRead >= 1024) - { - x265_log(NULL, X265_LOG_ERROR, "Invalid Dolby Vision RPU size in POC %d\n", pic->pts); - return 1; + for (uint32_t refEnc = 0; refEnc < numEncodes; refEnc++) + { + if (!strcmp(clioptcurEnc.reuseName, clioptrefEnc.encName)) + { + clioptcurEnc.refId = refEnc; + clioptrefEnc.numRefs++; + clioptrefEnc.saveLevel = X265_MAX(clioptrefEnc.saveLevel, clioptcurEnc.loadLevel); + isRefFound = true; + break; + } + } + if (!isRefFound) + { + x265_log(NULL, X265_LOG_ERROR, "Reference encode (%s) not found for %s\n", clioptcurEnc.reuseName, + clioptcurEnc.encName); + return false; + } } - - if (code != START_CODE) - pic->rpu.payloadpic->rpu.payloadSize++ = (code >> (3 * 8)) & 0xFF; - else - return 0; } - - int ShiftBytes = START_CODE_BYTES - (bytesRead - pic->rpu.payloadSize); - int bytesLeft = bytesRead - pic->rpu.payloadSize; - code = (code << ShiftBytes * 8); - for (int i = 0; i < bytesLeft; i++) - { - pic->rpu.payloadpic->rpu.payloadSize++ = (code >> (3 * 8)) & 0xFF; - code = (code << 8); - } - if (!pic->rpu.payloadSize) - x265_log(NULL, X265_LOG_WARNING, "Dolby Vision RPU not found for POC %d\n", pic->pts); - return 0; + return true; } - - /* CLI return codes: * * 0 - encode successful @@ -859,354 +268,57 @@ get_argv_utf8(&argc, &argv); #endif - ReconPlay* reconPlay = NULL; - CLIOptions cliopt; + uint8_t numEncodes = 1; + FILE *abrConfig = NULL; + bool isAbrLadder = checkAbrLadder(argc, argv, &abrConfig); - if (cliopt.parse(argc, argv)) - { - cliopt.destroy(); - if (cliopt.api) - cliopt.api->param_free(cliopt.param); - exit(1); - } + if (isAbrLadder) + numEncodes = getNumAbrEncodes(abrConfig); - x265_param* param = cliopt.param; - const x265_api* api = cliopt.api; -#if ENABLE_LIBVMAF - x265_vmaf_data* vmafdata = cliopt.vmafData; -#endif - /* This allows muxers to modify bitstream format */ - cliopt.output->setParam(param); + CLIOptions* cliopt = new CLIOptionsnumEncodes; - if (cliopt.reconPlayCmd) - reconPlay = new ReconPlay(cliopt.reconPlayCmd, *param); - - if (cliopt.zoneFile) + if (isAbrLadder) { - if (!cliopt.parseZoneFile()) - { - x265_log(NULL, X265_LOG_ERROR, "Unable to parse zonefile\n"); - fclose(cliopt.zoneFile); - cliopt.zoneFile = NULL; - } + if (!parseAbrConfig(abrConfig, cliopt, numEncodes)) + exit(1); + if (!setRefContext(cliopt, numEncodes)) + exit(1); } - - /* note: we could try to acquire a different libx265 API here based on - * the profile found during option parsing, but it must be done before - * opening an encoder */ - - x265_encoder *encoder = api->encoder_open(param); - if (!encoder) + else if (cliopt0.parse(argc, argv)) { - x265_log(param, X265_LOG_ERROR, "failed to open encoder\n"); - cliopt.destroy(); - api->param_free(param); - api->cleanup(); - exit(2); + cliopt0.destroy(); + if (cliopt0.api) + cliopt0.api->param_free(cliopt0.param); + exit(1); } - /* get the encoder parameters post-initialization */ - api->encoder_parameters(encoder, param); - - /* Control-C handler */ - if (signal(SIGINT, sigint_handler) == SIG_ERR) - x265_log(param, X265_LOG_ERROR, "Unable to register CTRL+C handler: %s\n", strerror(errno)); - - x265_picture pic_orig, pic_out; - x265_picture *pic_in = &pic_orig; - /* Allocate recon picture if analysis save/load is enabled */ - std::priority_queue<int64_t>* pts_queue = cliopt.output->needPTS() ? new std::priority_queue<int64_t>() : NULL; - x265_picture *pic_recon = (cliopt.recon || param->analysisSave || param->analysisLoad || pts_queue || reconPlay || param->csvLogLevel) ? &pic_out : NULL; - uint32_t inFrameCount = 0; - uint32_t outFrameCount = 0; - x265_nal *p_nal; - x265_stats stats; - uint32_t nal; - int16_t *errorBuf = NULL; - bool bDolbyVisionRPU = false; - uint8_t *rpuPayload = NULL; int ret = 0; - int inputPicNum = 1; - x265_picture picField1, picField2; - - if (!param->bRepeatHeaders && !param->bEnableSvtHevc) - { - if (api->encoder_headers(encoder, &p_nal, &nal) < 0) - { - x265_log(param, X265_LOG_ERROR, "Failure generating stream headers\n"); - ret = 3; - goto fail; - } - else - cliopt.totalbytes += cliopt.output->writeHeaders(p_nal, nal); - } - - if (param->bField && param->interlaceMode) - { - api->picture_init(param, &picField1); - api->picture_init(param, &picField2); - // return back the original height of input - param->sourceHeight *= 2; - api->picture_init(param, pic_in); - } - else - api->picture_init(param, pic_in); - - if (param->dolbyProfile && cliopt.dolbyVisionRpu) - { - rpuPayload = X265_MALLOC(uint8_t, 1024); - pic_in->rpu.payload = rpuPayload; - if (pic_in->rpu.payload) - bDolbyVisionRPU = true; - } - - if (cliopt.bDither) - { - errorBuf = X265_MALLOC(int16_t, param->sourceWidth + 1); - if (errorBuf) - memset(errorBuf, 0, (param->sourceWidth + 1) * sizeof(int16_t)); - else - cliopt.bDither = false; - } - - // main encoder loop - while (pic_in && !b_ctrl_c) - { - pic_orig.poc = (param->bField && param->interlaceMode) ? inFrameCount * 2 : inFrameCount; - if (cliopt.qpfile) - { - if (!cliopt.parseQPFile(pic_orig)) - { - x265_log(NULL, X265_LOG_ERROR, "can't parse qpfile for frame %d\n", pic_in->poc); - fclose(cliopt.qpfile); - cliopt.qpfile = NULL; - } - } - - if (cliopt.framesToBeEncoded && inFrameCount >= cliopt.framesToBeEncoded) - pic_in = NULL; - else if (cliopt.input->readPicture(pic_orig)) - inFrameCount++; - else - pic_in = NULL; - - if (pic_in) - { - if (pic_in->bitDepth > param->internalBitDepth && cliopt.bDither) - { - x265_dither_image(pic_in, cliopt.input->getWidth(), cliopt.input->getHeight(), errorBuf, param->internalBitDepth); - pic_in->bitDepth = param->internalBitDepth; - } - /* Overwrite PTS */ - pic_in->pts = pic_in->poc; - - // convert to field - if (param->bField && param->interlaceMode) - { - int height = pic_in->height >> 1; - - int static bCreated = 0; - if (bCreated == 0) - { - bCreated = 1; - inputPicNum = 2; - picField1.fieldNum = 1; - picField2.fieldNum = 2; - - picField1.bitDepth = picField2.bitDepth = pic_in->bitDepth; - picField1.colorSpace = picField2.colorSpace = pic_in->colorSpace; - picField1.height = picField2.height = pic_in->height >> 1; - picField1.framesize = picField2.framesize = pic_in->framesize >> 1; - - size_t fieldFrameSize = (size_t)pic_in->framesize >> 1; - char* field1Buf = X265_MALLOC(char, fieldFrameSize); - char* field2Buf = X265_MALLOC(char, fieldFrameSize); - - int stride = picField1.stride0 = picField2.stride0 = pic_in->stride0; - uint64_t framesize = stride * (height >> x265_cli_cspspic_in->colorSpace.height0); - picField1.planes0 = field1Buf; - picField2.planes0 = field2Buf; - for (int i = 1; i < x265_cli_cspspic_in->colorSpace.planes; i++) - { - picField1.planesi = field1Buf + framesize; - picField2.planesi = field2Buf + framesize; - - stride = picField1.stridei = picField2.stridei = pic_in->stridei; - framesize += (stride * (height >> x265_cli_cspspic_in->colorSpace.heighti)); - } - assert(framesize == picField1.framesize); - } - - picField1.pts = picField1.poc = pic_in->poc; - picField2.pts = picField2.poc = pic_in->poc + 1; - - picField1.userSEI = picField2.userSEI = pic_in->userSEI; - - //if (pic_in->userData) - //{ - // // Have to handle userData here - //} - - if (pic_in->framesize) - { - for (int i = 0; i < x265_cli_cspspic_in->colorSpace.planes; i++) - { - char* srcP1 = (char*)pic_in->planesi; - char* srcP2 = (char*)pic_in->planesi + pic_in->stridei; - char* p1 = (char*)picField1.planesi; - char* p2 = (char*)picField2.planesi; - int stride = picField1.stridei; - - for (int y = 0; y < (height >> x265_cli_cspspic_in->colorSpace.heighti); y++) - { - memcpy(p1, srcP1, stride); - memcpy(p2, srcP2, stride); - srcP1 += 2*stride; - srcP2 += 2*stride; - p1 += stride; - p2 += stride; - } - } - } - } - - if (bDolbyVisionRPU) - { - if (param->bField && param->interlaceMode) - { - if (rpuParser(&picField1, cliopt.dolbyVisionRpu) > 0) - goto fail; - if (rpuParser(&picField2, cliopt.dolbyVisionRpu) > 0) - goto fail; - } - else - { - if (rpuParser(pic_in, cliopt.dolbyVisionRpu) > 0) - goto fail; - } - } - } - - for (int inputNum = 0; inputNum < inputPicNum; inputNum++) - { - x265_picture *picInput = NULL; - if (inputPicNum == 2) - picInput = pic_in ? (inputNum ? &picField2 : &picField1) : NULL; - else - picInput = pic_in; - - int numEncoded = api->encoder_encode( encoder, &p_nal, &nal, picInput, pic_recon ); - if( numEncoded < 0 ) - { - b_ctrl_c = 1; - ret = 4; - break; - } - - if (reconPlay && numEncoded) - reconPlay->writePicture(*pic_recon); - - outFrameCount += numEncoded; - - if (numEncoded && pic_recon && cliopt.recon) - cliopt.recon->writePicture(pic_out); - if (nal) - { - cliopt.totalbytes += cliopt.output->writeFrame(p_nal, nal, pic_out); - if (pts_queue) - { - pts_queue->push(-pic_out.pts); - if (pts_queue->size() > 2) - pts_queue->pop(); - } - } - cliopt.printStatus( outFrameCount ); - } - } - - /* Flush the encoder */ - while (!b_ctrl_c) + AbrEncoder* abrEnc = new AbrEncoder(cliopt, numEncodes, ret); + int threadsActive = abrEnc->m_numActiveEncodes.get(); + while (threadsActive) { - int numEncoded = api->encoder_encode(encoder, &p_nal, &nal, NULL, pic_recon); - if (numEncoded < 0) - { - ret = 4; - break; - } - - if (reconPlay && numEncoded) - reconPlay->writePicture(*pic_recon); - - outFrameCount += numEncoded; - if (numEncoded && pic_recon && cliopt.recon) - cliopt.recon->writePicture(pic_out); - if (nal) + threadsActive = abrEnc->m_numActiveEncodes.waitForChange(threadsActive); + for (uint8_t idx = 0; idx < numEncodes; idx++) { - cliopt.totalbytes += cliopt.output->writeFrame(p_nal, nal, pic_out); - if (pts_queue) + if (abrEnc->m_passEncidx->m_ret) { - pts_queue->push(-pic_out.pts); - if (pts_queue->size() > 2) - pts_queue->pop(); - } + if (isAbrLadder) + x265_log(NULL, X265_LOG_INFO, "Error generating ABR-ladder \n"); + ret = abrEnc->m_passEncidx->m_ret; + threadsActive = 0; + break; + } } - - cliopt.printStatus(outFrameCount); - - if (!numEncoded) - break; - } - - if (bDolbyVisionRPU) - { - if(fgetc(cliopt.dolbyVisionRpu) != EOF) - x265_log(NULL, X265_LOG_WARNING, "Dolby Vision RPU count is greater than frame count\n"); - x265_log(NULL, X265_LOG_INFO, "VES muxing with Dolby Vision RPU file successful\n"); } - /* clear progress report */ - if (cliopt.bProgress) - fprintf(stderr, "%*s\r", 80, " "); - -fail: - - delete reconPlay; - - api->encoder_get_stats(encoder, &stats, sizeof(stats)); - if (param->csvfn && !b_ctrl_c) -#if ENABLE_LIBVMAF - api->vmaf_encoder_log(encoder, argc, argv, param, vmafdata); -#else - api->encoder_log(encoder, argc, argv); -#endif - api->encoder_close(encoder); - - int64_t second_largest_pts = 0; - int64_t largest_pts = 0; - if (pts_queue && pts_queue->size() >= 2) - { - second_largest_pts = -pts_queue->top(); - pts_queue->pop(); - largest_pts = -pts_queue->top(); - pts_queue->pop(); - delete pts_queue; - pts_queue = NULL; - } - cliopt.output->closeFile(largest_pts, second_largest_pts); - - if (b_ctrl_c) - general_log(param, NULL, X265_LOG_INFO, "aborted at input frame %d, output frame %d\n", - cliopt.seek + inFrameCount, stats.encodedPictureCount); - - api->cleanup(); /* Free library singletons */ - - cliopt.destroy(); + abrEnc->destroy(); + delete abrEnc; - api->param_free(param); + for (uint8_t idx = 0; idx < numEncodes; idx++) + clioptidx.destroy(); - X265_FREE(errorBuf); - X265_FREE(rpuPayload); + delete cliopt; SetConsoleTitle(orgConsoleTitle); SetThreadExecutionState(ES_CONTINUOUS);
View file
x265_3.3.tar.gz/source/x265.h -> x265_3.4.tar.gz/source/x265.h
Changed
@@ -134,6 +134,7 @@ int ctuDistortionRefine; int rightOffset; int bottomOffset; + int frameDuplication; }x265_analysis_validate; /* Stores intra analysis data for a single frame. This struct needs better packing */ @@ -304,6 +305,7 @@ double totalFrameTime; double vmafFrameScore; double bufferFillFinal; + double unclippedBufferFillFinal; } x265_frame_stats; typedef struct x265_ctu_info_t @@ -1255,9 +1257,9 @@ * skip blocks. Default is disabled */ int bEnableEarlySkip; - /* Enable early CU size decisions to avoid recursing to higher depths. + /* Enable early CU size decisions to avoid recursing to higher depths. * Default is enabled */ - int bEnableRecursionSkip; + int recursionSkipMode; /* Use a faster search method to find the best intra mode. Default is 0 */ int bEnableFastIntra; @@ -1857,7 +1859,7 @@ double edgeTransitionThreshold; /* Enables histogram based scenecut detection algorithm to detect scenecuts. Default disabled */ - int bHistBasedSceneCut; + int bHistBasedSceneCut; /* Enable HME search ranges for L0, L1 and L2 respectively. */ int hmeRange3; @@ -1874,7 +1876,7 @@ * analysis information stored in analysis-save. Higher the refine level higher * the information stored. Default is 5 */ int analysisSaveReuseLevel; - + /* A value between 1 and 10 (both inclusive) determines the level of * analysis information reused in analysis-load. Higher the refine level higher * the information reused. Default is 5 */ @@ -1901,6 +1903,12 @@ * info is available from the corresponding analysis-save. */ int confWinBottomOffset; + + /* Edge variance threshold for quad tree establishment. */ + float edgeVarThreshold; + + /* Maxrate that could be signaled to the decoder. Default 0. API only. */ + int decoderVbvMaxRate; } x265_param; /* x265_param_alloc:
View file
x265_3.4.tar.gz/source/x265cli.cpp
Added
@@ -0,0 +1,1062 @@ +/***************************************************************************** + * Copyright (C) 2013-2020 MulticoreWare, Inc + * + * Authors: Steve Borho <steve@borho.org> + * Min Chen <chenm003@163.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ +#if _MSC_VER +#pragma warning(disable: 4127) // conditional expression is constant, yes I know +#endif + +#include "x265cli.h" +#include "svt.h" + +#define START_CODE 0x00000001 +#define START_CODE_BYTES 4 + +#ifdef __cplusplus +namespace X265_NS { +#endif + + static void printVersion(x265_param *param, const x265_api* api) + { + x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str); + x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str); + } + + static void showHelp(x265_param *param) + { + int level = param->logLevel; + +#define OPT(value) (value ? "enabled" : "disabled") +#define H0 printf +#define H1 if (level >= X265_LOG_DEBUG) printf + + H0("\nSyntax: x265 options infile -o outfile\n"); + H0(" infile can be YUV or Y4M\n"); + H0(" outfile is raw HEVC bitstream\n"); + H0("\nExecutable Options:\n"); + H0("-h/--help Show this help text and exit\n"); + H0(" --fullhelp Show all options and exit\n"); + H0("-V/--version Show version info and exit\n"); + H0("\nOutput Options:\n"); + H0("-o/--output <filename> Bitstream output file name\n"); + H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth); + H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNamesparam->logLevel + 1); + H0(" --no-progress Disable CLI progress reports\n"); + H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n"); + H0(" --csv-log-level <integer> Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n"); + H0("\nInput Options:\n"); + H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n"); + H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n"); + H0(" --fps <float|rational> Source frame rate (float or num/denom), auto-detected if Y4M\n"); + H0(" --input-res WxH Source picture size w x h, auto-detected if Y4M\n"); + H1(" --input-depth <integer> Bit-depth of input file. Default 8\n"); + H1(" --input-csp <string> Chroma subsampling, auto-detected if Y4M\n"); + H1(" 0 - i400 (4:0:0 monochrome)\n"); + H1(" 1 - i420 (4:2:0 default)\n"); + H1(" 2 - i422 (4:2:2)\n"); + H1(" 3 - i444 (4:4:4)\n"); +#if ENABLE_HDR10_PLUS + H0(" --dhdr10-info <filename> JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n"); + H0(" --no-dhdr10-opt Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n"); +#endif + H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n"); + H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n" + " If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n"); + H0(" --nalu-file <filename> Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n"); + H0("-f/--frames <integer> Maximum number of frames to encode. Default all\n"); + H0(" --seek <integer> First frame to encode\n"); + H1(" --no-interlace <bff|tff> Indicate input pictures are interlace fields in temporal order. Default progressive\n"); + H0(" --no-field Enable or disable field coding. Default %s\n", OPT(param->bField)); + H1(" --dither Enable dither if downscaling to 8 bit pixels. Default disabled\n"); + H0(" --no-copy-pic Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame)); + H0("\nQuality reporting metrics:\n"); + H0(" --no-ssim Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim)); + H0(" --no-psnr Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr)); + H0("\nProfile, Level, Tier:\n"); + H0("-P/--profile <string> Enforce an encode profile: main, main10, mainstillpicture\n"); + H0(" --level-idc <integer|float> Force a minimum required decoder level (as '5.0' or '50')\n"); + H0(" --no-high-tier If a decoder level is specified, this modifier selects High tier of that level\n"); + H0(" --uhd-bd Enable UHD Bluray compatibility support\n"); + H0(" --no-allow-non-conformance Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance)); + H0("\nThreading, performance:\n"); + H0(" --pools <integer,...> Comma separated thread count per thread pool (pool per NUMA node)\n"); + H0(" '-' implies no threads on node, '+' implies one thread per core on node\n"); + H0("-F/--frame-threads <integer> Number of concurrently encoded frames. 0: auto-determined by core count\n"); + H0(" --no-wpp Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront)); + H0(" --no-slices <integer> Enable Multiple Slices feature. Default %d\n", param->maxSlices); + H0(" --no-pmode Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis)); + H0(" --no-pme Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation)); + H0(" --no-asm <bool|int|string> Override CPU detection. Default: auto\n"); + H0("\nPresets:\n"); + H0("-p/--preset <string> Trade off performance for compression efficiency. Default medium\n"); + H0(" ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n"); + H0("-t/--tune <string> Tune the settings for a particular type of source or situation:\n"); + H0(" psnr, ssim, grain, zerolatency, fastdecode\n"); + H0("\nQuad-Tree size and depth:\n"); + H0("-s/--ctu <64|32|16> Maximum CU size (WxH). Default %d\n", param->maxCUSize); + H0(" --min-cu-size <64|32|16|8> Minimum CU size (WxH). Default %d\n", param->minCUSize); + H0(" --max-tu-size <32|16|8|4> Maximum TU size (WxH). Default %d\n", param->maxTUSize); + H0(" --tu-intra-depth <integer> Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth); + H0(" --tu-inter-depth <integer> Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth); + H0(" --limit-tu <0..4> Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU); + H0("\nAnalysis:\n"); + H0(" --rd <1..6> Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel); + H0(" --no-psy-rd <0..5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd); + H0(" --no-rdoq-level <0|1|2> Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel); + H0(" --no-psy-rdoq <0..50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq); + H0(" --dynamic-rd <0..4.0> Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd); + H0(" --no-ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd)); + H0(" --no-rd-refine Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine)); + H0(" --no-early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip)); + H0(" --rskip <mode> Set mode for early exit from recursion. Mode 1: exit using rdcost & CU homogenity. Mode 2: exit using CU edge density.\n" + " Mode 0: disabled. Default %d\n", param->recursionSkipMode); + H1(" --rskip-edge-threshold Threshold in terms of percentage (integer of range 0,100) for minimum edge density in CUs used to prun the recursion depth. Applicable only for rskip mode 2. Value is preset dependent. Default: %.f\n", param->edgeVarThreshold*100.0f); + H1(" --no-tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast)); + H1(" --no-splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip)); + H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n"); + H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n"); + H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n" + " - 1: force the partitions if CTU information is present\n" + " - 2: functionality of (1) and reduce qp if CTU information has changed\n" + " - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n" + " Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n"); + H0("\nCoding tools:\n"); + H0("-w/--no-weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred)); + H0(" --no-weightb Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred)); + H0(" --no-cu-lossless Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless)); + H0(" --no-signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding)); + H1(" --no-tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip)); + H0("\nTemporal / motion search options:\n"); + H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand); + H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences); + H0(" --limit-refs <0|1|2|3> Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences); + H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod); + H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine); + H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange); + H0(" --no-rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter)); + H0(" --no-amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP)); + H0(" --no-limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes); + H1(" --no-temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp)); + H1(" --no-hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME)); + H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod0, param->hmeSearchMethod1, param->hmeSearchMethod2); + H1(" --hme-range <int>,<int>,<int> Motion search-range for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeRange0, param->hmeRange1, param->hmeRange2); + H0("\nSpatial / intra options:\n"); + H0(" --no-strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing)); + H0(" --no-constrained-intra Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra)); + H0(" --no-b-intra Enable intra in B frames in veryslow presets. Default %s\n", OPT(param->bIntraInBFrames)); + H0(" --no-fast-intra Enable faster search method for angular intra predictions. Default %s\n", OPT(param->bEnableFastIntra)); + H0(" --rdpenalty <0..2> penalty for 32x32 intra TU in non-I slices. 0:disabled 1:RD-penalty 2:maximum. Default %d\n", param->rdPenalty); + H0("\nSlice decision options:\n"); + H0(" --no-open-gop Enable open-GOP, allows I slices to be non-IDR. Default %s\n", OPT(param->bOpenGOP)); + H0("-I/--keyint <integer> Max IDR period in frames. -1 for infinite-gop. Default %d\n", param->keyframeMax); + H0("-i/--min-keyint <integer> Scenecuts closer together than this are coded as I, not IDR. Default: auto\n"); + H0(" --gop-lookahead <integer> Extends gop boundary if a scenecut is found within this from keyint boundary. Default 0\n"); + H0(" --no-scenecut Disable adaptive I-frame decision\n"); + H0(" --scenecut <integer> How aggressively to insert extra I-frames. Default %d\n", param->scenecutThreshold); + H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias); + H0(" --hist-scenecut Enables histogram based scene-cut detection using histogram based algorithm.\n"); + H0(" --no-hist-scenecut Disables histogram based scene-cut detection using histogram based algorithm.\n"); + H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's Normalized SAD threshold for histogram based scenecut detection Default %.2f\n", param->edgeTransitionThreshold); + H0(" --no-fades Enable detection and handling of fade-in regions. Default %s\n", OPT(param->bEnableFades)); + H1(" --no-scenecut-aware-qp Enable increasing QP for frames inside the scenecut window after scenecut. Default %s\n", OPT(param->bEnableSceneCutAwareQp)); + H1(" --scenecut-window <0..1000> QP incremental duration(in milliseconds) when scenecut-aware-qp is enabled. Default %d\n", param->scenecutWindow); + H1(" --max-qp-delta <0..10> QP offset to increment with base QP for inter-frames. Default %d\n", param->maxQpDelta); + H0(" --radl <integer> Number of RADL pictures allowed in front of IDR. Default %d\n", param->radl); + H0(" --intra-refresh Use Periodic Intra Refresh instead of IDR frames\n"); + H0(" --rc-lookahead <integer> Number of frames for frame-type lookahead (determines encoder latency) Default %d\n", param->lookaheadDepth); + H1(" --lookahead-slices <0..16> Number of slices to use per lookahead cost estimate. Default %d\n", param->lookaheadSlices); + H0(" --lookahead-threads <integer> Number of threads to be dedicated to perform lookahead only. Default %d\n", param->lookaheadThreads); + H0("-b/--bframes <0..16> Maximum number of consecutive b-frames. Default %d\n", param->bframes); + H1(" --bframe-bias <integer> Bias towards B frame decisions. Default %d\n", param->bFrameBias); + H0(" --b-adapt <0..2> 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive); + H0(" --no-b-pyramid Use B-frames as references. Default %s\n", OPT(param->bBPyramid)); + H1(" --qpfile <string> Force frametypes and QPs for some or all frames\n"); + H1(" Format of each line: framenumber frametype QP\n"); + H1(" QP is optional (none lets x265 choose). Frametypes: I,i,K,P,B,b.\n"); + H1(" QPs are restricted by qpmin/qpmax.\n"); + H1(" --force-flush <integer> Force the encoder to flush frames. Default %d\n", param->forceFlush); + H1(" 0 - flush the encoder only when all the input pictures are over.\n"); + H1(" 1 - flush all the frames even when the input is not over. Slicetype decision may change with this option.\n"); + H1(" 2 - flush the slicetype decided frames only.\n"); + H0(" --no--hrd-concat Set HRD concatenation flag for the first keyframe in the buffering period SEI. Default %s\n", OPT(param->bEnableHRDConcatFlag)); + H0("\nRate control, Adaptive Quantization:\n"); + H0(" --bitrate <integer> Target bitrate (kbps) for ABR (implied). Default %d\n", param->rc.bitrate); + H1("-q/--qp <integer> QP for P slices in CQP mode (implied). --ipratio and --pbration determine other slice QPs\n"); + H0(" --crf <float> Quality-based VBR (0-51). Default %.1f\n", param->rc.rfConstant); + H1(" --no-lossless Enable lossless: bypass transform, quant and loop filters globally. Default %s\n", OPT(param->bLossless)); + H1(" --crf-max <float> With CRF+VBV, limit RF to this value. Default %f\n", param->rc.rfConstantMax); + H1(" May cause VBV underflows!\n"); + H1(" --crf-min <float> With CRF+VBV, limit RF to this value. Default %f\n", param->rc.rfConstantMin); + H1(" this specifies a minimum rate factor value for encode!\n"); + H0(" --vbv-maxrate <integer> Max local bitrate (kbit/s). Default %d\n", param->rc.vbvMaxBitrate); + H0(" --vbv-bufsize <integer> Set size of the VBV buffer (kbit). Default %d\n", param->rc.vbvBufferSize); + H0(" --vbv-init <float> Initial VBV buffer occupancy (fraction of bufsize or in kbits). Default %.2f\n", param->rc.vbvBufferInit); + H0(" --vbv-end <float> Final VBV buffer emptiness (fraction of bufsize or in kbits). Default 0 (disabled)\n"); + H0(" --vbv-end-fr-adj <float> Frame from which qp has to be adjusted to achieve final decode buffer emptiness. Default 0\n"); + H0(" --chunk-start <integer> First frame of the chunk. Default 0 (disabled)\n"); + H0(" --chunk-end <integer> Last frame of the chunk. Default 0 (disabled)\n"); + H0(" --pass Multi pass rate control.\n" + " - 1 : First pass, creates stats file\n" + " - 2 : Last pass, does not overwrite stats file\n" + " - 3 : Nth pass, overwrites stats file\n"); + H0(" --no-multi-pass-opt-analysis Refine analysis in 2 pass based on analysis information from pass 1\n"); + H0(" --no-multi-pass-opt-distortion Use distortion of CTU from pass 1 to refine qp in 2 pass\n"); + H0(" --stats Filename for stats file in multipass pass rate control. Default x265_2pass.log\n"); + H0(" --no-analyze-src-pics Motion estimation uses source frame planes. Default disable\n"); + H0(" --no-slow-firstpass Enable a slow first pass in a multipass rate control mode. Default %s\n", OPT(param->rc.bEnableSlowFirstPass)); + H0(" --no-strict-cbr Enable stricter conditions and tolerance for bitrate deviations in CBR mode. Default %s\n", OPT(param->rc.bStrictCbr)); + H0(" --analysis-save <filename> Dump analysis info into the specified file. Default Disabled\n"); + H0(" --analysis-load <filename> Load analysis buffers from the file specified. Default Disabled\n"); + H0(" --analysis-reuse-file <filename> Specify file name used for either dumping or reading analysis data. Deault x265_analysis.dat\n"); + H0(" --analysis-reuse-level <1..10> Level of analysis reuse indicates amount of info stored/reused in save/load mode, 1:least..10:most. Now deprecated. Default %d\n", param->analysisReuseLevel); + H0(" --analysis-save-reuse-level <1..10> Indicates the amount of analysis info stored in save mode, 1:least..10:most. Default %d\n", param->analysisSaveReuseLevel); + H0(" --analysis-load-reuse-level <1..10> Indicates the amount of analysis info reused in load mode, 1:least..10:most. Default %d\n", param->analysisLoadReuseLevel); + H0(" --refine-analysis-type <string> Reuse anlaysis information received through API call. Supported options are avc and hevc. Default disabled - %d\n", param->bAnalysisType); + H0(" --scale-factor <int> Specify factor by which input video is scaled down for analysis save mode. Default %d\n", param->scaleFactor); + H0(" --refine-intra <0..4> Enable intra refinement for encode that uses analysis-load.\n" + " - 0 : Forces both mode and depth from the save encode.\n" + " - 1 : Functionality of (0) + evaluate all intra modes at min-cu-size's depth when current depth is one smaller than min-cu-size's depth.\n" + " - 2 : Functionality of (1) + irrespective of size evaluate all angular modes when the save encode decides the best mode as angular.\n" + " - 3 : Functionality of (1) + irrespective of size evaluate all intra modes.\n" + " - 4 : Re-evaluate all intra blocks, does not reuse data from save encode.\n" + " Default:%d\n", param->intraRefine); + H0(" --refine-inter <0..3> Enable inter refinement for encode that uses analysis-load.\n" + " - 0 : Forces both mode and depth from the save encode.\n" + " - 1 : Functionality of (0) + evaluate all inter modes at min-cu-size's depth when current depth is one smaller than\n" + " min-cu-size's depth. When save encode decides the current block as skip(for all sizes) evaluate skip/merge.\n" + " - 2 : Functionality of (1) + irrespective of size restrict the modes evaluated when specific modes are decided as the best mode by the save encode.\n" + " - 3 : Functionality of (1) + irrespective of size evaluate all inter modes.\n" + " Default:%d\n", param->interRefine); + H0(" --no-dynamic-refine Dynamically changes refine-inter level for each CU. Default %s\n", OPT(param->bDynamicRefine)); + H0(" --refine-mv <1..3> Enable mv refinement for load mode. Default %d\n", param->mvRefine); + H0(" --refine-ctu-distortion Store/normalize ctu distortion in analysis-save/load.\n" + " - 0 : Disabled.\n" + " - 1 : Store/Load ctu distortion to/from the file specified in analysis-save/load.\n" + " Default 0 - Disabled\n"); + H0(" --aq-mode <integer> Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark scenes 4:auto variance with edge information. Default %d\n", param->rc.aqMode); + H0(" --no-hevc-aq Mode for HEVC Adaptive Quantization. Default %s\n", OPT(param->rc.hevcAq)); + H0(" --aq-strength <float> Reduces blocking and blurring in flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength); + H0(" --qp-adaptation-range <float> Delta QP range by QP adaptation based on a psycho-visual model (1.0 to 6.0). Default %.2f\n", param->rc.qpAdaptationRange); + H0(" --no-aq-motion Block level QP adaptation based on the relative motion between the block and the frame. Default %s\n", OPT(param->bAQMotion)); + H0(" --qg-size <int> Specifies the size of the quantization group (64, 32, 16, 8). Default %d\n", param->rc.qgSize); + H0(" --no-cutree Enable cutree for Adaptive Quantization. Default %s\n", OPT(param->rc.cuTree)); + H0(" --no-rc-grain Enable ratecontrol mode to handle grains specifically. turned on with tune grain. Default %s\n", OPT(param->rc.bEnableGrain)); + H1(" --ipratio <float> QP factor between I and P. Default %.2f\n", param->rc.ipFactor); + H1(" --pbratio <float> QP factor between P and B. Default %.2f\n", param->rc.pbFactor); + H1(" --qcomp <float> Weight given to predicted complexity. Default %.2f\n", param->rc.qCompress); + H1(" --qpstep <integer> The maximum single adjustment in QP allowed to rate control. Default %d\n", param->rc.qpStep); + H1(" --qpmin <integer> sets a hard lower limit on QP allowed to ratecontrol. Default %d\n", param->rc.qpMin); + H1(" --qpmax <integer> sets a hard upper limit on QP allowed to ratecontrol. Default %d\n", param->rc.qpMax); + H0(" --no-const-vbv Enable consistent vbv. turned on with tune grain. Default %s\n", OPT(param->rc.bEnableConstVbv)); + H1(" --cbqpoffs <integer> Chroma Cb QP Offset -12..12. Default %d\n", param->cbQpOffset); + H1(" --crqpoffs <integer> Chroma Cr QP Offset -12..12. Default %d\n", param->crQpOffset); + H1(" --scaling-list <string> Specify a file containing HM style quant scaling lists or 'default' or 'off'. Default: off\n"); + H1(" --zones <zone0>/<zone1>/... Tweak the bitrate of regions of the video\n"); + H1(" Each zone is of the form\n"); + H1(" <start frame>,<end frame>,<option>\n"); + H1(" where <option> is either\n"); + H1(" q=<integer> (force QP)\n"); + H1(" or b=<float> (bitrate multiplier)\n"); + H0(" --zonefile <filename> Zone file containing the zone boundaries and the parameters to be reconfigured.\n"); + H1(" --lambda-file <string> Specify a file containing replacement values for the lambda tables\n"); + H1(" MAX_MAX_QP+1 floats for lambda table, then again for lambda2 table\n"); + H1(" Blank lines and lines starting with hash(#) are ignored\n"); + H1(" Comma is considered to be white-space\n"); + H0(" --max-ausize-factor <float> This value controls the maximum AU size defined in specification.\n"); + H0(" It represents the percentage of maximum AU size used. Default %.1f\n", param->maxAUSizeFactor); + H0("\nLoop filters (deblock and SAO):\n"); + H0(" --no-deblock Enable Deblocking Loop Filter, optionally specify tC:Beta offsets Default %s\n", OPT(param->bEnableLoopFilter)); + H0(" --no-sao Enable Sample Adaptive Offset. Default %s\n", OPT(param->bEnableSAO)); + H1(" --no-sao-non-deblock Use non-deblocked pixels, else right/bottom boundary areas skipped. Default %s\n", OPT(param->bSaoNonDeblocked)); + H0(" --no-limit-sao Limit Sample Adaptive Offset types. Default %s\n", OPT(param->bLimitSAO)); + H0(" --selective-sao <int> Enable slice-level SAO filter. Default %d\n", param->selectiveSAO); + H0("\nVUI options:\n"); + H0(" --sar <width:height|int> Sample Aspect Ratio, the ratio of width to height of an individual pixel.\n"); + H0(" Choose from 0=undef, 1=1:1(\"square\"), 2=12:11, 3=10:11, 4=16:11,\n"); + H0(" 5=40:33, 6=24:11, 7=20:11, 8=32:11, 9=80:33, 10=18:11, 11=15:11,\n"); + H0(" 12=64:33, 13=160:99, 14=4:3, 15=3:2, 16=2:1 or custom ratio of <int:int>. Default %d\n", param->vui.aspectRatioIdc); + H1(" --display-window <string> Describe overscan cropping region as 'left,top,right,bottom' in pixels\n"); + H1(" --overscan <string> Specify whether it is appropriate for decoder to show cropped region: undef, show or crop. Default undef\n"); + H0(" --videoformat <string> Specify video format from undef, component, pal, ntsc, secam, mac. Default undef\n"); + H0(" --range <string> Specify black level and range of luma and chroma signals as full or limited Default limited\n"); + H0(" --colorprim <string> Specify color primaries from bt709, unknown, reserved, bt470m, bt470bg, smpte170m,\n"); + H0(" smpte240m, film, bt2020, smpte428, smpte431, smpte432. Default undef\n"); + H0(" --transfer <string> Specify transfer characteristics from bt709, unknown, reserved, bt470m, bt470bg, smpte170m,\n"); + H0(" smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1,\n"); + H0(" bt2020-10, bt2020-12, smpte2084, smpte428, arib-std-b67. Default undef\n"); + H1(" --colormatrix <string> Specify color matrix setting from undef, bt709, fcc, bt470bg, smpte170m,\n"); + H1(" smpte240m, GBR, YCgCo, bt2020nc, bt2020c, smpte2085, chroma-derived-nc, chroma-derived-c, ictcp. Default undef\n"); + H1(" --chromaloc <integer> Specify chroma sample location (0 to 5). Default of %d\n", param->vui.chromaSampleLocTypeTopField); + H0(" --master-display <string> SMPTE ST 2086 master display color volume info SEI (HDR)\n"); + H0(" format: G(x,y)B(x,y)R(x,y)WP(x,y)L(max,min)\n"); + H0(" --max-cll <string> Specify content light level info SEI as \"cll,fall\" (HDR).\n"); + H0(" --no-cll Emit content light level info SEI. Default %s\n", OPT(param->bEmitCLL)); + H0(" --no-hdr10 Control dumping of HDR10 SEI packet. If max-cll or master-display has non-zero values, this is enabled. Default %s\n", OPT(param->bEmitHDR10SEI)); + H0(" --no-hdr-opt Add luma and chroma offsets for HDR/WCG content. Default %s. Now deprecated.\n", OPT(param->bHDROpt)); + H0(" --no-hdr10-opt Block-level QP optimization for HDR10 content. Default %s.\n", OPT(param->bHDR10Opt)); + H0(" --min-luma <integer> Minimum luma plane value of input source picture\n"); + H0(" --max-luma <integer> Maximum luma plane value of input source picture\n"); + H0("\nBitstream options:\n"); + H0(" --no-repeat-headers Emit SPS and PPS headers at each keyframe. Default %s\n", OPT(param->bRepeatHeaders)); + H0(" --no-info Emit SEI identifying encoder and parameters. Default %s\n", OPT(param->bEmitInfoSEI)); + H0(" --no-hrd Enable HRD parameters signaling. Default %s\n", OPT(param->bEmitHRDSEI)); + H0(" --no-idr-recovery-sei Emit recovery point infor SEI at each IDR frame \n"); + H0(" --no-temporal-layers Enable a temporal sublayer for unreferenced B frames. Default %s\n", OPT(param->bEnableTemporalSubLayers)); + H0(" --no-aud Emit access unit delimiters at the start of each access unit. Default %s\n", OPT(param->bEnableAccessUnitDelimiters)); + H1(" --hash <integer> Decoded Picture Hash SEI 0: disabled, 1: MD5, 2: CRC, 3: Checksum. Default %d\n", param->decodedPictureHashSEI); + H0(" --atc-sei <integer> Emit the alternative transfer characteristics SEI message where the integer is the preferred transfer characteristics. Default disabled\n"); + H0(" --pic-struct <integer> Set the picture structure and emits it in the picture timing SEI message. Values in the range 0..12. See D.3.3 of the HEVC spec. for a detailed explanation.\n"); + H0(" --log2-max-poc-lsb <integer> Maximum of the picture order count\n"); + H0(" --no-vui-timing-info Emit VUI timing information in the bistream. Default %s\n", OPT(param->bEmitVUITimingInfo)); + H0(" --no-vui-hrd-info Emit VUI HRD information in the bistream. Default %s\n", OPT(param->bEmitVUIHRDInfo)); + H0(" --no-opt-qp-pps Dynamically optimize QP in PPS (instead of default 26) based on QPs in previous GOP. Default %s\n", OPT(param->bOptQpPPS)); + H0(" --no-opt-ref-list-length-pps Dynamically set L0 and L1 ref list length in PPS (instead of default 0) based on values in last GOP. Default %s\n", OPT(param->bOptRefListLengthPPS)); + H0(" --no-multi-pass-opt-rps Enable storing commonly used RPS in SPS in multi pass mode. Default %s\n", OPT(param->bMultiPassOptRPS)); + H0(" --no-opt-cu-delta-qp Optimize to signal consistent CU level delta QPs in frame. Default %s\n", OPT(param->bOptCUDeltaQP)); + H1("\nReconstructed video options (debugging):\n"); + H1("-r/--recon <filename> Reconstructed raw image YUV or Y4M output file name\n"); + H1(" --recon-depth <integer> Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n"); + H1(" --recon-y4m-exec <string> pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n"); + H0(" --lowpass-dct Use low-pass subband dct approximation. Default %s\n", OPT(param->bLowPassDct)); + H0(" --no-frame-dup Enable Frame duplication. Default %s\n", OPT(param->bEnableFrameDuplication)); + H0(" --dup-threshold <integer> PSNR threshold for Frame duplication. Default %d\n", param->dupThreshold); +#ifdef SVT_HEVC + H0(" --nosvt Enable SVT HEVC encoder %s\n", OPT(param->bEnableSvtHevc)); + H0(" --no-svt-hme Enable Hierarchial motion estimation(HME) in SVT HEVC encoder \n"); + H0(" --svt-search-width Motion estimation search area width for SVT HEVC encoder \n"); + H0(" --svt-search-height Motion estimation search area height for SVT HEVC encoder \n"); + H0(" --no-svt-compressed-ten-bit-format Enable 8+2 encoding mode for 10bit input in SVT HEVC encoder \n"); + H0(" --no-svt-speed-control Enable speed control functionality to achieve real time encoding speed for SVT HEVC encoder \n"); + H0(" --svt-preset-tuner Enable additional faster presets of SVT; This only has to be used on top of x265's ultrafast preset. Accepts values in the range of 0-2 \n"); + H0(" --svt-hierarchical-level Hierarchical layer for SVT-HEVC encoder; Accepts inputs in the range 0-3 \n"); + H0(" --svt-base-layer-switch-mode Select whether B/P slice should be used in base layer for SVT-HEVC encoder. 0-Use B-frames; 1-Use P frames in the base layer \n"); + H0(" --svt-pred-struct Select pred structure for SVT HEVC encoder; Accepts inputs in the range 0-2 \n"); + H0(" --no-svt-fps-in-vps Enable VPS timing info for SVT HEVC encoder \n"); +#endif + H0(" ABR-ladder settings\n"); + H0(" --abr-ladder <file> File containing config settings required for the generation of ABR-ladder\n"); + H1("\nExecutable return codes:\n"); + H1(" 0 - encode successful\n"); + H1(" 1 - unable to parse command line\n"); + H1(" 2 - unable to open encoder\n"); + H1(" 3 - unable to generate stream headers\n"); + H1(" 4 - encoder abort\n"); +#undef OPT +#undef H0 +#undef H1 + if (level < X265_LOG_DEBUG) + printf("\nUse --fullhelp for a full listing (or --log-level full --help)\n"); + printf("\n\nComplete documentation may be found at http://x265.readthedocs.org/en/default/cli.html\n"); + exit(1); + } + + void CLIOptions::destroy() + { + if (isAbrLadderConfig) + { + for (int idx = 1; idx < argCnt; idx++) + free(argStringidx); + free(argString); + } + + if (input) + input->release(); + input = NULL; + if (recon) + recon->release(); + recon = NULL; + if (qpfile) + fclose(qpfile); + qpfile = NULL; + if (zoneFile) + fclose(zoneFile); + zoneFile = NULL; + if (dolbyVisionRpu) + fclose(dolbyVisionRpu); + dolbyVisionRpu = NULL; + if (output) + output->release(); + output = NULL; + } + + void CLIOptions::printStatus(uint32_t frameNum) + { + char buf200; + int64_t time = x265_mdate(); + + if (!bProgress || !frameNum || (prevUpdateTime && time - prevUpdateTime < UPDATE_INTERVAL)) + return; + + int64_t elapsed = time - startTime; + double fps = elapsed > 0 ? frameNum * 1000000. / elapsed : 0; + float bitrate = 0.008f * totalbytes * (param->fpsNum / param->fpsDenom) / ((float)frameNum); + if (framesToBeEncoded) + { + int eta = (int)(elapsed * (framesToBeEncoded - frameNum) / ((int64_t)frameNum * 1000000)); + sprintf(buf, "x265 %.1f%% %d/%d frames, %.2f fps, %.2f kb/s, eta %d:%02d:%02d", + 100. * frameNum / (param->chunkEnd ? param->chunkEnd : param->totalFrames), frameNum, (param->chunkEnd ? param->chunkEnd : param->totalFrames), fps, bitrate, + eta / 3600, (eta / 60) % 60, eta % 60); + } + else + sprintf(buf, "x265 %d frames: %.2f fps, %.2f kb/s", frameNum, fps, bitrate); + + fprintf(stderr, "%s \r", buf + 5); + SetConsoleTitle(buf); + fflush(stderr); // needed in windows + prevUpdateTime = time; + } + + bool CLIOptions::parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount) + { + bool bError = false; + int bShowHelp = false; + int outputBitDepth = 0; + const char *profile = NULL; + + /* Presets are applied before all other options. */ + for (optind = 0;;) + { + int c = getopt_long(argc, argv, short_options, long_options, NULL); + if (c == -1) + break; + else if (c == 'D') + outputBitDepth = atoi(optarg); + else if (c == 'P') + profile = optarg; + else if (c == '?') + bShowHelp = true; + } + + if (!outputBitDepth && profile) + { + /* try to derive the output bit depth from the requested profile */ + if (strstr(profile, "10")) + outputBitDepth = 10; + else if (strstr(profile, "12")) + outputBitDepth = 12; + else + outputBitDepth = 8; + } + + api = x265_api_get(outputBitDepth); + if (!api) + { + x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n"); + api = x265_api_get(0); + } + + if (bShowHelp) + { + printVersion(globalParam, api); + showHelp(globalParam); + } + + globalParam->rc.zoneszonefileCount.zoneParam = api->param_alloc(); + if (!globalParam->rc.zoneszonefileCount.zoneParam) + { + x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n"); + return true; + } + + memcpy(globalParam->rc.zoneszonefileCount.zoneParam, globalParam, sizeof(x265_param)); + + for (optind = 0;;) + { + int long_options_index = -1; + int c = getopt_long(argc, argv, short_options, long_options, &long_options_index); + if (c == -1) + break; + + if (long_options_index < 0 && c > 0) + { + for (size_t i = 0; i < sizeof(long_options) / sizeof(long_options0); i++) + { + if (long_optionsi.val == c) + { + long_options_index = (int)i; + break; + } + } + + if (long_options_index < 0) + { + /* getopt_long might have already printed an error message */ + if (c != 63) + x265_log(NULL, X265_LOG_WARNING, "internal error: short option '%c' has no long option\n", c); + return true; + } + } + if (long_options_index < 0) + { + x265_log(NULL, X265_LOG_WARNING, "short option '%c' unrecognized\n", c); + return true; + } + + bError |= !!api->zone_param_parse(globalParam->rc.zoneszonefileCount.zoneParam, long_optionslong_options_index.name, optarg); + + if (bError) + { + const char *name = long_options_index > 0 ? long_optionslong_options_index.name : argvoptind - 2; + x265_log(NULL, X265_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg); + return true; + } + } + + if (optind < argc) + { + x265_log(param, X265_LOG_WARNING, "extra unused command arguments given <%s>\n", argvoptind); + return true; + } + return false; + } + + bool CLIOptions::parse(int argc, char **argv) + { + bool bError = false; + int bShowHelp = false; + int inputBitDepth = 8; + int outputBitDepth = 0; + int reconFileBitDepth = 0; + const char *inputfn = NULL; + const char *reconfn = NULL; + const char *outputfn = NULL; + const char *preset = NULL; + const char *tune = NULL; + const char *profile = NULL; + int svtEnabled = 0; + argCnt = argc; + argString = argv; + + if (argc <= 1) + { + x265_log(NULL, X265_LOG_ERROR, "No input file. Run x265 --help for a list of options.\n"); + return true; + } + + /* Presets are applied before all other options. */ + for (optind = 0;;) + { + int optionsIndex = -1; + int c = getopt_long(argc, argv, short_options, long_options, &optionsIndex); + if (c == -1) + break; + else if (c == 'p') + preset = optarg; + else if (c == 't') + tune = optarg; + else if (c == 'D') + outputBitDepth = atoi(optarg); + else if (c == 'P') + profile = optarg; + else if (c == '?') + bShowHelp = true; + else if (!c && !strcmp(long_optionsoptionsIndex.name, "svt")) + svtEnabled = 1; + } + + if (!outputBitDepth && profile) + { + /* try to derive the output bit depth from the requested profile */ + if (strstr(profile, "10")) + outputBitDepth = 10; + else if (strstr(profile, "12")) + outputBitDepth = 12; + else + outputBitDepth = 8; + } + + api = x265_api_get(outputBitDepth); + if (!api) + { + x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n"); + api = x265_api_get(0); + } + + param = api->param_alloc(); + if (!param) + { + x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n"); + return true; + } +#if ENABLE_LIBVMAF + vmafData = (x265_vmaf_data*)x265_malloc(sizeof(x265_vmaf_data)); + if (!vmafData) + { + x265_log(NULL, X265_LOG_ERROR, "vmaf data alloc failed\n"); + return true; + } +#endif + + if (api->param_default_preset(param, preset, tune) < 0) + { + x265_log(NULL, X265_LOG_ERROR, "preset or tune unrecognized\n"); + return true; + } + + if (bShowHelp) + { + printVersion(param, api); + showHelp(param); + } + + //Set enable SVT-HEVC encoder first if found in the command line + if (svtEnabled) api->param_parse(param, "svt", NULL); + + for (optind = 0;;) + { + int long_options_index = -1; + int c = getopt_long(argc, argv, short_options, long_options, &long_options_index); + if (c == -1) + break; + + switch (c) + { + case 'h': + printVersion(param, api); + showHelp(param); + break; + + case 'V': + printVersion(param, api); + x265_report_simd(param); + exit(0); + + default: + if (long_options_index < 0 && c > 0) + { + for (size_t i = 0; i < sizeof(long_options) / sizeof(long_options0); i++) + { + if (long_optionsi.val == c) + { + long_options_index = (int)i; + break; + } + } + + if (long_options_index < 0) + { + /* getopt_long might have already printed an error message */ + if (c != 63) + x265_log(NULL, X265_LOG_WARNING, "internal error: short option '%c' has no long option\n", c); + return true; + } + } + if (long_options_index < 0) + { + x265_log(NULL, X265_LOG_WARNING, "short option '%c' unrecognized\n", c); + return true; + } +#define OPT(longname) \ + else if (!strcmp(long_optionslong_options_index.name, longname)) +#define OPT2(name1, name2) \ + else if (!strcmp(long_optionslong_options_index.name, name1) || \ + !strcmp(long_optionslong_options_index.name, name2)) + + if (0); + OPT2("frame-skip", "seek") this->seek = (uint32_t)x265_atoi(optarg, bError); + OPT("frames") this->framesToBeEncoded = (uint32_t)x265_atoi(optarg, bError); + OPT("no-progress") this->bProgress = false; + OPT("output") outputfn = optarg; + OPT("input") inputfn = optarg; + OPT("recon") reconfn = optarg; + OPT("input-depth") inputBitDepth = (uint32_t)x265_atoi(optarg, bError); + OPT("dither") this->bDither = true; + OPT("recon-depth") reconFileBitDepth = (uint32_t)x265_atoi(optarg, bError); + OPT("y4m") this->bForceY4m = true; + OPT("profile") /* handled above */; + OPT("preset") /* handled above */; + OPT("tune") /* handled above */; + OPT("output-depth") /* handled above */; + OPT("recon-y4m-exec") reconPlayCmd = optarg; + OPT("svt") /* handled above */; + OPT("qpfile") + { + this->qpfile = x265_fopen(optarg, "rb"); + if (!this->qpfile) + x265_log_file(param, X265_LOG_ERROR, "%s qpfile not found or error in opening qp file\n", optarg); + } + OPT("dolby-vision-rpu") + { + this->dolbyVisionRpu = x265_fopen(optarg, "rb"); + if (!this->dolbyVisionRpu) + { + x265_log_file(param, X265_LOG_ERROR, "Dolby Vision RPU metadata file %s not found or error in opening file\n", optarg); + return true; + } + } + OPT("zonefile") + { + this->zoneFile = x265_fopen(optarg, "rb"); + if (!this->zoneFile) + x265_log_file(param, X265_LOG_ERROR, "%s zone file not found or error in opening zone file\n", optarg); + } + OPT("fullhelp") + { + param->logLevel = X265_LOG_FULL; + printVersion(param, api); + showHelp(param); + break; + } + else + bError |= !!api->param_parse(param, long_optionslong_options_index.name, optarg); + if (bError) + { + const char *name = long_options_index > 0 ? long_optionslong_options_index.name : argvoptind - 2; + x265_log(NULL, X265_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg); + return true; + } +#undef OPT + } + } + + if (optind < argc && !inputfn) + inputfn = argvoptind++; + if (optind < argc && !outputfn) + outputfn = argvoptind++; + if (optind < argc) + { + x265_log(param, X265_LOG_WARNING, "extra unused command arguments given <%s>\n", argvoptind); + return true; + } + + if (argc <= 1) + { + api->param_default(param); + printVersion(param, api); + showHelp(param); + } + + if (!inputfn || !outputfn) + { + x265_log(param, X265_LOG_ERROR, "input or output file not specified, try --help for help\n"); + return true; + } + + if (param->internalBitDepth != api->bit_depth) + { + x265_log(param, X265_LOG_ERROR, "Only bit depths of %d are supported in this build\n", api->bit_depth); + return true; + } + +#ifdef SVT_HEVC + if (svtEnabled) + { + EB_H265_ENC_CONFIGURATION* svtParam = (EB_H265_ENC_CONFIGURATION*)param->svtHevcParam; + param->sourceWidth = svtParam->sourceWidth; + param->sourceHeight = svtParam->sourceHeight; + param->fpsNum = svtParam->frameRateNumerator; + param->fpsDenom = svtParam->frameRateDenominator; + svtParam->encoderBitDepth = inputBitDepth; + } +#endif + + InputFileInfo info; + info.filename = inputfn; + info.depth = inputBitDepth; + info.csp = param->internalCsp; + info.width = param->sourceWidth; + info.height = param->sourceHeight; + info.fpsNum = param->fpsNum; + info.fpsDenom = param->fpsDenom; + info.sarWidth = param->vui.sarWidth; + info.sarHeight = param->vui.sarHeight; + info.skipFrames = seek; + info.frameCount = 0; + getParamAspectRatio(param, info.sarWidth, info.sarHeight); + + + this->input = InputFile::open(info, this->bForceY4m); + if (!this->input || this->input->isFail()) + { + x265_log_file(param, X265_LOG_ERROR, "unable to open input file <%s>\n", inputfn); + return true; + } + + if (info.depth < 8 || info.depth > 16) + { + x265_log(param, X265_LOG_ERROR, "Input bit depth (%d) must be between 8 and 16\n", inputBitDepth); + return true; + } + + /* Unconditionally accept height/width/csp/bitDepth from file info */ + param->sourceWidth = info.width; + param->sourceHeight = info.height; + param->internalCsp = info.csp; + param->sourceBitDepth = info.depth; + + /* Accept fps and sar from file info if not specified by user */ + if (param->fpsDenom == 0 || param->fpsNum == 0) + { + param->fpsDenom = info.fpsDenom; + param->fpsNum = info.fpsNum; + } + if (!param->vui.aspectRatioIdc && info.sarWidth && info.sarHeight) + setParamAspectRatio(param, info.sarWidth, info.sarHeight); + if (this->framesToBeEncoded == 0 && info.frameCount > (int)seek) + this->framesToBeEncoded = info.frameCount - seek; + param->totalFrames = this->framesToBeEncoded; + +#ifdef SVT_HEVC + if (svtEnabled) + { + EB_H265_ENC_CONFIGURATION* svtParam = (EB_H265_ENC_CONFIGURATION*)param->svtHevcParam; + svtParam->sourceWidth = param->sourceWidth; + svtParam->sourceHeight = param->sourceHeight; + svtParam->frameRateNumerator = param->fpsNum; + svtParam->frameRateDenominator = param->fpsDenom; + svtParam->framesToBeEncoded = param->totalFrames; + svtParam->encoderColorFormat = (EB_COLOR_FORMAT)param->internalCsp; + } +#endif + + /* Force CFR until we have support for VFR */ + info.timebaseNum = param->fpsDenom; + info.timebaseDenom = param->fpsNum; + + if (param->bField && param->interlaceMode) + { // Field FPS + param->fpsNum *= 2; + // Field height + param->sourceHeight = param->sourceHeight >> 1; + // Number of fields to encode + param->totalFrames *= 2; + } + + if (api->param_apply_profile(param, profile)) + return true; + + if (param->logLevel >= X265_LOG_INFO) + { + char buf128; + int p = sprintf(buf, "%dx%d fps %d/%d %sp%d", param->sourceWidth, param->sourceHeight, + param->fpsNum, param->fpsDenom, x265_source_csp_namesparam->internalCsp, info.depth); + + int width, height; + getParamAspectRatio(param, width, height); + if (width && height) + p += sprintf(buf + p, " sar %d:%d", width, height); + + if (framesToBeEncoded <= 0 || info.frameCount <= 0) + strcpy(buf + p, " unknown frame count"); + else + sprintf(buf + p, " frames %u - %d of %d", this->seek, this->seek + this->framesToBeEncoded - 1, info.frameCount); + + general_log(param, input->getName(), X265_LOG_INFO, "%s\n", buf); + } + + this->input->startReader(); + + if (reconfn) + { + if (reconFileBitDepth == 0) + reconFileBitDepth = param->internalBitDepth; + this->recon = ReconFile::open(reconfn, param->sourceWidth, param->sourceHeight, reconFileBitDepth, + param->fpsNum, param->fpsDenom, param->internalCsp); + if (this->recon->isFail()) + { + x265_log(param, X265_LOG_WARNING, "unable to write reconstructed outputs file\n"); + this->recon->release(); + this->recon = 0; + } + else + general_log(param, this->recon->getName(), X265_LOG_INFO, + "reconstructed images %dx%d fps %d/%d %s\n", + param->sourceWidth, param->sourceHeight, param->fpsNum, param->fpsDenom, + x265_source_csp_namesparam->internalCsp); + } +#if ENABLE_LIBVMAF + if (!reconfn) + { + x265_log(param, X265_LOG_ERROR, "recon file must be specified to get VMAF score, try --help for help\n"); + return true; + } + const char *str = strrchr(info.filename, '.'); + + if (!strcmp(str, ".y4m")) + { + x265_log(param, X265_LOG_ERROR, "VMAF supports YUV file format only.\n"); + return true; + } + if (param->internalCsp == X265_CSP_I420 || param->internalCsp == X265_CSP_I422 || param->internalCsp == X265_CSP_I444) + { + vmafData->reference_file = x265_fopen(inputfn, "rb"); + vmafData->distorted_file = x265_fopen(reconfn, "rb"); + } + else + { + x265_log(param, X265_LOG_ERROR, "VMAF will support only yuv420p, yu422p, yu444p, yuv420p10le, yuv422p10le, yuv444p10le formats.\n"); + return true; + } +#endif + this->output = OutputFile::open(outputfn, info); + if (this->output->isFail()) + { + x265_log_file(param, X265_LOG_ERROR, "failed to open output file <%s> for writing\n", outputfn); + return true; + } + general_log_file(param, this->output->getName(), X265_LOG_INFO, "output file: %s\n", outputfn); + return false; + } + + bool CLIOptions::parseQPFile(x265_picture &pic_org) + { + int32_t num = -1, qp, ret; + char type; + uint32_t filePos; + pic_org.forceqp = 0; + pic_org.sliceType = X265_TYPE_AUTO; + while (num < pic_org.poc) + { + filePos = ftell(qpfile); + qp = -1; + ret = fscanf(qpfile, "%d %c%* \t%d\n", &num, &type, &qp); + + if (num > pic_org.poc || ret == EOF) + { + fseek(qpfile, filePos, SEEK_SET); + break; + } + if (num < pic_org.poc && ret >= 2) + continue; + if (ret == 3 && qp >= 0) + pic_org.forceqp = qp + 1; + if (type == 'I') pic_org.sliceType = X265_TYPE_IDR; + else if (type == 'i') pic_org.sliceType = X265_TYPE_I; + else if (type == 'K') pic_org.sliceType = param->bOpenGOP ? X265_TYPE_I : X265_TYPE_IDR; + else if (type == 'P') pic_org.sliceType = X265_TYPE_P; + else if (type == 'B') pic_org.sliceType = X265_TYPE_BREF; + else if (type == 'b') pic_org.sliceType = X265_TYPE_B; + else ret = 0; + if (ret < 2 || qp < -1 || qp > 51) + return 0; + } + return 1; + } + + bool CLIOptions::parseZoneFile() + { + char line256; + char* argLine; + param->rc.zonefileCount = 0; + + while (fgets(line, sizeof(line), zoneFile)) + { + if (!((*line == '#') || (strcmp(line, "\r\n") == 0))) + param->rc.zonefileCount++; + } + + rewind(zoneFile); + param->rc.zones = X265_MALLOC(x265_zone, param->rc.zonefileCount); + for (int i = 0; i < param->rc.zonefileCount; i++) + { + while (fgets(line, sizeof(line), zoneFile)) + { + if (*line == '#' || (strcmp(line, "\r\n") == 0)) + continue; + param->rc.zonesi.zoneParam = X265_MALLOC(x265_param, 1); + int index = (int)strcspn(line, "\r\n"); + lineindex = '\0'; + argLine = line; + while (isspace((unsigned char)*argLine)) argLine++; + char* start = strchr(argLine, ' '); + start++; + param->rc.zonesi.startFrame = atoi(argLine); + int argCount = 0; + char **args = (char**)malloc(256 * sizeof(char *)); + // Adding a dummy string to avoid file parsing error + argsargCount++ = (char *)"x265"; + char* token = strtok(start, " "); + while (token) + { + argsargCount++ = token; + token = strtok(NULL, " "); + } + argsargCount = NULL; + CLIOptions cliopt; + if (cliopt.parseZoneParam(argCount, args, param, i)) + { + cliopt.destroy(); + if (cliopt.api) + cliopt.api->param_free(cliopt.param); + exit(1); + } + break; + } + } + return 1; + } + + /* Parse the RPU file and extract the RPU corresponding to the current picture + * and fill the rpu field of the input picture */ + int CLIOptions::rpuParser(x265_picture * pic) + { + uint8_t byteVal; + uint32_t code = 0; + int bytesRead = 0; + pic->rpu.payloadSize = 0; + + if (!pic->pts) + { + while (bytesRead++ < 4 && fread(&byteVal, sizeof(uint8_t), 1, dolbyVisionRpu)) + code = (code << 8) | byteVal; + + if (code != START_CODE) + { + x265_log(NULL, X265_LOG_ERROR, "Invalid Dolby Vision RPU startcode in POC %d\n", pic->pts); + return 1; + } + } + + bytesRead = 0; + while (fread(&byteVal, sizeof(uint8_t), 1, dolbyVisionRpu)) + { + code = (code << 8) | byteVal; + if (bytesRead++ < 3) + continue; + if (bytesRead >= 1024) + { + x265_log(NULL, X265_LOG_ERROR, "Invalid Dolby Vision RPU size in POC %d\n", pic->pts); + return 1; + } + + if (code != START_CODE) + pic->rpu.payloadpic->rpu.payloadSize++ = (code >> (3 * 8)) & 0xFF; + else + return 0; + } + + int ShiftBytes = START_CODE_BYTES - (bytesRead - pic->rpu.payloadSize); + int bytesLeft = bytesRead - pic->rpu.payloadSize; + code = (code << ShiftBytes * 8); + for (int i = 0; i < bytesLeft; i++) + { + pic->rpu.payloadpic->rpu.payloadSize++ = (code >> (3 * 8)) & 0xFF; + code = (code << 8); + } + if (!pic->rpu.payloadSize) + x265_log(NULL, X265_LOG_WARNING, "Dolby Vision RPU not found for POC %d\n", pic->pts); + return 0; + } + +#ifdef __cplusplus +} +#endif \ No newline at end of file
View file
x265_3.3.tar.gz/source/x265cli.h -> x265_3.4.tar.gz/source/x265cli.h
Changed
@@ -27,9 +27,23 @@ #include "common.h" #include "param.h" +#include "input/input.h" +#include "output/output.h" +#include "output/reconplay.h" #include <getopt.h> +#define CONSOLE_TITLE_SIZE 200 +#ifdef _WIN32 +#include <windows.h> +#define SetThreadExecutionState(es) +static char orgConsoleTitleCONSOLE_TITLE_SIZE = ""; +#else +#define GetConsoleTitle(t, n) +#define SetConsoleTitle(t) +#define SetThreadExecutionState(es) +#endif + #ifdef __cplusplus namespace X265_NS { #endif @@ -105,8 +119,8 @@ { "amp", no_argument, NULL, 0 }, { "no-early-skip", no_argument, NULL, 0 }, { "early-skip", no_argument, NULL, 0 }, - { "no-rskip", no_argument, NULL, 0 }, - { "rskip", no_argument, NULL, 0 }, + { "rskip", required_argument, NULL, 0 }, + { "rskip-edge-threshold", required_argument, NULL, 0 }, { "no-fast-cbf", no_argument, NULL, 0 }, { "fast-cbf", no_argument, NULL, 0 }, { "no-tskip", no_argument, NULL, 0 }, @@ -358,6 +372,7 @@ { "cll", no_argument, NULL, 0 }, { "no-cll", no_argument, NULL, 0 }, { "hme-range", required_argument, NULL, 0 }, + { "abr-ladder", required_argument, NULL, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, @@ -365,336 +380,82 @@ { 0, 0, 0, 0 } }; -static void printVersion(x265_param *param, const x265_api* api) -{ - x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str); - x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str); -} + struct CLIOptions + { + InputFile* input; + ReconFile* recon; + OutputFile* output; + FILE* qpfile; + FILE* zoneFile; + FILE* dolbyVisionRpu; /* File containing Dolby Vision BL RPU metadata */ + const char* reconPlayCmd; + const x265_api* api; + x265_param* param; + x265_vmaf_data* vmafData; + bool bProgress; + bool bForceY4m; + bool bDither; + uint32_t seek; // number of frames to skip from the beginning + uint32_t framesToBeEncoded; // number of frames to encode + uint64_t totalbytes; + int64_t startTime; + int64_t prevUpdateTime; -static void showHelp(x265_param *param) -{ - int level = param->logLevel; + int argCnt; + char** argString; -#define OPT(value) (value ? "enabled" : "disabled") -#define H0 printf -#define H1 if (level >= X265_LOG_DEBUG) printf + /* ABR ladder settings */ + bool isAbrLadderConfig; + bool enableScaler; + char* encName; + char* reuseName; + uint32_t encId; + int refId; + uint32_t loadLevel; + uint32_t saveLevel; + uint32_t numRefs; - H0("\nSyntax: x265 options infile -o outfile\n"); - H0(" infile can be YUV or Y4M\n"); - H0(" outfile is raw HEVC bitstream\n"); - H0("\nExecutable Options:\n"); - H0("-h/--help Show this help text and exit\n"); - H0(" --fullhelp Show all options and exit\n"); - H0("-V/--version Show version info and exit\n"); - H0("\nOutput Options:\n"); - H0("-o/--output <filename> Bitstream output file name\n"); - H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth); - H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNamesparam->logLevel + 1); - H0(" --no-progress Disable CLI progress reports\n"); - H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n"); - H0(" --csv-log-level <integer> Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n"); - H0("\nInput Options:\n"); - H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n"); - H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n"); - H0(" --fps <float|rational> Source frame rate (float or num/denom), auto-detected if Y4M\n"); - H0(" --input-res WxH Source picture size w x h, auto-detected if Y4M\n"); - H1(" --input-depth <integer> Bit-depth of input file. Default 8\n"); - H1(" --input-csp <string> Chroma subsampling, auto-detected if Y4M\n"); - H1(" 0 - i400 (4:0:0 monochrome)\n"); - H1(" 1 - i420 (4:2:0 default)\n"); - H1(" 2 - i422 (4:2:2)\n"); - H1(" 3 - i444 (4:4:4)\n"); -#if ENABLE_HDR10_PLUS - H0(" --dhdr10-info <filename> JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n"); - H0(" --no-dhdr10-opt Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n"); -#endif - H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n"); - H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n" - " If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n"); - H0(" --nalu-file <filename> Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n"); - H0("-f/--frames <integer> Maximum number of frames to encode. Default all\n"); - H0(" --seek <integer> First frame to encode\n"); - H1(" --no-interlace <bff|tff> Indicate input pictures are interlace fields in temporal order. Default progressive\n"); - H0(" --no-field Enable or disable field coding. Default %s\n", OPT( param->bField)); - H1(" --dither Enable dither if downscaling to 8 bit pixels. Default disabled\n"); - H0(" --no-copy-pic Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame)); - H0("\nQuality reporting metrics:\n"); - H0(" --no-ssim Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim)); - H0(" --no-psnr Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr)); - H0("\nProfile, Level, Tier:\n"); - H0("-P/--profile <string> Enforce an encode profile: main, main10, mainstillpicture\n"); - H0(" --level-idc <integer|float> Force a minimum required decoder level (as '5.0' or '50')\n"); - H0(" --no-high-tier If a decoder level is specified, this modifier selects High tier of that level\n"); - H0(" --uhd-bd Enable UHD Bluray compatibility support\n"); - H0(" --no-allow-non-conformance Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance)); - H0("\nThreading, performance:\n"); - H0(" --pools <integer,...> Comma separated thread count per thread pool (pool per NUMA node)\n"); - H0(" '-' implies no threads on node, '+' implies one thread per core on node\n"); - H0("-F/--frame-threads <integer> Number of concurrently encoded frames. 0: auto-determined by core count\n"); - H0(" --no-wpp Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront)); - H0(" --no-slices <integer> Enable Multiple Slices feature. Default %d\n", param->maxSlices); - H0(" --no-pmode Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis)); - H0(" --no-pme Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation)); - H0(" --no-asm <bool|int|string> Override CPU detection. Default: auto\n"); - H0("\nPresets:\n"); - H0("-p/--preset <string> Trade off performance for compression efficiency. Default medium\n"); - H0(" ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n"); - H0("-t/--tune <string> Tune the settings for a particular type of source or situation:\n"); - H0(" psnr, ssim, grain, zerolatency, fastdecode\n"); - H0("\nQuad-Tree size and depth:\n"); - H0("-s/--ctu <64|32|16> Maximum CU size (WxH). Default %d\n", param->maxCUSize); - H0(" --min-cu-size <64|32|16|8> Minimum CU size (WxH). Default %d\n", param->minCUSize); - H0(" --max-tu-size <32|16|8|4> Maximum TU size (WxH). Default %d\n", param->maxTUSize); - H0(" --tu-intra-depth <integer> Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth); - H0(" --tu-inter-depth <integer> Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth); - H0(" --limit-tu <0..4> Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU); - H0("\nAnalysis:\n"); - H0(" --rd <1..6> Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel); - H0(" --no-psy-rd <0..5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd); - H0(" --no-rdoq-level <0|1|2> Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel); - H0(" --no-psy-rdoq <0..50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq); - H0(" --dynamic-rd <0..4.0> Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd); - H0(" --no-ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd)); - H0(" --no-rd-refine Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine)); - H0(" --no-early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip)); - H0(" --no-rskip Enable early exit from recursion. Default %s\n", OPT(param->bEnableRecursionSkip)); - H1(" --no-tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast)); - H1(" --no-splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip)); - H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n"); - H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n"); - H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n" - " - 1: force the partitions if CTU information is present\n" - " - 2: functionality of (1) and reduce qp if CTU information has changed\n" - " - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n" - " Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n"); - H0("\nCoding tools:\n"); - H0("-w/--no-weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred)); - H0(" --no-weightb Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred)); - H0(" --no-cu-lossless Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless)); - H0(" --no-signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding)); - H1(" --no-tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip)); - H0("\nTemporal / motion search options:\n"); - H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand); - H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences); - H0(" --limit-refs <0|1|2|3> Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences); - H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod); - H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine); - H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange); - H0(" --no-rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter)); - H0(" --no-amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP)); - H0(" --no-limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes); - H1(" --no-temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp)); - H1(" --no-hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME)); - H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod0, param->hmeSearchMethod1, param->hmeSearchMethod2); - H1(" --hme-range <int>,<int>,<int> Motion search-range for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeRange0, param->hmeRange1, param->hmeRange2); - H0("\nSpatial / intra options:\n"); - H0(" --no-strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing)); - H0(" --no-constrained-intra Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra)); - H0(" --no-b-intra Enable intra in B frames in veryslow presets. Default %s\n", OPT(param->bIntraInBFrames)); - H0(" --no-fast-intra Enable faster search method for angular intra predictions. Default %s\n", OPT(param->bEnableFastIntra)); - H0(" --rdpenalty <0..2> penalty for 32x32 intra TU in non-I slices. 0:disabled 1:RD-penalty 2:maximum. Default %d\n", param->rdPenalty); - H0("\nSlice decision options:\n"); - H0(" --no-open-gop Enable open-GOP, allows I slices to be non-IDR. Default %s\n", OPT(param->bOpenGOP)); - H0("-I/--keyint <integer> Max IDR period in frames. -1 for infinite-gop. Default %d\n", param->keyframeMax); - H0("-i/--min-keyint <integer> Scenecuts closer together than this are coded as I, not IDR. Default: auto\n"); - H0(" --gop-lookahead <integer> Extends gop boundary if a scenecut is found within this from keyint boundary. Default 0\n"); - H0(" --no-scenecut Disable adaptive I-frame decision\n"); - H0(" --scenecut <integer> How aggressively to insert extra I-frames. Default %d\n", param->scenecutThreshold); - H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias); - H0(" --hist-scenecut Enables histogram based scene-cut detection using histogram based algorithm.\n"); - H0(" --no-hist-scenecut Disables histogram based scene-cut detection using histogram based algorithm.\n"); - H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's Normalized SAD threshold for histogram based scenecut detection Default %.2f\n", param->edgeTransitionThreshold); - H0(" --no-fades Enable detection and handling of fade-in regions. Default %s\n", OPT(param->bEnableFades)); - H1(" --no-scenecut-aware-qp Enable increasing QP for frames inside the scenecut window after scenecut. Default %s\n", OPT(param->bEnableSceneCutAwareQp)); - H1(" --scenecut-window <0..1000> QP incremental duration(in milliseconds) when scenecut-aware-qp is enabled. Default %d\n", param->scenecutWindow); - H1(" --max-qp-delta <0..10> QP offset to increment with base QP for inter-frames. Default %d\n", param->maxQpDelta); - H0(" --radl <integer> Number of RADL pictures allowed in front of IDR. Default %d\n", param->radl); - H0(" --intra-refresh Use Periodic Intra Refresh instead of IDR frames\n"); - H0(" --rc-lookahead <integer> Number of frames for frame-type lookahead (determines encoder latency) Default %d\n", param->lookaheadDepth); - H1(" --lookahead-slices <0..16> Number of slices to use per lookahead cost estimate. Default %d\n", param->lookaheadSlices); - H0(" --lookahead-threads <integer> Number of threads to be dedicated to perform lookahead only. Default %d\n", param->lookaheadThreads); - H0("-b/--bframes <0..16> Maximum number of consecutive b-frames. Default %d\n", param->bframes); - H1(" --bframe-bias <integer> Bias towards B frame decisions. Default %d\n", param->bFrameBias); - H0(" --b-adapt <0..2> 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive); - H0(" --no-b-pyramid Use B-frames as references. Default %s\n", OPT(param->bBPyramid)); - H1(" --qpfile <string> Force frametypes and QPs for some or all frames\n"); - H1(" Format of each line: framenumber frametype QP\n"); - H1(" QP is optional (none lets x265 choose). Frametypes: I,i,K,P,B,b.\n"); - H1(" QPs are restricted by qpmin/qpmax.\n"); - H1(" --force-flush <integer> Force the encoder to flush frames. Default %d\n", param->forceFlush); - H1(" 0 - flush the encoder only when all the input pictures are over.\n"); - H1(" 1 - flush all the frames even when the input is not over. Slicetype decision may change with this option.\n"); - H1(" 2 - flush the slicetype decided frames only.\n"); - H0(" --no--hrd-concat Set HRD concatenation flag for the first keyframe in the buffering period SEI. Default %s\n", OPT(param->bEnableHRDConcatFlag)); - H0("\nRate control, Adaptive Quantization:\n"); - H0(" --bitrate <integer> Target bitrate (kbps) for ABR (implied). Default %d\n", param->rc.bitrate); - H1("-q/--qp <integer> QP for P slices in CQP mode (implied). --ipratio and --pbration determine other slice QPs\n"); - H0(" --crf <float> Quality-based VBR (0-51). Default %.1f\n", param->rc.rfConstant); - H1(" --no-lossless Enable lossless: bypass transform, quant and loop filters globally. Default %s\n", OPT(param->bLossless)); - H1(" --crf-max <float> With CRF+VBV, limit RF to this value. Default %f\n", param->rc.rfConstantMax); - H1(" May cause VBV underflows!\n"); - H1(" --crf-min <float> With CRF+VBV, limit RF to this value. Default %f\n", param->rc.rfConstantMin); - H1(" this specifies a minimum rate factor value for encode!\n"); - H0(" --vbv-maxrate <integer> Max local bitrate (kbit/s). Default %d\n", param->rc.vbvMaxBitrate); - H0(" --vbv-bufsize <integer> Set size of the VBV buffer (kbit). Default %d\n", param->rc.vbvBufferSize); - H0(" --vbv-init <float> Initial VBV buffer occupancy (fraction of bufsize or in kbits). Default %.2f\n", param->rc.vbvBufferInit); - H0(" --vbv-end <float> Final VBV buffer emptiness (fraction of bufsize or in kbits). Default 0 (disabled)\n"); - H0(" --vbv-end-fr-adj <float> Frame from which qp has to be adjusted to achieve final decode buffer emptiness. Default 0\n"); - H0(" --chunk-start <integer> First frame of the chunk. Default 0 (disabled)\n"); - H0(" --chunk-end <integer> Last frame of the chunk. Default 0 (disabled)\n"); - H0(" --pass Multi pass rate control.\n" - " - 1 : First pass, creates stats file\n" - " - 2 : Last pass, does not overwrite stats file\n" - " - 3 : Nth pass, overwrites stats file\n"); - H0(" --no-multi-pass-opt-analysis Refine analysis in 2 pass based on analysis information from pass 1\n"); - H0(" --no-multi-pass-opt-distortion Use distortion of CTU from pass 1 to refine qp in 2 pass\n"); - H0(" --stats Filename for stats file in multipass pass rate control. Default x265_2pass.log\n"); - H0(" --no-analyze-src-pics Motion estimation uses source frame planes. Default disable\n"); - H0(" --no-slow-firstpass Enable a slow first pass in a multipass rate control mode. Default %s\n", OPT(param->rc.bEnableSlowFirstPass)); - H0(" --no-strict-cbr Enable stricter conditions and tolerance for bitrate deviations in CBR mode. Default %s\n", OPT(param->rc.bStrictCbr)); - H0(" --analysis-save <filename> Dump analysis info into the specified file. Default Disabled\n"); - H0(" --analysis-load <filename> Load analysis buffers from the file specified. Default Disabled\n"); - H0(" --analysis-reuse-file <filename> Specify file name used for either dumping or reading analysis data. Deault x265_analysis.dat\n"); - H0(" --analysis-reuse-level <1..10> Level of analysis reuse indicates amount of info stored/reused in save/load mode, 1:least..10:most. Now deprecated. Default %d\n", param->analysisReuseLevel); - H0(" --analysis-save-reuse-level <1..10> Indicates the amount of analysis info stored in save mode, 1:least..10:most. Default %d\n", param->analysisSaveReuseLevel); - H0(" --analysis-load-reuse-level <1..10> Indicates the amount of analysis info reused in load mode, 1:least..10:most. Default %d\n", param->analysisLoadReuseLevel); - H0(" --refine-analysis-type <string> Reuse anlaysis information received through API call. Supported options are avc and hevc. Default disabled - %d\n", param->bAnalysisType); - H0(" --scale-factor <int> Specify factor by which input video is scaled down for analysis save mode. Default %d\n", param->scaleFactor); - H0(" --refine-intra <0..4> Enable intra refinement for encode that uses analysis-load.\n" - " - 0 : Forces both mode and depth from the save encode.\n" - " - 1 : Functionality of (0) + evaluate all intra modes at min-cu-size's depth when current depth is one smaller than min-cu-size's depth.\n" - " - 2 : Functionality of (1) + irrespective of size evaluate all angular modes when the save encode decides the best mode as angular.\n" - " - 3 : Functionality of (1) + irrespective of size evaluate all intra modes.\n" - " - 4 : Re-evaluate all intra blocks, does not reuse data from save encode.\n" - " Default:%d\n", param->intraRefine); - H0(" --refine-inter <0..3> Enable inter refinement for encode that uses analysis-load.\n" - " - 0 : Forces both mode and depth from the save encode.\n" - " - 1 : Functionality of (0) + evaluate all inter modes at min-cu-size's depth when current depth is one smaller than\n" - " min-cu-size's depth. When save encode decides the current block as skip(for all sizes) evaluate skip/merge.\n" - " - 2 : Functionality of (1) + irrespective of size restrict the modes evaluated when specific modes are decided as the best mode by the save encode.\n" - " - 3 : Functionality of (1) + irrespective of size evaluate all inter modes.\n" - " Default:%d\n", param->interRefine); - H0(" --no-dynamic-refine Dynamically changes refine-inter level for each CU. Default %s\n", OPT(param->bDynamicRefine)); - H0(" --refine-mv <1..3> Enable mv refinement for load mode. Default %d\n", param->mvRefine); - H0(" --refine-ctu-distortion Store/normalize ctu distortion in analysis-save/load.\n" - " - 0 : Disabled.\n" - " - 1 : Store/Load ctu distortion to/from the file specified in analysis-save/load.\n" - " Default 0 - Disabled\n"); - H0(" --aq-mode <integer> Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark scenes 4:auto variance with edge information. Default %d\n", param->rc.aqMode); - H0(" --no-hevc-aq Mode for HEVC Adaptive Quantization. Default %s\n", OPT(param->rc.hevcAq)); - H0(" --aq-strength <float> Reduces blocking and blurring in flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength); - H0(" --qp-adaptation-range <float> Delta QP range by QP adaptation based on a psycho-visual model (1.0 to 6.0). Default %.2f\n", param->rc.qpAdaptationRange); - H0(" --no-aq-motion Block level QP adaptation based on the relative motion between the block and the frame. Default %s\n", OPT(param->bAQMotion)); - H0(" --qg-size <int> Specifies the size of the quantization group (64, 32, 16, 8). Default %d\n", param->rc.qgSize); - H0(" --no-cutree Enable cutree for Adaptive Quantization. Default %s\n", OPT(param->rc.cuTree)); - H0(" --no-rc-grain Enable ratecontrol mode to handle grains specifically. turned on with tune grain. Default %s\n", OPT(param->rc.bEnableGrain)); - H1(" --ipratio <float> QP factor between I and P. Default %.2f\n", param->rc.ipFactor); - H1(" --pbratio <float> QP factor between P and B. Default %.2f\n", param->rc.pbFactor); - H1(" --qcomp <float> Weight given to predicted complexity. Default %.2f\n", param->rc.qCompress); - H1(" --qpstep <integer> The maximum single adjustment in QP allowed to rate control. Default %d\n", param->rc.qpStep); - H1(" --qpmin <integer> sets a hard lower limit on QP allowed to ratecontrol. Default %d\n", param->rc.qpMin); - H1(" --qpmax <integer> sets a hard upper limit on QP allowed to ratecontrol. Default %d\n", param->rc.qpMax); - H0(" --no-const-vbv Enable consistent vbv. turned on with tune grain. Default %s\n", OPT(param->rc.bEnableConstVbv)); - H1(" --cbqpoffs <integer> Chroma Cb QP Offset -12..12. Default %d\n", param->cbQpOffset); - H1(" --crqpoffs <integer> Chroma Cr QP Offset -12..12. Default %d\n", param->crQpOffset); - H1(" --scaling-list <string> Specify a file containing HM style quant scaling lists or 'default' or 'off'. Default: off\n"); - H1(" --zones <zone0>/<zone1>/... Tweak the bitrate of regions of the video\n"); - H1(" Each zone is of the form\n"); - H1(" <start frame>,<end frame>,<option>\n"); - H1(" where <option> is either\n"); - H1(" q=<integer> (force QP)\n"); - H1(" or b=<float> (bitrate multiplier)\n"); - H0(" --zonefile <filename> Zone file containing the zone boundaries and the parameters to be reconfigured.\n"); - H1(" --lambda-file <string> Specify a file containing replacement values for the lambda tables\n"); - H1(" MAX_MAX_QP+1 floats for lambda table, then again for lambda2 table\n"); - H1(" Blank lines and lines starting with hash(#) are ignored\n"); - H1(" Comma is considered to be white-space\n"); - H0(" --max-ausize-factor <float> This value controls the maximum AU size defined in specification.\n"); - H0(" It represents the percentage of maximum AU size used. Default %.1f\n", param->maxAUSizeFactor); - H0("\nLoop filters (deblock and SAO):\n"); - H0(" --no-deblock Enable Deblocking Loop Filter, optionally specify tC:Beta offsets Default %s\n", OPT(param->bEnableLoopFilter)); - H0(" --no-sao Enable Sample Adaptive Offset. Default %s\n", OPT(param->bEnableSAO)); - H1(" --no-sao-non-deblock Use non-deblocked pixels, else right/bottom boundary areas skipped. Default %s\n", OPT(param->bSaoNonDeblocked)); - H0(" --no-limit-sao Limit Sample Adaptive Offset types. Default %s\n", OPT(param->bLimitSAO)); - H0(" --selective-sao <int> Enable slice-level SAO filter. Default %d\n", param->selectiveSAO); - H0("\nVUI options:\n"); - H0(" --sar <width:height|int> Sample Aspect Ratio, the ratio of width to height of an individual pixel.\n"); - H0(" Choose from 0=undef, 1=1:1(\"square\"), 2=12:11, 3=10:11, 4=16:11,\n"); - H0(" 5=40:33, 6=24:11, 7=20:11, 8=32:11, 9=80:33, 10=18:11, 11=15:11,\n"); - H0(" 12=64:33, 13=160:99, 14=4:3, 15=3:2, 16=2:1 or custom ratio of <int:int>. Default %d\n", param->vui.aspectRatioIdc); - H1(" --display-window <string> Describe overscan cropping region as 'left,top,right,bottom' in pixels\n"); - H1(" --overscan <string> Specify whether it is appropriate for decoder to show cropped region: undef, show or crop. Default undef\n"); - H0(" --videoformat <string> Specify video format from undef, component, pal, ntsc, secam, mac. Default undef\n"); - H0(" --range <string> Specify black level and range of luma and chroma signals as full or limited Default limited\n"); - H0(" --colorprim <string> Specify color primaries from bt709, unknown, reserved, bt470m, bt470bg, smpte170m,\n"); - H0(" smpte240m, film, bt2020, smpte428, smpte431, smpte432. Default undef\n"); - H0(" --transfer <string> Specify transfer characteristics from bt709, unknown, reserved, bt470m, bt470bg, smpte170m,\n"); - H0(" smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1,\n"); - H0(" bt2020-10, bt2020-12, smpte2084, smpte428, arib-std-b67. Default undef\n"); - H1(" --colormatrix <string> Specify color matrix setting from undef, bt709, fcc, bt470bg, smpte170m,\n"); - H1(" smpte240m, GBR, YCgCo, bt2020nc, bt2020c, smpte2085, chroma-derived-nc, chroma-derived-c, ictcp. Default undef\n"); - H1(" --chromaloc <integer> Specify chroma sample location (0 to 5). Default of %d\n", param->vui.chromaSampleLocTypeTopField); - H0(" --master-display <string> SMPTE ST 2086 master display color volume info SEI (HDR)\n"); - H0(" format: G(x,y)B(x,y)R(x,y)WP(x,y)L(max,min)\n"); - H0(" --max-cll <string> Specify content light level info SEI as \"cll,fall\" (HDR).\n"); - H0(" --no-cll Emit content light level info SEI. Default %s\n", OPT(param->bEmitCLL)); - H0(" --no-hdr10 Control dumping of HDR10 SEI packet. If max-cll or master-display has non-zero values, this is enabled. Default %s\n", OPT(param->bEmitHDR10SEI)); - H0(" --no-hdr-opt Add luma and chroma offsets for HDR/WCG content. Default %s. Now deprecated.\n", OPT(param->bHDROpt)); - H0(" --no-hdr10-opt Block-level QP optimization for HDR10 content. Default %s.\n", OPT(param->bHDR10Opt)); - H0(" --min-luma <integer> Minimum luma plane value of input source picture\n"); - H0(" --max-luma <integer> Maximum luma plane value of input source picture\n"); - H0("\nBitstream options:\n"); - H0(" --no-repeat-headers Emit SPS and PPS headers at each keyframe. Default %s\n", OPT(param->bRepeatHeaders)); - H0(" --no-info Emit SEI identifying encoder and parameters. Default %s\n", OPT(param->bEmitInfoSEI)); - H0(" --no-hrd Enable HRD parameters signaling. Default %s\n", OPT(param->bEmitHRDSEI)); - H0(" --no-idr-recovery-sei Emit recovery point infor SEI at each IDR frame \n"); - H0(" --no-temporal-layers Enable a temporal sublayer for unreferenced B frames. Default %s\n", OPT(param->bEnableTemporalSubLayers)); - H0(" --no-aud Emit access unit delimiters at the start of each access unit. Default %s\n", OPT(param->bEnableAccessUnitDelimiters)); - H1(" --hash <integer> Decoded Picture Hash SEI 0: disabled, 1: MD5, 2: CRC, 3: Checksum. Default %d\n", param->decodedPictureHashSEI); - H0(" --atc-sei <integer> Emit the alternative transfer characteristics SEI message where the integer is the preferred transfer characteristics. Default disabled\n"); - H0(" --pic-struct <integer> Set the picture structure and emits it in the picture timing SEI message. Values in the range 0..12. See D.3.3 of the HEVC spec. for a detailed explanation.\n"); - H0(" --log2-max-poc-lsb <integer> Maximum of the picture order count\n"); - H0(" --no-vui-timing-info Emit VUI timing information in the bistream. Default %s\n", OPT(param->bEmitVUITimingInfo)); - H0(" --no-vui-hrd-info Emit VUI HRD information in the bistream. Default %s\n", OPT(param->bEmitVUIHRDInfo)); - H0(" --no-opt-qp-pps Dynamically optimize QP in PPS (instead of default 26) based on QPs in previous GOP. Default %s\n", OPT(param->bOptQpPPS)); - H0(" --no-opt-ref-list-length-pps Dynamically set L0 and L1 ref list length in PPS (instead of default 0) based on values in last GOP. Default %s\n", OPT(param->bOptRefListLengthPPS)); - H0(" --no-multi-pass-opt-rps Enable storing commonly used RPS in SPS in multi pass mode. Default %s\n", OPT(param->bMultiPassOptRPS)); - H0(" --no-opt-cu-delta-qp Optimize to signal consistent CU level delta QPs in frame. Default %s\n", OPT(param->bOptCUDeltaQP)); - H1("\nReconstructed video options (debugging):\n"); - H1("-r/--recon <filename> Reconstructed raw image YUV or Y4M output file name\n"); - H1(" --recon-depth <integer> Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n"); - H1(" --recon-y4m-exec <string> pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n"); - H0(" --lowpass-dct Use low-pass subband dct approximation. Default %s\n", OPT(param->bLowPassDct)); - H0(" --no-frame-dup Enable Frame duplication. Default %s\n", OPT(param->bEnableFrameDuplication)); - H0(" --dup-threshold <integer> PSNR threshold for Frame duplication. Default %d\n", param->dupThreshold); -#ifdef SVT_HEVC - H0(" --nosvt Enable SVT HEVC encoder %s\n", OPT(param->bEnableSvtHevc)); - H0(" --no-svt-hme Enable Hierarchial motion estimation(HME) in SVT HEVC encoder \n"); - H0(" --svt-search-width Motion estimation search area width for SVT HEVC encoder \n"); - H0(" --svt-search-height Motion estimation search area height for SVT HEVC encoder \n"); - H0(" --no-svt-compressed-ten-bit-format Enable 8+2 encoding mode for 10bit input in SVT HEVC encoder \n"); - H0(" --no-svt-speed-control Enable speed control functionality to achieve real time encoding speed for SVT HEVC encoder \n"); - H0(" --svt-preset-tuner Enable additional faster presets of SVT; This only has to be used on top of x265's ultrafast preset. Accepts values in the range of 0-2 \n"); - H0(" --svt-hierarchical-level Hierarchical layer for SVT-HEVC encoder; Accepts inputs in the range 0-3 \n"); - H0(" --svt-base-layer-switch-mode Select whether B/P slice should be used in base layer for SVT-HEVC encoder. 0-Use B-frames; 1-Use P frames in the base layer \n"); - H0(" --svt-pred-struct Select pred structure for SVT HEVC encoder; Accepts inputs in the range 0-2 \n"); - H0(" --no-svt-fps-in-vps Enable VPS timing info for SVT HEVC encoder \n"); -#endif - H1("\nExecutable return codes:\n"); - H1(" 0 - encode successful\n"); - H1(" 1 - unable to parse command line\n"); - H1(" 2 - unable to open encoder\n"); - H1(" 3 - unable to generate stream headers\n"); - H1(" 4 - encoder abort\n"); -#undef OPT -#undef H0 -#undef H1 - if (level < X265_LOG_DEBUG) - printf("\nUse --fullhelp for a full listing (or --log-level full --help)\n"); - printf("\n\nComplete documentation may be found at http://x265.readthedocs.org/en/default/cli.html\n"); - exit(1); -} + /* in microseconds */ + static const int UPDATE_INTERVAL = 250000; + CLIOptions() + { + input = NULL; + recon = NULL; + output = NULL; + qpfile = NULL; + zoneFile = NULL; + dolbyVisionRpu = NULL; + reconPlayCmd = NULL; + api = NULL; + param = NULL; + vmafData = NULL; + framesToBeEncoded = seek = 0; + totalbytes = 0; + bProgress = true; + bForceY4m = false; + startTime = x265_mdate(); + prevUpdateTime = 0; + bDither = false; + isAbrLadderConfig = false; + enableScaler = false; + encName = NULL; + reuseName = NULL; + encId = 0; + refId = -1; + loadLevel = 0; + saveLevel = 0; + numRefs = 0; + argCnt = 0; + } + void destroy(); + void printStatus(uint32_t frameNum); + bool parse(int argc, char **argv); + bool parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount); + bool parseQPFile(x265_picture &pic_org); + bool parseZoneFile(); + int rpuParser(x265_picture * pic); + }; #ifdef __cplusplus } #endif
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.