diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-03-29 18:11:18 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2024-03-29 18:11:18 +0000 |
commit | e91b3430dd4106d54524333bde0062343a801770 (patch) | |
tree | 7aa355fd0b89ec0b2611e17ee84a14c6fa449e22 | |
parent | eaf9564965f32c3a944ed1adf82cf4f80fdba734 (diff) | |
parent | c334ec011622efb99936768259a97d2a9b8dcd83 (diff) | |
download | libyuv-androidx-vectordrawable-release.tar.gz |
Merge "Snap for 11647390 from 488a2af021e3e7473f083a9435b1472c0d411f3d to androidx-vectordrawable-release" into androidx-vectordrawable-releaseandroidx-vectordrawable-release
-rw-r--r-- | .clang-format (renamed from files/.clang-format) | 0 | ||||
-rw-r--r-- | .gitignore (renamed from files/.gitignore) | 0 | ||||
-rw-r--r-- | .gn (renamed from files/.gn) | 0 | ||||
-rw-r--r-- | .vpython (renamed from files/.vpython) | 0 | ||||
-rw-r--r-- | .vpython3 (renamed from files/.vpython3) | 0 | ||||
-rw-r--r-- | AUTHORS (renamed from files/AUTHORS) | 0 | ||||
-rw-r--r-- | Android.bp | 179 | ||||
-rw-r--r-- | BUILD | 14 | ||||
-rw-r--r-- | BUILD.gn (renamed from files/BUILD.gn) | 55 | ||||
-rw-r--r-- | CM_linux_packages.cmake (renamed from files/CM_linux_packages.cmake) | 4 | ||||
-rw-r--r-- | CMakeLists.txt (renamed from files/CMakeLists.txt) | 4 | ||||
-rw-r--r-- | DEPS (renamed from files/DEPS) | 113 | ||||
-rw-r--r-- | DIR_METADATA (renamed from files/DIR_METADATA) | 0 | ||||
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | METADATA | 23 | ||||
-rw-r--r-- | OWNERS | 15 | ||||
-rw-r--r-- | OWNERS.android | 1 | ||||
-rw-r--r-- | PATENTS (renamed from files/PATENTS) | 0 | ||||
-rw-r--r-- | PRESUBMIT.py (renamed from files/PRESUBMIT.py) | 0 | ||||
-rw-r--r-- | README.chromium (renamed from files/README.chromium) | 5 | ||||
-rw-r--r-- | README.md (renamed from files/README.md) | 0 | ||||
-rw-r--r-- | README.version | 8 | ||||
-rw-r--r-- | UPDATING | 36 | ||||
-rw-r--r-- | build_overrides/build.gni (renamed from files/build_overrides/build.gni) | 0 | ||||
-rw-r--r-- | build_overrides/gtest.gni (renamed from files/build_overrides/gtest.gni) | 0 | ||||
-rw-r--r-- | build_overrides/partition_alloc.gni (renamed from files/build_overrides/partition_alloc.gni) | 0 | ||||
-rwxr-xr-x | cleanup_links.py (renamed from files/cleanup_links.py) | 0 | ||||
-rw-r--r-- | codereview.settings | 4 | ||||
-rw-r--r-- | docs/deprecated_builds.md (renamed from files/docs/deprecated_builds.md) | 4 | ||||
-rw-r--r-- | docs/environment_variables.md (renamed from files/docs/environment_variables.md) | 0 | ||||
-rw-r--r-- | docs/filtering.md (renamed from files/docs/filtering.md) | 0 | ||||
-rw-r--r-- | docs/formats.md (renamed from files/docs/formats.md) | 0 | ||||
-rw-r--r-- | docs/getting_started.md (renamed from files/docs/getting_started.md) | 18 | ||||
-rw-r--r-- | docs/rotation.md (renamed from files/docs/rotation.md) | 0 | ||||
-rw-r--r-- | download_vs_toolchain.py (renamed from files/download_vs_toolchain.py) | 0 | ||||
-rw-r--r-- | files/Android.bp | 197 | ||||
-rw-r--r-- | files/LICENSE | 29 | ||||
-rw-r--r-- | files/codereview.settings | 5 | ||||
-rw-r--r-- | files/public.mk | 13 | ||||
-rw-r--r-- | include/libyuv.h (renamed from files/include/libyuv.h) | 0 | ||||
-rw-r--r-- | include/libyuv/basic_types.h (renamed from files/include/libyuv/basic_types.h) | 0 | ||||
-rw-r--r-- | include/libyuv/compare.h (renamed from files/include/libyuv/compare.h) | 0 | ||||
-rw-r--r-- | include/libyuv/compare_row.h (renamed from files/include/libyuv/compare_row.h) | 5 | ||||
-rw-r--r-- | include/libyuv/convert.h (renamed from files/include/libyuv/convert.h) | 0 | ||||
-rw-r--r-- | include/libyuv/convert_argb.h (renamed from files/include/libyuv/convert_argb.h) | 0 | ||||
-rw-r--r-- | include/libyuv/convert_from.h (renamed from files/include/libyuv/convert_from.h) | 0 | ||||
-rw-r--r-- | include/libyuv/convert_from_argb.h (renamed from files/include/libyuv/convert_from_argb.h) | 0 | ||||
-rw-r--r-- | include/libyuv/cpu_id.h (renamed from files/include/libyuv/cpu_id.h) | 23 | ||||
-rw-r--r-- | include/libyuv/loongson_intrinsics.h (renamed from files/include/libyuv/loongson_intrinsics.h) | 0 | ||||
-rw-r--r-- | include/libyuv/macros_msa.h (renamed from files/include/libyuv/macros_msa.h) | 0 | ||||
-rw-r--r-- | include/libyuv/mjpeg_decoder.h (renamed from files/include/libyuv/mjpeg_decoder.h) | 0 | ||||
-rw-r--r-- | include/libyuv/planar_functions.h (renamed from files/include/libyuv/planar_functions.h) | 14 | ||||
-rw-r--r-- | include/libyuv/rotate.h (renamed from files/include/libyuv/rotate.h) | 0 | ||||
-rw-r--r-- | include/libyuv/rotate_argb.h (renamed from files/include/libyuv/rotate_argb.h) | 0 | ||||
-rw-r--r-- | include/libyuv/rotate_row.h (renamed from files/include/libyuv/rotate_row.h) | 5 | ||||
-rw-r--r-- | include/libyuv/row.h (renamed from files/include/libyuv/row.h) | 121 | ||||
-rw-r--r-- | include/libyuv/scale.h (renamed from files/include/libyuv/scale.h) | 55 | ||||
-rw-r--r-- | include/libyuv/scale_argb.h (renamed from files/include/libyuv/scale_argb.h) | 0 | ||||
-rw-r--r-- | include/libyuv/scale_rgb.h (renamed from files/include/libyuv/scale_rgb.h) | 0 | ||||
-rw-r--r-- | include/libyuv/scale_row.h (renamed from files/include/libyuv/scale_row.h) | 144 | ||||
-rw-r--r-- | include/libyuv/scale_uv.h (renamed from files/include/libyuv/scale_uv.h) | 0 | ||||
-rw-r--r-- | include/libyuv/version.h (renamed from files/include/libyuv/version.h) | 2 | ||||
-rw-r--r-- | include/libyuv/video_common.h (renamed from files/include/libyuv/video_common.h) | 0 | ||||
-rw-r--r-- | infra/config/OWNERS | 3 | ||||
-rw-r--r-- | infra/config/PRESUBMIT.py (renamed from files/infra/config/PRESUBMIT.py) | 0 | ||||
-rw-r--r-- | infra/config/README.md (renamed from files/infra/config/README.md) | 0 | ||||
-rw-r--r-- | infra/config/codereview.settings (renamed from files/infra/config/codereview.settings) | 0 | ||||
-rw-r--r-- | infra/config/commit-queue.cfg (renamed from files/infra/config/commit-queue.cfg) | 0 | ||||
-rw-r--r-- | infra/config/cr-buildbucket.cfg (renamed from files/infra/config/cr-buildbucket.cfg) | 259 | ||||
-rw-r--r-- | infra/config/luci-logdog.cfg (renamed from files/infra/config/luci-logdog.cfg) | 0 | ||||
-rw-r--r-- | infra/config/luci-milo.cfg (renamed from files/infra/config/luci-milo.cfg) | 0 | ||||
-rw-r--r-- | infra/config/luci-scheduler.cfg (renamed from files/infra/config/luci-scheduler.cfg) | 0 | ||||
-rwxr-xr-x | infra/config/main.star (renamed from files/infra/config/main.star) | 39 | ||||
-rw-r--r-- | infra/config/project.cfg (renamed from files/infra/config/project.cfg) | 2 | ||||
-rw-r--r-- | infra/config/realms.cfg (renamed from files/infra/config/realms.cfg) | 0 | ||||
-rw-r--r-- | libyuv.gni (renamed from files/libyuv.gni) | 5 | ||||
-rw-r--r-- | libyuv.gyp (renamed from files/libyuv.gyp) | 0 | ||||
-rw-r--r-- | libyuv.gypi (renamed from files/libyuv.gypi) | 0 | ||||
-rw-r--r-- | linux.mk (renamed from files/linux.mk) | 6 | ||||
-rw-r--r-- | public.mk | 2 | ||||
-rw-r--r-- | pylintrc (renamed from files/pylintrc) | 0 | ||||
-rwxr-xr-x | riscv_script/prepare_toolchain_qemu.sh (renamed from files/riscv_script/prepare_toolchain_qemu.sh) | 0 | ||||
-rw-r--r-- | riscv_script/riscv-clang.cmake (renamed from files/riscv_script/riscv-clang.cmake) | 23 | ||||
-rwxr-xr-x | riscv_script/run_qemu.sh (renamed from files/riscv_script/run_qemu.sh) | 0 | ||||
-rw-r--r-- | source/compare.cc (renamed from files/source/compare.cc) | 0 | ||||
-rw-r--r-- | source/compare_common.cc (renamed from files/source/compare_common.cc) | 0 | ||||
-rw-r--r-- | source/compare_gcc.cc (renamed from files/source/compare_gcc.cc) | 0 | ||||
-rw-r--r-- | source/compare_msa.cc (renamed from files/source/compare_msa.cc) | 0 | ||||
-rw-r--r-- | source/compare_neon.cc (renamed from files/source/compare_neon.cc) | 0 | ||||
-rw-r--r-- | source/compare_neon64.cc (renamed from files/source/compare_neon64.cc) | 0 | ||||
-rw-r--r-- | source/compare_win.cc (renamed from files/source/compare_win.cc) | 0 | ||||
-rw-r--r-- | source/convert.cc (renamed from files/source/convert.cc) | 127 | ||||
-rw-r--r-- | source/convert_argb.cc (renamed from files/source/convert_argb.cc) | 279 | ||||
-rw-r--r-- | source/convert_from.cc (renamed from files/source/convert_from.cc) | 90 | ||||
-rw-r--r-- | source/convert_from_argb.cc (renamed from files/source/convert_from_argb.cc) | 76 | ||||
-rw-r--r-- | source/convert_jpeg.cc (renamed from files/source/convert_jpeg.cc) | 0 | ||||
-rw-r--r-- | source/convert_to_argb.cc (renamed from files/source/convert_to_argb.cc) | 0 | ||||
-rw-r--r-- | source/convert_to_i420.cc (renamed from files/source/convert_to_i420.cc) | 0 | ||||
-rw-r--r-- | source/cpu_id.cc (renamed from files/source/cpu_id.cc) | 9 | ||||
-rw-r--r-- | source/mjpeg_decoder.cc (renamed from files/source/mjpeg_decoder.cc) | 0 | ||||
-rw-r--r-- | source/mjpeg_validate.cc (renamed from files/source/mjpeg_validate.cc) | 0 | ||||
-rw-r--r-- | source/planar_functions.cc (renamed from files/source/planar_functions.cc) | 92 | ||||
-rw-r--r-- | source/rotate.cc (renamed from files/source/rotate.cc) | 93 | ||||
-rw-r--r-- | source/rotate_any.cc (renamed from files/source/rotate_any.cc) | 0 | ||||
-rw-r--r-- | source/rotate_argb.cc (renamed from files/source/rotate_argb.cc) | 9 | ||||
-rw-r--r-- | source/rotate_common.cc (renamed from files/source/rotate_common.cc) | 31 | ||||
-rw-r--r-- | source/rotate_gcc.cc (renamed from files/source/rotate_gcc.cc) | 0 | ||||
-rw-r--r-- | source/rotate_lsx.cc (renamed from files/source/rotate_lsx.cc) | 0 | ||||
-rw-r--r-- | source/rotate_msa.cc (renamed from files/source/rotate_msa.cc) | 0 | ||||
-rw-r--r-- | source/rotate_neon.cc (renamed from files/source/rotate_neon.cc) | 0 | ||||
-rw-r--r-- | source/rotate_neon64.cc (renamed from files/source/rotate_neon64.cc) | 0 | ||||
-rw-r--r-- | source/rotate_win.cc (renamed from files/source/rotate_win.cc) | 0 | ||||
-rw-r--r-- | source/row_any.cc (renamed from files/source/row_any.cc) | 0 | ||||
-rw-r--r-- | source/row_common.cc (renamed from files/source/row_common.cc) | 95 | ||||
-rw-r--r-- | source/row_gcc.cc (renamed from files/source/row_gcc.cc) | 101 | ||||
-rw-r--r-- | source/row_lasx.cc (renamed from files/source/row_lasx.cc) | 36 | ||||
-rw-r--r-- | source/row_lsx.cc (renamed from files/source/row_lsx.cc) | 28 | ||||
-rw-r--r-- | source/row_msa.cc (renamed from files/source/row_msa.cc) | 0 | ||||
-rw-r--r-- | source/row_neon.cc (renamed from files/source/row_neon.cc) | 102 | ||||
-rw-r--r-- | source/row_neon64.cc (renamed from files/source/row_neon64.cc) | 215 | ||||
-rw-r--r-- | source/row_rvv.cc (renamed from files/source/row_rvv.cc) | 574 | ||||
-rw-r--r-- | source/row_win.cc (renamed from files/source/row_win.cc) | 0 | ||||
-rw-r--r-- | source/scale.cc (renamed from files/source/scale.cc) | 610 | ||||
-rw-r--r-- | source/scale_any.cc (renamed from files/source/scale_any.cc) | 0 | ||||
-rw-r--r-- | source/scale_argb.cc (renamed from files/source/scale_argb.cc) | 234 | ||||
-rw-r--r-- | source/scale_common.cc (renamed from files/source/scale_common.cc) | 29 | ||||
-rw-r--r-- | source/scale_gcc.cc (renamed from files/source/scale_gcc.cc) | 0 | ||||
-rw-r--r-- | source/scale_lsx.cc (renamed from files/source/scale_lsx.cc) | 0 | ||||
-rw-r--r-- | source/scale_msa.cc (renamed from files/source/scale_msa.cc) | 0 | ||||
-rw-r--r-- | source/scale_neon.cc (renamed from files/source/scale_neon.cc) | 0 | ||||
-rw-r--r-- | source/scale_neon64.cc (renamed from files/source/scale_neon64.cc) | 95 | ||||
-rw-r--r-- | source/scale_rgb.cc (renamed from files/source/scale_rgb.cc) | 0 | ||||
-rw-r--r-- | source/scale_rvv.cc | 1040 | ||||
-rw-r--r-- | source/scale_uv.cc (renamed from files/source/scale_uv.cc) | 249 | ||||
-rw-r--r-- | source/scale_win.cc (renamed from files/source/scale_win.cc) | 0 | ||||
-rwxr-xr-x | source/test.sh (renamed from files/source/test.sh) | 0 | ||||
-rw-r--r-- | source/video_common.cc (renamed from files/source/video_common.cc) | 0 | ||||
-rw-r--r-- | tools_libyuv/OWNERS | 4 | ||||
-rwxr-xr-x | tools_libyuv/autoroller/roll_deps.py (renamed from files/tools_libyuv/autoroller/roll_deps.py) | 1 | ||||
-rwxr-xr-x | tools_libyuv/autoroller/unittests/roll_deps_test.py (renamed from files/tools_libyuv/autoroller/unittests/roll_deps_test.py) | 0 | ||||
-rw-r--r-- | tools_libyuv/autoroller/unittests/testdata/DEPS (renamed from files/tools_libyuv/autoroller/unittests/testdata/DEPS) | 0 | ||||
-rw-r--r-- | tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new (renamed from files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new) | 0 | ||||
-rw-r--r-- | tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old (renamed from files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old) | 0 | ||||
-rwxr-xr-x | tools_libyuv/get_landmines.py (renamed from files/tools_libyuv/get_landmines.py) | 0 | ||||
-rw-r--r-- | tools_libyuv/msan/OWNERS | 3 | ||||
-rw-r--r-- | tools_libyuv/msan/blacklist.txt (renamed from files/tools_libyuv/msan/blacklist.txt) | 0 | ||||
-rw-r--r-- | tools_libyuv/ubsan/OWNERS | 3 | ||||
-rw-r--r-- | tools_libyuv/ubsan/blacklist.txt (renamed from files/tools_libyuv/ubsan/blacklist.txt) | 0 | ||||
-rw-r--r-- | tools_libyuv/ubsan/vptr_blacklist.txt (renamed from files/tools_libyuv/ubsan/vptr_blacklist.txt) | 0 | ||||
-rw-r--r-- | unit_test/basictypes_test.cc (renamed from files/unit_test/basictypes_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/color_test.cc (renamed from files/unit_test/color_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/compare_test.cc (renamed from files/unit_test/compare_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/convert_argb_test.cc (renamed from files/unit_test/convert_test.cc) | 2479 | ||||
-rw-r--r-- | unit_test/convert_test.cc | 2110 | ||||
-rw-r--r-- | unit_test/cpu_test.cc (renamed from files/unit_test/cpu_test.cc) | 15 | ||||
-rw-r--r-- | unit_test/cpu_thread_test.cc (renamed from files/unit_test/cpu_thread_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/math_test.cc (renamed from files/unit_test/math_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/planar_test.cc (renamed from files/unit_test/planar_test.cc) | 208 | ||||
-rw-r--r-- | unit_test/rotate_argb_test.cc (renamed from files/unit_test/rotate_argb_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/rotate_test.cc (renamed from files/unit_test/rotate_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/scale_argb_test.cc (renamed from files/unit_test/scale_argb_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/scale_plane_test.cc | 470 | ||||
-rw-r--r-- | unit_test/scale_rgb_test.cc (renamed from files/unit_test/scale_rgb_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/scale_test.cc (renamed from files/unit_test/scale_test.cc) | 478 | ||||
-rw-r--r-- | unit_test/scale_uv_test.cc (renamed from files/unit_test/scale_uv_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/testdata/arm_v7.txt (renamed from files/unit_test/testdata/arm_v7.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/juno.txt (renamed from files/unit_test/testdata/juno.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/mips.txt (renamed from files/unit_test/testdata/mips.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/mips_loongson2k.txt (renamed from files/unit_test/testdata/mips_loongson2k.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/mips_loongson3.txt (renamed from files/unit_test/testdata/mips_loongson3.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/mips_loongson_mmi.txt (renamed from files/unit_test/testdata/mips_loongson_mmi.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/mips_msa.txt (renamed from files/unit_test/testdata/mips_msa.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/riscv64.txt (renamed from files/unit_test/testdata/riscv64.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/riscv64_rvv.txt (renamed from files/unit_test/testdata/riscv64_rvv.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/riscv64_rvv_zvfh.txt (renamed from files/unit_test/testdata/riscv64_rvv_zvfh.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/tegra3.txt (renamed from files/unit_test/testdata/tegra3.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/test0.jpg (renamed from files/unit_test/testdata/test0.jpg) | bin | 421 -> 421 bytes | |||
-rw-r--r-- | unit_test/testdata/test1.jpg (renamed from files/unit_test/testdata/test1.jpg) | bin | 735 -> 735 bytes | |||
-rw-r--r-- | unit_test/testdata/test2.jpg (renamed from files/unit_test/testdata/test2.jpg) | bin | 685 -> 685 bytes | |||
-rw-r--r-- | unit_test/testdata/test3.jpg (renamed from files/unit_test/testdata/test3.jpg) | bin | 704 -> 704 bytes | |||
-rw-r--r-- | unit_test/testdata/test4.jpg (renamed from files/unit_test/testdata/test4.jpg) | bin | 701 -> 701 bytes | |||
-rw-r--r-- | unit_test/unit_test.cc (renamed from files/unit_test/unit_test.cc) | 11 | ||||
-rw-r--r-- | unit_test/unit_test.h (renamed from files/unit_test/unit_test.h) | 0 | ||||
-rw-r--r-- | unit_test/video_common_test.cc (renamed from files/unit_test/video_common_test.cc) | 0 | ||||
-rw-r--r-- | util/Makefile (renamed from files/util/Makefile) | 0 | ||||
-rw-r--r-- | util/color.cc (renamed from files/util/color.cc) | 0 | ||||
-rw-r--r-- | util/compare.cc (renamed from files/util/compare.cc) | 0 | ||||
-rw-r--r-- | util/cpuid.c (renamed from files/util/cpuid.c) | 10 | ||||
-rw-r--r-- | util/i444tonv12_eg.cc (renamed from files/util/i444tonv12_eg.cc) | 0 | ||||
-rw-r--r-- | util/psnr.cc (renamed from files/util/psnr.cc) | 0 | ||||
-rw-r--r-- | util/psnr.h (renamed from files/util/psnr.h) | 0 | ||||
-rw-r--r-- | util/psnr_main.cc (renamed from files/util/psnr_main.cc) | 0 | ||||
-rw-r--r-- | util/ssim.cc (renamed from files/util/ssim.cc) | 0 | ||||
-rw-r--r-- | util/ssim.h (renamed from files/util/ssim.h) | 0 | ||||
-rw-r--r-- | util/yuvconstants.c (renamed from files/util/yuvconstants.c) | 0 | ||||
-rw-r--r-- | util/yuvconvert.cc (renamed from files/util/yuvconvert.cc) | 0 | ||||
-rw-r--r-- | winarm.mk (renamed from files/winarm.mk) | 0 |
197 files changed, 6979 insertions, 4464 deletions
diff --git a/files/.clang-format b/.clang-format index 59d48705..59d48705 100644 --- a/files/.clang-format +++ b/.clang-format diff --git a/files/.gitignore b/.gitignore index 20d679b7..20d679b7 100644 --- a/files/.gitignore +++ b/.gitignore diff --git a/files/.vpython b/.vpython index 4a64fd21..4a64fd21 100644 --- a/files/.vpython +++ b/.vpython diff --git a/files/.vpython3 b/.vpython3 index 28d819e7..28d819e7 100644 --- a/files/.vpython3 +++ b/.vpython3 @@ -1,7 +1,6 @@ package { default_applicable_licenses: ["external_libyuv_license"], } - // Added automatically by a large-scale-change // See: http://go/android-license-faq license { @@ -12,7 +11,183 @@ license { ], license_text: [ "LICENSE", + "PATENTS", ], } - subdirs = ["files"] + +cc_library { + name: "libyuv", + vendor_available: true, + product_available: true, + host_supported: true, + + srcs: [ + "source/compare.cc", + "source/compare_common.cc", + "source/compare_gcc.cc", + "source/compare_msa.cc", + "source/compare_neon.cc", + "source/compare_neon64.cc", + "source/convert.cc", + "source/convert_argb.cc", + "source/convert_from.cc", + "source/convert_from_argb.cc", + "source/convert_jpeg.cc", + "source/convert_to_argb.cc", + "source/convert_to_i420.cc", + "source/cpu_id.cc", + "source/mjpeg_decoder.cc", + "source/mjpeg_validate.cc", + "source/planar_functions.cc", + "source/rotate.cc", + "source/rotate_any.cc", + "source/rotate_argb.cc", + "source/rotate_common.cc", + "source/rotate_gcc.cc", + "source/rotate_msa.cc", + "source/rotate_neon.cc", + "source/rotate_neon64.cc", + "source/row_any.cc", + "source/row_common.cc", + "source/row_gcc.cc", + "source/row_msa.cc", + "source/row_neon.cc", + "source/row_neon64.cc", + "source/row_rvv.cc", + "source/scale.cc", + "source/scale_any.cc", + "source/scale_argb.cc", + "source/scale_common.cc", + "source/scale_gcc.cc", + "source/scale_msa.cc", + "source/scale_neon.cc", + "source/scale_neon64.cc", + "source/scale_rgb.cc", + "source/scale_rvv.cc", + "source/scale_uv.cc", + "source/video_common.cc", + ], + + cflags: [ + "-Wall", + "-Werror", + "-Wno-unused-parameter", + "-fexceptions", + "-DHAVE_JPEG", + "-DLIBYUV_UNLIMITED_DATA", + ], + + arch: { + arm: { + cflags: ["-mfpu=neon"], + }, + }, + + shared_libs: ["libjpeg"], + + export_include_dirs: ["include"], + + apex_available: [ + "//apex_available:platform", + "com.android.media.swcodec", + "com.android.virt", + ], + min_sdk_version: "29", +} + +// compatibilty static library until all uses of libyuv_static are replaced +// with libyuv (b/37646797) +cc_library_static { + name: "libyuv_static", + vendor_available: true, + whole_static_libs: ["libyuv"], + apex_available: [ + "//apex_available:platform", + "com.android.media.swcodec", + ], + min_sdk_version: "29", +} + +cc_test { + name: "libyuv_unittest", + static_libs: ["libyuv"], + shared_libs: ["libjpeg"], + cflags: ["-Wall", "-Werror"], + srcs: [ + "unit_test/basictypes_test.cc", + "unit_test/color_test.cc", + "unit_test/compare_test.cc", + "unit_test/convert_test.cc", + "unit_test/cpu_test.cc", + "unit_test/cpu_thread_test.cc", + "unit_test/math_test.cc", + "unit_test/planar_test.cc", + "unit_test/rotate_argb_test.cc", + "unit_test/rotate_test.cc", + "unit_test/scale_argb_test.cc", + "unit_test/scale_plane_test.cc", + "unit_test/scale_rgb_test.cc", + "unit_test/scale_test.cc", + "unit_test/scale_uv_test.cc", + "unit_test/unit_test.cc", + "unit_test/video_common_test.cc", + ], +} + +cc_test { + name: "compare", + gtest: false, + srcs: [ + "util/compare.cc", + ], + static_libs: ["libyuv"], +} + +cc_test { + name: "cpuid", + gtest: false, + srcs: [ + "util/cpuid.c", + ], + static_libs: ["libyuv"], +} + +cc_test { + name: "i444tonv12_eg", + gtest: false, + srcs: [ + "util/i444tonv12_eg.cc", + ], + static_libs: ["libyuv"], +} + +cc_test { + name: "psnr", + gtest: false, + srcs: [ + "util/psnr_main.cc", + "util/psnr.cc", + "util/ssim.cc", + ], + static_libs: ["libyuv"], +} + +cc_test { + name: "yuvconstants", + gtest: false, + srcs: [ + "util/yuvconstants.c", + ], + static_libs: ["libyuv"], +} + +cc_test { + name: "yuvconvert", + gtest: false, + srcs: [ + "util/yuvconvert.cc", + ], + static_libs: ["libyuv"], + shared_libs: ["libjpeg"], +} @@ -1,14 +0,0 @@ -# Copyright 2011 Google Inc. All Rights Reserved. -# -# Description: -# The libyuv package provides implementation yuv image conversion and -# scaling. -# -# This library is used by Talk Video and WebRTC. -# - -licenses(['notice']) # 3-clause BSD - -exports_files(['LICENSE']) - -package(default_visibility = ['//visibility:public']) diff --git a/files/BUILD.gn b/BUILD.gn index adaae9d8..2c600b22 100644 --- a/files/BUILD.gn +++ b/BUILD.gn @@ -36,6 +36,12 @@ config("libyuv_config") { if (libyuv_disable_rvv) { defines += [ "LIBYUV_DISABLE_RVV" ] } + if (!libyuv_use_lsx) { + defines += [ "LIBYUV_DISABLE_LSX" ] + } + if (!libyuv_use_lasx) { + defines += [ "LIBYUV_DISABLE_LASX" ] + } } # This target is built when no specific target is specified on the command line. @@ -74,6 +80,14 @@ group("libyuv") { deps += [ ":libyuv_msa" ] } + if (libyuv_use_lsx) { + deps += [ ":libyuv_lsx" ] + } + + if (libyuv_use_lasx) { + deps += [ ":libyuv_lasx" ] + } + if (!is_ios && !libyuv_disable_jpeg) { # Make sure that clients of libyuv link with libjpeg. This can't go in # libyuv_internal because in Windows x64 builds that will generate a clang @@ -142,6 +156,7 @@ static_library("libyuv_internal") { "source/scale_common.cc", "source/scale_gcc.cc", "source/scale_rgb.cc", + "source/scale_rvv.cc", "source/scale_uv.cc", "source/scale_win.cc", "source/video_common.cc", @@ -235,6 +250,44 @@ if (libyuv_use_msa) { } } +if (libyuv_use_lsx) { + static_library("libyuv_lsx") { + sources = [ + # LSX Source Files + "source/rotate_lsx.cc", + "source/row_lsx.cc", + "source/scale_lsx.cc", + ] + + cflags_cc = [ + "-mlsx", + "-Wno-c++11-narrowing", + ] + + deps = [ ":libyuv_internal" ] + + public_configs = [ ":libyuv_config" ] + } +} + +if (libyuv_use_lasx) { + static_library("libyuv_lasx") { + sources = [ + # LASX Source Files + "source/row_lasx.cc", + ] + + cflags_cc = [ + "-mlasx", + "-Wno-c++11-narrowing", + ] + + deps = [ ":libyuv_internal" ] + + public_configs = [ ":libyuv_config" ] + } +} + if (libyuv_include_tests) { config("libyuv_unittest_warnings_config") { if (!is_win) { @@ -262,6 +315,7 @@ if (libyuv_include_tests) { "unit_test/basictypes_test.cc", "unit_test/color_test.cc", "unit_test/compare_test.cc", + "unit_test/convert_argb_test.cc", "unit_test/convert_test.cc", "unit_test/cpu_test.cc", "unit_test/cpu_thread_test.cc", @@ -270,6 +324,7 @@ if (libyuv_include_tests) { "unit_test/rotate_argb_test.cc", "unit_test/rotate_test.cc", "unit_test/scale_argb_test.cc", + "unit_test/scale_plane_test.cc", "unit_test/scale_rgb_test.cc", "unit_test/scale_test.cc", "unit_test/scale_uv_test.cc", diff --git a/files/CM_linux_packages.cmake b/CM_linux_packages.cmake index 5f676f89..a073edfa 100644 --- a/files/CM_linux_packages.cmake +++ b/CM_linux_packages.cmake @@ -8,7 +8,7 @@ SET ( YUV_VER_MAJOR 0 ) SET ( YUV_VER_MINOR 0 ) SET ( YUV_VER_PATCH ${YUV_VERSION_NUMBER} ) SET ( YUV_VERSION ${YUV_VER_MAJOR}.${YUV_VER_MINOR}.${YUV_VER_PATCH} ) -MESSAGE ( "Building ver.: ${YUV_VERSION}" ) +MESSAGE ( VERBOSE "Building ver.: ${YUV_VERSION}" ) # is this a 32-bit or 64-bit build? IF ( CMAKE_SIZEOF_VOID_P EQUAL 8 ) @@ -45,7 +45,7 @@ ELSE () SET ( YUV_SYSTEM_NAME "amd-${YUV_BIT_SIZE}" ) ENDIF () ENDIF () -MESSAGE ( "Packaging for: ${YUV_SYSTEM_NAME}" ) +MESSAGE ( VERBOSE "Packaging for: ${YUV_SYSTEM_NAME}" ) # define all the variables needed by CPack to create .deb and .rpm packages SET ( CPACK_PACKAGE_VENDOR "Frank Barchard" ) diff --git a/files/CMakeLists.txt b/CMakeLists.txt index 7a4a1994..9abfa74b 100644 --- a/files/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,10 @@ if(WIN32) SET_TARGET_PROPERTIES ( ${ly_lib_shared} PROPERTIES IMPORT_PREFIX "lib" ) endif() +# this creates the cpuid tool +ADD_EXECUTABLE ( cpuid ${ly_base_dir}/util/cpuid.c ) +TARGET_LINK_LIBRARIES ( cpuid ${ly_lib_static} ) + # this creates the conversion tool ADD_EXECUTABLE ( yuvconvert ${ly_base_dir}/util/yuvconvert.cc ) TARGET_LINK_LIBRARIES ( yuvconvert ${ly_lib_static} ) @@ -5,39 +5,39 @@ gclient_gn_args = [ vars = { 'chromium_git': 'https://chromium.googlesource.com', - 'chromium_revision': 'd1501576384de23ddf8d8815ee7c95be2f708de5', - 'gn_version': 'git_revision:e3978de3e8dafb50a2b11efa784e08699a43faf8', + 'chromium_revision': 'af3d01376bec75a68f90160bfd38057d60510a2b', + 'gn_version': 'git_revision:fae280eabe5d31accc53100137459ece19a7a295', # ninja CIPD package version. # https://chrome-infra-packages.appspot.com/p/infra/3pp/tools/ninja 'ninja_version': 'version:2@1.11.1.chromium.6', # reclient CIPD package version - 'reclient_version': 're_client_version:0.107.1.0b39c4c-gomaip', + 'reclient_version': 're_client_version:0.110.0.43ec6b1-gomaip', # Keep the Chromium default of generating location tags. 'generate_location_tags': True, # By default, download the fuchsia sdk from the public sdk directory. - 'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/gn/', - 'fuchsia_version': 'version:12.20230530.1.1', + 'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/core/', + 'fuchsia_version': 'version:15.20230909.2.1', # By default, download the fuchsia images from the fuchsia GCS bucket. 'fuchsia_images_bucket': 'fuchsia', 'checkout_fuchsia': False, # Since the images are hundreds of MB, default to only downloading the image # most commonly useful for developers. Bots and developers that need to use # other images can override this with additional images. - 'checkout_fuchsia_boot_images': "terminal.qemu-x64", + 'checkout_fuchsia_boot_images': "terminal.qemu-x64,terminal.x64", 'checkout_fuchsia_product_bundles': '"{checkout_fuchsia_boot_images}" != ""', } deps = { 'src/build': - Var('chromium_git') + '/chromium/src/build' + '@' + 'd0c2b4cf4fdd43866e066fb6722099aa8bf4ce79', + Var('chromium_git') + '/chromium/src/build' + '@' + '5885d3c24833ad72845a52a1b913a2b8bc651b56', 'src/buildtools': - Var('chromium_git') + '/chromium/src/buildtools' + '@' + 'edbefcee3d2cc45cdb0c60c2b01b673f8ba728bc', + Var('chromium_git') + '/chromium/src/buildtools' + '@' + '79ab87fa54614258c4c95891e873223371194525', 'src/testing': - Var('chromium_git') + '/chromium/src/testing' + '@' + 'a13817e1ea0255a375d13aeb3bb2527bd528495b', + Var('chromium_git') + '/chromium/src/testing' + '@' + '51e9a02297057cc0e917763a51e16680b7d16fb6', 'src/third_party': - Var('chromium_git') + '/chromium/src/third_party' + '@' + '824e26c9fcbd00fccf6cdb712f8f127aae133042', + Var('chromium_git') + '/chromium/src/third_party' + '@' + '2dc4b18abd1003ce7b1eda509dc96f12d49a9667', 'src/buildtools/linux64': { 'packages': [ @@ -82,17 +82,10 @@ deps = { 'dep_type': 'cipd', }, - 'src/buildtools/clang_format/script': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + 'f97059df7f8b205064625cdb5f97b56668a125ef', - 'src/buildtools/third_party/libc++/trunk': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + 'f8279b01085b800724f5c5629dc365b9f040dc53', - 'src/buildtools/third_party/libc++abi/trunk': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '5c8dbff7a4911fe1e0af0bc1628891e4187a3c90', - 'src/buildtools/third_party/libunwind/trunk': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'cd144ced35285edaa064a91561969e5b22c219b1', - 'src/third_party/catapult': - Var('chromium_git') + '/catapult.git' + '@' + '9f3ef9c2eae9b1adabde88efe5dcc438ba76e205', + Var('chromium_git') + '/catapult.git' + '@' + 'fa05d995e152efdae488a2aeba397cd609fdbc9d', + 'src/third_party/clang-format/script': + Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + 'f97059df7f8b205064625cdb5f97b56668a125ef', 'src/third_party/colorama/src': Var('chromium_git') + '/external/colorama.git' + '@' + '3de9f013df4b470069d03d250224062e8cf15c49', 'src/third_party/cpu_features/src': { @@ -100,19 +93,29 @@ deps = { 'condition': 'checkout_android', }, 'src/third_party/depot_tools': - Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '05ab73be51774f098eb580eda6e96a49e1010b1b', + Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + 'd3e43dd4319ba169c0aaf44547eecf861f2fe5da', 'src/third_party/freetype/src': - Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '80a507a6b8e3d2906ad2c8ba69329bd2fb2a85ef', + Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '9e3c5d7e183c1a8d5ed8868d7d28ef18d3ec9ec8', + 'third_party/fuchsia-gn-sdk': { + 'url': Var('chromium_git') + '/chromium/src/third_party/fuchsia-gn-sdk.git' + '@' + '0d6902558d92fe3d49ba9a8f638ddea829be595b', + 'condition': 'checkout_fuchsia', + }, 'src/third_party/googletest/src': Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'af29db7ec28d6df1c7f0f745186884091e602e07', 'src/third_party/harfbuzz-ng/src': - Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '8df5cdbcda495a582e72a7e2ce35d6106401edce', + Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + 'db700b5670d9475cc8ed4880cc9447b232c5e432', + 'src/third_party/libc++/src': + Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '84fb809dd6dae36d556dc0bb702c6cc2ce9d4b80', + 'src/third_party/libc++abi/src': + Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '8d21803b9076b16d46c32e2f10da191ee758520c', + 'src/third_party/libunwind/src': + Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'f1c687e0aaf0d70b9a53a150e9be5cb63af9215f', 'src/third_party/libjpeg_turbo': - Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + 'aa4075f116e4312537d0d3e9dbd5e31096539f94', + Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + '30bdb85e302ecfc52593636b2f44af438e05e784', 'src/third_party/nasm': Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '7fc833e889d1afda72c06220e5bed8fb43b2e5ce', 'src/tools': - Var('chromium_git') + '/chromium/src/tools' + '@' + '916dfffd61cbf61075c47d7b480425d7de1483fd', + Var('chromium_git') + '/chromium/src/tools' + '@' + 'a76c0dbb64c603a0d45e0c6dfae3a351b6e1adf1', # libyuv-only dependencies (not present in Chromium). 'src/third_party/gtest-parallel': @@ -139,7 +142,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/kotlin_stdlib', - 'version': 'z4_AYYz2Tw5GKikuiDLTuxxf0NJVGLkC3CVcyiIpc-gC', + 'version': 'Z1gsqhL967kFQecxKrRwXHbl-vwQjpv0l7PMUZ0EVO8C', }, ], 'condition': 'checkout_android', @@ -149,7 +152,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/kotlinc', - 'version': 'J3BAlA7yf4corBopDhlwuT9W4jR1Z9R55KD3BUTVldQC', + 'version': 'Rr02Gf2EkaeSs3EhSUHhPqDHSd1AzimrM6cRYUJCPjQC', }, ], 'condition': 'checkout_android', @@ -157,9 +160,9 @@ deps = { }, 'src/third_party/boringssl/src': - 'https://boringssl.googlesource.com/boringssl.git' + '@' + 'dd5219451c3ce26221762a15d867edf43b463bb2', + 'https://boringssl.googlesource.com/boringssl.git' + '@' + '20a06474c0b4a16779311bfe98ba69dc2402101d', 'src/base': { - 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'b4c5ce6cb1a7c90de3fdddc80ed439fe87eab443', + 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'd407b7061bce341bb6e11b539ea86c46c949ac4c', 'condition': 'checkout_android', }, 'src/third_party/bazel': { @@ -182,16 +185,22 @@ deps = { 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_ndk': { - 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '310956bd122ec2b96049f8d7398de6b717f3452e', - 'condition': 'checkout_android', + 'src/third_party/android_toolchain': { + 'packages': [ + { + 'package': 'chromium/third_party/android_toolchain/android_toolchain', + 'version': 'R_8suM8m0oHbZ1awdxGXvKEFpAOETscbfZxkkMthyk8C', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', }, 'src/third_party/androidx': { 'packages': [ { 'package': 'chromium/third_party/androidx', - 'version': 'Wr5b9WJiFAzJcmjmvQIePIxk5IgpDl62kaGY_SiLxJEC', + 'version': 'y7rF_rx56mD3FGhMiqnlbQ6HOqHJ95xUFNX1m-_a988C', }, ], 'condition': 'checkout_android', @@ -211,8 +220,8 @@ deps = { 'src/third_party/android_sdk/public': { 'packages': [ { - 'package': 'chromium/third_party/android_sdk/public/build-tools/33.0.0', - 'version': '-VRKr36Uw8L_iFqqo9nevIBgNMggND5iWxjidyjnCgsC', + 'package': 'chromium/third_party/android_sdk/public/build-tools/34.0.0', + 'version': 'YK9Rzw3fDzMHVzatNN6VlyoD_81amLZpN1AbmkdOd6AC', }, { 'package': 'chromium/third_party/android_sdk/public/emulator', @@ -224,11 +233,11 @@ deps = { }, { 'package': 'chromium/third_party/android_sdk/public/platform-tools', - 'version': 'RSI3iwryh7URLGRgJHsCvUxj092woTPnKt4pwFcJ6L8C', + 'version': 'HWVsGs2HCKgSVv41FsOcsfJbNcB0UFiNrF6Tc4yRArYC', }, { - 'package': 'chromium/third_party/android_sdk/public/platforms/android-33', - 'version': 'eo5KvW6UVor92LwZai8Zulc624BQZoCu-yn7wa1z_YcC', + 'package': 'chromium/third_party/android_sdk/public/platforms/android-34', + 'version': 'u-bhWbTME6u-DjypTgr3ZikCyeAeU6txkR9ET6Uudc8C', }, { 'package': 'chromium/third_party/android_sdk/public/platforms/android-tiramisuprivacysandbox', @@ -330,7 +339,7 @@ deps = { }, 'src/third_party/icu': { - 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'a2961dc659b4ae847a9c6120718cc2517ee57d9e', + 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'e8c3bc9ea97d4423ad0515e5f1c064f486dae8b1', }, 'src/third_party/icu4j': { 'packages': [ @@ -356,7 +365,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/jdk', - 'version': '2Of9Pe_OdO4xoAATuiLDiMVNebKTNO3WrwJGqil4RosC', + 'version': 'GCFtf5t6M4HlrHj6NXedHbpHp2xjgognF8ptNci4478C', }, ], 'condition': 'checkout_android', @@ -411,7 +420,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/r8', - 'version': '4Oq32DG2vuDh7Frxj6tH5xyi77sVgBWpvvl4hwvZRR4C', + 'version': 'O1BBWiBTIeNUcraX8STMtQXVaCleu6SJJjWCcnfhPLkC', }, ], 'condition': 'checkout_android', @@ -424,7 +433,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/r8', - 'version': 'PwglNZFRNPkBBXdnY9NfrZFk2ULWDTRxhV9rl2kvkpUC', + 'version': 'vw5kLlW3-suSlCKSO9OQpFWpR8oDnvQ8k1RgKNUapQYC', }, ], 'condition': 'checkout_android', @@ -441,7 +450,7 @@ deps = { 'dep_type': 'cipd', }, 'src/third_party/requests/src': { - 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'refs/tags/v2.23.0', + 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'c7e0fc087ceeadb8b4c84a0953a422c474093d6d', 'condition': 'checkout_android', }, 'src/third_party/robolectric': { @@ -468,7 +477,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/turbine', - 'version': 'Foa7uRpVoKr4YoayCKc9EERkjpmGOE3DAUTWFLL7gKEC', + 'version': '2I2Nz480QsuCxpQ1lMfbigX8l5HAhX3_ykWU4TKRGo4C', }, ], 'condition': 'checkout_android', @@ -481,7 +490,7 @@ deps = { # iOS deps: 'src/ios': { - 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '241921896b64f85de9a32d461462913cbff4baeb', + 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + 'ddd58e86cf4ebdc0db60a5d0f3c323de49bb295c', 'condition': 'checkout_ios' }, @@ -1680,7 +1689,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_squareup_okio_okio_jvm', - 'version': 'version:2@3.0.0.cr1', + 'version': 'version:2@3.3.0.cr1', }, ], 'condition': 'checkout_android', @@ -1691,7 +1700,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_squareup_wire_wire_runtime_jvm', - 'version': 'version:2@4.5.1.cr1', + 'version': 'version:2@4.7.0.cr1', }, ], 'condition': 'checkout_android', @@ -1823,7 +1832,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy', - 'version': 'version:2@1.14.4.cr1', + 'version': 'version:2@1.14.5.cr1', }, ], 'condition': 'checkout_android', @@ -1834,7 +1843,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy_agent', - 'version': 'version:2@1.14.4.cr1', + 'version': 'version:2@1.14.5.cr1', }, ], 'condition': 'checkout_android', @@ -2043,7 +2052,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_android', - 'version': 'version:2@5.3.1.cr1', + 'version': 'version:2@5.4.0.cr1', }, ], 'condition': 'checkout_android', @@ -2054,7 +2063,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_core', - 'version': 'version:2@5.3.1.cr1', + 'version': 'version:2@5.4.0.cr1', }, ], 'condition': 'checkout_android', @@ -2065,7 +2074,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_subclass', - 'version': 'version:2@5.3.1.cr1', + 'version': 'version:2@5.4.0.cr1', }, ], 'condition': 'checkout_android', diff --git a/files/DIR_METADATA b/DIR_METADATA index 8bc04f15..8bc04f15 100644 --- a/files/DIR_METADATA +++ b/DIR_METADATA @@ -1,4 +1,4 @@ -Copyright (c) 2011, Google Inc. All rights reserved. +Copyright 2011 The LibYuv Project Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1,14 +1,19 @@ -name: "libyuv" -description: - "libyuv is an open source project that includes YUV scaling and conversion " - "functionality." +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update libyuv +# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md +name: "libyuv" +description: "libyuv is an open source project that includes YUV scaling and conversion functionality." third_party { - url { - type: GIT + license_type: NOTICE + last_upgrade_date { + year: 2024 + month: 1 + day: 11 + } + identifier { + type: "Git" value: "https://chromium.googlesource.com/libyuv/libyuv/" + version: "af6ac8265bbd07bcf977526458b60305c4304288" } - version: "2a6cb7431939faba1b40d3f08883847f0cf63572" - last_upgrade_date { year: 2023 month: 6 day: 1 } - license_type: NOTICE } @@ -1,4 +1,11 @@ -fbarchard@google.com -phoglund@google.com -magjed@google.com -chz@google.com +mbonadei@chromium.org +fbarchard@chromium.org +magjed@chromium.org +wtc@google.com +jansson@google.com + +per-file *.gn=mbonadei@chromium.org,jansson@google.com +per-file .gitignore=* +per-file AUTHORS=* +per-file DEPS=* +per-file PRESUBMIT.py=mbonadei@chromium.org,jansson@google.com diff --git a/OWNERS.android b/OWNERS.android new file mode 100644 index 00000000..7529cb92 --- /dev/null +++ b/OWNERS.android @@ -0,0 +1 @@ +include platform/system/core:/janitors/OWNERS diff --git a/files/PRESUBMIT.py b/PRESUBMIT.py index d3901caf..d3901caf 100644 --- a/files/PRESUBMIT.py +++ b/PRESUBMIT.py diff --git a/files/README.chromium b/README.chromium index 880191e4..1389f285 100644 --- a/files/README.chromium +++ b/README.chromium @@ -1,8 +1,9 @@ Name: libyuv -URL: http://code.google.com/p/libyuv/ -Version: 1871 +URL: https://chromium.googlesource.com/libyuv/libyuv/ +Version: 1883 License: BSD License File: LICENSE +Shipped: yes Description: libyuv is an open source project that includes YUV conversion and scaling functionality. diff --git a/files/README.md b/README.md index 95eeb04c..95eeb04c 100644 --- a/files/README.md +++ b/README.md diff --git a/README.version b/README.version deleted file mode 100644 index 6eb9dc8c..00000000 --- a/README.version +++ /dev/null @@ -1,8 +0,0 @@ -Version: r1871 -BugComponent: 42195 -Owner: lajos -Local Modifications: - * Remove files/Android.mk (it messes with the android build system). - * Remove OWNERS files within files/ and all the subdirectories (except for - files/fuzz). Having these files breaks repo presubmit hooks since they - contain non @google.com email addresses. diff --git a/UPDATING b/UPDATING deleted file mode 100644 index 2679284c..00000000 --- a/UPDATING +++ /dev/null @@ -1,36 +0,0 @@ -To sync the libyuv checkout to an upstream revision, do the following: - -These commands are known to work from the external/libyuv directory of the -Android tree's checkout. - -Step 1: Remove the files/ subdirectory. - -$ rm -rf files - -Step 2: Clone the libyuv repository from upstream. - -$ git clone https://chromium.googlesource.com/libyuv/libyuv files - -Step 3 (optional): Checkout a specific commit/tag. - -$ cd files -$ git checkout <commit_or_tag> -$ cd .. - -Step 4: Remove files that aren't necessary (Android.mk, .git and OWNERS). - -$ rm files/Android.mk -$ rm -rf files/.git -$ find files/ -name "OWNERS" | xargs rm - -Step 5: Update the version and last_upgrade_date fields in the METADATA file. - -Step 6: Update README.version with the version (can be found in - files/include/libyuv/version.h) - -Step 7: If any local modifications are being done, update README.version and - this file with updated instructions. - -Step 8: Ensure that libyuv builds and camera and media related CTS tests are - passing. If there are any linker errors about missing symbols, try - updating frameworks/av/media/libstagefright/export.lds. diff --git a/files/build_overrides/build.gni b/build_overrides/build.gni index d9d01d51..d9d01d51 100644 --- a/files/build_overrides/build.gni +++ b/build_overrides/build.gni diff --git a/files/build_overrides/gtest.gni b/build_overrides/gtest.gni index d3c3f68c..d3c3f68c 100644 --- a/files/build_overrides/gtest.gni +++ b/build_overrides/gtest.gni diff --git a/files/build_overrides/partition_alloc.gni b/build_overrides/partition_alloc.gni index dcf8ac2d..dcf8ac2d 100644 --- a/files/build_overrides/partition_alloc.gni +++ b/build_overrides/partition_alloc.gni diff --git a/files/cleanup_links.py b/cleanup_links.py index 7d1eba9b..7d1eba9b 100755 --- a/files/cleanup_links.py +++ b/cleanup_links.py diff --git a/codereview.settings b/codereview.settings index 9782886f..b226fae5 100644 --- a/codereview.settings +++ b/codereview.settings @@ -1,5 +1,5 @@ -# This file is used by git cl to get repository specific information. +# This file is used by `git cl` to get repository specific information. +CODE_REVIEW_SERVER: codereview.chromium.org GERRIT_HOST: True PROJECT: libyuv -TRY_ON_UPLOAD: False VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/ diff --git a/files/docs/deprecated_builds.md b/docs/deprecated_builds.md index ba42966c..8edefd78 100644 --- a/files/docs/deprecated_builds.md +++ b/docs/deprecated_builds.md @@ -165,11 +165,11 @@ mipsel arm32 disassembly: - third_party/android_ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o + llvm-objdump -d out/Release/obj/source/libyuv.row_neon.o arm64 disassembly: - third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o + llvm-objdump -d out/Release/obj/source/libyuv.row_neon64.o Running tests: diff --git a/files/docs/environment_variables.md b/docs/environment_variables.md index 4eb09659..4eb09659 100644 --- a/files/docs/environment_variables.md +++ b/docs/environment_variables.md diff --git a/files/docs/filtering.md b/docs/filtering.md index 8696976e..8696976e 100644 --- a/files/docs/filtering.md +++ b/docs/filtering.md diff --git a/files/docs/formats.md b/docs/formats.md index 12ea9465..12ea9465 100644 --- a/files/docs/formats.md +++ b/docs/formats.md diff --git a/files/docs/getting_started.md b/docs/getting_started.md index b19f0009..f2f71b8b 100644 --- a/files/docs/getting_started.md +++ b/docs/getting_started.md @@ -139,11 +139,11 @@ mips arm disassembly: - third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt + llvm-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt - third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt + llvm-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt - third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt + llvm-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt Caveat: Disassembly may require optimize_max be disabled in BUILD.gn @@ -238,6 +238,18 @@ After running script, clang & qemu are built in `build-toolchain-qemu/riscv-clan -DUSE_RVV=ON . cmake --build out/Release/ +#### Customized Compiler Flags + +Customized compiler flags are supported by `-DRISCV_COMPILER_FLAGS="xxx"`. +If `-DRISCV_COMPILER_FLAGS="xxx"` is manually assigned, other compile flags(e.g disable -march=xxx) will not be appended. + +Example: + + cmake -B out/Release/ -DUNIT_TEST=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE="./riscv_script/riscv-clang.cmake" \ + -DRISCV_COMPILER_FLAGS="-mcpu=sifive-x280" \ + . ### Run on QEMU diff --git a/files/docs/rotation.md b/docs/rotation.md index a08430fd..a08430fd 100644 --- a/files/docs/rotation.md +++ b/docs/rotation.md diff --git a/files/download_vs_toolchain.py b/download_vs_toolchain.py index 6bc086d6..6bc086d6 100644 --- a/files/download_vs_toolchain.py +++ b/download_vs_toolchain.py diff --git a/files/Android.bp b/files/Android.bp deleted file mode 100644 index 9811a7b3..00000000 --- a/files/Android.bp +++ /dev/null @@ -1,197 +0,0 @@ -package { - default_applicable_licenses: ["external_libyuv_files_license"], -} - -// Added automatically by a large-scale-change -// -// large-scale-change included anything that looked like it might be a license -// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc. -// -// Please consider removing redundant or irrelevant files from 'license_text:'. -// See: http://go/android-license-faq -license { - name: "external_libyuv_files_license", - visibility: [":__subpackages__"], - license_kinds: [ - "SPDX-license-identifier-BSD", - ], - license_text: [ - "LICENSE", - "PATENTS", - ], -} - -cc_library { - name: "libyuv", - vendor_available: true, - product_available: true, - host_supported: true, - vndk: { - enabled: true, - }, - - srcs: [ - "source/compare.cc", - "source/compare_common.cc", - "source/compare_gcc.cc", - "source/compare_msa.cc", - "source/compare_neon.cc", - "source/compare_neon64.cc", - "source/convert.cc", - "source/convert_argb.cc", - "source/convert_from.cc", - "source/convert_from_argb.cc", - "source/convert_jpeg.cc", - "source/convert_to_argb.cc", - "source/convert_to_i420.cc", - "source/cpu_id.cc", - "source/mjpeg_decoder.cc", - "source/mjpeg_validate.cc", - "source/planar_functions.cc", - "source/rotate.cc", - "source/rotate_any.cc", - "source/rotate_argb.cc", - "source/rotate_common.cc", - "source/rotate_gcc.cc", - "source/rotate_msa.cc", - "source/rotate_neon.cc", - "source/rotate_neon64.cc", - "source/row_any.cc", - "source/row_common.cc", - "source/row_gcc.cc", - "source/row_msa.cc", - "source/row_neon.cc", - "source/row_neon64.cc", - "source/row_rvv.cc", - "source/scale.cc", - "source/scale_any.cc", - "source/scale_argb.cc", - "source/scale_common.cc", - "source/scale_gcc.cc", - "source/scale_msa.cc", - "source/scale_neon.cc", - "source/scale_neon64.cc", - "source/scale_rgb.cc", - "source/scale_uv.cc", - "source/video_common.cc", - ], - - cflags: [ - "-Wall", - "-Werror", - "-Wno-unused-parameter", - "-fexceptions", - "-DHAVE_JPEG", - ], - - arch: { - arm: { - cflags: ["-mfpu=neon"], - }, - }, - - shared_libs: ["libjpeg"], - - export_include_dirs: ["include"], - - apex_available: [ - "//apex_available:platform", - "com.android.media.swcodec", - ], - min_sdk_version: "29", -} - -// compatibilty static library until all uses of libyuv_static are replaced -// with libyuv (b/37646797) -cc_library_static { - name: "libyuv_static", - vendor_available: true, - whole_static_libs: ["libyuv"], - apex_available: [ - "//apex_available:platform", - "com.android.media.swcodec", - ], - min_sdk_version: "29", -} - -cc_test { - name: "libyuv_unittest", - static_libs: ["libyuv"], - shared_libs: ["libjpeg"], - cflags: ["-Wall", "-Werror"], - srcs: [ - "unit_test/basictypes_test.cc", - "unit_test/color_test.cc", - "unit_test/compare_test.cc", - "unit_test/convert_test.cc", - "unit_test/cpu_test.cc", - "unit_test/cpu_thread_test.cc", - "unit_test/math_test.cc", - "unit_test/planar_test.cc", - "unit_test/rotate_argb_test.cc", - "unit_test/rotate_test.cc", - "unit_test/scale_argb_test.cc", - "unit_test/scale_rgb_test.cc", - "unit_test/scale_test.cc", - "unit_test/scale_uv_test.cc", - "unit_test/unit_test.cc", - "unit_test/video_common_test.cc", - ], -} - -cc_test { - name: "compare", - gtest: false, - srcs: [ - "util/compare.cc", - ], - static_libs: ["libyuv"], -} - -cc_test { - name: "cpuid", - gtest: false, - srcs: [ - "util/cpuid.c", - ], - static_libs: ["libyuv"], -} - -cc_test { - name: "i444tonv12_eg", - gtest: false, - srcs: [ - "util/i444tonv12_eg.cc", - ], - static_libs: ["libyuv"], -} - -cc_test { - name: "psnr", - gtest: false, - srcs: [ - "util/psnr_main.cc", - "util/psnr.cc", - "util/ssim.cc", - ], - static_libs: ["libyuv"], -} - -cc_test { - name: "yuvconstants", - gtest: false, - srcs: [ - "util/yuvconstants.c", - ], - static_libs: ["libyuv"], -} - -cc_test { - name: "yuvconvert", - gtest: false, - srcs: [ - "util/yuvconvert.cc", - ], - static_libs: ["libyuv"], - shared_libs: ["libjpeg"], -} diff --git a/files/LICENSE b/files/LICENSE deleted file mode 100644 index c911747a..00000000 --- a/files/LICENSE +++ /dev/null @@ -1,29 +0,0 @@ -Copyright 2011 The LibYuv Project Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - * Neither the name of Google nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/files/codereview.settings b/files/codereview.settings deleted file mode 100644 index b226fae5..00000000 --- a/files/codereview.settings +++ /dev/null @@ -1,5 +0,0 @@ -# This file is used by `git cl` to get repository specific information. -CODE_REVIEW_SERVER: codereview.chromium.org -GERRIT_HOST: True -PROJECT: libyuv -VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/ diff --git a/files/public.mk b/files/public.mk deleted file mode 100644 index 1342307a..00000000 --- a/files/public.mk +++ /dev/null @@ -1,13 +0,0 @@ -# This file contains all the common make variables which are useful for -# anyone depending on this library. -# Note that dependencies on NDK are not directly listed since NDK auto adds -# them. - -LIBYUV_INCLUDES := $(LIBYUV_PATH)/include - -LIBYUV_C_FLAGS := - -LIBYUV_CPP_FLAGS := - -LIBYUV_LDLIBS := -LIBYUV_DEP_MODULES := diff --git a/files/include/libyuv.h b/include/libyuv.h index a06e1233..a06e1233 100644 --- a/files/include/libyuv.h +++ b/include/libyuv.h diff --git a/files/include/libyuv/basic_types.h b/include/libyuv/basic_types.h index 1bea67f2..1bea67f2 100644 --- a/files/include/libyuv/basic_types.h +++ b/include/libyuv/basic_types.h diff --git a/files/include/libyuv/compare.h b/include/libyuv/compare.h index 3353ad71..3353ad71 100644 --- a/files/include/libyuv/compare.h +++ b/include/libyuv/compare.h diff --git a/files/include/libyuv/compare_row.h b/include/libyuv/compare_row.h index d8e82d72..8293c919 100644 --- a/files/include/libyuv/compare_row.h +++ b/include/libyuv/compare_row.h @@ -28,7 +28,10 @@ extern "C" { #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) -#if __has_feature(memory_sanitizer) +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON) +#define LIBYUV_DISABLE_NEON +#endif +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86) #define LIBYUV_DISABLE_X86 #endif #endif diff --git a/files/include/libyuv/convert.h b/include/libyuv/convert.h index 88619a4f..88619a4f 100644 --- a/files/include/libyuv/convert.h +++ b/include/libyuv/convert.h diff --git a/files/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index 35eeac9b..35eeac9b 100644 --- a/files/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h diff --git a/files/include/libyuv/convert_from.h b/include/libyuv/convert_from.h index 32f42a63..32f42a63 100644 --- a/files/include/libyuv/convert_from.h +++ b/include/libyuv/convert_from.h diff --git a/files/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index ff2a581a..ff2a581a 100644 --- a/files/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h diff --git a/files/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 203f7e0d..5a81e7c9 100644 --- a/files/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -31,24 +31,25 @@ static const int kCpuHasX86 = 0x10; static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSE41 = 0x80; -static const int kCpuHasSSE42 = 0x100; // unused at this time. +static const int kCpuHasSSE42 = 0x100; static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX2 = 0x400; static const int kCpuHasERMS = 0x800; static const int kCpuHasFMA3 = 0x1000; static const int kCpuHasF16C = 0x2000; -static const int kCpuHasGFNI = 0x4000; -static const int kCpuHasAVX512BW = 0x8000; -static const int kCpuHasAVX512VL = 0x10000; -static const int kCpuHasAVX512VNNI = 0x20000; -static const int kCpuHasAVX512VBMI = 0x40000; -static const int kCpuHasAVX512VBMI2 = 0x80000; -static const int kCpuHasAVX512VBITALG = 0x100000; -static const int kCpuHasAVX512VPOPCNTDQ = 0x200000; +static const int kCpuHasAVX512BW = 0x4000; +static const int kCpuHasAVX512VL = 0x8000; +static const int kCpuHasAVX512VNNI = 0x10000; +static const int kCpuHasAVX512VBMI = 0x20000; +static const int kCpuHasAVX512VBMI2 = 0x40000; +static const int kCpuHasAVX512VBITALG = 0x80000; +static const int kCpuHasAVX10 = 0x100000; +static const int kCpuHasAVXVNNI = 0x200000; +static const int kCpuHasAVXVNNIINT8 = 0x400000; // These flags are only valid on MIPS processors. -static const int kCpuHasMIPS = 0x400000; -static const int kCpuHasMSA = 0x800000; +static const int kCpuHasMIPS = 0x800000; +static const int kCpuHasMSA = 0x1000000; // These flags are only valid on LOONGARCH processors. static const int kCpuHasLOONGARCH = 0x2000000; diff --git a/files/include/libyuv/loongson_intrinsics.h b/include/libyuv/loongson_intrinsics.h index 1d613def..1d613def 100644 --- a/files/include/libyuv/loongson_intrinsics.h +++ b/include/libyuv/loongson_intrinsics.h diff --git a/files/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h index b9a44fcc..b9a44fcc 100644 --- a/files/include/libyuv/macros_msa.h +++ b/include/libyuv/macros_msa.h diff --git a/files/include/libyuv/mjpeg_decoder.h b/include/libyuv/mjpeg_decoder.h index 275f8d4c..275f8d4c 100644 --- a/files/include/libyuv/mjpeg_decoder.h +++ b/include/libyuv/mjpeg_decoder.h diff --git a/files/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 154f2f21..f9344721 100644 --- a/files/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -30,7 +30,10 @@ extern "C" { #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) -#if __has_feature(memory_sanitizer) +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON) +#define LIBYUV_DISABLE_NEON +#endif +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86) #define LIBYUV_DISABLE_X86 #endif #endif @@ -827,15 +830,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y, int width, int height); -typedef void (*ARGBBlendRow)(const uint8_t* src_argb0, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width); - -// Get function to Alpha Blend ARGB pixels and store to destination. -LIBYUV_API -ARGBBlendRow GetARGBBlend(); - // Alpha Blend ARGB images and store to destination. // Source is pre-multiplied by alpha using ARGBAttenuate. // Alpha of destination is set to 255. diff --git a/files/include/libyuv/rotate.h b/include/libyuv/rotate.h index 37460c4a..37460c4a 100644 --- a/files/include/libyuv/rotate.h +++ b/include/libyuv/rotate.h diff --git a/files/include/libyuv/rotate_argb.h b/include/libyuv/rotate_argb.h index 20432949..20432949 100644 --- a/files/include/libyuv/rotate_argb.h +++ b/include/libyuv/rotate_argb.h diff --git a/files/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index 2dd8c03d..3e6a2fef 100644 --- a/files/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -28,7 +28,10 @@ extern "C" { #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) -#if __has_feature(memory_sanitizer) +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON) +#define LIBYUV_DISABLE_NEON +#endif +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86) #define LIBYUV_DISABLE_X86 #endif #endif diff --git a/files/include/libyuv/row.h b/include/libyuv/row.h index 5b244d77..46685a50 100644 --- a/files/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -31,7 +31,10 @@ extern "C" { #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) -#if __has_feature(memory_sanitizer) +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON) +#define LIBYUV_DISABLE_NEON +#endif +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86) #define LIBYUV_DISABLE_X86 #endif #endif @@ -161,7 +164,6 @@ extern "C" { #define HAS_ARGBSEPIAROW_SSSE3 #define HAS_ARGBSHADEROW_SSE2 #define HAS_ARGBSUBTRACTROW_SSE2 -#define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_BLENDPLANEROW_SSSE3 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 @@ -171,9 +173,6 @@ extern "C" { #define HAS_SOBELXROW_SSE2 #define HAS_SOBELXYROW_SSE2 #define HAS_SOBELYROW_SSE2 -#if !defined(LIBYUV_BIT_EXACT) -#define HAS_ARGBATTENUATEROW_SSSE3 -#endif // The following functions fail on gcc/clang 32 bit with fpic and framepointer. // caveat: clangcl uses row_win.cc which works. @@ -241,11 +240,7 @@ extern "C" { #define HAS_ARGBADDROW_AVX2 #define HAS_ARGBMULTIPLYROW_AVX2 #define HAS_ARGBSUBTRACTROW_AVX2 -#define HAS_ARGBUNATTENUATEROW_AVX2 #define HAS_BLENDPLANEROW_AVX2 -#if !defined(LIBYUV_BIT_EXACT) -#define HAS_ARGBATTENUATEROW_AVX2 -#endif #if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \ defined(_MSC_VER) @@ -285,14 +280,15 @@ extern "C" { #define HAS_ABGRTOAR30ROW_SSSE3 #define HAS_ABGRTOYJROW_SSSE3 #define HAS_AR64TOARGBROW_SSSE3 +#define HAS_ARGBATTENUATEROW_SSSE3 #define HAS_ARGBTOAB64ROW_SSSE3 #define HAS_ARGBTOAR30ROW_SSSE3 #define HAS_ARGBTOAR64ROW_SSSE3 +#define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_CONVERT16TO8ROW_SSSE3 #define HAS_CONVERT8TO16ROW_SSE2 -#define HAS_DETILEROW_SSE2 #define HAS_DETILEROW_16_SSE2 -#define HAS_DETILEROW_16_AVX +#define HAS_DETILEROW_SSE2 #define HAS_DETILESPLITUVROW_SSSE3 #define HAS_DETILETOYUY2_SSE2 #define HAS_HALFMERGEUVROW_SSSE3 @@ -345,13 +341,16 @@ extern "C" { #define HAS_ABGRTOYJROW_AVX2 #define HAS_ABGRTOYROW_AVX2 #define HAS_AR64TOARGBROW_AVX2 +#define HAS_ARGBATTENUATEROW_AVX2 #define HAS_ARGBTOAB64ROW_AVX2 #define HAS_ARGBTOAR30ROW_AVX2 #define HAS_ARGBTOAR64ROW_AVX2 #define HAS_ARGBTORAWROW_AVX2 #define HAS_ARGBTORGB24ROW_AVX2 +#define HAS_ARGBUNATTENUATEROW_AVX2 #define HAS_CONVERT16TO8ROW_AVX2 #define HAS_CONVERT8TO16ROW_AVX2 +#define HAS_DETILEROW_16_AVX #define HAS_DIVIDEROW_16_AVX2 #define HAS_HALFMERGEUVROW_AVX2 #define HAS_I210TOAR30ROW_AVX2 @@ -795,19 +794,29 @@ extern "C" { #endif #if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) +#define HAS_COPYROW_RVV +#if __riscv_v_intrinsic == 11000 #define HAS_AB64TOARGBROW_RVV +#define HAS_ABGRTOYJROW_RVV +#define HAS_ABGRTOYROW_RVV #define HAS_AR64TOARGBROW_RVV +#define HAS_AR64TOAB64ROW_RVV #define HAS_ARGBATTENUATEROW_RVV +#define HAS_ARGBBLENDROW_RVV +#define HAS_ARGBCOPYYTOALPHAROW_RVV +#define HAS_ARGBEXTRACTALPHAROW_RVV #define HAS_ARGBTOAB64ROW_RVV +#define HAS_ARGBTOABGRROW_RVV #define HAS_ARGBTOAR64ROW_RVV +#define HAS_ARGBTOBGRAROW_RVV #define HAS_ARGBTORAWROW_RVV #define HAS_ARGBTORGB24ROW_RVV -#define HAS_ARGBTOYROW_RVV +#define HAS_ARGBTORGBAROW_RVV #define HAS_ARGBTOYJROW_RVV -#define HAS_ABGRTOYROW_RVV -#define HAS_ABGRTOYJROW_RVV +#define HAS_ARGBTOYMATRIXROW_RVV +#define HAS_ARGBTOYROW_RVV #define HAS_BGRATOYROW_RVV -#define HAS_COPYROW_RVV +#define HAS_BLENDPLANEROW_RVV #define HAS_I400TOARGBROW_RVV #define HAS_I422ALPHATOARGBROW_RVV #define HAS_I422TOARGBROW_RVV @@ -822,10 +831,10 @@ extern "C" { #define HAS_MERGERGBROW_RVV #define HAS_MERGEUVROW_RVV #define HAS_MERGEXRGBROW_RVV -#define HAS_SPLITARGBROW_RVV -#define HAS_SPLITRGBROW_RVV -#define HAS_SPLITUVROW_RVV -#define HAS_SPLITXRGBROW_RVV +#define HAS_NV12TOARGBROW_RVV +#define HAS_NV12TORGB24ROW_RVV +#define HAS_NV21TOARGBROW_RVV +#define HAS_NV21TORGB24ROW_RVV #define HAS_RAWTOARGBROW_RVV #define HAS_RAWTORGB24ROW_RVV #define HAS_RAWTORGBAROW_RVV @@ -834,8 +843,16 @@ extern "C" { #define HAS_RGB24TOARGBROW_RVV #define HAS_RGB24TOYJROW_RVV #define HAS_RGB24TOYROW_RVV -#define HAS_RGBATOYROW_RVV +#define HAS_RGBATOARGBROW_RVV #define HAS_RGBATOYJROW_RVV +#define HAS_RGBATOYMATRIXROW_RVV +#define HAS_RGBATOYROW_RVV +#define HAS_RGBTOYMATRIXROW_RVV +#define HAS_SPLITARGBROW_RVV +#define HAS_SPLITRGBROW_RVV +#define HAS_SPLITUVROW_RVV +#define HAS_SPLITXRGBROW_RVV +#endif #endif #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) @@ -932,14 +949,6 @@ struct YuvConstants { free(var##_mem); \ var = NULL -#define align_buffer_64_16(var, size) \ - void* var##_mem = malloc((size)*2 + 63); /* NOLINT */ \ - uint16_t* var = (uint16_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */ - -#define free_aligned_buffer_64_16(var) \ - free(var##_mem); \ - var = NULL - #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else @@ -1351,6 +1360,26 @@ void UYVYToARGBRow_LSX(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void NV12ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -3081,6 +3110,9 @@ void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, void ARGBExtractAlphaRow_LSX(const uint8_t* src_argb, uint8_t* dst_a, int width); +void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb, + uint8_t* dst_a, + int width); void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -3100,6 +3132,7 @@ void ARGBExtractAlphaRow_Any_LSX(const uint8_t* src_ptr, void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -3458,8 +3491,13 @@ void ARGBToARGB4444Row_LASX(const uint8_t* src_argb, int width); void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width); +void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width); +void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width); +void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width); +void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width); +void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width); void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); @@ -3473,6 +3511,8 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width); void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width); void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width); void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width); +void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width); void AR64ShuffleRow_C(const uint8_t* src_ar64, uint8_t* dst_ar64, const uint8_t* shuffler, @@ -3501,6 +3541,8 @@ void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width); void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width); void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width); void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void AR64ToAB64Row_RVV(const uint16_t* src_ar64, uint16_t* dst_ab64, int width); +void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width); void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr, uint16_t* dst_ptr, int width); @@ -4515,6 +4557,10 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); +void ARGBBlendRow_RVV(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); void ARGBBlendRow_C(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, @@ -4541,6 +4587,11 @@ void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); +void BlendPlaneRow_RVV(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width); void BlendPlaneRow_C(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, @@ -6180,7 +6231,19 @@ void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr, float* dst_ptr, float param, int width); - +// Convert FP16 Half Floats to FP32 Floats +void ConvertFP16ToFP32Row_NEON(const uint16_t* src, // fp16 + float* dst, + int width); +// Convert a column of FP16 Half Floats to a row of FP32 Floats +void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16 + int src_stride, // stride in elements + float* dst, + int width); +// Convert FP32 Floats to FP16 Half Floats +void ConvertFP32ToFP16Row_NEON(const float* src, + uint16_t* dst, // fp16 + int width); void ARGBLumaColorTableRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width, diff --git a/files/include/libyuv/scale.h b/include/libyuv/scale.h index 443f89c2..bfe4a344 100644 --- a/files/include/libyuv/scale.h +++ b/include/libyuv/scale.h @@ -27,39 +27,40 @@ typedef enum FilterMode { } FilterModeEnum; // Scale a YUV plane. +// Returns 0 if successful. LIBYUV_API -void ScalePlane(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering); +int ScalePlane(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering); LIBYUV_API -void ScalePlane_16(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering); +int ScalePlane_16(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering); // Sample is expected to be in the low 12 bits. LIBYUV_API -void ScalePlane_12(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering); +int ScalePlane_12(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering); // Scales a YUV 4:2:0 image from the src width and height to the // dst width and height. diff --git a/files/include/libyuv/scale_argb.h b/include/libyuv/scale_argb.h index 7641f18e..7641f18e 100644 --- a/files/include/libyuv/scale_argb.h +++ b/include/libyuv/scale_argb.h diff --git a/files/include/libyuv/scale_rgb.h b/include/libyuv/scale_rgb.h index d17c39fd..d17c39fd 100644 --- a/files/include/libyuv/scale_rgb.h +++ b/include/libyuv/scale_rgb.h diff --git a/files/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index a7957c3f..02ed61ca 100644 --- a/files/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -29,7 +29,10 @@ extern "C" { #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) -#if __has_feature(memory_sanitizer) +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON) +#define LIBYUV_DISABLE_NEON +#endif +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86) #define LIBYUV_DISABLE_X86 #endif #endif @@ -175,6 +178,38 @@ extern "C" { #define HAS_SCALEROWDOWN34_LSX #endif +#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) +#define HAS_SCALEADDROW_RVV +// TODO: Test ScaleARGBRowDownEven_RVV and enable it +// #define HAS_SCALEARGBROWDOWNEVEN_RVV +#define HAS_SCALEUVROWDOWN4_RVV +#define HAS_SCALEUVROWDOWNEVEN_RVV +#if __riscv_v_intrinsic == 11000 +#define HAS_SCALEARGBROWDOWN2_RVV +#define HAS_SCALEARGBROWDOWN2BOX_RVV +#define HAS_SCALEARGBROWDOWN2LINEAR_RVV +#define HAS_SCALEARGBROWDOWNEVENBOX_RVV +#define HAS_SCALEROWDOWN2_RVV +#define HAS_SCALEROWDOWN2BOX_RVV +#define HAS_SCALEROWDOWN2LINEAR_RVV +#define HAS_SCALEROWDOWN34_0_BOX_RVV +#define HAS_SCALEROWDOWN34_1_BOX_RVV +#define HAS_SCALEROWDOWN34_RVV +#define HAS_SCALEROWDOWN38_2_BOX_RVV +#define HAS_SCALEROWDOWN38_3_BOX_RVV +#define HAS_SCALEROWDOWN38_RVV +#define HAS_SCALEROWDOWN4_RVV +#define HAS_SCALEROWDOWN4BOX_RVV +#define HAS_SCALEROWUP2_BILINEAR_RVV +#define HAS_SCALEROWUP2_LINEAR_RVV +#define HAS_SCALEUVROWDOWN2_RVV +#define HAS_SCALEUVROWDOWN2BOX_RVV +#define HAS_SCALEUVROWDOWN2LINEAR_RVV +#define HAS_SCALEUVROWUP2_BILINEAR_RVV +#define HAS_SCALEUVROWUP2_LINEAR_RVV +#endif +#endif + // Scale ARGB vertically with bilinear interpolation. void ScalePlaneVertical(int src_height, int dst_width, @@ -949,6 +984,18 @@ void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); +void ScaleARGBRowDown2_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, @@ -1061,6 +1108,16 @@ void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb, int src_stepx, uint8_t* dst_argb, int dst_width); +void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width); void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, @@ -1143,6 +1200,18 @@ void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width); +void ScaleUVRowDown2_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Linear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, @@ -1203,6 +1272,16 @@ void ScaleUVRowDownEvenBox_NEON(const uint8_t* src_ptr, int src_stepx, uint8_t* dst_uv, int dst_width); +void ScaleUVRowDown4_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEven_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_uv, + int dst_width); void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, int32_t src_stepx, @@ -1292,6 +1371,14 @@ void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width); +void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width); @@ -1744,6 +1831,61 @@ void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width); +void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleRowDown2_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); + +void ScaleRowDown4_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); + +void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/files/include/libyuv/scale_uv.h b/include/libyuv/scale_uv.h index 8e74e319..8e74e319 100644 --- a/files/include/libyuv/scale_uv.h +++ b/include/libyuv/scale_uv.h diff --git a/files/include/libyuv/version.h b/include/libyuv/version.h index b6623dbb..a9c54400 100644 --- a/files/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1871 +#define LIBYUV_VERSION 1883 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/files/include/libyuv/video_common.h b/include/libyuv/video_common.h index 32b8a521..32b8a521 100644 --- a/files/include/libyuv/video_common.h +++ b/include/libyuv/video_common.h diff --git a/infra/config/OWNERS b/infra/config/OWNERS new file mode 100644 index 00000000..2c4f90a0 --- /dev/null +++ b/infra/config/OWNERS @@ -0,0 +1,3 @@ +fbarchard@chromium.org +mbonadei@chromium.org +jansson@google.com diff --git a/files/infra/config/PRESUBMIT.py b/infra/config/PRESUBMIT.py index f79e08ad..f79e08ad 100644 --- a/files/infra/config/PRESUBMIT.py +++ b/infra/config/PRESUBMIT.py diff --git a/files/infra/config/README.md b/infra/config/README.md index e5e3b5f8..e5e3b5f8 100644 --- a/files/infra/config/README.md +++ b/infra/config/README.md diff --git a/files/infra/config/codereview.settings b/infra/config/codereview.settings index 6d742273..6d742273 100644 --- a/files/infra/config/codereview.settings +++ b/infra/config/codereview.settings diff --git a/files/infra/config/commit-queue.cfg b/infra/config/commit-queue.cfg index 4a8d77f4..4a8d77f4 100644 --- a/files/infra/config/commit-queue.cfg +++ b/infra/config/commit-queue.cfg diff --git a/files/infra/config/cr-buildbucket.cfg b/infra/config/cr-buildbucket.cfg index be9d1d28..7415851b 100644 --- a/files/infra/config/cr-buildbucket.cfg +++ b/infra/config/cr-buildbucket.cfg @@ -29,11 +29,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -64,11 +59,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -99,11 +89,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -132,10 +117,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -164,10 +145,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -196,10 +173,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -230,11 +203,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -265,11 +233,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -300,11 +263,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -335,11 +293,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -370,11 +323,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -405,11 +353,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -440,11 +383,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -475,11 +413,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -510,11 +443,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -545,11 +473,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -580,11 +503,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -614,10 +532,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -647,10 +561,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -680,10 +590,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -714,11 +620,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -749,11 +650,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -784,11 +680,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -819,11 +710,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -854,11 +740,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -889,11 +770,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -924,11 +800,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -959,11 +830,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -993,10 +859,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1026,10 +888,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-trusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1114,10 +972,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1146,10 +1000,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1178,10 +1028,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1212,11 +1058,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1247,11 +1088,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1281,10 +1117,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1314,10 +1146,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1348,11 +1176,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1383,11 +1206,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1418,11 +1236,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1453,11 +1266,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1488,11 +1296,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1523,11 +1326,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1558,11 +1356,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1593,11 +1386,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1627,10 +1415,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1660,10 +1444,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1693,10 +1473,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1727,11 +1503,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1764,11 +1535,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1799,11 +1565,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1834,11 +1595,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1869,11 +1625,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1904,11 +1655,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' @@ -1939,11 +1685,6 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' - ' },' ' "$build/reclient": {' ' "instance": "rbe-webrtc-untrusted",' ' "metrics_project": "chromium-reclient-metrics"' diff --git a/files/infra/config/luci-logdog.cfg b/infra/config/luci-logdog.cfg index adc75bef..adc75bef 100644 --- a/files/infra/config/luci-logdog.cfg +++ b/infra/config/luci-logdog.cfg diff --git a/files/infra/config/luci-milo.cfg b/infra/config/luci-milo.cfg index baf786f2..baf786f2 100644 --- a/files/infra/config/luci-milo.cfg +++ b/infra/config/luci-milo.cfg diff --git a/files/infra/config/luci-scheduler.cfg b/infra/config/luci-scheduler.cfg index 0ec5dd0e..0ec5dd0e 100644 --- a/files/infra/config/luci-scheduler.cfg +++ b/infra/config/luci-scheduler.cfg diff --git a/files/infra/config/main.star b/infra/config/main.star index 7490a599..e83afe4f 100755 --- a/files/infra/config/main.star +++ b/infra/config/main.star @@ -8,24 +8,6 @@ lucicfg.check_version("1.30.9") LIBYUV_GIT = "https://chromium.googlesource.com/libyuv/libyuv" LIBYUV_GERRIT = "https://chromium-review.googlesource.com/libyuv/libyuv" -GOMA_BACKEND_RBE_PROD = { - "server_host": "goma.chromium.org", - "use_luci_auth": True, -} - -GOMA_BACKEND_RBE_ATS_PROD = { - "server_host": "goma.chromium.org", - "use_luci_auth": True, - "enable_ats": True, -} - -# Disable ATS on Windows CQ/try. -GOMA_BACKEND_RBE_NO_ATS_PROD = { - "server_host": "goma.chromium.org", - "use_luci_auth": True, - "enable_ats": False, -} - RECLIENT_CI = { "instance": "rbe-webrtc-trusted", "metrics_project": "chromium-reclient-metrics", @@ -80,7 +62,7 @@ luci.project( ], bindings = [ luci.binding( - roles = "role/swarming.taskTriggerer", # for LED tasks. + roles = "role/swarming.taskTriggerer", # for LED tasks. groups = "project-libyuv-admins", ), luci.binding( @@ -218,19 +200,6 @@ def get_os_dimensions(os): return {"os": "Ubuntu-18.04", "cores": "8", "cpu": "x86-64"} return {} -def get_os_properties(os, try_builder = False): - if os == "android": - return {"$build/goma": GOMA_BACKEND_RBE_PROD} - elif os in ("ios", "mac"): - return {"$build/goma": GOMA_BACKEND_RBE_PROD} - elif os == "win" and try_builder: - return {"$build/goma": GOMA_BACKEND_RBE_NO_ATS_PROD} - elif os == "win": - return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD} - elif os == "linux": - return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD} - return {} - def libyuv_ci_builder(name, dimensions, properties, triggered_by): return luci.builder( name = name, @@ -268,8 +237,7 @@ def libyuv_try_builder(name, dimensions, properties, recipe_name = "libyuv/libyu def ci_builder(name, os, category, short_name = None): dimensions = get_os_dimensions(os) - properties = get_os_properties(os) - properties["$build/reclient"] = RECLIENT_CI + properties = {"$build/reclient": RECLIENT_CI} dimensions["pool"] = "luci.flex.ci" properties["builder_group"] = "client.libyuv" @@ -280,8 +248,7 @@ def ci_builder(name, os, category, short_name = None): def try_builder(name, os, experiment_percentage = None): dimensions = get_os_dimensions(os) - properties = get_os_properties(os, try_builder = True) - properties["$build/reclient"] = RECLIENT_CQ + properties = {"$build/reclient": RECLIENT_CQ} dimensions["pool"] = "luci.flex.try" properties["builder_group"] = "tryserver.libyuv" diff --git a/files/infra/config/project.cfg b/infra/config/project.cfg index af79cfb2..3c327118 100644 --- a/files/infra/config/project.cfg +++ b/infra/config/project.cfg @@ -7,7 +7,7 @@ name: "libyuv" access: "group:all" lucicfg { - version: "1.39.8" + version: "1.39.14" package_dir: "." config_dir: "." entry_point: "main.star" diff --git a/files/infra/config/realms.cfg b/infra/config/realms.cfg index 16ffaac9..16ffaac9 100644 --- a/files/infra/config/realms.cfg +++ b/infra/config/realms.cfg diff --git a/files/libyuv.gni b/libyuv.gni index 0a6c4453..343160c3 100644 --- a/files/libyuv.gni +++ b/libyuv.gni @@ -7,6 +7,7 @@ # be found in the AUTHORS file in the root of the source tree. import("//build/config/arm.gni") +import("//build/config/loongarch64.gni") import("//build/config/mips.gni") import("//build_overrides/build.gni") @@ -21,4 +22,8 @@ declare_args() { (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa libyuv_use_mmi = (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi + libyuv_use_lsx = + (current_cpu == "loong64") && loongarch64_use_lsx + libyuv_use_lasx = + (current_cpu == "loong64") && loongarch64_use_lasx } diff --git a/files/libyuv.gyp b/libyuv.gyp index f73a1a4b..f73a1a4b 100644 --- a/files/libyuv.gyp +++ b/libyuv.gyp diff --git a/files/libyuv.gypi b/libyuv.gypi index 48936aa7..48936aa7 100644 --- a/files/libyuv.gypi +++ b/libyuv.gypi diff --git a/files/linux.mk b/linux.mk index b541b47c..d19a888a 100644 --- a/files/linux.mk +++ b/linux.mk @@ -33,6 +33,7 @@ LOCAL_OBJ_FILES := \ source/rotate_argb.o \ source/rotate_common.o \ source/rotate_gcc.o \ + source/rotate_lsx.o \ source/rotate_msa.o \ source/rotate_neon.o \ source/rotate_neon64.o \ @@ -40,19 +41,24 @@ LOCAL_OBJ_FILES := \ source/row_any.o \ source/row_common.o \ source/row_gcc.o \ + source/row_lasx.o \ + source/row_lsx.o \ source/row_msa.o \ source/row_neon.o \ source/row_neon64.o \ + source/row_rvv.o \ source/row_win.o \ source/scale.o \ source/scale_any.o \ source/scale_argb.o \ source/scale_common.o \ source/scale_gcc.o \ + source/scale_lsx.o \ source/scale_msa.o \ source/scale_neon.o \ source/scale_neon64.o \ source/scale_rgb.o \ + source/scale_rvv.o \ source/scale_uv.o \ source/scale_win.o \ source/video_common.o @@ -3,7 +3,7 @@ # Note that dependencies on NDK are not directly listed since NDK auto adds # them. -LIBYUV_INCLUDES := $(LIBYUV_PATH)/files/include +LIBYUV_INCLUDES := $(LIBYUV_PATH)/include LIBYUV_C_FLAGS := diff --git a/files/pylintrc b/pylintrc index b8bea334..b8bea334 100644 --- a/files/pylintrc +++ b/pylintrc diff --git a/files/riscv_script/prepare_toolchain_qemu.sh b/riscv_script/prepare_toolchain_qemu.sh index 2a901739..2a901739 100755 --- a/files/riscv_script/prepare_toolchain_qemu.sh +++ b/riscv_script/prepare_toolchain_qemu.sh diff --git a/files/riscv_script/riscv-clang.cmake b/riscv_script/riscv-clang.cmake index 47dd5067..e287941f 100644 --- a/files/riscv_script/riscv-clang.cmake +++ b/riscv_script/riscv-clang.cmake @@ -28,17 +28,20 @@ set(CMAKE_OBJDUMP "${TOOLCHAIN_PATH}/bin/llvm-objdump") set(CMAKE_OBJCOPY "${TOOLCHAIN_PATH}/bin/llvm-objcopy") # compile options -message(STATUS "USE_RVV: ${USE_RVV}") -message(STATUS "USE_AUTO_VECTORIZER: ${USE_AUTO_VECTORIZER}") -set(RISCV_COMPILER_FLAGS) -if(USE_RVV) - list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gcv") - if(NOT USE_AUTO_VECTORIZER) - # Disable auto-vectorizer - add_compile_options(-fno-vectorize -fno-slp-vectorize) +set(RISCV_COMPILER_FLAGS "" CACHE STRING "Compile flags") +# if user provides RISCV_COMPILER_FLAGS, appeding compile flags is avoided. +if(RISCV_COMPILER_FLAGS STREQUAL "") + message(STATUS "USE_RVV: ${USE_RVV}") + message(STATUS "USE_AUTO_VECTORIZER: ${USE_AUTO_VECTORIZER}") + if(USE_RVV) + list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gcv") + if(NOT USE_AUTO_VECTORIZER) + # Disable auto-vectorizer + add_compile_options(-fno-vectorize -fno-slp-vectorize) + endif() + else() + list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gc") endif() -else() - list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gc") endif() message(STATUS "RISCV_COMPILER_FLAGS: ${RISCV_COMPILER_FLAGS}") diff --git a/files/riscv_script/run_qemu.sh b/riscv_script/run_qemu.sh index 080af3b1..080af3b1 100755 --- a/files/riscv_script/run_qemu.sh +++ b/riscv_script/run_qemu.sh diff --git a/files/source/compare.cc b/source/compare.cc index 50a736bd..50a736bd 100644 --- a/files/source/compare.cc +++ b/source/compare.cc diff --git a/files/source/compare_common.cc b/source/compare_common.cc index d1cab8d2..d1cab8d2 100644 --- a/files/source/compare_common.cc +++ b/source/compare_common.cc diff --git a/files/source/compare_gcc.cc b/source/compare_gcc.cc index 33cbe25d..33cbe25d 100644 --- a/files/source/compare_gcc.cc +++ b/source/compare_gcc.cc diff --git a/files/source/compare_msa.cc b/source/compare_msa.cc index 0b807d37..0b807d37 100644 --- a/files/source/compare_msa.cc +++ b/source/compare_msa.cc diff --git a/files/source/compare_neon.cc b/source/compare_neon.cc index afdd6012..afdd6012 100644 --- a/files/source/compare_neon.cc +++ b/source/compare_neon.cc diff --git a/files/source/compare_neon64.cc b/source/compare_neon64.cc index 70fb9b91..70fb9b91 100644 --- a/files/source/compare_neon64.cc +++ b/source/compare_neon64.cc diff --git a/files/source/compare_win.cc b/source/compare_win.cc index 9bb27f1d..9bb27f1d 100644 --- a/files/source/compare_win.cc +++ b/source/compare_win.cc diff --git a/files/source/convert.cc b/source/convert.cc index b11ab1bf..6ac5bc43 100644 --- a/files/source/convert.cc +++ b/source/convert.cc @@ -54,18 +54,25 @@ static int I4xxToI420(const uint8_t* src_y, const int dst_y_height = Abs(src_y_height); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); + int r; if (src_uv_width <= 0 || src_uv_height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, - dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, + dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, - dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, - dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); - return 0; + r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return r; } // Copy I420 with optional flipping. @@ -526,18 +533,25 @@ static int Ix10ToI010(const uint16_t* src_y, const int src_uv_height = SUBSAMPLE(height, subsample_y, subsample_y); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - dst_y_width, dst_y_height, kFilterBilinear); + r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + dst_y_width, dst_y_height, kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, - dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); - ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, - dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); - return 0; + r = ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return r; } LIBYUV_API @@ -777,6 +791,8 @@ int I422ToNV21(const uint8_t* src_y, // Allocate u and v buffers align_buffer_64(plane_u, halfwidth * halfheight * 2); uint8_t* plane_v = plane_u + halfwidth * halfheight; + if (!plane_u) + return 1; I422ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width, @@ -892,6 +908,8 @@ int MT2TToP010(const uint8_t* src_y, void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) = UnpackMT2T_C; align_buffer_64(row_buf, row_buf_size); + if (!row_buf) + return 1; #if defined(HAS_UNPACKMT2T_NEON) if (TestCpuFlag(kCpuHasNEON)) { @@ -1092,6 +1110,8 @@ int I422ToNV21(const uint8_t* src_y, int awidth = halfwidth * 2; align_buffer_64(row_vu_0, awidth * 2); uint8_t* row_vu_1 = row_vu_0 + awidth; + if (!row_vu_0) + return 1; for (y = 0; y < height - 1; y += 2) { MergeUVRow(src_v, src_u, row_vu_0, halfwidth); @@ -1330,18 +1350,22 @@ int NV12ToNV24(const uint8_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), - Abs(height), kFilterBilinear); - return 0; + r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), + Abs(height), kFilterBilinear); + return r; } LIBYUV_API @@ -1355,17 +1379,21 @@ int NV16ToNV24(const uint8_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, - dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, + dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); + return r; } // Any P[420]1[02] to I[420]1[02] format with mirroring. @@ -1443,18 +1471,22 @@ int P010ToP410(const uint16_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), - Abs(height), kFilterBilinear); - return 0; + r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), + Abs(height), kFilterBilinear); + return r; } LIBYUV_API @@ -1468,17 +1500,21 @@ int P210ToP410(const uint16_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, - dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, + dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); + return r; } // Convert YUY2 to I420. @@ -2128,6 +2164,11 @@ int ARGBToI420Alpha(const uint8_t* src_argb, : ARGBExtractAlphaRow_Any_LSX; } #endif +#if defined(HAS_ARGBEXTRACTALPHAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV; + } +#endif for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); @@ -2655,6 +2696,8 @@ int RGB24ToI420(const uint8_t* src_rgb24, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -2831,6 +2874,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -3010,6 +3055,8 @@ int RAWToI420(const uint8_t* src_raw, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -3186,6 +3233,8 @@ int RAWToJ420(const uint8_t* src_raw, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -3364,6 +3413,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ @@ -3544,6 +3595,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -3757,6 +3810,8 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { diff --git a/files/source/convert_argb.cc b/source/convert_argb.cc index cc6560de..871fea59 100644 --- a/files/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -3003,6 +3003,7 @@ int J400ToARGB(const uint8_t* src_y, return 0; } +#ifndef __riscv // Shuffle table for converting BGRA to ARGB. static const uvec8 kShuffleMaskBGRAToARGB = { 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u}; @@ -3090,6 +3091,195 @@ int AR64ToAB64(const uint16_t* src_ar64, return AR64Shuffle(src_ar64, src_stride_ar64, dst_ab64, dst_stride_ab64, (const uint8_t*)&kShuffleMaskAR64ToAB64, width, height); } +#else +// Convert BGRA to ARGB (same as ARGBToBGRA). +LIBYUV_API +int BGRAToARGB(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBToBGRA(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, width, + height); +} + +// Convert ARGB to BGRA. +LIBYUV_API +int ARGBToBGRA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height) { + int y; + void (*ARGBToBGRARow)(const uint8_t* src_argb, uint8_t* dst_bgra, int width) = + ARGBToBGRARow_C; + if (!src_argb || !dst_bgra || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_bgra == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_bgra = 0; + } + +#if defined(HAS_ARGBTOBGRAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToBGRARow = ARGBToBGRARow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToBGRARow(src_argb, dst_bgra, width); + src_argb += src_stride_argb; + dst_bgra += dst_stride_bgra; + } + return 0; +} + +// Convert ARGB to ABGR. +LIBYUV_API +int ARGBToABGR(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + int y; + void (*ARGBToABGRRow)(const uint8_t* src_argb, uint8_t* dst_abgr, int width) = + ARGBToABGRRow_C; + if (!src_argb || !dst_abgr || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_abgr == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_abgr = 0; + } + +#if defined(HAS_ARGBTOABGRROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToABGRRow = ARGBToABGRRow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToABGRRow(src_argb, dst_abgr, width); + src_argb += src_stride_argb; + dst_abgr += dst_stride_abgr; + } + return 0; +} + +// Convert ABGR to ARGB (same as ARGBToABGR). +LIBYUV_API +int ABGRToARGB(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBToABGR(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, width, + height); +} + +// Convert RGBA to ARGB. +LIBYUV_API +int RGBAToARGB(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + int y; + void (*RGBAToARGBRow)(const uint8_t* src_rgba, uint8_t* dst_argb, int width) = + RGBAToARGBRow_C; + if (!src_rgba || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgba = src_rgba + (height - 1) * src_stride_rgba; + src_stride_rgba = -src_stride_rgba; + } + // Coalesce rows. + if (src_stride_rgba == width * 4 && dst_stride_argb == width * 4) { + width *= height; + height = 1; + src_stride_rgba = dst_stride_argb = 0; + } + +#if defined(HAS_RGBATOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGBAToARGBRow = RGBAToARGBRow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + RGBAToARGBRow(src_rgba, dst_argb, width); + src_rgba += src_stride_rgba; + dst_argb += dst_stride_argb; + } + return 0; +} + +// Convert AR64 To AB64. +LIBYUV_API +int AR64ToAB64(const uint16_t* src_ar64, + int src_stride_ar64, + uint16_t* dst_ab64, + int dst_stride_ab64, + int width, + int height) { + int y; + void (*AR64ToAB64Row)(const uint16_t* src_ar64, uint16_t* dst_ab64, + int width) = AR64ToAB64Row_C; + if (!src_ar64 || !dst_ab64 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ar64 = src_ar64 + (height - 1) * src_stride_ar64; + src_stride_ar64 = -src_stride_ar64; + } + // Coalesce rows. + if (src_stride_ar64 == width * 4 && dst_stride_ab64 == width * 4) { + width *= height; + height = 1; + src_stride_ar64 = dst_stride_ab64 = 0; + } + +#if defined(HAS_AR64TOAB64ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + AR64ToAB64Row = AR64ToAB64Row_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + AR64ToAB64Row(src_ar64, dst_ab64, width); + src_ar64 += src_stride_ar64; + dst_ab64 += dst_stride_ab64; + } + return 0; +} +#endif // Convert RGB24 to ARGB. LIBYUV_API @@ -3853,6 +4043,11 @@ int NV12ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_NV12TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + NV12ToARGBRow = NV12ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width); @@ -3938,6 +4133,11 @@ int NV21ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_NV21TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + NV21ToARGBRow = NV21ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width); @@ -4058,6 +4258,11 @@ int NV12ToRGB24Matrix(const uint8_t* src_y, } } #endif +#if defined(HAS_NV12TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + NV12ToRGB24Row = NV12ToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width); @@ -4119,6 +4324,11 @@ int NV21ToRGB24Matrix(const uint8_t* src_y, } } #endif +#if defined(HAS_NV21TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + NV21ToRGB24Row = NV21ToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width); @@ -4460,6 +4670,8 @@ int Android420ToARGBMatrix(const uint8_t* src_y, // General case fallback creates NV12 align_buffer_64(plane_uv, halfwidth * 2 * halfheight); + if (!plane_uv) + return 1; dst_uv = plane_uv; for (y = 0; y < halfheight; ++y) { WeavePixels(src_u, src_v, src_pixel_stride_uv, dst_uv, halfwidth); @@ -5772,6 +5984,8 @@ int I420ToRGB565Dither(const uint8_t* src_y, { // Allocate a row of argb. align_buffer_64(row_argb, width * 4); + if (!row_argb) + return 1; for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width); ARGBToRGB565DitherRow(row_argb, dst_rgb565, @@ -6020,6 +6234,12 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif +#if defined(HAS_SCALEROWUP2_BILINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; + } +#endif // alloc 4 lines temp const int row_size = (width + 31) & ~31; @@ -6028,6 +6248,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, uint8_t* temp_u_2 = row + row_size; uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear(src_u, temp_u_1, width); ScaleRowUp2_Linear(src_v, temp_v_1, width); @@ -6151,12 +6373,19 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif +#if defined(HAS_SCALEROWUP2_LINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; + } +#endif // alloc 2 lines temp const int row_size = (width + 31) & ~31; align_buffer_64(row, row_size * 2); uint8_t* temp_u = row; uint8_t* temp_v = row + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_u, temp_u, width); @@ -6276,6 +6505,12 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif +#if defined(HAS_SCALEROWUP2_BILINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; + } +#endif // alloc 4 lines temp const int row_size = (width + 31) & ~31; @@ -6284,6 +6519,8 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, uint8_t* temp_u_2 = row + row_size; uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear(src_u, temp_u_1, width); ScaleRowUp2_Linear(src_v, temp_v_1, width); @@ -6390,6 +6627,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear_12(src_u, temp_u_1, width); ScaleRowUp2_Linear_12(src_v, temp_v_1, width); @@ -6487,6 +6726,8 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); uint16_t* temp_v = (uint16_t*)(row) + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear_12(src_u, temp_u, width); @@ -6577,6 +6818,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear_12(src_u, temp_u_1, width); ScaleRowUp2_Linear_12(src_v, temp_v_1, width); @@ -6673,6 +6916,8 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); uint16_t* temp_v = (uint16_t*)(row) + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear_12(src_u, temp_u, width); @@ -6837,6 +7082,12 @@ static int I420AlphaToARGBMatrixBilinear( ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif +#if defined(HAS_SCALEROWUP2_BILINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; + } +#endif // alloc 4 lines temp const int row_size = (width + 31) & ~31; @@ -6845,6 +7096,8 @@ static int I420AlphaToARGBMatrixBilinear( uint8_t* temp_u_2 = row + row_size; uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear(src_u, temp_u_1, width); ScaleRowUp2_Linear(src_v, temp_v_1, width); @@ -7032,12 +7285,19 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif +#if defined(HAS_SCALEROWUP2_LINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; + } +#endif // alloc 2 lines temp const int row_size = (width + 31) & ~31; align_buffer_64(row, row_size * 2); uint8_t* temp_u = row; uint8_t* temp_v = row + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_u, temp_u, width); @@ -7179,6 +7439,8 @@ static int I010AlphaToARGBMatrixBilinear( uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; + if (!row) + return 1; ScaleRowUp2_Linear_12(src_u, temp_u_1, width); ScaleRowUp2_Linear_12(src_v, temp_v_1, width); @@ -7338,6 +7600,8 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); uint16_t* temp_v = (uint16_t*)(row) + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_u, temp_u, width); @@ -7423,6 +7687,8 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_uv_1 = (uint16_t*)(row); uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; + if (!row) + return 1; Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); @@ -7512,6 +7778,8 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, const int row_size = (2 * width + 31) & ~31; align_buffer_64(row, row_size * sizeof(uint16_t)); uint16_t* temp_uv = (uint16_t*)(row); + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_uv, temp_uv, width); @@ -7591,6 +7859,8 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_uv_1 = (uint16_t*)(row); uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; + if (!row) + return 1; Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); @@ -7680,6 +7950,8 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, const int row_size = (2 * width + 31) & ~31; align_buffer_64(row, row_size * sizeof(uint16_t)); uint16_t* temp_uv = (uint16_t*)(row); + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_uv, temp_uv, width); @@ -7770,12 +8042,19 @@ static int I422ToRGB24MatrixLinear(const uint8_t* src_y, ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif +#if defined(HAS_SCALEROWUP2_LINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; + } +#endif // alloc 2 lines temp const int row_size = (width + 31) & ~31; align_buffer_64(row, row_size * 2); uint8_t* temp_u = row; uint8_t* temp_v = row + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { ScaleRowUp2_Linear(src_u, temp_u, width); diff --git a/files/source/convert_from.cc b/source/convert_from.cc index 4102d610..e69da9e9 100644 --- a/files/source/convert_from.cc +++ b/source/convert_from.cc @@ -52,19 +52,26 @@ static int I420ToI4xx(const uint8_t* src_y, const int dst_y_height = Abs(src_y_height); const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1); const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1); + int r; if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 || dst_uv_height <= 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, - dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, + dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, - dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, - dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); - return 0; + r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return r; } // Convert 8 bit YUV to 10 bit. @@ -223,21 +230,28 @@ int I010ToI410(const uint16_t* src_y, int dst_stride_v, int width, int height) { + int r; if (width == 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); - } - ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width), - Abs(height), kFilterBilinear); - ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width), - Abs(height), kFilterBilinear); - return 0; + r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + } + r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width), + Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width), + Abs(height), kFilterBilinear); + return r; } // 422 chroma to 444 chroma, 10/12 bit version @@ -256,19 +270,26 @@ int I210ToI410(const uint16_t* src_y, int dst_stride_v, int width, int height) { + int r; if (width == 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, - dst_stride_u, Abs(width), Abs(height), kFilterBilinear); - ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, - dst_stride_v, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, + dst_stride_u, Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, + dst_stride_v, Abs(width), Abs(height), kFilterBilinear); + return r; } // 422 chroma is 1/2 width, 1x height @@ -288,19 +309,26 @@ int I422ToI444(const uint8_t* src_y, int dst_stride_v, int width, int height) { + int r; if (width == 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, - dst_stride_u, Abs(width), Abs(height), kFilterBilinear); - ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, - dst_stride_v, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, + dst_stride_u, Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, + dst_stride_v, Abs(width), Abs(height), kFilterBilinear); + return r; } // Copy to I400. Source can be I420,422,444,400,NV12,NV21 diff --git a/files/source/convert_from_argb.cc b/source/convert_from_argb.cc index c3d037c4..b45de8c8 100644 --- a/files/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -463,6 +463,8 @@ int ARGBToNV12(const uint8_t* src_argb, // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); @@ -661,6 +663,8 @@ int ARGBToNV21(const uint8_t* src_argb, // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); @@ -846,6 +850,8 @@ int ABGRToNV12(const uint8_t* src_abgr, // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width); @@ -1032,6 +1038,8 @@ int ABGRToNV21(const uint8_t* src_abgr, // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width); @@ -1232,6 +1240,8 @@ int ARGBToYUY2(const uint8_t* src_argb, align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; + if (!row_y) + return 1; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); @@ -1426,6 +1436,8 @@ int ARGBToUYVY(const uint8_t* src_argb, align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; + if (!row_y) + return 1; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); @@ -1527,6 +1539,7 @@ int ARGBToI400(const uint8_t* src_argb, return 0; } +#ifndef __riscv // Shuffle table for converting ARGB to RGBA. static const uvec8 kShuffleMaskARGBToRGBA = { 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u}; @@ -1542,6 +1555,47 @@ int ARGBToRGBA(const uint8_t* src_argb, return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba, (const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height); } +#else +// Convert ARGB to RGBA. +LIBYUV_API +int ARGBToRGBA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height) { + int y; + void (*ARGBToRGBARow)(const uint8_t* src_argb, uint8_t* dst_rgba, int width) = + ARGBToRGBARow_C; + if (!src_argb || !dst_rgba || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_rgba == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_rgba = 0; + } + +#if defined(HAS_ARGBTORGBAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToRGBARow = ARGBToRGBARow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToRGBARow(src_argb, dst_rgba, width); + src_argb += src_stride_argb; + dst_rgba += dst_stride_rgba; + } + return 0; +} +#endif // Convert ARGB To RGB24. LIBYUV_API @@ -3230,14 +3284,21 @@ int RAWToJNV21(const uint8_t* src_raw, } #endif { +#if defined(HAS_RAWTOYJROW) // Allocate a row of uv. - align_buffer_64(row_uj, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31); -#if !defined(HAS_RAWTOYJROW) - // Allocate 2 rows of ARGB. - const int row_size = (width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); + const int row_uv_size = ((halfwidth + 31) & ~31); + align_buffer_64(row_uj, row_uv_size * 2); + uint8_t* row_vj = row_uj + row_uv_size; +#else + // Allocate row of uv and 2 rows of ARGB. + const int row_size = ((width * 4 + 31) & ~31); + const int row_uv_size = ((halfwidth + 31) & ~31); + align_buffer_64(row_uj, row_uv_size * 2 + row_size * 2); + uint8_t* row_vj = row_uj + row_uv_size; + uint8_t* row = row_vj + row_uv_size; #endif + if (!row_uj) + return 1; for (y = 0; y < height - 1; y += 2) { #if defined(HAS_RAWTOYJROW) @@ -3269,9 +3330,6 @@ int RAWToJNV21(const uint8_t* src_raw, ARGBToYJRow(row, dst_y, width); #endif } -#if !defined(HAS_RAWTOYJROW) - free_aligned_buffer_64(row); -#endif free_aligned_buffer_64(row_uj); } return 0; diff --git a/files/source/convert_jpeg.cc b/source/convert_jpeg.cc index d7556ee9..d7556ee9 100644 --- a/files/source/convert_jpeg.cc +++ b/source/convert_jpeg.cc diff --git a/files/source/convert_to_argb.cc b/source/convert_to_argb.cc index 84df16c8..84df16c8 100644 --- a/files/source/convert_to_argb.cc +++ b/source/convert_to_argb.cc diff --git a/files/source/convert_to_i420.cc b/source/convert_to_i420.cc index 5869ecd7..5869ecd7 100644 --- a/files/source/convert_to_i420.cc +++ b/source/convert_to_i420.cc diff --git a/files/source/cpu_id.cc b/source/cpu_id.cc index 0c4a1581..eedce16b 100644 --- a/files/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -292,10 +292,12 @@ static SAFEBUFFERS int GetCpuFlags(void) { int cpu_info0[4] = {0, 0, 0, 0}; int cpu_info1[4] = {0, 0, 0, 0}; int cpu_info7[4] = {0, 0, 0, 0}; + int cpu_einfo7[4] = {0, 0, 0, 0}; CpuId(0, 0, cpu_info0); CpuId(1, 0, cpu_info1); if (cpu_info0[0] >= 7) { CpuId(7, 0, cpu_info7); + CpuId(7, 1, cpu_einfo7); } cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | @@ -308,7 +310,9 @@ static SAFEBUFFERS int GetCpuFlags(void) { ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | - ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0); + ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0) | + ((cpu_einfo7[0] & 0x00000010) ? kCpuHasAVXVNNI : 0) | + ((cpu_einfo7[3] & 0x00000010) ? kCpuHasAVXVNNIINT8 : 0); // Detect AVX512bw if ((GetXCR0() & 0xe0) == 0xe0) { @@ -318,8 +322,7 @@ static SAFEBUFFERS int GetCpuFlags(void) { cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0; cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0; cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0; - cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0; - cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0; + cpu_info |= (cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0; } } #endif diff --git a/files/source/mjpeg_decoder.cc b/source/mjpeg_decoder.cc index 0141da8a..0141da8a 100644 --- a/files/source/mjpeg_decoder.cc +++ b/source/mjpeg_decoder.cc diff --git a/files/source/mjpeg_validate.cc b/source/mjpeg_validate.cc index ba0a03ab..ba0a03ab 100644 --- a/files/source/mjpeg_validate.cc +++ b/source/mjpeg_validate.cc diff --git a/files/source/planar_functions.cc b/source/planar_functions.cc index d115a2a1..1c94e260 100644 --- a/files/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -2783,37 +2783,6 @@ int RGB24Mirror(const uint8_t* src_rgb24, return 0; } -// Get a blender that optimized for the CPU and pixel count. -// As there are 6 blenders to choose from, the caller should try to use -// the same blend function for all pixels if possible. -LIBYUV_API -ARGBBlendRow GetARGBBlend() { - void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, - uint8_t* dst_argb, int width) = ARGBBlendRow_C; -#if defined(HAS_ARGBBLENDROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBBlendRow = ARGBBlendRow_SSSE3; - return ARGBBlendRow; - } -#endif -#if defined(HAS_ARGBBLENDROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBBlendRow = ARGBBlendRow_NEON; - } -#endif -#if defined(HAS_ARGBBLENDROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBBlendRow = ARGBBlendRow_MSA; - } -#endif -#if defined(HAS_ARGBBLENDROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBBlendRow = ARGBBlendRow_LSX; - } -#endif - return ARGBBlendRow; -} - // Alpha Blend 2 ARGB images and store to destination. LIBYUV_API int ARGBBlend(const uint8_t* src_argb0, @@ -2826,7 +2795,7 @@ int ARGBBlend(const uint8_t* src_argb0, int height) { int y; void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, - uint8_t* dst_argb, int width) = GetARGBBlend(); + uint8_t* dst_argb, int width) = ARGBBlendRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -2843,7 +2812,31 @@ int ARGBBlend(const uint8_t* src_argb0, height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; } - +#if defined(HAS_ARGBBLENDROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBBlendRow = ARGBBlendRow_SSSE3; + } +#endif +#if defined(HAS_ARGBBLENDROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBBlendRow = ARGBBlendRow_NEON; + } +#endif +#if defined(HAS_ARGBBLENDROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBBlendRow = ARGBBlendRow_MSA; + } +#endif +#if defined(HAS_ARGBBLENDROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBBlendRow = ARGBBlendRow_LSX; + } +#endif +#if defined(HAS_ARGBBLENDROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBBlendRow = ARGBBlendRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBBlendRow(src_argb0, src_argb1, dst_argb, width); src_argb0 += src_stride_argb0; @@ -2903,6 +2896,11 @@ int BlendPlane(const uint8_t* src_y0, } } #endif +#if defined(HAS_BLENDPLANEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + BlendPlaneRow = BlendPlaneRow_RVV; + } +#endif for (y = 0; y < height; ++y) { BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width); @@ -2980,6 +2978,11 @@ int I420Blend(const uint8_t* src_y0, } } #endif +#if defined(HAS_BLENDPLANEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + BlendPlaneRow = BlendPlaneRow_RVV; + } +#endif if (!IS_ALIGNED(width, 2)) { ScaleRowDown2 = ScaleRowDown2Box_Odd_C; } @@ -3016,9 +3019,16 @@ int I420Blend(const uint8_t* src_y0, } } #endif +#if defined(HAS_SCALEROWDOWN2_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowDown2 = ScaleRowDown2Box_RVV; + } +#endif // Row buffer for intermediate alpha pixels. align_buffer_64(halfalpha, halfwidth); + if (!halfalpha) + return 1; for (y = 0; y < height; y += 2) { // last row of odd height image use 1 row of alpha instead of 2. if (y == (height - 1)) { @@ -4702,6 +4712,8 @@ int GaussPlane_F32(const float* src, { // 2 pixels on each side, but aligned out to 16 bytes. align_buffer_64(rowbuf, (4 + width + 4) * 4); + if (!rowbuf) + return 1; memset(rowbuf, 0, 16); memset(rowbuf + (4 + width) * 4, 0, 16); float* row = (float*)(rowbuf + 16); @@ -4860,6 +4872,8 @@ static int ARGBSobelize(const uint8_t* src_argb, uint8_t* row_y0 = row_y + kEdge; uint8_t* row_y1 = row_y0 + row_size; uint8_t* row_y2 = row_y1 + row_size; + if (!rows) + return 1; ARGBToYJRow(src_argb, row_y0, width); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. @@ -5340,6 +5354,11 @@ int ARGBExtractAlpha(const uint8_t* src_argb, : ARGBExtractAlphaRow_Any_LSX; } #endif +#if defined(HAS_ARGBEXTRACTALPHAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV; + } +#endif for (int y = 0; y < height; ++y) { ARGBExtractAlphaRow(src_argb, dst_a, width); @@ -5391,6 +5410,11 @@ int ARGBCopyYToAlpha(const uint8_t* src_y, } } #endif +#if defined(HAS_ARGBCOPYYTOALPHAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBCopyYToAlphaRow(src_y, dst_argb, width); @@ -5636,6 +5660,8 @@ int UYVYToNV12(const uint8_t* src_uyvy, int awidth = halfwidth * 2; // row of y and 2 rows of uv align_buffer_64(rows, awidth * 3); + if (!rows) + return 1; for (y = 0; y < height - 1; y += 2) { // Split Y from UV. diff --git a/files/source/rotate.cc b/source/rotate.cc index 8d3978c7..3f8332c3 100644 --- a/files/source/rotate.cc +++ b/source/rotate.cc @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <assert.h> + #include "libyuv/rotate.h" #include "libyuv/convert.h" @@ -140,6 +142,9 @@ void RotatePlane180(const uint8_t* src, int height) { // Swap top and bottom row and mirror the content. Uses a temporary row. align_buffer_64(row, width); + assert(row); + if (!row) + return; const uint8_t* src_bot = src + src_stride * (height - 1); uint8_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; @@ -489,13 +494,12 @@ int RotatePlane(const uint8_t* src, return -1; } -LIBYUV_API -void TransposePlane_16(const uint16_t* src, - int src_stride, - uint16_t* dst, - int dst_stride, - int width, - int height) { +static void TransposePlane_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { int i = height; // Work across the source in 8x8 tiles while (i >= 8) { @@ -544,24 +548,29 @@ static void RotatePlane180_16(const uint16_t* src, int dst_stride, int width, int height) { - // Swap top and bottom row and mirror the content. Uses a temporary row. - align_buffer_64_16(row, width); const uint16_t* src_bot = src + src_stride * (height - 1); uint16_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; int y; + // Swap top and bottom row and mirror the content. Uses a temporary row. + align_buffer_64(row, width * 2); + uint16_t* row_tmp = (uint16_t*)row; + assert(row); + if (!row) + return; + // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { - CopyRow_16_C(src, row, width); // Copy top row into buffer - MirrorRow_16_C(src_bot, dst, width); // Mirror bottom row into top row - MirrorRow_16_C(row, dst_bot, width); // Mirror buffer into bottom row + CopyRow_16_C(src, row_tmp, width); // Copy top row into buffer + MirrorRow_16_C(src_bot, dst, width); // Mirror bottom row into top row + MirrorRow_16_C(row_tmp, dst_bot, width); // Mirror buffer into bottom row src += src_stride; dst += dst_stride; src_bot -= src_stride; dst_bot -= dst_stride; } - free_aligned_buffer_64_16(row); + free_aligned_buffer_64(row); } LIBYUV_API @@ -691,6 +700,7 @@ int I422Rotate(const uint8_t* src_y, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; + int r; if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || !dst_u || !dst_v) { return -1; @@ -726,23 +736,35 @@ int I422Rotate(const uint8_t* src_y, case kRotate90: RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, height); - ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, - halfheight, width, kFilterBilinear); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, height); - ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, - halfheight, width, kFilterLinear); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; case kRotate270: RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, height); - ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, - halfheight, width, kFilterBilinear); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, height); - ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, - halfheight, width, kFilterLinear); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; case kRotate180: @@ -1056,6 +1078,7 @@ int I210Rotate(const uint16_t* src_y, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; + int r; if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || !dst_u || !dst_v) { return -1; @@ -1091,23 +1114,35 @@ int I210Rotate(const uint16_t* src_y, case kRotate90: RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, height); - ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, - halfheight, width, kFilterBilinear); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, height); - ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, - halfheight, width, kFilterLinear); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; case kRotate270: RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, height); - ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, - halfheight, width, kFilterBilinear); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, height); - ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, - halfheight, width, kFilterLinear); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; diff --git a/files/source/rotate_any.cc b/source/rotate_any.cc index 88ca7876..88ca7876 100644 --- a/files/source/rotate_any.cc +++ b/source/rotate_any.cc diff --git a/files/source/rotate_argb.cc b/source/rotate_argb.cc index c7239010..d55fac4f 100644 --- a/files/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -69,6 +69,11 @@ static int ARGBTranspose(const uint8_t* src_argb, } } #endif +#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV; + } +#endif for (i = 0; i < width; ++i) { // column of source to row of dest. ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height); @@ -115,7 +120,6 @@ static int ARGBRotate180(const uint8_t* src_argb, int width, int height) { // Swap first and last row and mirror the content. Uses a temporary row. - align_buffer_64(row, width * 4); const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1); uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1); int half_height = (height + 1) >> 1; @@ -124,6 +128,9 @@ static int ARGBRotate180(const uint8_t* src_argb, ARGBMirrorRow_C; void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = CopyRow_C; + align_buffer_64(row, width * 4); + if (!row) + return 1; #if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBMirrorRow = ARGBMirrorRow_Any_NEON; diff --git a/files/source/rotate_common.cc b/source/rotate_common.cc index 4b496d1b..e72608e9 100644 --- a/files/source/rotate_common.cc +++ b/source/rotate_common.cc @@ -120,37 +120,6 @@ void TransposeWx8_16_C(const uint16_t* src, } } -void TransposeUVWx8_16_C(const uint16_t* src, - int src_stride, - uint16_t* dst_a, - int dst_stride_a, - uint16_t* dst_b, - int dst_stride_b, - int width) { - int i; - for (i = 0; i < width; ++i) { - dst_a[0] = src[0 * src_stride + 0]; - dst_b[0] = src[0 * src_stride + 1]; - dst_a[1] = src[1 * src_stride + 0]; - dst_b[1] = src[1 * src_stride + 1]; - dst_a[2] = src[2 * src_stride + 0]; - dst_b[2] = src[2 * src_stride + 1]; - dst_a[3] = src[3 * src_stride + 0]; - dst_b[3] = src[3 * src_stride + 1]; - dst_a[4] = src[4 * src_stride + 0]; - dst_b[4] = src[4 * src_stride + 1]; - dst_a[5] = src[5 * src_stride + 0]; - dst_b[5] = src[5 * src_stride + 1]; - dst_a[6] = src[6 * src_stride + 0]; - dst_b[6] = src[6 * src_stride + 1]; - dst_a[7] = src[7 * src_stride + 0]; - dst_b[7] = src[7 * src_stride + 1]; - src += 2; - dst_a += dst_stride_a; - dst_b += dst_stride_b; - } -} - void TransposeWxH_16_C(const uint16_t* src, int src_stride, uint16_t* dst, diff --git a/files/source/rotate_gcc.cc b/source/rotate_gcc.cc index fd5eee05..fd5eee05 100644 --- a/files/source/rotate_gcc.cc +++ b/source/rotate_gcc.cc diff --git a/files/source/rotate_lsx.cc b/source/rotate_lsx.cc index 94a2b91c..94a2b91c 100644 --- a/files/source/rotate_lsx.cc +++ b/source/rotate_lsx.cc diff --git a/files/source/rotate_msa.cc b/source/rotate_msa.cc index 99bdca65..99bdca65 100644 --- a/files/source/rotate_msa.cc +++ b/source/rotate_msa.cc diff --git a/files/source/rotate_neon.cc b/source/rotate_neon.cc index 569a7318..569a7318 100644 --- a/files/source/rotate_neon.cc +++ b/source/rotate_neon.cc diff --git a/files/source/rotate_neon64.cc b/source/rotate_neon64.cc index 95047fa7..95047fa7 100644 --- a/files/source/rotate_neon64.cc +++ b/source/rotate_neon64.cc diff --git a/files/source/rotate_win.cc b/source/rotate_win.cc index a78873f8..a78873f8 100644 --- a/files/source/rotate_win.cc +++ b/source/rotate_win.cc diff --git a/files/source/row_any.cc b/source/row_any.cc index e574543c..e574543c 100644 --- a/files/source/row_any.cc +++ b/source/row_any.cc diff --git a/files/source/row_common.cc b/source/row_common.cc index 8be37fb5..3afc4b4d 100644 --- a/files/source/row_common.cc +++ b/source/row_common.cc @@ -48,7 +48,6 @@ extern "C" { defined(__i386__) || defined(_M_IX86)) #define LIBYUV_ARGBTOUV_PAVGB 1 #define LIBYUV_RGBTOU_TRUNCATE 1 -#define LIBYUV_ATTENUATE_DUP 1 #endif #if defined(LIBYUV_BIT_EXACT) #define LIBYUV_UNATTENUATE_DUP 1 @@ -282,6 +281,54 @@ void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) { } } +void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; + uint8_t a = src_argb[3]; + dst_abgr[0] = r; + dst_abgr[1] = g; + dst_abgr[2] = b; + dst_abgr[3] = a; + dst_abgr += 4; + src_argb += 4; + } +} + +void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; + uint8_t a = src_argb[3]; + dst_bgra[0] = a; + dst_bgra[1] = r; + dst_bgra[2] = g; + dst_bgra[3] = b; + dst_bgra += 4; + src_argb += 4; + } +} + +void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; + uint8_t a = src_argb[3]; + dst_rgba[0] = a; + dst_rgba[1] = b; + dst_rgba[2] = g; + dst_rgba[3] = r; + dst_rgba += 4; + src_argb += 4; + } +} + void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width; ++x) { @@ -310,6 +357,22 @@ void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { } } +void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t a = src_rgba[0]; + uint8_t b = src_rgba[1]; + uint8_t g = src_rgba[2]; + uint8_t r = src_rgba[3]; + dst_argb[0] = b; + dst_argb[1] = g; + dst_argb[2] = r; + dst_argb[3] = a; + dst_argb += 4; + src_rgba += 4; + } +} + void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { @@ -518,6 +581,22 @@ void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { } } +void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) { + int x; + for (x = 0; x < width; ++x) { + uint16_t b = src_ar64[0]; + uint16_t g = src_ar64[1]; + uint16_t r = src_ar64[2]; + uint16_t a = src_ar64[3]; + dst_ab64[0] = r; + dst_ab64[1] = g; + dst_ab64[2] = b; + dst_ab64[3] = a; + dst_ab64 += 4; + src_ar64 += 4; + } +} + // TODO(fbarchard): Make shuffle compatible with SIMD versions void AR64ShuffleRow_C(const uint8_t* src_ar64, uint8_t* dst_ar64, @@ -1876,9 +1955,10 @@ static __inline void YPixel(uint8_t y, int yg = yuvconstants->kYToRgb[0]; #endif uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; - *b = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); - *g = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); - *r = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); + uint8_t b8 = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); + *b = b8; + *g = b8; + *r = b8; } void I444ToARGBRow_C(const uint8_t* src_y, @@ -3369,12 +3449,7 @@ void BlendPlaneRow_C(const uint8_t* src0, } #undef UBLEND -#if LIBYUV_ATTENUATE_DUP -// This code mimics the SSSE3 version for better testability. -#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24 -#else -#define ATTENUATE(f, a) (f * a + 128) >> 8 -#endif +#define ATTENUATE(f, a) (f * a + 255) >> 8 // Multiply source RGB by alpha and store to destination. void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { diff --git a/files/source/row_gcc.cc b/source/row_gcc.cc index e94fd04d..d8074987 100644 --- a/files/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -7441,93 +7441,106 @@ void BlendPlaneRow_AVX2(const uint8_t* src0, #ifdef HAS_ARGBATTENUATEROW_SSSE3 // Shuffle table duplicating alpha. -static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, - 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u}; -static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, - 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u}; +static const vec8 kAttenuateShuffle = {6, -128, 6, -128, 6, -128, + -128, -128, 14, -128, 14, -128, + 14, -128, -128, -128}; + // Attenuate 4 pixels at a time. void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( - "pcmpeqb %%xmm3,%%xmm3 \n" - "pslld $0x18,%%xmm3 \n" "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0x18,%%xmm5 \n" + "pxor %%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "punpcklbw %%xmm6,%%xmm7 \n" + "sub %0,%1 \n" // 4 pixel loop. LABELALIGN "1: \n" - "movdqu (%0),%%xmm0 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqu (%0),%%xmm1 \n" - "punpcklbw %%xmm1,%%xmm1 \n" - "pmulhuw %%xmm1,%%xmm0 \n" - "movdqu (%0),%%xmm1 \n" - "pshufb %%xmm5,%%xmm1 \n" - "movdqu (%0),%%xmm2 \n" - "punpckhbw %%xmm2,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "movdqu (%0),%%xmm2 \n" - "lea 0x10(%0),%0 \n" - "pand %%xmm3,%%xmm2 \n" + "movdqu (%0),%%xmm6 \n" + "movdqa %%xmm6,%%xmm0 \n" + "movdqa %%xmm6,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "punpckhbw %%xmm5,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "pshufb %%xmm4,%%xmm2 \n" // a,a,a,0 + "pshufb %%xmm4,%%xmm3 \n" + "pmullw %%xmm2,%%xmm0 \n" // rgb * alpha + "pmullw %%xmm3,%%xmm1 \n" + "paddw %%xmm7,%%xmm0 \n" // + 255 + "paddw %%xmm7,%%xmm1 \n" "psrlw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "movdqu %%xmm0,(%1) \n" - "lea 0x10(%1),%1 \n" + "pand %%xmm5,%%xmm6 \n" + "por %%xmm6,%%xmm0 \n" + "movdqu %%xmm0,(%0,%1) \n" + "lea 0x10(%0),%0 \n" "sub $0x4,%2 \n" "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAlpha0), // %3 - "m"(kShuffleAlpha1) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kAttenuateShuffle) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBATTENUATEROW_SSSE3 #ifdef HAS_ARGBATTENUATEROW_AVX2 + // Shuffle table duplicating alpha. -static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u, - 128u, 128u, 14u, 15u, 14u, 15u, - 14u, 15u, 128u, 128u}; +static const lvec8 kAttenuateShuffle_AVX2 = { + 6, -128, 6, -128, 6, -128, -128, -128, 14, -128, 14, + -128, 14, -128, -128, -128, 22, -128, 22, -128, 22, -128, + -128, -128, 30, -128, 30, -128, 30, -128, -128, -128}; + // Attenuate 8 pixels at a time. void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( - "vbroadcastf128 %3,%%ymm4 \n" + "vmovdqa %3,%%ymm4 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpslld $0x18,%%ymm5,%%ymm5 \n" + "vpxor %%ymm6,%%ymm6,%%ymm6 \n" + "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" + "vpunpcklbw %%ymm6,%%ymm7,%%ymm7 \n" "sub %0,%1 \n" // 8 pixel loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm6 \n" - "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" - "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" + "vpunpcklbw %%ymm5,%%ymm6,%%ymm0 \n" + "vpunpckhbw %%ymm5,%%ymm6,%%ymm1 \n" "vpshufb %%ymm4,%%ymm0,%%ymm2 \n" "vpshufb %%ymm4,%%ymm1,%%ymm3 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" - "vpand %%ymm5,%%ymm6,%%ymm6 \n" + "vpmullw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmullw %%ymm3,%%ymm1,%%ymm1 \n" + "vpaddw %%ymm7,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm7,%%ymm1,%%ymm1 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpsrlw $0x8,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpor %%ymm6,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm6,%%ymm1 \n" + "vpor %%ymm1,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,0x00(%0,%1,1) \n" "lea 0x20(%0),%0 \n" "sub $0x8,%2 \n" "jg 1b \n" "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAlpha_AVX2) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kAttenuateShuffle_AVX2) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBATTENUATEROW_AVX2 diff --git a/files/source/row_lasx.cc b/source/row_lasx.cc index 1082ad80..be85022e 100644 --- a/files/source/row_lasx.cc +++ b/source/row_lasx.cc @@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y, __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m256i vec_ubvr, vec_ugvg; __m256i const_0x80 = __lasx_xvldi(0x80); - __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000, - 0xF000F000F000F000}; + __m256i alpha = (__m256i)v4u64{0xF000F000F000F000, 0xF000F000F000F000, + 0xF000F000F000F000, 0xF000F000F000F000}; __m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0}; @@ -595,8 +595,8 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y, __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m256i vec_ubvr, vec_ugvg; __m256i const_0x80 = __lasx_xvldi(0x80); - __m256i alpha = {0x8000800080008000, 0x8000800080008000, 0x8000800080008000, - 0x8000800080008000}; + __m256i alpha = (__m256i)v4u64{0x8000800080008000, 0x8000800080008000, + 0x8000800080008000, 0x8000800080008000}; YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr); @@ -799,8 +799,8 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0, 0x0009000900090009, 0x0009000900090009}; __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}; - __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64, @@ -1037,8 +1037,8 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb, __m256i const_38 = __lasx_xvldi(38); __m256i const_94 = __lasx_xvldi(94); __m256i const_18 = __lasx_xvldi(18); - __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}; for (x = 0; x < len; x++) { @@ -1609,8 +1609,8 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0, @@ -1726,8 +1726,8 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0, @@ -1793,8 +1793,8 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18, 0x15120F0C09060300, 0x00000000001E1B18}; __m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908, @@ -1856,8 +1856,8 @@ void RAWToUVRow_LASX(const uint8_t* src_raw, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18, 0x15120F0C09060300, 0x00000000001E1B18}; __m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908, @@ -2242,8 +2242,8 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb, __m256i const_21 = __lasx_xvldi(0x415); __m256i const_53 = __lasx_xvldi(0x435); __m256i const_10 = __lasx_xvldi(0x40A); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301, 0x1F1D0F0D1B190B09}; diff --git a/files/source/row_lsx.cc b/source/row_lsx.cc index e626072a..fa088c9e 100644 --- a/files/source/row_lsx.cc +++ b/source/row_lsx.cc @@ -565,7 +565,7 @@ void I422ToARGB4444Row_LSX(const uint8_t* src_y, __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m128i vec_ubvr, vec_ugvg; __m128i const_80 = __lsx_vldi(0x80); - __m128i alpha = {0xF000F000F000F000, 0xF000F000F000F000}; + __m128i alpha = (__m128i)v2u64{0xF000F000F000F000, 0xF000F000F000F000}; __m128i mask = {0x00F000F000F000F0, 0x00F000F000F000F0}; YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); @@ -612,7 +612,7 @@ void I422ToARGB1555Row_LSX(const uint8_t* src_y, __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m128i vec_ubvr, vec_ugvg; __m128i const_80 = __lsx_vldi(0x80); - __m128i alpha = {0x8000800080008000, 0x8000800080008000}; + __m128i alpha = (__m128i)v2u64{0x8000800080008000, 0x8000800080008000}; YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr); @@ -792,7 +792,7 @@ void ARGBToUVRow_LSX(const uint8_t* src_argb0, __m128i const_0x26 = {0x0013001300130013, 0x0013001300130013}; __m128i const_0x5E = {0x002f002f002f002f, 0x002f002f002f002f}; __m128i const_0x12 = {0x0009000900090009, 0x0009000900090009}; - __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb0, 0, src_argb0, 16, src_argb0, 32, src_argb0, 48, src0, src1, src2, src3); @@ -991,7 +991,7 @@ void ARGBToUV444Row_LSX(const uint8_t* src_argb, __m128i const_38 = __lsx_vldi(38); __m128i const_94 = __lsx_vldi(94); __m128i const_18 = __lsx_vldi(18); - __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, src0, src1, src2, src3); @@ -1533,7 +1533,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0, @@ -1642,7 +1642,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0, @@ -1703,7 +1703,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18}; __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908}; __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; @@ -1756,7 +1756,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18}; __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908}; __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; @@ -1991,7 +1991,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48, @@ -2039,7 +2039,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48, @@ -2087,7 +2087,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48, @@ -2136,7 +2136,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb, __m128i const_21 = __lsx_vldi(0x415); __m128i const_53 = __lsx_vldi(0x435); __m128i const_10 = __lsx_vldi(0x40A); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, @@ -2514,7 +2514,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb, __m128i const_256 = __lsx_vldi(0x500); __m128i zero = __lsx_vldi(0); __m128i alpha = __lsx_vldi(0xFF); - __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; + __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16, @@ -2560,7 +2560,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb, __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset); __m128i vec_scale = __lsx_vreplgr2vr_w(scale); __m128i zero = __lsx_vldi(0); - __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; + __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48, diff --git a/files/source/row_msa.cc b/source/row_msa.cc index b7d5bb5e..b7d5bb5e 100644 --- a/files/source/row_msa.cc +++ b/source/row_msa.cc diff --git a/files/source/row_neon.cc b/source/row_neon.cc index 4ed13638..31142a90 100644 --- a/files/source/row_neon.cc +++ b/source/row_neon.cc @@ -1827,19 +1827,27 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, ); } +struct RgbUVConstants { + uint8_t kRGBToU[4]; + uint8_t kRGBToV[4]; +}; + // 8x1 pixels. -void ARGBToUV444Row_NEON(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - asm volatile( - "vmov.u8 d24, #112 \n" // UB / VR 0.875 - // coefficient - "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient - "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient - "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient - "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient +void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct RgbUVConstants* rgbuvconstants) { + asm volatile( + + "vld1.8 {d0}, [%4] \n" // load rgbuvconstants + "vdup.u8 d24, d0[0] \n" // UB 0.875 coefficient + "vdup.u8 d25, d0[1] \n" // UG -0.5781 coefficient + "vdup.u8 d26, d0[2] \n" // UR -0.2969 coefficient + "vdup.u8 d27, d0[4] \n" // VB -0.1406 coefficient + "vdup.u8 d28, d0[5] \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 + "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "subs %3, %3, #8 \n" // 8 processed per loop. @@ -1857,15 +1865,53 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb, "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"(rgbuvconstants) // %4 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"); } +// RGB to bt601 coefficients +// UB 0.875 coefficient = 112 +// UG -0.5781 coefficient = 74 +// UR -0.2969 coefficient = 38 +// VB -0.1406 coefficient = 18 +// VG -0.7344 coefficient = 94 +// VR 0.875 coefficient = 112 (ignored) + +static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0}, + {18, 94, 112, 0}}; + +// RGB to JPeg coefficients +// UB coeff 0.500 = 127 +// UG coeff -0.33126 = 84 +// UR coeff -0.16874 = 43 +// VB coeff -0.08131 = 20 +// VG coeff -0.41869 = 107 +// VR coeff 0.500 = 127 (ignored) + +static const struct RgbUVConstants kRgb24JPegUVConstants = {{127, 84, 43, 0}, + {20, 107, 127, 0}}; + +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, + &kRgb24I601UVConstants); +} + +void ARGBToUVJ444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, + &kRgb24JPegUVConstants); +} + // clang-format off // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. #define RGBTOUV(QB, QG, QR) \ @@ -2702,7 +2748,6 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64, struct RgbConstants { uint8_t kRGBToY[4]; uint16_t kAddY; - uint16_t pad; }; // RGB to JPeg coefficients @@ -2710,11 +2755,9 @@ struct RgbConstants { // G * 0.5870 coefficient = 150 // R * 0.2990 coefficient = 77 // Add 0.5 = 0x80 -static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, - 128, - 0}; +static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128}; -static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; +static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128}; // RGB to BT.601 coefficients // B * 0.1016 coefficient = 25 @@ -2723,12 +2766,9 @@ static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; // Add 16.5 = 0x1080 static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, - 0x1080, - 0}; + 0x1080}; -static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, - 0x1080, - 0}; +static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080}; // ARGB expects first 3 values to contain RGB and 4th value is ignored. void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, @@ -3058,6 +3098,8 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( + "vmov.u16 q15, #0x00ff \n" // 255 for rounding up + // Attenuate 8 pixels. "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. @@ -3065,16 +3107,16 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, "vmull.u8 q10, d0, d3 \n" // b * a "vmull.u8 q11, d1, d3 \n" // g * a "vmull.u8 q12, d2, d3 \n" // r * a - "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8 - "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8 - "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8 + "vaddhn.u16 d0, q10, q15 \n" // (b + 255) >> 8 + "vaddhn.u16 d1, q11, q15 \n" // (g + 255) >> 8 + "vaddhn.u16 d2, q12, q15 \n" // (r + 255) >> 8 "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : - : "cc", "memory", "q0", "q1", "q10", "q11", "q12"); + : "cc", "memory", "q0", "q1", "q10", "q11", "q12", "q15"); } // Quantize 8 ARGB pixels (32 bytes). diff --git a/files/source/row_neon64.cc b/source/row_neon64.cc index 74190d61..1679f87c 100644 --- a/files/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -2198,19 +2198,26 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, ); } +struct RgbUVConstants { + uint8_t kRGBToU[4]; + uint8_t kRGBToV[4]; +}; + // 8x1 pixels. -void ARGBToUV444Row_NEON(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - asm volatile( - "movi v24.8b, #112 \n" // UB / VR 0.875 - // coefficient - "movi v25.8b, #74 \n" // UG -0.5781 coefficient - "movi v26.8b, #38 \n" // UR -0.2969 coefficient - "movi v27.8b, #18 \n" // VB -0.1406 coefficient - "movi v28.8b, #94 \n" // VG -0.7344 coefficient - "movi v29.16b,#0x80 \n" // 128.5 +void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct RgbUVConstants* rgbuvconstants) { + asm volatile( + "ldr d0, [%4] \n" // load rgbuvconstants + "dup v24.16b, v0.b[0] \n" // UB 0.875 coefficient + "dup v25.16b, v0.b[1] \n" // UG -0.5781 coefficient + "dup v26.16b, v0.b[2] \n" // UR -0.2969 coefficient + "dup v27.16b, v0.b[4] \n" // VB -0.1406 coefficient + "dup v28.16b, v0.b[5] \n" // VG -0.7344 coefficient + "movi v29.16b, #0x80 \n" // 128.5 + "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "subs %w3, %w3, #8 \n" // 8 processed per loop. @@ -2229,15 +2236,53 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb, "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"(rgbuvconstants) // %4 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26", "v27", "v28", "v29"); } +// RGB to bt601 coefficients +// UB 0.875 coefficient = 112 +// UG -0.5781 coefficient = 74 +// UR -0.2969 coefficient = 38 +// VB -0.1406 coefficient = 18 +// VG -0.7344 coefficient = 94 +// VR 0.875 coefficient = 112 (ignored) + +static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0}, + {18, 94, 112, 0}}; + +// RGB to JPeg coefficients +// UB coeff 0.500 = 127 +// UG coeff -0.33126 = 84 +// UR coeff -0.16874 = 43 +// VB coeff -0.08131 = 20 +// VG coeff -0.41869 = 107 +// VR coeff 0.500 = 127 (ignored) + +static const struct RgbUVConstants kRgb24JPegUVConstants = {{127, 84, 43, 0}, + {20, 107, 127, 0}}; + +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, + &kRgb24I601UVConstants); +} + +void ARGBToUVJ444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, + &kRgb24JPegUVConstants); +} + #define RGBTOUV_SETUP_REG \ "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ @@ -2943,34 +2988,8 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, struct RgbConstants { uint8_t kRGBToY[4]; uint16_t kAddY; - uint16_t pad; }; -// RGB to JPeg coefficients -// B * 0.1140 coefficient = 29 -// G * 0.5870 coefficient = 150 -// R * 0.2990 coefficient = 77 -// Add 0.5 = 0x80 -static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, - 128, - 0}; - -static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; - -// RGB to BT.601 coefficients -// B * 0.1016 coefficient = 25 -// G * 0.5078 coefficient = 129 -// R * 0.2578 coefficient = 66 -// Add 16.5 = 0x1080 - -static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, - 0x1080, - 0}; - -static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, - 0x1080, - 0}; - // ARGB expects first 3 values to contain RGB and 4th value is ignored. void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, @@ -3005,6 +3024,26 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, "v17"); } +// RGB to JPeg coefficients +// B * 0.1140 coefficient = 29 +// G * 0.5870 coefficient = 150 +// R * 0.2990 coefficient = 77 +// Add 0.5 = 0x80 +static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128}; + +static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128}; + +// RGB to BT.601 coefficients +// B * 0.1016 coefficient = 25 +// G * 0.5078 coefficient = 129 +// R * 0.2578 coefficient = 66 +// Add 16.5 = 0x1080 + +static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, + 0x1080}; + +static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080}; + void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { ARGBToYMatrixRow_NEON(src_argb, dst_y, width, &kRgb24I601Constants); } @@ -3402,6 +3441,8 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( + "movi v7.8h, #0x00ff \n" // 255 for rounding up + // Attenuate 8 pixels. "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB @@ -3410,16 +3451,16 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, "prfm pldl1keep, [%0, 448] \n" "umull v5.8h, v1.8b, v3.8b \n" // g * a "umull v6.8h, v2.8b, v3.8b \n" // r * a - "uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8 - "uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8 - "uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8 + "addhn v0.8b, v4.8h, v7.8h \n" // (b + 255) >> 8 + "addhn v1.8b, v5.8h, v7.8h \n" // (g + 255) >> 8 + "addhn v2.8b, v6.8h, v7.8h \n" // (r + 255) >> 8 "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"); + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } // Quantize 8 ARGB pixels (32 bytes). @@ -3960,6 +4001,86 @@ void ByteToFloatRow_NEON(const uint8_t* src, : "cc", "memory", "v1", "v2", "v3"); } +// Convert FP16 Half Floats to FP32 Floats +void ConvertFP16ToFP32Row_NEON(const uint16_t* src, // fp16 + float* dst, + int width) { + asm volatile( + "1: \n" + "ld1 {v1.8h}, [%0], #16 \n" // load 8 halffloats + "subs %w2, %w2, #8 \n" // 8 floats per loop + "prfm pldl1keep, [%0, 448] \n" + "fcvtl v2.4s, v1.4h \n" // 8 floats + "fcvtl2 v3.4s, v1.8h \n" + "stp q2, q3, [%1], #32 \n" // store 8 floats + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v1", "v2", "v3"); +} + +// Convert FP16 Half Floats to FP32 Floats +// Read a column and write a row +void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16 + int src_stride, // stride in elements + float* dst, + int width) { + asm volatile( + "cmp %w2, #8 \n" // Is there 8 rows? + "b.lo 2f \n" + "1: \n" + "ld1 {v0.h}[0], [%0], %3 \n" // load 8 halffloats + "ld1 {v0.h}[1], [%0], %3 \n" + "ld1 {v0.h}[2], [%0], %3 \n" + "ld1 {v0.h}[3], [%0], %3 \n" + "ld1 {v1.h}[0], [%0], %3 \n" + "ld1 {v1.h}[1], [%0], %3 \n" + "ld1 {v1.h}[2], [%0], %3 \n" + "ld1 {v1.h}[3], [%0], %3 \n" + "subs %w2, %w2, #8 \n" // 8 rows per loop + "prfm pldl1keep, [%0, 448] \n" + "fcvtl v2.4s, v0.4h \n" // 4 floats + "fcvtl v3.4s, v1.4h \n" // 4 more floats + "stp q2, q3, [%1], #32 \n" // store 8 floats + "b.gt 1b \n" + "cmp %w2, #1 \n" // Is there 1 value? + "b.lo 3f \n" + "2: \n" + "ld1 {v1.h}[0], [%0], %3 \n" // load 1 halffloats + "subs %w2, %w2, #1 \n" // 1 floats per loop + "fcvtl v2.4s, v1.4h \n" // 1 floats + "str s2, [%1], #4 \n" // store 1 floats + "b.gt 2b \n" + "3: \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"((ptrdiff_t)(src_stride * 2)) // %3 + : "cc", "memory", "v0", "v1", "v2", "v3"); +} + +// Convert FP32 Floats to FP16 Half Floats +void ConvertFP32ToFP16Row_NEON(const float* src, + uint16_t* dst, // fp16 + int width) { + asm volatile( + "1: \n" + "ldp q2, q3, [%0], #32 \n" // load 8 floats + "subs %w2, %w2, #8 \n" // 8 floats per loop + "prfm pldl1keep, [%0, 448] \n" + "fcvtn v1.4h, v2.4s \n" // 8 fp16 halffloats + "fcvtn2 v1.8h, v3.4s \n" + "str q1, [%1], #16 \n" // store 8 fp16 halffloats + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v1", "v2", "v3"); +} + float ScaleMaxSamples_NEON(const float* src, float* dst, float scale, diff --git a/files/source/row_rvv.cc b/source/row_rvv.cc index 27e91a3b..0bf2bef6 100644 --- a/files/source/row_rvv.cc +++ b/source/row_rvv.cc @@ -17,7 +17,9 @@ #include "libyuv/row.h" -#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) +// This module is for clang rvv. GCC hasn't supported segment load & store. +#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && \ + defined(__clang__) #include <assert.h> #include <riscv_vector.h> @@ -29,48 +31,48 @@ extern "C" { // Fill YUV -> RGB conversion constants into vectors // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode // register) is set to round-to-nearest-up mode(0). -#define YUVTORGB_SETUP(vl, yuvconst, ub, vr, ug, vg, yg, bb, bg, br) \ - { \ - asm volatile("csrwi vxrm, 0"); \ - ub = yuvconst->kUVCoeff[0]; \ - vr = yuvconst->kUVCoeff[1]; \ - ug = yuvconst->kUVCoeff[2]; \ - vg = yuvconst->kUVCoeff[3]; \ - yg = yuvconst->kRGBCoeffBias[0]; \ - bb = yuvconst->kRGBCoeffBias[1] + 32; \ - bg = yuvconst->kRGBCoeffBias[2] - 32; \ - br = yuvconst->kRGBCoeffBias[3] + 32; \ +#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, yg, bb, bg, br) \ + { \ + asm volatile("csrwi vxrm, 0"); \ + ub = yuvconst->kUVCoeff[0]; \ + vr = yuvconst->kUVCoeff[1]; \ + ug = yuvconst->kUVCoeff[2]; \ + vg = yuvconst->kUVCoeff[3]; \ + yg = yuvconst->kRGBCoeffBias[0]; \ + bb = yuvconst->kRGBCoeffBias[1] + 32; \ + bg = yuvconst->kRGBCoeffBias[2] - 32; \ + br = yuvconst->kRGBCoeffBias[3] + 32; \ } -// Read [VLEN/8] Y, [VLEN/(8 * 2)] U and [VLEN/(8 * 2)] V from 422 -#define READYUV422(vl, v_u, v_v, v_y_16) \ - { \ - vuint8m1_t v_tmp0, v_tmp1; \ - vuint8m2_t v_y; \ - vuint16m2_t v_u_16, v_v_16; \ - vl = __riscv_vsetvl_e8m1((w + 1) / 2); \ - v_tmp0 = __riscv_vle8_v_u8m1(src_u, vl); \ - v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \ - v_tmp1 = __riscv_vle8_v_u8m1(src_v, vl); \ - v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \ - v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \ - v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \ - v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \ - v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \ - vl = __riscv_vsetvl_e8m2(w); \ - v_y = __riscv_vle8_v_u8m2(src_y, vl); \ - v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \ +// Read [2*VLEN/8] Y, [VLEN/8] U and [VLEN/8] V from 422 +#define READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16) \ + { \ + vuint8m1_t v_tmp0, v_tmp1; \ + vuint8m2_t v_y; \ + vuint16m2_t v_u_16, v_v_16; \ + vl = __riscv_vsetvl_e8m1((w + 1) / 2); \ + v_tmp0 = __riscv_vle8_v_u8m1(src_u, vl); \ + v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \ + v_tmp1 = __riscv_vle8_v_u8m1(src_v, vl); \ + v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \ + v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \ + v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \ + v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \ + v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \ + vl = __riscv_vsetvl_e8m2(w); \ + v_y = __riscv_vle8_v_u8m2(src_y, vl); \ + v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \ } -// Read [VLEN/8] Y, [VLEN/8] U, and [VLEN/8] V from 444 -#define READYUV444(vl, v_u, v_v, v_y_16) \ - { \ - vuint8m2_t v_y; \ - vl = __riscv_vsetvl_e8m2(w); \ - v_y = __riscv_vle8_v_u8m2(src_y, vl); \ - v_u = __riscv_vle8_v_u8m2(src_u, vl); \ - v_v = __riscv_vle8_v_u8m2(src_v, vl); \ - v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \ +// Read [2*VLEN/8] Y, [2*VLEN/8] U, and [2*VLEN/8] V from 444 +#define READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16) \ + { \ + vuint8m2_t v_y; \ + vl = __riscv_vsetvl_e8m2(w); \ + v_y = __riscv_vle8_v_u8m2(src_y, vl); \ + v_u = __riscv_vle8_v_u8m2(src_u, vl); \ + v_v = __riscv_vle8_v_u8m2(src_v, vl); \ + v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \ } // Convert from YUV to fixed point RGB @@ -101,6 +103,45 @@ extern "C" { v_r = __riscv_vnclipu_wx_u8m2(v_r_16, 6, vl); \ } +// Read [2*VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_uv +#define READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16) \ + { \ + vuint8m1_t v_tmp0, v_tmp1; \ + vuint8m2_t v_y; \ + vuint16m2_t v_u_16, v_v_16; \ + vl = __riscv_vsetvl_e8m1((w + 1) / 2); \ + __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_uv, vl); \ + v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \ + v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \ + v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \ + v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \ + v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \ + v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \ + vl = __riscv_vsetvl_e8m2(w); \ + v_y = __riscv_vle8_v_u8m2(src_y, vl); \ + v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \ + } + +// Read 2*[VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_vu +#define READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16) \ + { \ + vuint8m1_t v_tmp0, v_tmp1; \ + vuint8m2_t v_y; \ + vuint16m2_t v_u_16, v_v_16; \ + vl = __riscv_vsetvl_e8m1((w + 1) / 2); \ + __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_vu, vl); \ + v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \ + v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \ + v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \ + v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \ + v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \ + v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \ + vl = __riscv_vsetvl_e8m2(w); \ + v_y = __riscv_vle8_v_u8m2(src_y, vl); \ + v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \ + } + +#ifdef HAS_ARGBTOAR64ROW_RVV void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) { size_t avl = (size_t)4 * width; do { @@ -116,7 +157,9 @@ void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) { dst_ar64 += vl; } while (avl > 0); } +#endif +#ifdef HAS_ARGBTOAB64ROW_RVV void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) { size_t avl = (size_t)width; do { @@ -138,7 +181,9 @@ void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) { dst_ab64 += 4 * vl; } while (avl > 0); } +#endif +#ifdef HAS_AR64TOARGBROW_RVV void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) { size_t avl = (size_t)4 * width; do { @@ -153,7 +198,26 @@ void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) { dst_argb += vl; } while (avl > 0); } +#endif + +#ifdef HAS_AR64TOAB64ROW_RVV +void AR64ToAB64Row_RVV(const uint16_t* src_ar64, + uint16_t* dst_ab64, + int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e16m2(w); + vuint16m2_t v_b, v_g, v_r, v_a; + __riscv_vlseg4e16_v_u16m2(&v_b, &v_g, &v_r, &v_a, src_ar64, vl); + __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r, v_g, v_b, v_a, vl); + w -= vl; + src_ar64 += vl * 4; + dst_ab64 += vl * 4; + } while (w > 0); +} +#endif +#ifdef HAS_AB64TOARGBROW_RVV void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { size_t avl = (size_t)width; do { @@ -171,7 +235,9 @@ void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { dst_argb += 4 * vl; } while (avl > 0); } +#endif +#ifdef HAS_RAWTOARGBROW_RVV void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) { size_t w = (size_t)width; size_t vl = __riscv_vsetvl_e8m2(w); @@ -186,7 +252,9 @@ void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) { vl = __riscv_vsetvl_e8m2(w); } while (w > 0); } +#endif +#ifdef HAS_RAWTORGBAROW_RVV void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) { size_t w = (size_t)width; size_t vl = __riscv_vsetvl_e8m2(w); @@ -201,7 +269,9 @@ void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) { vl = __riscv_vsetvl_e8m2(w); } while (w > 0); } +#endif +#ifdef HAS_RAWTORGB24ROW_RVV void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { size_t w = (size_t)width; do { @@ -214,7 +284,9 @@ void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { dst_rgb24 += vl * 3; } while (w > 0); } +#endif +#ifdef HAS_ARGBTORAWROW_RVV void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) { size_t w = (size_t)width; do { @@ -227,7 +299,9 @@ void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) { dst_raw += vl * 3; } while (w > 0); } +#endif +#ifdef HAS_ARGBTORGB24ROW_RVV void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width) { @@ -242,7 +316,69 @@ void ARGBToRGB24Row_RVV(const uint8_t* src_argb, dst_rgb24 += vl * 3; } while (w > 0); } +#endif + +#ifdef HAS_ARGBTOABGRROW_RVV +void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg4e8_v_u8m2(dst_abgr, v_r, v_g, v_b, v_a, vl); + w -= vl; + src_argb += vl * 4; + dst_abgr += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBTOBGRAROW_RVV +void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg4e8_v_u8m2(dst_bgra, v_a, v_r, v_g, v_b, vl); + w -= vl; + src_argb += vl * 4; + dst_bgra += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBTORGBAROW_RVV +void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl); + w -= vl; + src_argb += vl * 4; + dst_rgba += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_RGBATOARGBROW_RVV +void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_rgba += vl * 4; + dst_argb += vl * 4; + } while (w > 0); +} +#endif +#ifdef HAS_RGB24TOARGBROW_RVV void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { @@ -259,24 +395,26 @@ void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24, vl = __riscv_vsetvl_e8m2(w); } while (w > 0); } +#endif +#ifdef HAS_I444TOARGBROW_RVV void I444ToARGBRow_RVV(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - size_t vl; size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); uint8_t ub, vr, ug, vg; int16_t yg, bb, bg, br; vuint8m2_t v_u, v_v; vuint8m2_t v_b, v_g, v_r, v_a; vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; - YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); v_a = __riscv_vmv_v_x_u8m2(255u, vl); do { - READYUV444(vl, v_u, v_v, v_y_16); + READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, v_b_16, v_r_16); RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); @@ -288,7 +426,9 @@ void I444ToARGBRow_RVV(const uint8_t* src_y, dst_argb += vl * 4; } while (w > 0); } +#endif +#ifdef HAS_I444ALPHATOARGBROW_RVV void I444AlphaToARGBRow_RVV(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -303,9 +443,9 @@ void I444AlphaToARGBRow_RVV(const uint8_t* src_y, vuint8m2_t v_u, v_v; vuint8m2_t v_b, v_g, v_r, v_a; vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; - YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); do { - READYUV444(vl, v_u, v_v, v_y_16); + READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); v_a = __riscv_vle8_v_u8m2(src_a, vl); YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, v_b_16, v_r_16); @@ -319,7 +459,9 @@ void I444AlphaToARGBRow_RVV(const uint8_t* src_y, dst_argb += vl * 4; } while (w > 0); } +#endif +#ifdef HAS_I444TORGB24ROW_RVV void I444ToRGB24Row_RVV(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -333,9 +475,9 @@ void I444ToRGB24Row_RVV(const uint8_t* src_y, vuint8m2_t v_u, v_v; vuint8m2_t v_b, v_g, v_r; vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; - YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); do { - READYUV444(vl, v_u, v_v, v_y_16); + READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, v_b_16, v_r_16); RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); @@ -347,24 +489,26 @@ void I444ToRGB24Row_RVV(const uint8_t* src_y, dst_rgb24 += vl * 3; } while (w > 0); } +#endif +#ifdef HAS_I422TOARGBROW_RVV void I422ToARGBRow_RVV(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - size_t vl; size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); uint8_t ub, vr, ug, vg; int16_t yg, bb, bg, br; vuint8m2_t v_u, v_v; vuint8m2_t v_b, v_g, v_r, v_a; vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; - YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); v_a = __riscv_vmv_v_x_u8m2(255u, vl); do { - READYUV422(vl, v_u, v_v, v_y_16); + READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, v_b_16, v_r_16); RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); @@ -376,7 +520,9 @@ void I422ToARGBRow_RVV(const uint8_t* src_y, dst_argb += vl * 4; } while (w > 0); } +#endif +#ifdef HAS_I422ALPHATOARGBROW_RVV void I422AlphaToARGBRow_RVV(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -391,9 +537,9 @@ void I422AlphaToARGBRow_RVV(const uint8_t* src_y, vuint8m2_t v_u, v_v; vuint8m2_t v_b, v_g, v_r, v_a; vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; - YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); do { - READYUV422(vl, v_u, v_v, v_y_16); + READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); v_a = __riscv_vle8_v_u8m2(src_a, vl); YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, v_b_16, v_r_16); @@ -407,24 +553,26 @@ void I422AlphaToARGBRow_RVV(const uint8_t* src_y, dst_argb += vl * 4; } while (w > 0); } +#endif +#ifdef HAS_I422TORGBAROW_RVV void I422ToRGBARow_RVV(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { - size_t vl; size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); uint8_t ub, vr, ug, vg; int16_t yg, bb, bg, br; vuint8m2_t v_u, v_v; vuint8m2_t v_b, v_g, v_r, v_a; vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; - YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); v_a = __riscv_vmv_v_x_u8m2(255u, vl); do { - READYUV422(vl, v_u, v_v, v_y_16); + READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, v_b_16, v_r_16); RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); @@ -436,7 +584,9 @@ void I422ToRGBARow_RVV(const uint8_t* src_y, dst_rgba += vl * 4; } while (w > 0); } +#endif +#ifdef HAS_I422TORGB24ROW_RVV void I422ToRGB24Row_RVV(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -450,9 +600,9 @@ void I422ToRGB24Row_RVV(const uint8_t* src_y, vuint8m2_t v_u, v_v; vuint8m2_t v_b, v_g, v_r; vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; - YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); do { - READYUV422(vl, v_u, v_v, v_y_16); + READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, v_b_16, v_r_16); RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); @@ -464,7 +614,9 @@ void I422ToRGB24Row_RVV(const uint8_t* src_y, dst_rgb24 += vl * 3; } while (w > 0); } +#endif +#ifdef HAS_I400TOARGBROW_RVV void I400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, @@ -503,7 +655,9 @@ void I400ToARGBRow_RVV(const uint8_t* src_y, dst_argb += vl * 4; } while (w > 0); } +#endif +#ifdef HAS_J400TOARGBROW_RVV void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) { size_t w = (size_t)width; size_t vl = __riscv_vsetvl_e8m2(w); @@ -518,7 +672,9 @@ void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) { vl = __riscv_vsetvl_e8m2(w); } while (w > 0); } +#endif +#ifdef HAS_COPYROW_RVV void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int width) { size_t w = (size_t)width; do { @@ -530,8 +686,125 @@ void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int width) { dst += vl; } while (w > 0); } +#endif + +#ifdef HAS_NV12TOARGBROW_RVV +void NV12ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r, v_a; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_y += vl; + src_uv += vl; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_NV12TORGB24ROW_RVV +void NV12ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + do { + READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl); + w -= vl; + src_y += vl; + src_uv += vl; + dst_rgb24 += vl * 3; + } while (w > 0); +} +#endif + +#ifdef HAS_NV21TOARGBROW_RVV +void NV21ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r, v_a; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_y += vl; + src_vu += vl; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_NV21TORGB24ROW_RVV +void NV21ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + do { + READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl); + w -= vl; + src_y += vl; + src_vu += vl; + dst_rgb24 += vl * 3; + } while (w > 0); +} +#endif // Bilinear filter [VLEN/8]x2 -> [VLEN/8]x1 + +#ifdef HAS_INTERPOLATEROW_RVV void InterpolateRow_RVV(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, @@ -554,13 +827,16 @@ void InterpolateRow_RVV(uint8_t* dst_ptr, } while (dst_w > 0); return; } + // To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up(0). + asm volatile("csrwi vxrm, 0"); // Blend 50 / 50. if (y1_fraction == 128) { do { size_t vl = __riscv_vsetvl_e8m8(dst_w); vuint8m8_t row0 = __riscv_vle8_v_u8m8(src_ptr, vl); vuint8m8_t row1 = __riscv_vle8_v_u8m8(src_ptr1, vl); - // Averaging add + // Use round-to-nearest-up mode for averaging add vuint8m8_t row_out = __riscv_vaaddu_vv_u8m8(row0, row1, vl); __riscv_vse8_v_u8m8(dst_ptr, row_out, vl); dst_w -= vl; @@ -571,15 +847,13 @@ void InterpolateRow_RVV(uint8_t* dst_ptr, return; } // General purpose row blend. - // To match behavior on other platforms, vxrm (fixed-point rounding mode - // register) is set to round-to-nearest-up(0). - asm volatile("csrwi vxrm, 0"); do { size_t vl = __riscv_vsetvl_e8m4(dst_w); vuint8m4_t row0 = __riscv_vle8_v_u8m4(src_ptr, vl); vuint16m8_t acc = __riscv_vwmulu_vx_u16m8(row0, y0_fraction, vl); vuint8m4_t row1 = __riscv_vle8_v_u8m4(src_ptr1, vl); acc = __riscv_vwmaccu_vx_u16m8(acc, y1_fraction, row1, vl); + // Use round-to-nearest-up mode for vnclip __riscv_vse8_v_u8m4(dst_ptr, __riscv_vnclipu_wx_u8m4(acc, 8, vl), vl); dst_w -= vl; src_ptr += vl; @@ -587,7 +861,9 @@ void InterpolateRow_RVV(uint8_t* dst_ptr, dst_ptr += vl; } while (dst_w > 0); } +#endif +#ifdef HAS_SPLITRGBROW_RVV void SplitRGBRow_RVV(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, @@ -608,7 +884,9 @@ void SplitRGBRow_RVV(const uint8_t* src_rgb, src_rgb += vl * 3; } while (w > 0); } +#endif +#ifdef HAS_MERGERGBROW_RVV void MergeRGBRow_RVV(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, @@ -628,7 +906,9 @@ void MergeRGBRow_RVV(const uint8_t* src_r, dst_rgb += vl * 3; } while (w > 0); } +#endif +#ifdef HAS_SPLITARGBROW_RVV void SplitARGBRow_RVV(const uint8_t* src_argb, uint8_t* dst_r, uint8_t* dst_g, @@ -652,7 +932,9 @@ void SplitARGBRow_RVV(const uint8_t* src_argb, src_argb += vl * 4; } while (w > 0); } +#endif +#ifdef HAS_MERGEARGBROW_RVV void MergeARGBRow_RVV(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, @@ -675,7 +957,9 @@ void MergeARGBRow_RVV(const uint8_t* src_r, dst_argb += vl * 4; } while (w > 0); } +#endif +#ifdef HAS_SPLITXRGBROW_RVV void SplitXRGBRow_RVV(const uint8_t* src_argb, uint8_t* dst_r, uint8_t* dst_g, @@ -696,7 +980,9 @@ void SplitXRGBRow_RVV(const uint8_t* src_argb, src_argb += vl * 4; } while (w > 0); } +#endif +#ifdef HAS_MERGEXRGBROW_RVV void MergeXRGBRow_RVV(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, @@ -719,7 +1005,9 @@ void MergeXRGBRow_RVV(const uint8_t* src_r, vl = __riscv_vsetvl_e8m2(w); } while (w > 0); } +#endif +#ifdef HAS_SPLITUVROW_RVV void SplitUVRow_RVV(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, @@ -737,7 +1025,9 @@ void SplitUVRow_RVV(const uint8_t* src_uv, src_uv += 2 * vl; } while (w > 0); } +#endif +#ifdef HAS_MERGEUVROW_RVV void MergeUVRow_RVV(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, @@ -755,6 +1045,7 @@ void MergeUVRow_RVV(const uint8_t* src_u, dst_uv += 2 * vl; } while (w > 0); } +#endif struct RgbConstants { uint8_t kRGBToY[4]; @@ -787,7 +1078,8 @@ static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080, 0}; -// ARGB expects first 3 values to contain RGB and 4th value is ignored. +// ARGB expects first 3 values to contain RGB and 4th value is ignored +#ifdef HAS_ARGBTOYMATRIXROW_RVV void ARGBToYMatrixRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width, @@ -817,24 +1109,34 @@ void ARGBToYMatrixRow_RVV(const uint8_t* src_argb, dst_y += vl; } while (w > 0); } +#endif +#ifdef HAS_ARGBTOYROW_RVV void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width) { ARGBToYMatrixRow_RVV(src_argb, dst_y, width, &kRgb24I601Constants); } +#endif +#ifdef HAS_ARGBTOYJROW_RVV void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width) { ARGBToYMatrixRow_RVV(src_argb, dst_yj, width, &kRgb24JPEGConstants); } +#endif +#ifdef HAS_ABGRTOYROW_RVV void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width) { ARGBToYMatrixRow_RVV(src_abgr, dst_y, width, &kRawI601Constants); } +#endif +#ifdef HAS_ABGRTOYJROW_RVV void ABGRToYJRow_RVV(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { ARGBToYMatrixRow_RVV(src_abgr, dst_yj, width, &kRawJPEGConstants); } +#endif // RGBA expects first value to be A and ignored, then 3 values to contain RGB. +#ifdef HAS_RGBATOYMATRIXROW_RVV void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width, @@ -864,19 +1166,27 @@ void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba, dst_y += vl; } while (w > 0); } +#endif +#ifdef HAS_RGBATOYROW_RVV void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width) { RGBAToYMatrixRow_RVV(src_rgba, dst_y, width, &kRgb24I601Constants); } +#endif +#ifdef HAS_RGBATOYJROW_RVV void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width) { RGBAToYMatrixRow_RVV(src_rgba, dst_yj, width, &kRgb24JPEGConstants); } +#endif +#ifdef HAS_BGRATOYROW_RVV void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width) { RGBAToYMatrixRow_RVV(src_bgra, dst_y, width, &kRawI601Constants); } +#endif +#ifdef HAS_RGBTOYMATRIXROW_RVV void RGBToYMatrixRow_RVV(const uint8_t* src_rgb, uint8_t* dst_y, int width, @@ -906,51 +1216,179 @@ void RGBToYMatrixRow_RVV(const uint8_t* src_rgb, dst_y += vl; } while (w > 0); } +#endif +#ifdef HAS_RGB24TOYJROW_RVV void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) { RGBToYMatrixRow_RVV(src_rgb24, dst_yj, width, &kRgb24JPEGConstants); } +#endif +#ifdef HAS_RAWTOYJROW_RVV void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width) { RGBToYMatrixRow_RVV(src_raw, dst_yj, width, &kRawJPEGConstants); } +#endif +#ifdef HAS_RGB24TOYROW_RVV void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { RGBToYMatrixRow_RVV(src_rgb24, dst_y, width, &kRgb24I601Constants); } +#endif +#ifdef HAS_RAWTOYROW_RVV void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) { RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants); } +#endif + +// Blend src_argb over src_argb1 and store to dst_argb. +// dst_argb may be src_argb or src_argb1. +// src_argb: RGB values have already been pre-multiplied by the a. +#ifdef HAS_ARGBBLENDROW_RVV +void ARGBBlendRow_RVV(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvlmax_e8m2(); + // clamp255((((256 - a) * b) >> 8) + f) + // = b * (256 - a) / 256 + f + // = b - (b * a / 256) + f + vuint8m2_t v_255 = __riscv_vmv_v_x_u8m2(255, vl); + do { + vuint8m2_t v_src0_b, v_src0_g, v_src0_r, v_src0_a; + vuint8m2_t v_src1_b, v_src1_g, v_src1_r, v_src1_a; + vuint8m2_t v_tmp_b, v_tmp_g, v_tmp_r; + vuint8m2_t v_dst_b, v_dst_g, v_dst_r; + vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_src0_b, &v_src0_g, &v_src0_r, &v_src0_a, + src_argb, vl); + __riscv_vlseg4e8_v_u8m2(&v_src1_b, &v_src1_g, &v_src1_r, &v_src1_a, + src_argb1, vl); + + v_tmp_b = __riscv_vmulhu_vv_u8m2(v_src1_b, v_src0_a, vl); + v_tmp_g = __riscv_vmulhu_vv_u8m2(v_src1_g, v_src0_a, vl); + v_tmp_r = __riscv_vmulhu_vv_u8m2(v_src1_r, v_src0_a, vl); + + v_dst_b = __riscv_vsub_vv_u8m2(v_src1_b, v_tmp_b, vl); + v_dst_g = __riscv_vsub_vv_u8m2(v_src1_g, v_tmp_g, vl); + v_dst_r = __riscv_vsub_vv_u8m2(v_src1_r, v_tmp_r, vl); + + v_dst_b = __riscv_vsaddu_vv_u8m2(v_dst_b, v_src0_b, vl); + v_dst_g = __riscv_vsaddu_vv_u8m2(v_dst_g, v_src0_g, vl); + v_dst_r = __riscv_vsaddu_vv_u8m2(v_dst_r, v_src0_r, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_dst_b, v_dst_g, v_dst_r, v_255, vl); + + w -= vl; + src_argb += 4 * vl; + src_argb1 += 4 * vl; + dst_argb += 4 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_BLENDPLANEROW_RVV +void BlendPlaneRow_RVV(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width) { + size_t w = (size_t)width; + do { + vuint16m8_t v_dst_u16; + vuint8m4_t v_dst; + size_t vl = __riscv_vsetvl_e8m4(w); + vuint8m4_t v_src0 = __riscv_vle8_v_u8m4(src0, vl); + vuint8m4_t v_src1 = __riscv_vle8_v_u8m4(src1, vl); + vuint8m4_t v_alpha = __riscv_vle8_v_u8m4(alpha, vl); + vuint8m4_t v_255_minus_alpha = __riscv_vrsub_vx_u8m4(v_alpha, 255u, vl); + // (a * foreground) + (1-a) * background + v_dst_u16 = __riscv_vwmulu_vv_u16m8(v_alpha, v_src0, vl); + v_dst_u16 = + __riscv_vwmaccu_vv_u16m8(v_dst_u16, v_255_minus_alpha, v_src1, vl); + v_dst_u16 = __riscv_vadd_vx_u16m8(v_dst_u16, 255u, vl); + v_dst = __riscv_vnsrl_wx_u8m4(v_dst_u16, 8, vl); + + __riscv_vse8_v_u8m4(dst, v_dst, vl); + w -= vl; + src0 += vl; + src1 += vl; + alpha += vl; + dst += vl; + } while (w > 0); +} +#endif + +// Attenuate: (f * a + 255) >> 8 +#ifdef HAS_ARGBATTENUATEROW_RVV void ARGBAttenuateRow_RVV(const uint8_t* src_argb, uint8_t* dst_argb, int width) { size_t w = (size_t)width; - // To match behavior on other platforms, vxrm (fixed-point rounding mode - // register) is set to round-to-nearest-up(0). - asm volatile("csrwi vxrm, 0"); do { vuint8m2_t v_b, v_g, v_r, v_a; vuint16m4_t v_ba_16, v_ga_16, v_ra_16; size_t vl = __riscv_vsetvl_e8m2(w); __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + // f * a v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl); v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl); v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl); - v_b = __riscv_vnclipu_wx_u8m2(v_ba_16, 8, vl); - v_g = __riscv_vnclipu_wx_u8m2(v_ga_16, 8, vl); - v_r = __riscv_vnclipu_wx_u8m2(v_ra_16, 8, vl); + // f * a + 255 + v_ba_16 = __riscv_vadd_vx_u16m4(v_ba_16, 255u, vl); + v_ga_16 = __riscv_vadd_vx_u16m4(v_ga_16, 255u, vl); + v_ra_16 = __riscv_vadd_vx_u16m4(v_ra_16, 255u, vl); + // (f * a + 255) >> 8 + v_b = __riscv_vnsrl_wx_u8m2(v_ba_16, 8, vl); + v_g = __riscv_vnsrl_wx_u8m2(v_ga_16, 8, vl); + v_r = __riscv_vnsrl_wx_u8m2(v_ra_16, 8, vl); __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); w -= vl; src_argb += vl * 4; dst_argb += vl * 4; } while (w > 0); } +#endif + +#ifdef HAS_ARGBEXTRACTALPHAROW_RVV +void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_b, v_g, v_r, v_a; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vse8_v_u8m2(dst_a, v_a, vl); + w -= vl; + src_argb += vl * 4; + dst_a += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBCOPYYTOALPHAROW_RVV +void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width) { + size_t w = (size_t)width; + const ptrdiff_t dst_stride = 4; + dst += 3; + do { + size_t vl = __riscv_vsetvl_e8m8(w); + vuint8m8_t v_a = __riscv_vle8_v_u8m8(src, vl); + __riscv_vsse8_v_u8m8(dst, dst_stride, v_a, vl); + w -= vl; + src += vl; + dst += vl * dst_stride; + } while (w > 0); +} +#endif #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) +#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && + // defined(__clang__) diff --git a/files/source/row_win.cc b/source/row_win.cc index 5fb28521..5fb28521 100644 --- a/files/source/row_win.cc +++ b/source/row_win.cc diff --git a/files/source/scale.cc b/source/scale.cc index 80b030dc..b7a602ba 100644 --- a/files/source/scale.cc +++ b/source/scale.cc @@ -135,6 +135,14 @@ static void ScalePlaneDown2(int src_width, } } #endif +#if defined(HAS_SCALEROWDOWN2_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowDown2 = filtering == kFilterNone + ? ScaleRowDown2_RVV + : (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV + : ScaleRowDown2Box_RVV); + } +#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -312,6 +320,11 @@ static void ScalePlaneDown4(int src_width, } } #endif +#if defined(HAS_SCALEROWDOWN4_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV; + } +#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -472,6 +485,17 @@ static void ScalePlaneDown34(int src_width, } } #endif +#if defined(HAS_SCALEROWDOWN34_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + if (!filtering) { + ScaleRowDown34_0 = ScaleRowDown34_RVV; + ScaleRowDown34_1 = ScaleRowDown34_RVV; + } else { + ScaleRowDown34_0 = ScaleRowDown34_0_Box_RVV; + ScaleRowDown34_1 = ScaleRowDown34_1_Box_RVV; + } + } +#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); @@ -687,6 +711,17 @@ static void ScalePlaneDown38(int src_width, } } #endif +#if defined(HAS_SCALEROWDOWN38_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + if (!filtering) { + ScaleRowDown38_3 = ScaleRowDown38_RVV; + ScaleRowDown38_2 = ScaleRowDown38_RVV; + } else { + ScaleRowDown38_3 = ScaleRowDown38_3_Box_RVV; + ScaleRowDown38_2 = ScaleRowDown38_2_Box_RVV; + } + } +#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); @@ -904,14 +939,14 @@ static void ScaleAddCols1_16_C(int dst_width, // one pixel of destination using fixed point (16.16) to step // through source, sampling a box of pixel with simple // averaging. -static void ScalePlaneBox(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr) { +static int ScalePlaneBox(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -925,6 +960,8 @@ static void ScalePlaneBox(int src_width, { // Allocate a row buffer of uint16_t. align_buffer_64(row16, src_width * 2); + if (!row16) + return 1; void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, const uint16_t* src_ptr, uint8_t* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_C @@ -971,6 +1008,11 @@ static void ScalePlaneBox(int src_width, } } #endif +#if defined(HAS_SCALEADDROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleAddRow = ScaleAddRow_RVV; + } +#endif for (j = 0; j < dst_height; ++j) { int boxheight; @@ -991,16 +1033,17 @@ static void ScalePlaneBox(int src_width, } free_aligned_buffer_64(row16); } + return 0; } -static void ScalePlaneBox_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static int ScalePlaneBox_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1014,6 +1057,8 @@ static void ScalePlaneBox_16(int src_width, { // Allocate a row buffer of uint32_t. align_buffer_64(row32, src_width * 4); + if (!row32) + return 1; void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, const uint32_t* src_ptr, uint16_t* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C; @@ -1045,18 +1090,19 @@ static void ScalePlaneBox_16(int src_width, } free_aligned_buffer_64(row32); } + return 0; } // Scale plane down with bilinear interpolation. -void ScalePlaneBilinearDown(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1065,6 +1111,8 @@ void ScalePlaneBilinearDown(int src_width, // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row buffer. align_buffer_64(row, src_width); + if (!row) + return 1; const int max_y = (src_height - 1) << 16; int j; @@ -1174,17 +1222,18 @@ void ScalePlaneBilinearDown(int src_width, } } free_aligned_buffer_64(row); + return 0; } -void ScalePlaneBilinearDown_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearDown_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1193,6 +1242,8 @@ void ScalePlaneBilinearDown_16(int src_width, // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row buffer. align_buffer_64(row, src_width * 2); + if (!row) + return 1; const int max_y = (src_height - 1) << 16; int j; @@ -1265,18 +1316,19 @@ void ScalePlaneBilinearDown_16(int src_width, } } free_aligned_buffer_64(row); + return 0; } // Scale up down with bilinear interpolation. -void ScalePlaneBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, + enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1375,6 +1427,8 @@ void ScalePlaneBilinearUp(int src_width, // Allocate 2 row buffers. const int row_size = (dst_width + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; uint8_t* rowptr = row; int rowstride = row_size; @@ -1418,6 +1472,7 @@ void ScalePlaneBilinearUp(int src_width, } free_aligned_buffer_64(row); } + return 0; } // Scale plane, horizontally up by 2 times. @@ -1425,20 +1480,21 @@ void ScalePlaneBilinearUp(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original width, using linear interpolation. // This is used to scale U and V planes of I422 to I444. -void ScalePlaneUp2_Linear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr) { +static void ScalePlaneUp2_Linear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_Any_C; int i; int y; int dy; + (void)src_width; // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); @@ -1465,6 +1521,11 @@ void ScalePlaneUp2_Linear(int src_width, ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; } #endif +#ifdef HAS_SCALEROWUP2_LINEAR_RVV + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowUp = ScaleRowUp2_Linear_RVV; + } +#endif if (dst_height == 1) { ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, @@ -1484,19 +1545,20 @@ void ScalePlaneUp2_Linear(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original size, using bilinear interpolation. // This is used to scale U and V planes of I420 to I444. -void ScalePlaneUp2_Bilinear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr) { +static void ScalePlaneUp2_Bilinear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_Any_C; int x; + (void)src_width; // This function can only scale up by 2 times. assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); @@ -1524,6 +1586,11 @@ void ScalePlaneUp2_Bilinear(int src_width, Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; } #endif +#ifdef HAS_SCALEROWUP2_BILINEAR_RVV + if (TestCpuFlag(kCpuHasRVV)) { + Scale2RowUp = ScaleRowUp2_Bilinear_RVV; + } +#endif Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); dst_ptr += dst_stride; @@ -1544,20 +1611,21 @@ void ScalePlaneUp2_Bilinear(int src_width, // its original width, using linear interpolation. // stride is in count of uint16_t. // This is used to scale U and V planes of I210 to I410 and I212 to I412. -void ScalePlaneUp2_12_Linear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static void ScalePlaneUp2_12_Linear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_16_Any_C; int i; int y; int dy; + (void)src_width; // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); @@ -1598,19 +1666,20 @@ void ScalePlaneUp2_12_Linear(int src_width, // its original size, using bilinear interpolation. // stride is in count of uint16_t. // This is used to scale U and V planes of I010 to I410 and I012 to I412. -void ScalePlaneUp2_12_Bilinear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static void ScalePlaneUp2_12_Bilinear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; int x; + (void)src_width; // This function can only scale up by 2 times. assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); @@ -1645,20 +1714,21 @@ void ScalePlaneUp2_12_Bilinear(int src_width, } } -void ScalePlaneUp2_16_Linear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static void ScalePlaneUp2_16_Linear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_16_Any_C; int i; int y; int dy; + (void)src_width; // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); @@ -1694,19 +1764,20 @@ void ScalePlaneUp2_16_Linear(int src_width, } } -void ScalePlaneUp2_16_Bilinear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static void ScalePlaneUp2_16_Bilinear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; int x; + (void)src_width; // This function can only scale up by 2 times. assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); @@ -1741,15 +1812,15 @@ void ScalePlaneUp2_16_Bilinear(int src_width, } } -void ScalePlaneBilinearUp_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearUp_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, + enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1826,10 +1897,11 @@ void ScalePlaneBilinearUp_16(int src_width, // Allocate 2 row buffers. const int row_size = (dst_width + 31) & ~31; align_buffer_64(row, row_size * 4); - - uint16_t* rowptr = (uint16_t*)row; int rowstride = row_size; int lasty = yi; + uint16_t* rowptr = (uint16_t*)row; + if (!row) + return 1; ScaleFilterCols(rowptr, src, dst_width, x, dx); if (src_height > 1) { @@ -1869,6 +1941,7 @@ void ScalePlaneBilinearUp_16(int src_width, } free_aligned_buffer_64(row); } + return 0; } // Scale Plane to/from any dimensions, without interpolation. @@ -1953,15 +2026,15 @@ static void ScalePlaneSimple_16(int src_width, // Scale a plane. // This function dispatches to a specialized scaler based on scale factor. LIBYUV_API -void ScalePlane(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering) { +int ScalePlane(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -1977,7 +2050,7 @@ void ScalePlane(const uint8_t* src, if (dst_width == src_width && dst_height == src_height) { // Straight copy. CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); - return; + return 0; } if (dst_width == src_width && filtering != kFilterBox) { int dy = 0; @@ -1993,7 +2066,7 @@ void ScalePlane(const uint8_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); - return; + return 0; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. @@ -2001,69 +2074,67 @@ void ScalePlane(const uint8_t* src, // optimized, 3/4 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (2 * dst_width == src_width && 2 * dst_height == src_height) { // optimized, 1/2 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } // 3/8 rounded up for odd sized chroma height. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { // optimized, 3/8 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (4 * dst_width == src_width && 4 * dst_height == src_height && (filtering == kFilterBox || filtering == kFilterNone)) { // optimized, 1/4 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } } if (filtering == kFilterBox && dst_height * 2 < src_height) { - ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride, - dst_stride, src, dst); - return; + return ScalePlaneBox(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); } if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if (filtering && dst_height > src_height) { - ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); } if (filtering) { - ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); } ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); + return 0; } LIBYUV_API -void ScalePlane_16(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering) { +int ScalePlane_16(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -2079,7 +2150,7 @@ void ScalePlane_16(const uint16_t* src, if (dst_width == src_width && dst_height == src_height) { // Straight copy. CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height); - return; + return 0; } if (dst_width == src_width && filtering != kFilterBox) { int dy = 0; @@ -2098,7 +2169,7 @@ void ScalePlane_16(const uint16_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); - return; + return 0; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. @@ -2106,69 +2177,68 @@ void ScalePlane_16(const uint16_t* src, // optimized, 3/4 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (2 * dst_width == src_width && 2 * dst_height == src_height) { // optimized, 1/2 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } // 3/8 rounded up for odd sized chroma height. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { // optimized, 3/8 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (4 * dst_width == src_width && 4 * dst_height == src_height && (filtering == kFilterBox || filtering == kFilterNone)) { // optimized, 1/4 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } } if (filtering == kFilterBox && dst_height * 2 < src_height) { - ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride, - dst_stride, src, dst); - return; + return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); } if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if (filtering && dst_height > src_height) { - ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); } if (filtering) { - ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearDown_16(src_width, src_height, dst_width, + dst_height, src_stride, dst_stride, src, + dst, filtering); } ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); + return 0; } LIBYUV_API -void ScalePlane_12(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering) { +int ScalePlane_12(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -2183,17 +2253,17 @@ void ScalePlane_12(const uint16_t* src, if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } - ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride, - dst_width, dst_height, filtering); + return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride, + dst_width, dst_height, filtering); } // Scale an I420 image. @@ -2221,6 +2291,7 @@ int I420Scale(const uint8_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2228,13 +2299,19 @@ int I420Scale(const uint8_t* src_y, return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, - dst_stride_u, dst_halfwidth, dst_halfheight, filtering); - ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, - dst_stride_v, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); + return r; } LIBYUV_API @@ -2259,6 +2336,7 @@ int I420Scale_16(const uint16_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2266,13 +2344,19 @@ int I420Scale_16(const uint16_t* src_y, return -1; } - ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, - dst_stride_u, dst_halfwidth, dst_halfheight, filtering); - ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, - dst_stride_v, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); + return r; } LIBYUV_API @@ -2297,6 +2381,7 @@ int I420Scale_12(const uint16_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2304,13 +2389,19 @@ int I420Scale_12(const uint16_t* src_y, return -1; } - ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, - dst_stride_u, dst_halfwidth, dst_halfheight, filtering); - ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, - dst_stride_v, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); + return r; } // Scale an I444 image. @@ -2334,19 +2425,27 @@ int I444Scale(const uint8_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int r; + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, - dst_width, dst_height, filtering); - ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, - dst_width, dst_height, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, + dst_stride_u, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, + dst_stride_v, dst_width, dst_height, filtering); + return r; } LIBYUV_API @@ -2367,19 +2466,27 @@ int I444Scale_16(const uint16_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int r; + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, - dst_width, dst_height, filtering); - ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, - dst_width, dst_height, filtering); - return 0; + r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, + dst_stride_u, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, + dst_stride_v, dst_width, dst_height, filtering); + return r; } LIBYUV_API @@ -2400,19 +2507,27 @@ int I444Scale_12(const uint16_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int r; + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, - dst_width, dst_height, filtering); - ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, - dst_width, dst_height, filtering); - return 0; + r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, + dst_stride_u, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, + dst_stride_v, dst_width, dst_height, filtering); + return r; } // Scale an I422 image. @@ -2438,6 +2553,7 @@ int I422Scale(const uint8_t* src_y, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2445,13 +2561,19 @@ int I422Scale(const uint8_t* src_y, return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u, - dst_stride_u, dst_halfwidth, dst_height, filtering); - ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v, - dst_stride_v, dst_halfwidth, dst_height, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u, + dst_stride_u, dst_halfwidth, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v, + dst_stride_v, dst_halfwidth, dst_height, filtering); + return r; } LIBYUV_API @@ -2474,6 +2596,7 @@ int I422Scale_16(const uint16_t* src_y, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2481,13 +2604,19 @@ int I422Scale_16(const uint16_t* src_y, return -1; } - ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u, - dst_stride_u, dst_halfwidth, dst_height, filtering); - ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v, - dst_stride_v, dst_halfwidth, dst_height, filtering); - return 0; + r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u, + dst_stride_u, dst_halfwidth, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v, + dst_stride_v, dst_halfwidth, dst_height, filtering); + return r; } LIBYUV_API @@ -2510,6 +2639,7 @@ int I422Scale_12(const uint16_t* src_y, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2517,13 +2647,19 @@ int I422Scale_12(const uint16_t* src_y, return -1; } - ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u, - dst_stride_u, dst_halfwidth, dst_height, filtering); - ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v, - dst_stride_v, dst_halfwidth, dst_height, filtering); - return 0; + r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u, + dst_stride_u, dst_halfwidth, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v, + dst_stride_v, dst_halfwidth, dst_height, filtering); + return r; } // Scale an NV12 image. @@ -2547,6 +2683,7 @@ int NV12Scale(const uint8_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_uv || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv || @@ -2554,11 +2691,14 @@ int NV12Scale(const uint8_t* src_y, return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv, - dst_stride_uv, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv, + dst_stride_uv, dst_halfwidth, dst_halfheight, filtering); + return r; } // Deprecated api diff --git a/files/source/scale_any.cc b/source/scale_any.cc index f6576874..f6576874 100644 --- a/files/source/scale_any.cc +++ b/source/scale_any.cc diff --git a/files/source/scale_argb.cc b/source/scale_argb.cc index ddd8d29e..18bdeb86 100644 --- a/files/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -16,6 +16,7 @@ #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" // For CopyARGB #include "libyuv/row.h" +#include "libyuv/scale_argb.h" #include "libyuv/scale_row.h" #ifdef __cplusplus @@ -127,6 +128,15 @@ static void ScaleARGBDown2(int src_width, } } #endif +#if defined(HAS_SCALEARGBROWDOWN2_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDown2 = + filtering == kFilterNone + ? ScaleARGBRowDown2_RVV + : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_RVV + : ScaleARGBRowDown2Box_RVV); + } +#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -141,22 +151,27 @@ static void ScaleARGBDown2(int src_width, // ScaleARGB ARGB, 1/4 // This is an optimized version for scaling down a ARGB to 1/4 of // its original size. -static void ScaleARGBDown4Box(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_argb, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy) { +static int ScaleARGBDown4Box(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy) { int j; // Allocate 2 rows of ARGB. const int row_size = (dst_width * 2 * 4 + 31) & ~31; + // TODO(fbarchard): Remove this row buffer and implement a ScaleARGBRowDown4 + // but implemented via a 2 pass wrapper that uses a very small array on the + // stack with a horizontal loop. align_buffer_64(row, row_size * 2); + if (!row) + return 1; int row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) = @@ -184,6 +199,11 @@ static void ScaleARGBDown4Box(int src_width, } } #endif +#if defined(HAS_SCALEARGBROWDOWN2_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDown2 = ScaleARGBRowDown2Box_RVV; + } +#endif for (j = 0; j < dst_height; ++j) { ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); @@ -194,6 +214,7 @@ static void ScaleARGBDown4Box(int src_width, dst_argb += dst_stride; } free_aligned_buffer_64(row); + return 0; } // ScaleARGB ARGB Even @@ -263,6 +284,16 @@ static void ScaleARGBDownEven(int src_width, } } #endif +#if defined(HAS_SCALEARGBROWDOWNEVENBOX_RVV) + if (filtering && TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEvenBox_RVV; + } +#endif +#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV) + if (!filtering && TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV; + } +#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -275,19 +306,19 @@ static void ScaleARGBDownEven(int src_width, } // Scale ARGB down with bilinear interpolation. -static void ScaleARGBBilinearDown(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_argb, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleARGBBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, @@ -386,6 +417,8 @@ static void ScaleARGBBilinearDown(int src_width, // Allocate a row of ARGB. { align_buffer_64(row, clip_src_width * 4); + if (!row) + return 1; const int max_y = (src_height - 1) << 16; if (y > max_y) { @@ -409,22 +442,23 @@ static void ScaleARGBBilinearDown(int src_width, } free_aligned_buffer_64(row); } + return 0; } // Scale ARGB up with bilinear interpolation. -static void ScaleARGBBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_argb, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleARGBBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, @@ -560,6 +594,8 @@ static void ScaleARGBBilinearUp(int src_width, // Allocate 2 rows of ARGB. const int row_size = (dst_width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); + if (!row) + return 1; uint8_t* rowptr = row; int rowstride = row_size; @@ -603,27 +639,28 @@ static void ScaleARGBBilinearUp(int src_width, } free_aligned_buffer_64(row); } + return 0; } #ifdef YUVSCALEUP // Scale YUV to ARGB up with bilinear interpolation. -static void ScaleYUVToARGBBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride_y, - int src_stride_u, - int src_stride_v, - int dst_stride_argb, - const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleYUVToARGBBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride_y, + int src_stride_u, + int src_stride_v, + int dst_stride_argb, + const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, int width) = @@ -825,16 +862,17 @@ static void ScaleYUVToARGBBilinearUp(int src_width, const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u; const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v; - // Allocate 2 rows of ARGB. + // Allocate 1 row of ARGB for source conversion and 2 rows of ARGB + // scaled horizontally to the destination width. const int row_size = (dst_width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); - - // Allocate 1 row of ARGB for source conversion. - align_buffer_64(argb_row, src_width * 4); + align_buffer_64(row, row_size * 2 + src_width * 4); + uint8_t* argb_row = row + row_size * 2; uint8_t* rowptr = row; int rowstride = row_size; int lasty = yi; + if (!row) + return 1; // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); @@ -889,7 +927,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, y += dy; } free_aligned_buffer_64(row); - free_aligned_buffer_64(row_argb); + return 0; } #endif @@ -964,19 +1002,19 @@ static void ScaleARGBSimple(int src_width, // ScaleARGB a ARGB. // This function in turn calls a scaling function // suitable for handling the desired resolutions. -static void ScaleARGB(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - int clip_x, - int clip_y, - int clip_width, - int clip_height, - enum FilterMode filtering) { +static int ScaleARGB(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1021,18 +1059,18 @@ static void ScaleARGB(const uint8_t* src, ScaleARGBDown2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; } if (dx == 0x40000 && filtering == kFilterBox) { // Optimized 1/4 box downsample. - ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy); - return; + return ScaleARGBDown4Box(src_width, src_height, clip_width, + clip_height, src_stride, dst_stride, src, + dst, x, dx, y, dy); } ScaleARGBDownEven(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; } // Optimized odd scale down. ie 3, 5, 7, 9x. if ((dx & 0x10000) && (dy & 0x10000)) { @@ -1041,7 +1079,7 @@ static void ScaleARGB(const uint8_t* src, // Straight copy. ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4, src_stride, dst, dst_stride, clip_width, clip_height); - return; + return 0; } } } @@ -1050,22 +1088,21 @@ static void ScaleARGB(const uint8_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, y, dy, /*bpp=*/4, filtering); - return; + return 0; } if (filtering && dy < 65536) { - ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } if (filtering) { - ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy); + return 0; } LIBYUV_API @@ -1089,10 +1126,9 @@ int ARGBScaleClip(const uint8_t* src_argb, (clip_y + clip_height) > dst_height) { return -1; } - ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, - dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width, - clip_height, filtering); - return 0; + return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, + dst_stride_argb, dst_width, dst_height, clip_x, clip_y, + clip_width, clip_height, filtering); } // Scale an ARGB image. @@ -1110,10 +1146,9 @@ int ARGBScale(const uint8_t* src_argb, src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) { return -1; } - ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, - dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height, - filtering); - return 0; + return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, + dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, + dst_height, filtering); } // Scale with YUV conversion to ARGB and clipping. @@ -1137,8 +1172,11 @@ int YUVToARGBScaleClip(const uint8_t* src_y, int clip_width, int clip_height, enum FilterMode filtering) { - uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4); int r; + uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4); + if (!argb_buffer) { + return 1; // Out of memory runtime error. + } (void)src_fourcc; // TODO(fbarchard): implement and/or assert. (void)dst_fourcc; I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, diff --git a/files/source/scale_common.cc b/source/scale_common.cc index 77455903..d07a39af 100644 --- a/files/source/scale_common.cc +++ b/source/scale_common.cc @@ -1964,35 +1964,6 @@ void ScaleSlope(int src_width, } #undef CENTERSTART -// Read 8x2 upsample with filtering and write 16x1. -// actually reads an extra pixel, so 9x2. -void ScaleRowUp2_16_C(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - const uint16_t* src2 = src_ptr + src_stride; - - int x; - for (x = 0; x < dst_width - 1; x += 2) { - uint16_t p0 = src_ptr[0]; - uint16_t p1 = src_ptr[1]; - uint16_t p2 = src2[0]; - uint16_t p3 = src2[1]; - dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; - dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4; - ++src_ptr; - ++src2; - dst += 2; - } - if (dst_width & 1) { - uint16_t p0 = src_ptr[0]; - uint16_t p1 = src_ptr[1]; - uint16_t p2 = src2[0]; - uint16_t p3 = src2[1]; - dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; - } -} - #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/files/source/scale_gcc.cc b/source/scale_gcc.cc index 17eeffad..17eeffad 100644 --- a/files/source/scale_gcc.cc +++ b/source/scale_gcc.cc diff --git a/files/source/scale_lsx.cc b/source/scale_lsx.cc index bfe5e9fb..bfe5e9fb 100644 --- a/files/source/scale_lsx.cc +++ b/source/scale_lsx.cc diff --git a/files/source/scale_msa.cc b/source/scale_msa.cc index 482a521f..482a521f 100644 --- a/files/source/scale_msa.cc +++ b/source/scale_msa.cc diff --git a/files/source/scale_neon.cc b/source/scale_neon.cc index ccc75106..ccc75106 100644 --- a/files/source/scale_neon.cc +++ b/source/scale_neon.cc diff --git a/files/source/scale_neon64.cc b/source/scale_neon64.cc index ad06ee83..7c072380 100644 --- a/files/source/scale_neon64.cc +++ b/source/scale_neon64.cc @@ -1118,101 +1118,6 @@ void ScaleFilterCols_NEON(uint8_t* dst_ptr, #undef LOAD2_DATA8_LANE -// 16x2 -> 16x1 -void ScaleFilterRows_NEON(uint8_t* dst_ptr, - const uint8_t* src_ptr, - ptrdiff_t src_stride, - int dst_width, - int source_y_fraction) { - int y_fraction = 256 - source_y_fraction; - asm volatile( - "cmp %w4, #0 \n" - "b.eq 100f \n" - "add %2, %2, %1 \n" - "cmp %w4, #64 \n" - "b.eq 75f \n" - "cmp %w4, #128 \n" - "b.eq 50f \n" - "cmp %w4, #192 \n" - "b.eq 25f \n" - - "dup v5.8b, %w4 \n" - "dup v4.8b, %w5 \n" - // General purpose row blend. - "1: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "umull v6.8h, v0.8b, v4.8b \n" - "umull2 v7.8h, v0.16b, v4.16b \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "umlal v6.8h, v1.8b, v5.8b \n" - "umlal2 v7.8h, v1.16b, v5.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "rshrn v0.8b, v6.8h, #8 \n" - "rshrn2 v0.16b, v7.8h, #8 \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 1b \n" - "b 99f \n" - - // Blend 25 / 75. - "25: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 25b \n" - "b 99f \n" - - // Blend 50 / 50. - "50: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 50b \n" - "b 99f \n" - - // Blend 75 / 25. - "75: \n" - "ld1 {v1.16b}, [%1], #16 \n" - "ld1 {v0.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 75b \n" - "b 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "subs %w3, %w3, #16 \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 100b \n" - - "99: \n" - "st1 {v0.b}[15], [%0] \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_stride), // %2 - "+r"(dst_width), // %3 - "+r"(source_y_fraction), // %4 - "+r"(y_fraction) // %5 - : - : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc"); -} - void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, diff --git a/files/source/scale_rgb.cc b/source/scale_rgb.cc index 8db59b56..8db59b56 100644 --- a/files/source/scale_rgb.cc +++ b/source/scale_rgb.cc diff --git a/source/scale_rvv.cc b/source/scale_rvv.cc new file mode 100644 index 00000000..de037e45 --- /dev/null +++ b/source/scale_rvv.cc @@ -0,0 +1,1040 @@ +/* + * Copyright 2023 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Copyright (c) 2023 SiFive, Inc. All rights reserved. + * + * Contributed by Darren Hsieh <darren.hsieh@sifive.com> + * Contributed by Bruce Lai <bruce.lai@sifive.com> + */ + +#include "libyuv/row.h" +#include "libyuv/scale_row.h" + +// This module is for clang rvv. GCC hasn't supported segment load & store. +#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && \ + defined(__clang__) +#include <assert.h> +#include <riscv_vector.h> +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#ifdef HAS_SCALEADDROW_RVV +void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { + size_t w = (size_t)src_width; + do { + size_t vl = __riscv_vsetvl_e8m4(w); + vuint8m4_t v_src = __riscv_vle8_v_u8m4(src_ptr, vl); + vuint16m8_t v_dst = __riscv_vle16_v_u16m8(dst_ptr, vl); + // Use widening multiply-add instead of widening + add + v_dst = __riscv_vwmaccu_vx_u16m8(v_dst, 1, v_src, vl); + __riscv_vse16_v_u16m8(dst_ptr, v_dst, vl); + w -= vl; + src_ptr += vl; + dst_ptr += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEARGBROWDOWN2_RVV +void ScaleARGBRowDown2_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + (void)src_stride; + size_t w = (size_t)dst_width; + const uint64_t* src = (const uint64_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); + do { + size_t vl = __riscv_vsetvl_e64m8(w); + vuint64m8_t v_data = __riscv_vle64_v_u64m8(src, vl); + vuint32m4_t v_dst = __riscv_vnsrl_wx_u32m4(v_data, 32, vl); + __riscv_vse32_v_u32m4(dst, v_dst, vl); + w -= vl; + src += vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEARGBROWDOWN2LINEAR_RVV +void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + (void)src_stride; + size_t w = (size_t)dst_width; + const uint32_t* src = (const uint32_t*)(src_argb); + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m4_t v_odd, v_even, v_dst; + vuint32m4_t v_odd_32, v_even_32; + size_t vl = __riscv_vsetvl_e32m4(w); + __riscv_vlseg2e32_v_u32m4(&v_even_32, &v_odd_32, src, vl); + v_even = __riscv_vreinterpret_v_u32m4_u8m4(v_even_32); + v_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_odd_32); + // Use round-to-nearest-up mode for averaging add + v_dst = __riscv_vaaddu_vv_u8m4(v_even, v_odd, vl * 4); + __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4); + w -= vl; + src += vl * 2; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEARGBROWDOWN2BOX_RVV +void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + size_t w = (size_t)dst_width; + const uint32_t* src0 = (const uint32_t*)(src_argb); + const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride); + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m4_t v_row0_odd, v_row0_even, v_row1_odd, v_row1_even, v_dst; + vuint16m8_t v_row0_sum, v_row1_sum, v_dst_16; + vuint32m4_t v_row0_odd_32, v_row0_even_32, v_row1_odd_32, v_row1_even_32; + size_t vl = __riscv_vsetvl_e32m4(w); + __riscv_vlseg2e32_v_u32m4(&v_row0_even_32, &v_row0_odd_32, src0, vl); + __riscv_vlseg2e32_v_u32m4(&v_row1_even_32, &v_row1_odd_32, src1, vl); + v_row0_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_even_32); + v_row0_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_odd_32); + v_row1_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_even_32); + v_row1_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_odd_32); + v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_even, v_row0_odd, vl * 4); + v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_even, v_row1_odd, vl * 4); + v_dst_16 = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4); + // Use round-to-nearest-up mode for vnclip + v_dst = __riscv_vnclipu_wx_u8m4(v_dst_16, 2, vl * 4); + __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4); + w -= vl; + src0 += vl * 2; + src1 += vl * 2; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEARGBROWDOWNEVEN_RVV +void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { + size_t w = (size_t)dst_width; + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); + const int stride_byte = src_stepx * 4; + do { + size_t vl = __riscv_vsetvl_e32m8(w); + vuint32m8_t v_row = __riscv_vlse32_v_u32m8(src, stride_byte, vl); + __riscv_vse32_v_u32m8(dst, v_row, vl); + w -= vl; + src += vl * src_stepx; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEARGBROWDOWNEVENBOX_RVV +void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { + size_t w = (size_t)dst_width; + const uint32_t* src0 = (const uint32_t*)(src_argb); + const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride); + const int stride_byte = src_stepx * 4; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m4_t v_row0_low, v_row0_high, v_row1_low, v_row1_high, v_dst; + vuint16m8_t v_row0_sum, v_row1_sum, v_sum; + vuint32m4_t v_row0_low_32, v_row0_high_32, v_row1_low_32, v_row1_high_32; + size_t vl = __riscv_vsetvl_e32m4(w); + __riscv_vlsseg2e32_v_u32m4(&v_row0_low_32, &v_row0_high_32, src0, + stride_byte, vl); + __riscv_vlsseg2e32_v_u32m4(&v_row1_low_32, &v_row1_high_32, src1, + stride_byte, vl); + v_row0_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_low_32); + v_row0_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_high_32); + v_row1_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_low_32); + v_row1_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_high_32); + v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_low, v_row0_high, vl * 4); + v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_low, v_row1_high, vl * 4); + v_sum = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4); + // Use round-to-nearest-up mode for vnclip + v_dst = __riscv_vnclipu_wx_u8m4(v_sum, 2, vl * 4); + __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4); + w -= vl; + src0 += vl * src_stepx; + src1 += vl * src_stepx; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN2_RVV +void ScaleRowDown2_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + size_t w = (size_t)dst_width; + const uint16_t* src = (const uint16_t*)src_ptr; + (void)src_stride; + do { + size_t vl = __riscv_vsetvl_e16m8(w); + vuint16m8_t v_src = __riscv_vle16_v_u16m8(src, vl); + vuint8m4_t v_dst = __riscv_vnsrl_wx_u8m4(v_src, 8, vl); + __riscv_vse8_v_u8m4(dst, v_dst, vl); + w -= vl; + src += vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN2LINEAR_RVV +void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + size_t w = (size_t)dst_width; + (void)src_stride; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m4_t v_s0, v_s1, v_dst; + size_t vl = __riscv_vsetvl_e8m4(w); + __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, src_ptr, vl); + // Use round-to-nearest-up mode for averaging add + v_dst = __riscv_vaaddu_vv_u8m4(v_s0, v_s1, vl); + __riscv_vse8_v_u8m4(dst, v_dst, vl); + w -= vl; + src_ptr += 2 * vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN2BOX_RVV +void ScaleRowDown2Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + size_t w = (size_t)dst_width; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + size_t vl = __riscv_vsetvl_e8m4(w); + vuint8m4_t v_s0, v_s1, v_t0, v_t1; + vuint16m8_t v_s01, v_t01, v_st01; + vuint8m4_t v_dst; + __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, s, vl); + __riscv_vlseg2e8_v_u8m4(&v_t0, &v_t1, t, vl); + v_s01 = __riscv_vwaddu_vv_u16m8(v_s0, v_s1, vl); + v_t01 = __riscv_vwaddu_vv_u16m8(v_t0, v_t1, vl); + v_st01 = __riscv_vadd_vv_u16m8(v_s01, v_t01, vl); + // Use round-to-nearest-up mode for vnclip + v_dst = __riscv_vnclipu_wx_u8m4(v_st01, 2, vl); + __riscv_vse8_v_u8m4(dst, v_dst, vl); + w -= vl; + s += 2 * vl; + t += 2 * vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN4_RVV +void ScaleRowDown4_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width; + (void)src_stride; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_s0, v_s1, v_s2, v_s3; + __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl); + __riscv_vse8_v_u8m2(dst_ptr, v_s2, vl); + w -= vl; + src_ptr += (4 * vl); + dst_ptr += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN4BOX_RVV +void ScaleRowDown4Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + const uint8_t* src_ptr1 = src_ptr + src_stride; + const uint8_t* src_ptr2 = src_ptr + src_stride * 2; + const uint8_t* src_ptr3 = src_ptr + src_stride * 3; + size_t w = (size_t)dst_width; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m2_t v_s0, v_s1, v_s2, v_s3; + vuint8m2_t v_t0, v_t1, v_t2, v_t3; + vuint8m2_t v_u0, v_u1, v_u2, v_u3; + vuint8m2_t v_v0, v_v1, v_v2, v_v3; + vuint16m4_t v_s01, v_s23, v_t01, v_t23; + vuint16m4_t v_u01, v_u23, v_v01, v_v23; + vuint16m4_t v_st01, v_st23, v_uv01, v_uv23; + vuint16m4_t v_st0123, v_uv0123, v_stuv0123; + vuint8m2_t v_dst; + size_t vl = __riscv_vsetvl_e8m2(w); + + __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl); + v_s01 = __riscv_vwaddu_vv_u16m4(v_s0, v_s1, vl); + + __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, src_ptr1, vl); + v_t01 = __riscv_vwaddu_vv_u16m4(v_t0, v_t1, vl); + + __riscv_vlseg4e8_v_u8m2(&v_u0, &v_u1, &v_u2, &v_u3, src_ptr2, vl); + v_u01 = __riscv_vwaddu_vv_u16m4(v_u0, v_u1, vl); + v_u23 = __riscv_vwaddu_vv_u16m4(v_u2, v_u3, vl); + + v_s23 = __riscv_vwaddu_vv_u16m4(v_s2, v_s3, vl); + v_t23 = __riscv_vwaddu_vv_u16m4(v_t2, v_t3, vl); + v_st01 = __riscv_vadd_vv_u16m4(v_s01, v_t01, vl); + v_st23 = __riscv_vadd_vv_u16m4(v_s23, v_t23, vl); + + __riscv_vlseg4e8_v_u8m2(&v_v0, &v_v1, &v_v2, &v_v3, src_ptr3, vl); + + v_v01 = __riscv_vwaddu_vv_u16m4(v_v0, v_v1, vl); + v_v23 = __riscv_vwaddu_vv_u16m4(v_v2, v_v3, vl); + + v_uv01 = __riscv_vadd_vv_u16m4(v_u01, v_v01, vl); + v_uv23 = __riscv_vadd_vv_u16m4(v_u23, v_v23, vl); + + v_st0123 = __riscv_vadd_vv_u16m4(v_st01, v_st23, vl); + v_uv0123 = __riscv_vadd_vv_u16m4(v_uv01, v_uv23, vl); + v_stuv0123 = __riscv_vadd_vv_u16m4(v_st0123, v_uv0123, vl); + // Use round-to-nearest-up mode for vnclip + v_dst = __riscv_vnclipu_wx_u8m2(v_stuv0123, 4, vl); + __riscv_vse8_v_u8m2(dst_ptr, v_dst, vl); + w -= vl; + src_ptr += 4 * vl; + src_ptr1 += 4 * vl; + src_ptr2 += 4 * vl; + src_ptr3 += 4 * vl; + dst_ptr += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN34_RVV +void ScaleRowDown34_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_s0, v_s1, v_s2, v_s3; + __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl); + __riscv_vsseg3e8_v_u8m2(dst_ptr, v_s0, v_s1, v_s3, vl); + w -= vl; + src_ptr += 4 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN34_0_BOX_RVV +void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m2_t v_s0, v_s1, v_s2, v_s3; + vuint16m4_t v_t0_u16, v_t1_u16, v_t2_u16, v_t3_u16; + vuint8m2_t v_u0, v_u1, v_u2, v_u3; + vuint16m4_t v_u1_u16; + vuint8m2_t v_a0, v_a1, v_a2; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl); + + if (src_stride == 0) { + v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl); + v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl); + v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_s2, 2, vl); + v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_s3, 2, vl); + } else { + vuint8m2_t v_t0, v_t1, v_t2, v_t3; + __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl); + v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 0, vl); + v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 0, vl); + v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_t2, 0, vl); + v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_t3, 0, vl); + t += 4 * vl; + } + + v_t0_u16 = __riscv_vwmaccu_vx_u16m4(v_t0_u16, 3, v_s0, vl); + v_t1_u16 = __riscv_vwmaccu_vx_u16m4(v_t1_u16, 3, v_s1, vl); + v_t2_u16 = __riscv_vwmaccu_vx_u16m4(v_t2_u16, 3, v_s2, vl); + v_t3_u16 = __riscv_vwmaccu_vx_u16m4(v_t3_u16, 3, v_s3, vl); + + // Use round-to-nearest-up mode for vnclip & averaging add + v_u0 = __riscv_vnclipu_wx_u8m2(v_t0_u16, 2, vl); + v_u1 = __riscv_vnclipu_wx_u8m2(v_t1_u16, 2, vl); + v_u2 = __riscv_vnclipu_wx_u8m2(v_t2_u16, 2, vl); + v_u3 = __riscv_vnclipu_wx_u8m2(v_t3_u16, 2, vl); + + // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2 + v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u1, 0, vl); + v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u0, vl); + v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl); + + // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1 + v_a1 = __riscv_vaaddu_vv_u8m2(v_u1, v_u2, vl); + + // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2 + v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u2, 0, vl); + v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u3, vl); + v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl); + + __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl); + + w -= vl; + s += 4 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN34_1_BOX_RVV +void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m2_t v_s0, v_s1, v_s2, v_s3; + vuint8m2_t v_ave0, v_ave1, v_ave2, v_ave3; + vuint16m4_t v_u1_u16; + vuint8m2_t v_a0, v_a1, v_a2; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl); + + // Use round-to-nearest-up mode for vnclip & averaging add + if (src_stride == 0) { + v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_s0, vl); + v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_s1, vl); + v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_s2, vl); + v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_s3, vl); + } else { + vuint8m2_t v_t0, v_t1, v_t2, v_t3; + __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl); + v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_t0, vl); + v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_t1, vl); + v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_t2, vl); + v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_t3, vl); + t += 4 * vl; + } + // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2 + v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave1, 0, vl); + v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave0, vl); + v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl); + + // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1 + v_a1 = __riscv_vaaddu_vv_u8m2(v_ave1, v_ave2, vl); + + // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2 + v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave2, 0, vl); + v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave3, vl); + v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl); + + __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl); + + w -= vl; + s += 4 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN38_RVV +void ScaleRowDown38_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + (void)src_stride; + assert(dst_width % 3 == 0); + do { + vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7; + size_t vl = __riscv_vsetvl_e8m1(w); + __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6, + &v_s7, src_ptr, vl); + __riscv_vsseg3e8_v_u8m1(dst_ptr, v_s0, v_s3, v_s6, vl); + w -= vl; + src_ptr += 8 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN38_2_BOX_RVV +void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + const uint16_t coeff_a = (65536u / 6u); + const uint16_t coeff_b = (65536u / 4u); + assert((dst_width % 3 == 0) && (dst_width > 0)); + do { + vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7; + vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7; + vuint16m2_t v_e0, v_e1, v_e2, v_e; + vuint16m2_t v_f0, v_f1, v_f2, v_f; + vuint16m2_t v_g0, v_g1, v_g; + vuint8m1_t v_dst_e, v_dst_f, v_dst_g; + size_t vl = __riscv_vsetvl_e8m1(w); + // s: e00, e10, e20, f00, f10, f20, g00, g10 + // t: e01, e11, e21, f01, f11, f21, g01, g11 + __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6, + &v_s7, src_ptr, vl); + __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6, + &v_t7, src_ptr + src_stride, vl); + // Calculate sum of [e00, e21] to v_e + // Calculate sum of [f00, f21] to v_f + // Calculate sum of [g00, g11] to v_g + v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl); + v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl); + v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl); + v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl); + v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl); + v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl); + v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl); + v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl); + + v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl); + v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl); + v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl); + v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl); + v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl); + + // Average in 16-bit fixed-point + v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl); + v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl); + v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl); + + v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl); + v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl); + v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl); + + __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl); + w -= vl; + src_ptr += 8 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN38_3_BOX_RVV +void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + const uint16_t coeff_a = (65536u / 9u); + const uint16_t coeff_b = (65536u / 6u); + assert((dst_width % 3 == 0) && (dst_width > 0)); + do { + vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7; + vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7; + vuint8m1_t v_u0, v_u1, v_u2, v_u3, v_u4, v_u5, v_u6, v_u7; + vuint16m2_t v_e0, v_e1, v_e2, v_e3, v_e4, v_e; + vuint16m2_t v_f0, v_f1, v_f2, v_f3, v_f4, v_f; + vuint16m2_t v_g0, v_g1, v_g2, v_g; + vuint8m1_t v_dst_e, v_dst_f, v_dst_g; + size_t vl = __riscv_vsetvl_e8m1(w); + // s: e00, e10, e20, f00, f10, f20, g00, g10 + // t: e01, e11, e21, f01, f11, f21, g01, g11 + // u: e02, e12, e22, f02, f12, f22, g02, g12 + __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6, + &v_s7, src_ptr, vl); + __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6, + &v_t7, src_ptr + src_stride, vl); + __riscv_vlseg8e8_v_u8m1(&v_u0, &v_u1, &v_u2, &v_u3, &v_u4, &v_u5, &v_u6, + &v_u7, src_ptr + 2 * src_stride, vl); + // Calculate sum of [e00, e22] + v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl); + v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl); + v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl); + v_e3 = __riscv_vwaddu_vv_u16m2(v_u0, v_u1, vl); + v_e4 = __riscv_vwaddu_vx_u16m2(v_u2, 0, vl); + + v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl); + v_e2 = __riscv_vadd_vv_u16m2(v_e2, v_e3, vl); + v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e4, vl); + v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl); + // Calculate sum of [f00, f22] + v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl); + v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl); + v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl); + v_f3 = __riscv_vwaddu_vv_u16m2(v_u3, v_u4, vl); + v_f4 = __riscv_vwaddu_vx_u16m2(v_u5, 0, vl); + + v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl); + v_f2 = __riscv_vadd_vv_u16m2(v_f2, v_f3, vl); + v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f4, vl); + v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl); + // Calculate sum of [g00, g12] + v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl); + v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl); + v_g2 = __riscv_vwaddu_vv_u16m2(v_u6, v_u7, vl); + + v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl); + v_g = __riscv_vadd_vv_u16m2(v_g, v_g2, vl); + + // Average in 16-bit fixed-point + v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl); + v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl); + v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl); + + v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl); + v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl); + v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl); + __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl); + w -= vl; + src_ptr += 8 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +// ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms' +// ScaleRowUp2_(Bi)linear_Any_XXX. We process entire row in this function. Other +// platforms only implement non-edge part of image and process edge with scalar. + +#ifdef HAS_SCALEROWUP2_LINEAR_RVV +void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width) { + size_t work_width = (size_t)dst_width - 1u; + size_t src_width = work_width >> 1u; + const uint8_t* work_src_ptr = src_ptr; + uint8_t* work_dst_ptr = dst_ptr + 1; + size_t vl = __riscv_vsetvlmax_e8m4(); + vuint8m4_t v_3 = __riscv_vmv_v_x_u8m4(3, vl); + dst_ptr[0] = src_ptr[0]; + while (src_width > 0) { + vuint8m4_t v_src0, v_src1, v_dst_odd, v_dst_even; + vuint16m8_t v_src0_u16, v_src1_u16; + size_t vl = __riscv_vsetvl_e8m4(src_width); + v_src0 = __riscv_vle8_v_u8m4(work_src_ptr, vl); + v_src1 = __riscv_vle8_v_u8m4(work_src_ptr + 1, vl); + + v_src0_u16 = __riscv_vwaddu_vx_u16m8(v_src0, 2, vl); + v_src1_u16 = __riscv_vwaddu_vx_u16m8(v_src1, 2, vl); + v_src0_u16 = __riscv_vwmaccu_vv_u16m8(v_src0_u16, v_3, v_src1, vl); + v_src1_u16 = __riscv_vwmaccu_vv_u16m8(v_src1_u16, v_3, v_src0, vl); + + v_dst_odd = __riscv_vnsrl_wx_u8m4(v_src0_u16, 2, vl); + v_dst_even = __riscv_vnsrl_wx_u8m4(v_src1_u16, 2, vl); + + __riscv_vsseg2e8_v_u8m4(work_dst_ptr, v_dst_even, v_dst_odd, vl); + + src_width -= vl; + work_src_ptr += vl; + work_dst_ptr += 2 * vl; + } + dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; +} +#endif + +#ifdef HAS_SCALEROWUP2_BILINEAR_RVV +void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width) { + size_t work_width = ((size_t)dst_width - 1u) & ~1u; + size_t src_width = work_width >> 1u; + const uint8_t* work_s = src_ptr; + const uint8_t* work_t = src_ptr + src_stride; + const uint8_t* s = work_s; + const uint8_t* t = work_t; + uint8_t* d = dst_ptr; + uint8_t* e = dst_ptr + dst_stride; + uint8_t* work_d = d + 1; + uint8_t* work_e = e + 1; + size_t vl = __riscv_vsetvlmax_e16m4(); + vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl); + vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl); + d[0] = (3 * s[0] + t[0] + 2) >> 2; + e[0] = (s[0] + 3 * t[0] + 2) >> 2; + while (src_width > 0) { + vuint8m2_t v_s0, v_s1, v_t0, v_t1; + vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16; + vuint16m4_t v_t0_u16_, v_t1_u16_; + vuint8m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd; + size_t vl = __riscv_vsetvl_e8m2(src_width); + v_s0 = __riscv_vle8_v_u8m2(work_s, vl); + v_s1 = __riscv_vle8_v_u8m2(work_s + 1, vl); + + v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl); + v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl); + v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl); + v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl); + + v_t0 = __riscv_vle8_v_u8m2(work_t, vl); + v_t1 = __riscv_vle8_v_u8m2(work_t + 1, vl); + + v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl); + v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl); + v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl); + v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl); + + v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl); + v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl); + + v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl); + v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl); + v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl); + v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl); + + v_dst0_odd = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl); + v_dst0_even = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl); + v_dst1_odd = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl); + v_dst1_even = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl); + + __riscv_vsseg2e8_v_u8m2(work_d, v_dst0_even, v_dst0_odd, vl); + __riscv_vsseg2e8_v_u8m2(work_e, v_dst1_even, v_dst1_odd, vl); + + src_width -= vl; + work_s += vl; + work_t += vl; + work_d += 2 * vl; + work_e += 2 * vl; + } + d[dst_width - 1] = + (3 * s[(dst_width - 1) / 2] + t[(dst_width - 1) / 2] + 2) >> 2; + e[dst_width - 1] = + (s[(dst_width - 1) / 2] + 3 * t[(dst_width - 1) / 2] + 2) >> 2; +} +#endif + +#ifdef HAS_SCALEUVROWDOWN2_RVV +void ScaleUVRowDown2_RVV(const uint8_t* src_uv, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width) { + size_t w = (size_t)dst_width; + const uint32_t* src = (const uint32_t*)src_uv; + uint16_t* dst = (uint16_t*)dst_uv; + (void)src_stride; + do { + size_t vl = __riscv_vsetvl_e32m8(w); + vuint32m8_t v_data = __riscv_vle32_v_u32m8(src, vl); + vuint16m4_t v_u1v1 = __riscv_vnsrl_wx_u16m4(v_data, 16, vl); + __riscv_vse16_v_u16m4(dst, v_u1v1, vl); + w -= vl; + src += vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEUVROWDOWN2LINEAR_RVV +void ScaleUVRowDown2Linear_RVV(const uint8_t* src_uv, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width) { + size_t w = (size_t)dst_width; + const uint16_t* src = (const uint16_t*)src_uv; + (void)src_stride; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m4_t v_u0v0, v_u1v1, v_avg; + vuint16m4_t v_u0v0_16, v_u1v1_16; + size_t vl = __riscv_vsetvl_e16m4(w); + __riscv_vlseg2e16_v_u16m4(&v_u0v0_16, &v_u1v1_16, src, vl); + v_u0v0 = __riscv_vreinterpret_v_u16m4_u8m4(v_u0v0_16); + v_u1v1 = __riscv_vreinterpret_v_u16m4_u8m4(v_u1v1_16); + // Use round-to-nearest-up mode for averaging add + v_avg = __riscv_vaaddu_vv_u8m4(v_u0v0, v_u1v1, vl * 2); + __riscv_vse8_v_u8m4(dst_uv, v_avg, vl * 2); + w -= vl; + src += vl * 2; + dst_uv += vl * 2; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEUVROWDOWN2BOX_RVV +void ScaleUVRowDown2Box_RVV(const uint8_t* src_uv, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width) { + const uint8_t* src_uv_row1 = src_uv + src_stride; + size_t w = (size_t)dst_width; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m2_t v_u0_row0, v_v0_row0, v_u1_row0, v_v1_row0; + vuint8m2_t v_u0_row1, v_v0_row1, v_u1_row1, v_v1_row1; + vuint16m4_t v_u0u1_row0, v_u0u1_row1, v_v0v1_row0, v_v0v1_row1; + vuint16m4_t v_sum0, v_sum1; + vuint8m2_t v_dst_u, v_dst_v; + size_t vl = __riscv_vsetvl_e8m2(w); + + __riscv_vlseg4e8_v_u8m2(&v_u0_row0, &v_v0_row0, &v_u1_row0, &v_v1_row0, + src_uv, vl); + __riscv_vlseg4e8_v_u8m2(&v_u0_row1, &v_v0_row1, &v_u1_row1, &v_v1_row1, + src_uv_row1, vl); + + v_u0u1_row0 = __riscv_vwaddu_vv_u16m4(v_u0_row0, v_u1_row0, vl); + v_u0u1_row1 = __riscv_vwaddu_vv_u16m4(v_u0_row1, v_u1_row1, vl); + v_v0v1_row0 = __riscv_vwaddu_vv_u16m4(v_v0_row0, v_v1_row0, vl); + v_v0v1_row1 = __riscv_vwaddu_vv_u16m4(v_v0_row1, v_v1_row1, vl); + + v_sum0 = __riscv_vadd_vv_u16m4(v_u0u1_row0, v_u0u1_row1, vl); + v_sum1 = __riscv_vadd_vv_u16m4(v_v0v1_row0, v_v0v1_row1, vl); + // Use round-to-nearest-up mode for vnclip + v_dst_u = __riscv_vnclipu_wx_u8m2(v_sum0, 2, vl); + v_dst_v = __riscv_vnclipu_wx_u8m2(v_sum1, 2, vl); + + __riscv_vsseg2e8_v_u8m2(dst_uv, v_dst_u, v_dst_v, vl); + + dst_uv += 2 * vl; + src_uv += 4 * vl; + w -= vl; + src_uv_row1 += 4 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEUVROWDOWN4_RVV +void ScaleUVRowDown4_RVV(const uint8_t* src_uv, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width) { + // Overflow will never happen here, since sizeof(size_t)/sizeof(int)=2. + // dst_width = src_width / 4 and src_width is also int. + size_t w = (size_t)dst_width * 8; + (void)src_stride; + (void)src_stepx; + do { + size_t vl = __riscv_vsetvl_e8m8(w); + vuint8m8_t v_row = __riscv_vle8_v_u8m8(src_uv, vl); + vuint64m8_t v_row_64 = __riscv_vreinterpret_v_u8m8_u64m8(v_row); + // Narrowing without clipping + vuint32m4_t v_tmp = __riscv_vncvt_x_x_w_u32m4(v_row_64, vl / 8); + vuint16m2_t v_dst_16 = __riscv_vncvt_x_x_w_u16m2(v_tmp, vl / 8); + vuint8m2_t v_dst = __riscv_vreinterpret_v_u16m2_u8m2(v_dst_16); + __riscv_vse8_v_u8m2(dst_uv, v_dst, vl / 4); + w -= vl; + src_uv += vl; + dst_uv += vl / 4; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEUVROWDOWNEVEN_RVV +void ScaleUVRowDownEven_RVV(const uint8_t* src_uv, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width) { + size_t w = (size_t)dst_width; + const ptrdiff_t stride_byte = (ptrdiff_t)src_stepx * 2; + const uint16_t* src = (const uint16_t*)(src_uv); + uint16_t* dst = (uint16_t*)(dst_uv); + (void)src_stride; + do { + size_t vl = __riscv_vsetvl_e16m8(w); + vuint16m8_t v_row = __riscv_vlse16_v_u16m8(src, stride_byte, vl); + __riscv_vse16_v_u16m8(dst, v_row, vl); + w -= vl; + src += vl * src_stepx; + dst += vl; + } while (w > 0); +} +#endif + +// ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms' +// ScaleUVRowUp2_(Bi)linear_Any_XXX. We process entire row in this function. +// Other platforms only implement non-edge part of image and process edge with +// scalar. + +#ifdef HAS_SCALEUVROWUP2_LINEAR_RVV +void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width) { + size_t work_width = ((size_t)dst_width - 1u) & ~1u; + uint16_t* work_dst_ptr = (uint16_t*)dst_ptr + 1; + const uint8_t* work_src_ptr = src_ptr; + size_t vl = __riscv_vsetvlmax_e8m4(); + vuint8m4_t v_3_u8 = __riscv_vmv_v_x_u8m4(3, vl); + dst_ptr[0] = src_ptr[0]; + dst_ptr[1] = src_ptr[1]; + while (work_width > 0) { + vuint8m4_t v_uv0, v_uv1, v_dst_odd_u8, v_dst_even_u8; + vuint16m4_t v_dst_odd, v_dst_even; + vuint16m8_t v_uv0_u16, v_uv1_u16; + size_t vl = __riscv_vsetvl_e8m4(work_width); + v_uv0 = __riscv_vle8_v_u8m4(work_src_ptr, vl); + v_uv1 = __riscv_vle8_v_u8m4(work_src_ptr + 2, vl); + + v_uv0_u16 = __riscv_vwaddu_vx_u16m8(v_uv0, 2, vl); + v_uv1_u16 = __riscv_vwaddu_vx_u16m8(v_uv1, 2, vl); + + v_uv0_u16 = __riscv_vwmaccu_vv_u16m8(v_uv0_u16, v_3_u8, v_uv1, vl); + v_uv1_u16 = __riscv_vwmaccu_vv_u16m8(v_uv1_u16, v_3_u8, v_uv0, vl); + + v_dst_odd_u8 = __riscv_vnsrl_wx_u8m4(v_uv0_u16, 2, vl); + v_dst_even_u8 = __riscv_vnsrl_wx_u8m4(v_uv1_u16, 2, vl); + + v_dst_even = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_even_u8); + v_dst_odd = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_odd_u8); + + __riscv_vsseg2e16_v_u16m4(work_dst_ptr, v_dst_even, v_dst_odd, vl / 2); + + work_width -= vl; + work_src_ptr += vl; + work_dst_ptr += vl; + } + dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2]; + dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1]; +} +#endif + +#ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV +void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width) { + size_t work_width = ((size_t)dst_width - 1u) & ~1u; + const uint8_t* work_s = src_ptr; + const uint8_t* work_t = src_ptr + src_stride; + const uint8_t* s = work_s; + const uint8_t* t = work_t; + uint8_t* d = dst_ptr; + uint8_t* e = dst_ptr + dst_stride; + uint16_t* work_d = (uint16_t*)d + 1; + uint16_t* work_e = (uint16_t*)e + 1; + size_t vl = __riscv_vsetvlmax_e16m4(); + vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl); + vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl); + d[0] = (3 * s[0] + t[0] + 2) >> 2; + e[0] = (s[0] + 3 * t[0] + 2) >> 2; + d[1] = (3 * s[1] + t[1] + 2) >> 2; + e[1] = (s[1] + 3 * t[1] + 2) >> 2; + while (work_width > 0) { + vuint8m2_t v_s0, v_s1, v_t0, v_t1; + vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16; + vuint16m4_t v_t0_u16_, v_t1_u16_; + vuint8m2_t v_dst0_odd_u8, v_dst0_even_u8, v_dst1_odd_u8, v_dst1_even_u8; + vuint16m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd; + size_t vl = __riscv_vsetvl_e8m2(work_width); + v_s0 = __riscv_vle8_v_u8m2(work_s, vl); + v_s1 = __riscv_vle8_v_u8m2(work_s + 2, vl); + + v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl); + v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl); + v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl); + v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl); + + v_t0 = __riscv_vle8_v_u8m2(work_t, vl); + v_t1 = __riscv_vle8_v_u8m2(work_t + 2, vl); + + v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl); + v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl); + v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl); + v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl); + + v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl); + v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl); + + v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl); + v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl); + v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl); + v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl); + + v_dst0_odd_u8 = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl); + v_dst0_even_u8 = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl); + v_dst1_odd_u8 = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl); + v_dst1_even_u8 = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl); + + v_dst0_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_even_u8); + v_dst0_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_odd_u8); + v_dst1_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_even_u8); + v_dst1_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_odd_u8); + + __riscv_vsseg2e16_v_u16m2(work_d, v_dst0_even, v_dst0_odd, vl / 2); + __riscv_vsseg2e16_v_u16m2(work_e, v_dst1_even, v_dst1_odd, vl / 2); + + work_width -= vl; + work_s += vl; + work_t += vl; + work_d += vl; + work_e += vl; + } + d[2 * dst_width - 2] = + (3 * s[((dst_width + 1) & ~1) - 2] + t[((dst_width + 1) & ~1) - 2] + 2) >> + 2; + e[2 * dst_width - 2] = + (s[((dst_width + 1) & ~1) - 2] + 3 * t[((dst_width + 1) & ~1) - 2] + 2) >> + 2; + d[2 * dst_width - 1] = + (3 * s[((dst_width + 1) & ~1) - 1] + t[((dst_width + 1) & ~1) - 1] + 2) >> + 2; + e[2 * dst_width - 1] = + (s[((dst_width + 1) & ~1) - 1] + 3 * t[((dst_width + 1) & ~1) - 1] + 2) >> + 2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && + // defined(__clang__) diff --git a/files/source/scale_uv.cc b/source/scale_uv.cc index 1556071d..0931c89a 100644 --- a/files/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -128,6 +128,15 @@ static void ScaleUVDown2(int src_width, } } #endif +#if defined(HAS_SCALEUVROWDOWN2_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleUVRowDown2 = + filtering == kFilterNone + ? ScaleUVRowDown2_RVV + : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_RVV + : ScaleUVRowDown2Box_RVV); + } +#endif // This code is not enabled. Only box filter is available at this time. #if defined(HAS_SCALEUVROWDOWN2_SSSE3) @@ -179,22 +188,24 @@ static void ScaleUVDown2(int src_width, // This is an optimized version for scaling down a UV to 1/4 of // its original size. #if HAS_SCALEUVDOWN4BOX -static void ScaleUVDown4Box(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv, - int x, - int dx, - int y, - int dy) { +static int ScaleUVDown4Box(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv, + int x, + int dx, + int y, + int dy) { int j; // Allocate 2 rows of UV. const int row_size = (dst_width * 2 * 2 + 15) & ~15; align_buffer_64(row, row_size * 2); + if (!row) + return 1; int row_stride = src_stride * (dy >> 16); void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width) = @@ -231,6 +242,11 @@ static void ScaleUVDown4Box(int src_width, } } #endif +#if defined(HAS_SCALEUVROWDOWN2BOX_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleUVRowDown2 = ScaleUVRowDown2Box_RVV; + } +#endif for (j = 0; j < dst_height; ++j) { ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2); @@ -241,6 +257,7 @@ static void ScaleUVDown4Box(int src_width, dst_uv += dst_stride; } free_aligned_buffer_64(row); + return 0; } #endif // HAS_SCALEUVDOWN4BOX @@ -310,6 +327,12 @@ static void ScaleUVDownEven(int src_width, } } #endif +#if defined(HAS_SCALEUVROWDOWNEVEN_RVV) + if (TestCpuFlag(kCpuHasRVV) && !filtering) { + ScaleUVRowDownEven = + (col_step == 4) ? ScaleUVRowDown4_RVV : ScaleUVRowDownEven_RVV; + } +#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -324,19 +347,19 @@ static void ScaleUVDownEven(int src_width, // Scale UV down with bilinear interpolation. #if HAS_SCALEUVBILINEARDOWN -static void ScaleUVBilinearDown(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleUVBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv, ptrdiff_t src_stride, int dst_width, @@ -426,9 +449,10 @@ static void ScaleUVBilinearDown(int src_width, // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row of UV. { - align_buffer_64(row, clip_src_width * 2); - const int max_y = (src_height - 1) << 16; + align_buffer_64(row, clip_src_width * 2); + if (!row) + return 1; if (y > max_y) { y = max_y; } @@ -450,24 +474,25 @@ static void ScaleUVBilinearDown(int src_width, } free_aligned_buffer_64(row); } + return 0; } #endif // Scale UV up with bilinear interpolation. #if HAS_SCALEUVBILINEARUP -static void ScaleUVBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleUVBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv, ptrdiff_t src_stride, int dst_width, @@ -586,6 +611,8 @@ static void ScaleUVBilinearUp(int src_width, // Allocate 2 rows of UV. const int row_size = (dst_width * 2 + 15) & ~15; align_buffer_64(row, row_size * 2); + if (!row) + return 1; uint8_t* rowptr = row; int rowstride = row_size; @@ -629,6 +656,7 @@ static void ScaleUVBilinearUp(int src_width, } free_aligned_buffer_64(row); } + return 0; } #endif // HAS_SCALEUVBILINEARUP @@ -637,14 +665,14 @@ static void ScaleUVBilinearUp(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original width, using linear interpolation. // This is used to scale U and V planes of NV16 to NV24. -void ScaleUVLinearUp2(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv) { +static void ScaleUVLinearUp2(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv) { void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) = ScaleUVRowUp2_Linear_Any_C; int i; @@ -672,6 +700,12 @@ void ScaleUVLinearUp2(int src_width, } #endif +#ifdef HAS_SCALEUVROWUP2_LINEAR_RVV + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowUp = ScaleUVRowUp2_Linear_RVV; + } +#endif + if (dst_height == 1) { ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv, dst_width); @@ -690,14 +724,14 @@ void ScaleUVLinearUp2(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original size, using bilinear interpolation. // This is used to scale U and V planes of NV12 to NV24. -void ScaleUVBilinearUp2(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr) { +static void ScaleUVBilinearUp2(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_Any_C; @@ -725,6 +759,12 @@ void ScaleUVBilinearUp2(int src_width, } #endif +#ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV + if (TestCpuFlag(kCpuHasRVV)) { + Scale2RowUp = ScaleUVRowUp2_Bilinear_RVV; + } +#endif + Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); dst_ptr += dst_stride; for (x = 0; x < src_height - 1; ++x) { @@ -744,14 +784,14 @@ void ScaleUVBilinearUp2(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original width, using linear interpolation. // This is used to scale U and V planes of P210 to P410. -void ScaleUVLinearUp2_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_uv, - uint16_t* dst_uv) { +static void ScaleUVLinearUp2_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_uv, + uint16_t* dst_uv) { void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = ScaleUVRowUp2_Linear_16_Any_C; int i; @@ -797,14 +837,14 @@ void ScaleUVLinearUp2_16(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original size, using bilinear interpolation. // This is used to scale U and V planes of P010 to P410. -void ScaleUVBilinearUp2_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static void ScaleUVBilinearUp2_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C; @@ -952,19 +992,19 @@ static int UVCopy_16(const uint16_t* src_uv, // Scale a UV plane (from NV12) // This function in turn calls a scaling function // suitable for handling the desired resolutions. -static void ScaleUV(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - int clip_x, - int clip_y, - int clip_width, - int clip_height, - enum FilterMode filtering) { +static int ScaleUV(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1010,22 +1050,22 @@ static void ScaleUV(const uint8_t* src, ScaleUVDown2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; } #endif #if HAS_SCALEUVDOWN4BOX if (dx == 0x40000 && filtering == kFilterBox) { // Optimized 1/4 box downsample. - ScaleUVDown4Box(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy); - return; + return ScaleUVDown4Box(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, + dy); } #endif #if HAS_SCALEUVDOWNEVEN ScaleUVDownEven(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; #endif } // Optimized odd scale down. ie 3, 5, 7, 9x. @@ -1036,7 +1076,7 @@ static void ScaleUV(const uint8_t* src, // Straight copy. UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2, src_stride, dst, dst_stride, clip_width, clip_height); - return; + return 0; } #endif } @@ -1047,38 +1087,37 @@ static void ScaleUV(const uint8_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, y, dy, /*bpp=*/2, filtering); - return; + return 0; } if ((filtering == kFilterLinear) && ((dst_width + 1) / 2 == src_width)) { ScaleUVLinearUp2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((clip_height + 1) / 2 == src_height && (clip_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScaleUVBilinearUp2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst); - return; + return 0; } #if HAS_SCALEUVBILINEARUP if (filtering && dy < 65536) { - ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } #endif #if HAS_SCALEUVBILINEARDOWN if (filtering) { - ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } #endif ScaleUVSimple(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy); + return 0; } // Scale an UV image. @@ -1096,9 +1135,9 @@ int UVScale(const uint8_t* src_uv, src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) { return -1; } - ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, dst_stride_uv, - dst_width, dst_height, 0, 0, dst_width, dst_height, filtering); - return 0; + return ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, + dst_stride_uv, dst_width, dst_height, 0, 0, dst_width, + dst_height, filtering); } // Scale a 16 bit UV image. diff --git a/files/source/scale_win.cc b/source/scale_win.cc index ea1f95c6..ea1f95c6 100644 --- a/files/source/scale_win.cc +++ b/source/scale_win.cc diff --git a/files/source/test.sh b/source/test.sh index 7f12c3c1..7f12c3c1 100755 --- a/files/source/test.sh +++ b/source/test.sh diff --git a/files/source/video_common.cc b/source/video_common.cc index 92384c05..92384c05 100644 --- a/files/source/video_common.cc +++ b/source/video_common.cc diff --git a/tools_libyuv/OWNERS b/tools_libyuv/OWNERS new file mode 100644 index 00000000..aae4fb6e --- /dev/null +++ b/tools_libyuv/OWNERS @@ -0,0 +1,4 @@ +mbonadei@chromium.org +fbarchard@chromium.org +pbos@chromium.org + diff --git a/files/tools_libyuv/autoroller/roll_deps.py b/tools_libyuv/autoroller/roll_deps.py index 2b57eb65..d5c1089f 100755 --- a/files/tools_libyuv/autoroller/roll_deps.py +++ b/tools_libyuv/autoroller/roll_deps.py @@ -31,6 +31,7 @@ def FindSrcDirPath(): # Skip these dependencies (list without solution name prefix). DONT_AUTOROLL_THESE = [ + 'third_party/fuchsia-gn-sdk', 'src/third_party/gflags/src', 'src/third_party/mockito/src', ] diff --git a/files/tools_libyuv/autoroller/unittests/roll_deps_test.py b/tools_libyuv/autoroller/unittests/roll_deps_test.py index af86bdd5..af86bdd5 100755 --- a/files/tools_libyuv/autoroller/unittests/roll_deps_test.py +++ b/tools_libyuv/autoroller/unittests/roll_deps_test.py diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS b/tools_libyuv/autoroller/unittests/testdata/DEPS index 4f45860c..4f45860c 100644 --- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS +++ b/tools_libyuv/autoroller/unittests/testdata/DEPS diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new index d53083ce..d53083ce 100644 --- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new +++ b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old index dd6ddaec..dd6ddaec 100644 --- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old +++ b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old diff --git a/files/tools_libyuv/get_landmines.py b/tools_libyuv/get_landmines.py index 8b33483e..8b33483e 100755 --- a/files/tools_libyuv/get_landmines.py +++ b/tools_libyuv/get_landmines.py diff --git a/tools_libyuv/msan/OWNERS b/tools_libyuv/msan/OWNERS new file mode 100644 index 00000000..9b67a8f6 --- /dev/null +++ b/tools_libyuv/msan/OWNERS @@ -0,0 +1,3 @@ +mbonadei@chromium.org +fbarchard@chromium.org +pbos@chromium.org diff --git a/files/tools_libyuv/msan/blacklist.txt b/tools_libyuv/msan/blacklist.txt index 8b5e42a7..8b5e42a7 100644 --- a/files/tools_libyuv/msan/blacklist.txt +++ b/tools_libyuv/msan/blacklist.txt diff --git a/tools_libyuv/ubsan/OWNERS b/tools_libyuv/ubsan/OWNERS new file mode 100644 index 00000000..9b67a8f6 --- /dev/null +++ b/tools_libyuv/ubsan/OWNERS @@ -0,0 +1,3 @@ +mbonadei@chromium.org +fbarchard@chromium.org +pbos@chromium.org diff --git a/files/tools_libyuv/ubsan/blacklist.txt b/tools_libyuv/ubsan/blacklist.txt index 8bcb2907..8bcb2907 100644 --- a/files/tools_libyuv/ubsan/blacklist.txt +++ b/tools_libyuv/ubsan/blacklist.txt diff --git a/files/tools_libyuv/ubsan/vptr_blacklist.txt b/tools_libyuv/ubsan/vptr_blacklist.txt index 23cfca53..23cfca53 100644 --- a/files/tools_libyuv/ubsan/vptr_blacklist.txt +++ b/tools_libyuv/ubsan/vptr_blacklist.txt diff --git a/files/unit_test/basictypes_test.cc b/unit_test/basictypes_test.cc index 9aaa2dcd..9aaa2dcd 100644 --- a/files/unit_test/basictypes_test.cc +++ b/unit_test/basictypes_test.cc diff --git a/files/unit_test/color_test.cc b/unit_test/color_test.cc index 01267ff1..01267ff1 100644 --- a/files/unit_test/color_test.cc +++ b/unit_test/color_test.cc diff --git a/files/unit_test/compare_test.cc b/unit_test/compare_test.cc index c29562cb..c29562cb 100644 --- a/files/unit_test/compare_test.cc +++ b/unit_test/compare_test.cc diff --git a/files/unit_test/convert_test.cc b/unit_test/convert_argb_test.cc index 1f1896b0..aeee8a7f 100644 --- a/files/unit_test/convert_test.cc +++ b/unit_test/convert_argb_test.cc @@ -1,5 +1,5 @@ /* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * Copyright 2023 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -31,6 +31,13 @@ #include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */ #endif +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#undef ENABLE_ROW_TESTS +#define LEAN_TESTS +#endif + // Some functions fail on big endian. Enable these tests on all cpus except // PowerPC, but they are not optimized so disabled by default. #if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__) @@ -51,501 +58,7 @@ namespace libyuv { // subsample amount uses a divide. #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) -// Planar test - -#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ - SRC_DEPTH) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ - static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ - static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ - "SRC_SUBSAMP_X unsupported"); \ - static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ - "SRC_SUBSAMP_Y unsupported"); \ - static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ - "DST_SUBSAMP_X unsupported"); \ - static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ - "DST_SUBSAMP_Y unsupported"); \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ - const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ - const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ - const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ - align_buffer_page_end(src_u, \ - kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ - align_buffer_page_end(src_v, \ - kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ - MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ - MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ - SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ - SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \ - SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \ - } \ - for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \ - src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ - src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ - } \ - memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ - memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ - memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ - reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ - reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \ - reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \ - NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ - reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ - reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \ - reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \ - NEG kHeight); \ - } \ - for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ - EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ - } \ - for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ - EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ - EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_u_c); \ - free_aligned_buffer_page_end(dst_v_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_u_opt); \ - free_aligned_buffer_page_end(dst_v_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - } - -#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2, SRC_DEPTH) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0, SRC_DEPTH) - -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8) -TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8) -TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2, 8) -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1, 8) -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1, 8) -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2, 8) -TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8) -TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8) -TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8) -TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10) -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8) -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8) -TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10) -TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10) -TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8) -TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8) -TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10) -TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10) -TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12) -TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12) -TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10) -TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10) -TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12) -TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12) -TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10) -TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10) -TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10) -TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10) -TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10) -TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12) -TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 12) -TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12) -TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 12) -TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) - -// Test Android 420 to I420 -#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - W1280, N, NEG, OFF, PN, OFF_U, OFF_V) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##To##PN##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSizeUV = \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_uv, \ - kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - uint8_t* src_u = src_uv + OFF_U; \ - uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ - int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ - src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ - (fastrand() & 0xff); \ - src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ - (fastrand() & 0xff); \ - } \ - } \ - memset(dst_y_c, 1, kWidth* kHeight); \ - memset(dst_u_c, 2, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_c, 3, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth* kHeight); \ - memset(dst_u_opt, 102, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_opt, 103, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \ - kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \ - dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ - dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ - } \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - } \ - } \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ - dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ - } \ - } \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ - dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_u_c); \ - free_aligned_buffer_page_end(dst_v_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_u_opt); \ - free_aligned_buffer_page_end(dst_v_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_uv); \ - } - -#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \ - SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \ - SUBSAMP_Y) \ - TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \ - _Any, +, 0, PN, OFF_U, OFF_V) \ - TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \ - _Unaligned, +, 2, PN, OFF_U, OFF_V) \ - TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \ - -, 0, PN, OFF_U, OFF_V) \ - TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ - 0, PN, OFF_U, OFF_V) - -TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2) -TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2) -TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) -#undef TESTAPLANARTOP -#undef TESTAPLANARTOPI - -// wrapper to keep API the same -int I400ToNV21(const uint8_t* src_y, - int src_stride_y, - const uint8_t* /* src_u */, - int /* src_stride_u */, - const uint8_t* /* src_v */, - int /* src_stride_v */, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_vu, - int dst_stride_vu, - int width, - int height) { - return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu, - dst_stride_vu, width, height); -} - -#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ - SRC_DEPTH) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ - static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ - static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ - "SRC_SUBSAMP_X unsupported"); \ - static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ - "SRC_SUBSAMP_Y unsupported"); \ - static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ - "DST_SUBSAMP_X unsupported"); \ - static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ - "DST_SUBSAMP_Y unsupported"); \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ - const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ - const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ - const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ - align_buffer_page_end(src_u, \ - kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ - align_buffer_page_end(src_v, \ - kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_c, \ - kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_opt, \ - kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ - MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ - MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ - MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ - SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ - SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \ - SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \ - } \ - for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \ - src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ - src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ - } \ - memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ - memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \ - src_v_p, kSrcHalfWidth, \ - reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_c), \ - kDstHalfWidth * 2, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ - reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_opt), kDstHalfWidth * 2, kWidth, \ - NEG kHeight); \ - } \ - for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ - EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ - } \ - for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) { \ - EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - } - -#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \ - SRC_DEPTH) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) - -TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10) -TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10) -TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12) -TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) - -#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ - static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ - "SRC_SUBSAMP_X unsupported"); \ - static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ - "SRC_SUBSAMP_Y unsupported"); \ - static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ - "DST_SUBSAMP_X unsupported"); \ - static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ - "DST_SUBSAMP_Y unsupported"); \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ - const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ - const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ - const int kPaddedHeight = \ - (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ - const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ - const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ - align_buffer_page_end( \ - src_uv, \ - 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_c, \ - 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_opt, \ - 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ - SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ - for (int i = 0; \ - i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \ - ++i) { \ - src_y_p[i] = \ - (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ - } \ - for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \ - SRC_BPC / (int)sizeof(SRC_T); \ - ++i) { \ - src_uv_p[i] = \ - (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ - } \ - memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ - 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ - DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \ - NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ - 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ - DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \ - NEG kHeight); \ - } \ - if (DOY) { \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - } \ - } \ - } \ - for (int i = 0; i < kDstHalfHeight; ++i) { \ - for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \ - EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ - dst_uv_opt[i * 2 * kDstHalfWidth + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_uv); \ - } - -#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) - -TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1) -TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1) -TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) -TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32) +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) #define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ @@ -630,6 +143,7 @@ TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32) free_aligned_buffer_page_end(src_uv); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ @@ -648,6 +162,14 @@ TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32) TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) +#else +#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) +#endif TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) @@ -696,8 +218,6 @@ TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1) I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ kFilterBilinear) -#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) - #define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ @@ -761,8 +281,6 @@ TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1) #define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN) \ TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, benchmark_width_, _Opt, +, 0) #endif @@ -837,7 +355,7 @@ TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1) -#else +#else // FULL_TESTS TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1) @@ -866,218 +384,6 @@ TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) #endif -#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, W1280, N, NEG, OFF, ATTEN) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, kSizeUV + OFF); \ - align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y[i + OFF] = (fastrand() & 0xff); \ - src_a[i + OFF] = (fastrand() & 0xff); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - src_u[i + OFF] = (fastrand() & 0xff); \ - src_v[i + OFF] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ - memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ - dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \ - ATTEN); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ - dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \ - ATTEN); \ - } \ - for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(src_a); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Unaligned, +, 2, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Invert, -, 0, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Premult, +, 0, 1) -#else -#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0) -#endif - -#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) - -#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ - &kYuvI601Constants, k, l, m, kFilterBilinear) -#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ - &kYuvI601Constants, k, l, m, kFilterBilinear) - -#if defined(ENABLE_FULL_TESTS) -TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) -#else -TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) -#endif - #define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ @@ -1132,6 +438,7 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) free_aligned_buffer_page_end(dst_argb32_opt); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ benchmark_width_ + 1, _Any, +, 0) \ @@ -1141,6 +448,11 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) benchmark_width_, _Invert, -, 0) \ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ benchmark_width_, _Opt, +, 0) +#else +#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Opt, +, 0) +#endif #define JNV12ToARGB(a, b, c, d, e, f, g, h) \ NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) @@ -1186,241 +498,6 @@ TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3) TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2) #endif -#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ - align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_c, \ - kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_opt, \ - kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_c, 1, kWidth* kHeight); \ - memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth* kHeight); \ - memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kStride; ++j) \ - src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ - kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ - dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \ - kStrideUV * 2, kWidth, NEG kHeight); \ - } \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - } \ - } \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ - for (int j = 0; j < kStrideUV; ++j) { \ - EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_argb); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) -#else -#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) -#endif - -TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2) -TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2) -TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1) -TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1) -TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2) -TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1) -TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2) -TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2) -TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2) -TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2) -#endif -TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2) -TESTATOPLANAR(I400, 1, 1, I420, 2, 2) -TESTATOPLANAR(J400, 1, 1, J420, 2, 2) -TESTATOPLANAR(RAW, 3, 1, I420, 2, 2) -TESTATOPLANAR(RAW, 3, 1, J420, 2, 2) -TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2) -TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2) -TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2) -TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2) -TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1) -TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2) -TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) - -#define TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, \ - SUBSAMP_Y, W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ - align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ - align_buffer_page_end(dst_a_c, kWidth* kHeight); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_c, \ - kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_a_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_opt, \ - kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_a_c, 1, kWidth* kHeight); \ - memset(dst_y_c, 2, kWidth* kHeight); \ - memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_a_opt, 101, kWidth* kHeight); \ - memset(dst_y_opt, 102, kWidth* kHeight); \ - memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kStride; ++j) \ - src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ - kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \ - dst_a_c, kWidth, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ - dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \ - kStrideUV * 2, dst_a_opt, kWidth, kWidth, \ - NEG kHeight); \ - } \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]); \ - } \ - } \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ - for (int j = 0; j < kStrideUV; ++j) { \ - EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_a_c); \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_a_opt); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_argb); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) -#else -#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) -#endif - -TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2) - -#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_c, \ - kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_opt, \ - kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kStride; ++j) \ - src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ - memset(dst_y_c, 1, kWidth* kHeight); \ - memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth* kHeight); \ - memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ - kStrideUV * 2, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ - dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \ - } \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - } \ - } \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < kStrideUV * 2; ++j) { \ - EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j], \ - dst_uv_opt[i * kStrideUV * 2 + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_argb); \ - } - -#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) - -TESTATOBP(ARGB, 1, 4, NV12, 2, 2) -TESTATOBP(ARGB, 1, 4, NV21, 2, 2) -TESTATOBP(ABGR, 1, 4, NV12, 2, 2) -TESTATOBP(ABGR, 1, 4, NV21, 2, 2) -TESTATOBP(RAW, 1, 3, JNV21, 2, 2) -TESTATOBP(YUY2, 2, 4, NV12, 2, 2) -TESTATOBP(UYVY, 2, 4, NV12, 2, 2) -TESTATOBP(AYUV, 1, 4, NV12, 2, 2) -TESTATOBP(AYUV, 1, 4, NV21, 2, 2) - #define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ @@ -1776,6 +853,7 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) } \ } +#if defined(ENABLE_FULL_TESTS) #define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B) \ TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ @@ -1788,6 +866,12 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) HEIGHT_B, benchmark_width_, _Opt, +, 0) \ TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B) +#else +#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B) \ + TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B) +#endif #ifdef LITTLE_ENDIAN_ONLY_TEST TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1) @@ -1856,1117 +940,217 @@ TESTEND(BGRAToARGB, uint8_t, 4, 4, 1) TESTEND(ABGRToARGB, uint8_t, 4, 4, 1) TESTEND(AB64ToAR64, uint16_t, 4, 4, 1) -#ifdef HAVE_JPEG -TEST_F(LibYUVConvertTest, ValidateJpeg) { - const int kOff = 10; - const int kMinJpeg = 64; - const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg - ? benchmark_width_ * benchmark_height_ - : kMinJpeg; - const int kSize = kImageSize + kOff; - align_buffer_page_end(orig_pixels, kSize); - - // No SOI or EOI. Expect fail. - memset(orig_pixels, 0, kSize); - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - - // Test special value that matches marker start. - memset(orig_pixels, 0xff, kSize); - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - - // EOI, SOI. Expect pass. - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[2] = 0xff; - orig_pixels[kSize - kOff + 0] = 0xff; - orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. - for (int times = 0; times < benchmark_iterations_; ++times) { - EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize)); - } - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVConvertTest, ValidateJpegLarge) { - const int kOff = 10; - const int kMinJpeg = 64; - const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg - ? benchmark_width_ * benchmark_height_ - : kMinJpeg; - const int kSize = kImageSize + kOff; - const int kMultiple = 10; - const int kBufSize = kImageSize * kMultiple + kOff; - align_buffer_page_end(orig_pixels, kBufSize); - - // No SOI or EOI. Expect fail. - memset(orig_pixels, 0, kBufSize); - EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize)); - - // EOI, SOI. Expect pass. - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[2] = 0xff; - orig_pixels[kSize - kOff + 0] = 0xff; - orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. - for (int times = 0; times < benchmark_iterations_; ++times) { - EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize)); - } - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVConvertTest, InvalidateJpeg) { - const int kOff = 10; - const int kMinJpeg = 64; - const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg - ? benchmark_width_ * benchmark_height_ - : kMinJpeg; - const int kSize = kImageSize + kOff; - align_buffer_page_end(orig_pixels, kSize); - - // NULL pointer. Expect fail. - EXPECT_FALSE(ValidateJpeg(NULL, kSize)); - - // Negative size. Expect fail. - EXPECT_FALSE(ValidateJpeg(orig_pixels, -1)); - - // Too large size. Expect fail. - EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull)); - - // No SOI or EOI. Expect fail. - memset(orig_pixels, 0, kSize); - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - - // SOI but no EOI. Expect fail. - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[2] = 0xff; - for (int times = 0; times < benchmark_iterations_; ++times) { - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - } - - // EOI but no SOI. Expect fail. - orig_pixels[0] = 0; - orig_pixels[1] = 0; - orig_pixels[kSize - kOff + 0] = 0xff; - orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVConvertTest, FuzzJpeg) { - // SOI but no EOI. Expect fail. - for (int times = 0; times < benchmark_iterations_; ++times) { - const int kSize = fastrand() % 5000 + 3; - align_buffer_page_end(orig_pixels, kSize); - MemRandomize(orig_pixels, kSize); - - // Add SOI so frame will be scanned. - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[2] = 0xff; - orig_pixels[kSize - 1] = 0xff; - ValidateJpeg(orig_pixels, - kSize); // Failure normally expected. - free_aligned_buffer_page_end(orig_pixels); - } -} - -// Test data created in GIMP. In export jpeg, disable -// thumbnails etc, choose a subsampling, and use low quality -// (50) to keep size small. Generated with xxd -i test.jpg -// test 0 is J400 -static const uint8_t kTest0Jpg[] = { - 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, - 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, - 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, - 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, - 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, - 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, - 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, - 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10, - 0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01, - 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4, - 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, - 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, - 0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, - 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, - 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, - 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, - 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, - 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, - 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, - 0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, - 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, - 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10, - 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, - 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, - 0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b, - 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, - 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, - 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, - 0xd9}; -static const size_t kTest0JpgLen = 421; - -// test 1 is J444 -static const uint8_t kTest1Jpg[] = { - 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, - 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, - 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, - 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, - 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, - 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, - 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, - 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, - 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, - 0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, - 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, - 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda, - 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01, - 0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb, - 0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, - 0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, - 0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, - 0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00, - 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31, - 0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, - 0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72, - 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, - 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, - 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, - 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, - 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, - 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, - 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, - 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, - 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, - 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff, - 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, - 0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, - 0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26, - 0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, - 0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5, - 0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00, - 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, - 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, - 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, - 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, - 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, - 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, - 0xd4, 0xff, 0xd9}; -static const size_t kTest1JpgLen = 735; - -// test 2 is J420 -static const uint8_t kTest2Jpg[] = { - 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, - 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, - 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, - 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, - 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, - 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, - 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, - 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, - 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, - 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, - 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff, - 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff, - 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, - 0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00, - 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10, - 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02, - 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62, - 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, - 0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, - 0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, - 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, - 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, - 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, - 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, - 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, - 0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, - 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, - 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, - 0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f, - 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, - 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e, - 0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, - 0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10, - 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, - 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, - 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, - 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, - 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, - 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, - 0xd9}; -static const size_t kTest2JpgLen = 685; - -// test 3 is J422 -static const uint8_t kTest3Jpg[] = { - 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, - 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, - 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, - 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, - 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, - 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, - 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, - 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, - 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, - 0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, - 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, - 0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff, - 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, - 0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4, - 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, - 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, - 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, - 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, - 0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18, - 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda, - 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84, - 0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, - 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, - 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, - 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, - 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, - 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, - 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, - 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, - 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff, - 0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, - 0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53, - 0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, - 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca, - 0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, - 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, - 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, - 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, - 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, - 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, - 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; -static const size_t kTest3JpgLen = 704; - -// test 4 is J422 vertical - not supported -static const uint8_t kTest4Jpg[] = { - 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, - 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, - 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, - 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, - 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, - 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, - 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, - 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, - 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, - 0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, - 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff, - 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff, - 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, - 0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4, - 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, - 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, - 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, - 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, - 0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, - 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff, - 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02, - 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01, - 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9, - 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01, - 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0, - 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e, - 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde, - 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a, - 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, - 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19, - 0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff, - 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca, - 0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01, - 0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff, - 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, - 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, - 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, - 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, - 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, - 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; -static const size_t kTest4JpgLen = 701; - -TEST_F(LibYUVConvertTest, TestMJPGSize) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - printf("test jpeg size %d x %d\n", width, height); -} - -TEST_F(LibYUVConvertTest, TestMJPGToI420) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_u, half_width * half_height); - align_buffer_page_end(dst_v, half_width * half_height); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width, - dst_v, half_width, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381); - uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_u_hash, 2501859930u); - EXPECT_EQ(dst_v_hash, 2126459123u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_u); - free_aligned_buffer_page_end(dst_v); -} - -TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - // Convert to NV21 - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_vu, half_width * half_height * 2); - - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Convert to I420 - align_buffer_page_end(dst2_y, width * height); - align_buffer_page_end(dst2_u, half_width * half_height); - align_buffer_page_end(dst2_v, half_width * half_height); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, - dst2_v, half_width, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Convert I420 to NV21 - align_buffer_page_end(dst3_y, width * height); - align_buffer_page_end(dst3_vu, half_width * half_height * 2); - - I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, - width, dst3_vu, half_width * 2, width, height); - - for (int i = 0; i < width * height; ++i) { - EXPECT_EQ(dst_y[i], dst3_y[i]); - } - for (int i = 0; i < half_width * half_height * 2; ++i) { - EXPECT_EQ(dst_vu[i], dst3_vu[i]); - EXPECT_EQ(dst_vu[i], dst3_vu[i]); - } - - free_aligned_buffer_page_end(dst3_y); - free_aligned_buffer_page_end(dst3_vu); - - free_aligned_buffer_page_end(dst2_y); - free_aligned_buffer_page_end(dst2_u); - free_aligned_buffer_page_end(dst2_v); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_vu); -} - -TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - // Convert to NV12 - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Convert to I420 - align_buffer_page_end(dst2_y, width * height); - align_buffer_page_end(dst2_u, half_width * half_height); - align_buffer_page_end(dst2_v, half_width * half_height); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, - dst2_v, half_width, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Convert I420 to NV12 - align_buffer_page_end(dst3_y, width * height); - align_buffer_page_end(dst3_uv, half_width * half_height * 2); - - I420ToNV12(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, - width, dst3_uv, half_width * 2, width, height); - - for (int i = 0; i < width * height; ++i) { - EXPECT_EQ(dst_y[i], dst3_y[i]); - } - for (int i = 0; i < half_width * half_height * 2; ++i) { - EXPECT_EQ(dst_uv[i], dst3_uv[i]); - EXPECT_EQ(dst_uv[i], dst3_uv[i]); - } - - free_aligned_buffer_page_end(dst3_y); - free_aligned_buffer_page_end(dst3_uv); - - free_aligned_buffer_page_end(dst2_y); - free_aligned_buffer_page_end(dst2_u); - free_aligned_buffer_page_end(dst2_v); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_uv_hash, 1069662856u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. Hashes are for VU so flip the plane. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - align_buffer_page_end(dst_vu, half_width * half_height * 2); - SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, - half_height); - uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_vu_hash, 1069662856u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); - free_aligned_buffer_page_end(dst_vu); -} - -// TODO(fbarchard): Improve test to compare against I422, not checksum -TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_uv_hash, 493520167u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); -} - -TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV12(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. Hashes are for VU so flip the plane. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - align_buffer_page_end(dst_vu, half_width * half_height * 2); - SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, - half_height); - uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_vu_hash, 493520167u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); - free_aligned_buffer_page_end(dst_vu); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 330644005u); - EXPECT_EQ(dst_uv_hash, 135214341u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV12(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. Hashes are for VU so flip the plane. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - align_buffer_page_end(dst_vu, half_width * half_height * 2); - SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, - half_height); - uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 330644005u); - EXPECT_EQ(dst_vu_hash, 135214341u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); - free_aligned_buffer_page_end(dst_vu); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_uv_hash, 506143297u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV12(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. Hashes are for VU so flip the plane. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - align_buffer_page_end(dst_vu, half_width * half_height * 2); - SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, - half_height); - uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_vu_hash, 506143297u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); - free_aligned_buffer_page_end(dst_vu); -} - -TEST_F(LibYUVConvertTest, TestMJPGToARGB) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - if (benchmark_iterations < 1) { - benchmark_iterations = 1; - } - - align_buffer_page_end(dst_argb, width * height * 4); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width, - height, width, height); +#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, OFF, ATTEN) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + src_a[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ + dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \ + ATTEN); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ + dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \ + ATTEN); \ + } \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(src_a); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ } - // Expect sucesss - EXPECT_EQ(0, ret); - // Test result matches known hash value. - uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381); -#ifdef LIBYUV_UNLIMITED_DATA - EXPECT_EQ(dst_argb_hash, 3900633302u); +#if defined(ENABLE_FULL_TESTS) +#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 2, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Premult, +, 0, 1) #else - EXPECT_EQ(dst_argb_hash, 2355976473u); +#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0) #endif - free_aligned_buffer_page_end(dst_argb); -} - -static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) { - MJpegDecoder mjpeg_decoder; - LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); - - int width = mjpeg_decoder.GetWidth(); - int height = mjpeg_decoder.GetHeight(); - - // YUV420 - if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 2 && - mjpeg_decoder.GetHorizSampFactor(0) == 2 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - printf("JPeg is J420, %dx%d %d bytes\n", width, height, - static_cast<int>(sample_size)); - // YUV422 - } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 2 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - printf("JPeg is J422, %dx%d %d bytes\n", width, height, - static_cast<int>(sample_size)); - // YUV444 - } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 1 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - printf("JPeg is J444, %dx%d %d bytes\n", width, height, - static_cast<int>(sample_size)); - // YUV400 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceGrayscale && - mjpeg_decoder.GetNumComponents() == 1 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 1) { - printf("JPeg is J400, %dx%d %d bytes\n", width, height, - static_cast<int>(sample_size)); - } else { - // Unknown colorspace. - printf("JPeg is Unknown colorspace.\n"); - } - mjpeg_decoder.UnloadFrame(); - return ret; -} - -TEST_F(LibYUVConvertTest, TestMJPGInfo) { - EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen)); - EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); - EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); - EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); - EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg, - kTest4JpgLen)); // Valid but unsupported. -} -#endif // HAVE_JPEG - -TEST_F(LibYUVConvertTest, NV12Crop) { - const int SUBSAMP_X = 2; - const int SUBSAMP_Y = 2; - const int kWidth = benchmark_width_; - const int kHeight = benchmark_height_; - const int crop_y = - ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1; - const int kDestWidth = benchmark_width_; - const int kDestHeight = benchmark_height_ - crop_y * 2; - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); - const int sample_size = - kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; - align_buffer_page_end(src_y, sample_size); - uint8_t* src_uv = src_y + kWidth * kHeight; - - align_buffer_page_end(dst_y, kDestWidth * kDestHeight); - align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - - align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight); - align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - - for (int i = 0; i < kHeight * kWidth; ++i) { - src_y[i] = (fastrand() & 0xff); - } - for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideUV) * 2; ++i) { - src_uv[i] = (fastrand() & 0xff); - } - memset(dst_y, 1, kDestWidth * kDestHeight); - memset(dst_u, 2, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - memset(dst_v, 3, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - memset(dst_y_2, 1, kDestWidth * kDestHeight); - memset(dst_u_2, 2, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - memset(dst_v_2, 3, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - - ConvertToI420(src_y, sample_size, dst_y_2, kDestWidth, dst_u_2, - SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v_2, - SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, - kDestWidth, kDestHeight, libyuv::kRotate0, libyuv::FOURCC_NV12); - - NV12ToI420(src_y + crop_y * kWidth, kWidth, - src_uv + (crop_y / 2) * kStrideUV * 2, kStrideUV * 2, dst_y, - kDestWidth, dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, - SUBSAMPLE(kDestWidth, SUBSAMP_X), kDestWidth, kDestHeight); - - for (int i = 0; i < kDestHeight; ++i) { - for (int j = 0; j < kDestWidth; ++j) { - EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]); - } - } - for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { - for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], - dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); - } - } - for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { - for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], - dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); - } - } - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_u); - free_aligned_buffer_page_end(dst_v); - free_aligned_buffer_page_end(dst_y_2); - free_aligned_buffer_page_end(dst_u_2); - free_aligned_buffer_page_end(dst_v_2); - free_aligned_buffer_page_end(src_y); -} - -TEST_F(LibYUVConvertTest, I420CropOddY) { - const int SUBSAMP_X = 2; - const int SUBSAMP_Y = 2; - const int kWidth = benchmark_width_; - const int kHeight = benchmark_height_; - const int crop_y = benchmark_height_ > 1 ? 1 : 0; - const int kDestWidth = benchmark_width_; - const int kDestHeight = benchmark_height_ - crop_y * 2; - const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X); - const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X); - const int sample_size = kWidth * kHeight + - kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) + - kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y); - align_buffer_page_end(src_y, sample_size); - uint8_t* src_u = src_y + kWidth * kHeight; - uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y); - - align_buffer_page_end(dst_y, kDestWidth * kDestHeight); - align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - - for (int i = 0; i < kHeight * kWidth; ++i) { - src_y[i] = (fastrand() & 0xff); - } - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) { - src_u[i] = (fastrand() & 0xff); - } - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) { - src_v[i] = (fastrand() & 0xff); - } - memset(dst_y, 1, kDestWidth * kDestHeight); - memset(dst_u, 2, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - memset(dst_v, 3, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - - MaskCpuFlags(benchmark_cpu_info_); - for (int i = 0; i < benchmark_iterations_; ++i) { - ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u, - SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, - SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, - kDestWidth, kDestHeight, libyuv::kRotate0, - libyuv::FOURCC_I420); - } +#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) - for (int i = 0; i < kDestHeight; ++i) { - for (int j = 0; j < kDestWidth; ++j) { - EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j], - dst_y[i * kDestWidth + j]); - } - } - for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { - for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j], - dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); - } - } - for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { - for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j], - dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); - } - } +#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) +#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_u); - free_aligned_buffer_page_end(dst_v); - free_aligned_buffer_page_end(src_y); -} +#if defined(ENABLE_FULL_TESTS) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) +#else +TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) +#endif TEST_F(LibYUVConvertTest, TestYToARGB) { uint8_t y[32]; @@ -3104,6 +1288,7 @@ TEST_F(LibYUVConvertTest, TestDither) { free_aligned_buffer_page_end(dst_argb32_opt); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, FMT_C, BPP_C) \ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ @@ -3114,116 +1299,17 @@ TEST_F(LibYUVConvertTest, TestDither) { YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) +#else +#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) +#endif #ifdef LITTLE_ENDIAN_ONLY_TEST TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4) #endif -#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \ - TEST_F(LibYUVConvertTest, NAME) { \ - const int kWidth = benchmark_width_; \ - const int kHeight = benchmark_height_; \ - \ - align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ - align_buffer_page_end(orig_y, kWidth* kHeight); \ - align_buffer_page_end(orig_u, \ - SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ - align_buffer_page_end(orig_v, \ - SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ - \ - align_buffer_page_end(dst_y_orig, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_orig, \ - 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ - \ - align_buffer_page_end(dst_y, kWidth* kHeight); \ - align_buffer_page_end(dst_uv, \ - 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ - \ - MemRandomize(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ - \ - /* Convert UYVY to NV12 in 2 steps for reference */ \ - libyuv::UYVYTOI420(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), orig_y, kWidth, \ - orig_u, SUBSAMPLE(kWidth, 2), orig_v, \ - SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ - libyuv::I420ToNV12(orig_y, kWidth, orig_u, SUBSAMPLE(kWidth, 2), orig_v, \ - SUBSAMPLE(kWidth, 2), dst_y_orig, kWidth, dst_uv_orig, \ - 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ - \ - /* Convert to NV12 */ \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - libyuv::UYVYTONV12(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), dst_y, kWidth, \ - dst_uv, 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ - } \ - \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - EXPECT_EQ(orig_y[i], dst_y[i]); \ - } \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - EXPECT_EQ(dst_y_orig[i], dst_y[i]); \ - } \ - for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2); \ - ++i) { \ - EXPECT_EQ(dst_uv_orig[i], dst_uv[i]); \ - } \ - \ - free_aligned_buffer_page_end(orig_uyvy); \ - free_aligned_buffer_page_end(orig_y); \ - free_aligned_buffer_page_end(orig_u); \ - free_aligned_buffer_page_end(orig_v); \ - free_aligned_buffer_page_end(dst_y_orig); \ - free_aligned_buffer_page_end(dst_uv_orig); \ - free_aligned_buffer_page_end(dst_y); \ - free_aligned_buffer_page_end(dst_uv); \ - } - -TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12) -TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12) - -TEST_F(LibYUVConvertTest, MM21ToYUY2) { - const int kWidth = (benchmark_width_ + 15) & (~15); - const int kHeight = (benchmark_height_ + 31) & (~31); - - align_buffer_page_end(orig_y, kWidth * kHeight); - align_buffer_page_end(orig_uv, - 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); - - align_buffer_page_end(tmp_y, kWidth * kHeight); - align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); - align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); - - align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight); - align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight); - - MemRandomize(orig_y, kWidth * kHeight); - MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); - - /* Convert MM21 to YUY2 in 2 steps for reference */ - libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y, - kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v, - SUBSAMPLE(kWidth, 2), kWidth, kHeight); - libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v, - SUBSAMPLE(kWidth, 2), golden_yuyv, - 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); - - /* Convert to NV12 */ - for (int i = 0; i < benchmark_iterations_; ++i) { - libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), - dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); - } - - for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) { - EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]); - } - - free_aligned_buffer_page_end(orig_y); - free_aligned_buffer_page_end(orig_uv); - free_aligned_buffer_page_end(tmp_y); - free_aligned_buffer_page_end(tmp_u); - free_aligned_buffer_page_end(tmp_v); - free_aligned_buffer_page_end(dst_yuyv); - free_aligned_buffer_page_end(golden_yuyv); -} - // Transitive test. A to B to C is same as A to C. // Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere. #define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ @@ -3526,6 +1612,7 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) free_aligned_buffer_page_end(dst_argb_bc); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \ TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, \ benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ @@ -3535,6 +1622,11 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) _Invert, -, 0, FMT_C, BPP_C) \ TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ _Opt, +, 0, FMT_C, BPP_C) +#else +#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Opt, +, 0, FMT_C, BPP_C) +#endif // Caveat: Destination needs to be 4 bytes #ifdef LITTLE_ENDIAN_ONLY_TEST @@ -3651,6 +1743,8 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { } #endif // HAS_ABGRTOAR30ROW_AVX2 +#if !defined(LEAN_TESTS) + // Provide matrix wrappers for 12 bit YUV #define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \ I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) @@ -3745,6 +1839,7 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { free_aligned_buffer_page_end(dst_argb_opt); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ BPP_B, ALIGN, YALIGN) \ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ @@ -3755,6 +1850,12 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0) \ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0) +#else +#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ + BPP_B, ALIGN, YALIGN) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0) +#endif // These conversions are only optimized for x86 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) @@ -4083,6 +2184,7 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) free_aligned_buffer_page_end(dst_argb_opt); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, S_DEPTH) \ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ @@ -4093,6 +2195,12 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#else +#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#endif #define P010ToARGB(a, b, c, d, e, f, g, h) \ P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) @@ -4587,61 +2695,6 @@ TEST_F(LibYUVConvertTest, Test565) { uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); EXPECT_EQ(610919429u, checksum); } - -// Test RGB24 to J420 is exact -#if defined(LIBYUV_BIT_EXACT) -TEST_F(LibYUVConvertTest, TestRGB24ToJ420) { - const int kSize = 256; - align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 - align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2); - int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / - (kSize * 2) * benchmark_iterations_; - - for (int i = 0; i < kSize * 3 * 2; ++i) { - orig_rgb24[i] = i; - } - - for (int i = 0; i < iterations256; ++i) { - RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize, // Y plane - dest_j420 + kSize * 2, kSize / 2, // U plane - dest_j420 + kSize * 5 / 2, kSize / 2, // V plane - kSize, 2); - } - - uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381); - EXPECT_EQ(2755440272u, checksum); - - free_aligned_buffer_page_end(orig_rgb24); - free_aligned_buffer_page_end(dest_j420); -} -#endif - -// Test RGB24 to I420 is exact -#if defined(LIBYUV_BIT_EXACT) -TEST_F(LibYUVConvertTest, TestRGB24ToI420) { - const int kSize = 256; - align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 - align_buffer_page_end(dest_i420, kSize * 3 / 2 * 2); - int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / - (kSize * 2) * benchmark_iterations_; - - for (int i = 0; i < kSize * 3 * 2; ++i) { - orig_rgb24[i] = i; - } - - for (int i = 0; i < iterations256; ++i) { - RGB24ToI420(orig_rgb24, kSize * 3, dest_i420, kSize, // Y plane - dest_i420 + kSize * 2, kSize / 2, // U plane - dest_i420 + kSize * 5 / 2, kSize / 2, // V plane - kSize, 2); - } - - uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381); - EXPECT_EQ(1526656597u, checksum); - - free_aligned_buffer_page_end(orig_rgb24); - free_aligned_buffer_page_end(dest_i420); -} -#endif +#endif // !defined(LEAN_TESTS) } // namespace libyuv diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc new file mode 100644 index 00000000..f55bace3 --- /dev/null +++ b/unit_test/convert_test.cc @@ -0,0 +1,2110 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <stdlib.h> +#include <time.h> + +#include "libyuv/basic_types.h" +#include "libyuv/compare.h" +#include "libyuv/convert.h" +#include "libyuv/convert_argb.h" +#include "libyuv/convert_from.h" +#include "libyuv/convert_from_argb.h" +#include "libyuv/cpu_id.h" +#ifdef HAVE_JPEG +#include "libyuv/mjpeg_decoder.h" +#endif +#include "../unit_test/unit_test.h" +#include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" +#include "libyuv/video_common.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */ +#endif + +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#undef ENABLE_ROW_TESTS +#define LEAN_TESTS +#endif + +// Some functions fail on big endian. Enable these tests on all cpus except +// PowerPC, but they are not optimized so disabled by default. +#if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__) +#define LITTLE_ENDIAN_ONLY_TEST 1 +#endif +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +namespace libyuv { + +// Alias to copy pixels as is +#define AR30ToAR30 ARGBCopy +#define ABGRToABGR ARGBCopy + +// subsample amount uses a divide. +#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) + +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) + +// Planar test + +#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ + SRC_DEPTH) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_u, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_v, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ + MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ + SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \ + SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \ + src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ + src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ + reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ + reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \ + reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ + reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ + reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \ + reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ + } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#else +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#endif + +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8) +TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10) +TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12) +TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10) +TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10) +TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12) +TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 12) +TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) + +// Test Android 420 to I420 +#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF, PN, OFF_U, OFF_V) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##To##PN##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSizeUV = \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_uv, \ + kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + uint8_t* src_u = src_uv + OFF_U; \ + uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ + int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ + (fastrand() & 0xff); \ + src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ + (fastrand() & 0xff); \ + } \ + } \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_u_c, 2, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_c, 3, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_u_opt, 102, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_opt, 103, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \ + kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \ + dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \ + SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \ + _Any, +, 0, PN, OFF_U, OFF_V) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \ + _Unaligned, +, 2, PN, OFF_U, OFF_V) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \ + -, 0, PN, OFF_U, OFF_V) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ + 0, PN, OFF_U, OFF_V) +#else +#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \ + SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ + 0, PN, OFF_U, OFF_V) +#endif + +TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2) +TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2) +TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) +#undef TESTAPLANARTOP +#undef TESTAPLANARTOPI + +// wrapper to keep API the same +int I400ToNV21(const uint8_t* src_y, + int src_stride_y, + const uint8_t* /* src_u */, + int /* src_stride_u */, + const uint8_t* /* src_v */, + int /* src_stride_v */, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu, + dst_stride_vu, width, height); +} + +#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ + SRC_DEPTH) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_u, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_v, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_c, \ + kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_opt, \ + kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ + MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ + SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \ + SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \ + src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ + src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \ + src_v_p, kSrcHalfWidth, \ + reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_c), \ + kDstHalfWidth * 2, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ + reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_opt), kDstHalfWidth * 2, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ + } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) { \ + EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \ + SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#else +#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#endif + +TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10) +TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12) +TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) + +#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \ + TILE_WIDTH, TILE_HEIGHT) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ + const int kPaddedHeight = \ + (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ + const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ + align_buffer_page_end( \ + src_uv, \ + 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_c, \ + 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_opt, \ + 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ + SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ + for (int i = 0; \ + i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \ + ++i) { \ + src_y_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \ + SRC_BPC / (int)sizeof(SRC_T); \ + ++i) { \ + src_uv_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ + DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ + DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \ + NEG kHeight); \ + } \ + if (DOY) { \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + } \ + for (int i = 0; i < kDstHalfHeight; ++i) { \ + for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \ + EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ + dst_uv_opt[i * 2 * kDstHalfWidth + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) +#else +#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) +#endif + +TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1) +TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1) +TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) +TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32) + +#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ + kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \ + kStrideUV * 2, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ + for (int j = 0; j < kStrideUV; ++j) { \ + EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#endif + +TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2) +TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2) +TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1) +TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1) +TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2) +TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1) +TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2) +TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2) +TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2) +TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2) +#endif +TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2) +TESTATOPLANAR(I400, 1, 1, I420, 2, 2) +TESTATOPLANAR(J400, 1, 1, J420, 2, 2) +TESTATOPLANAR(RAW, 3, 1, I420, 2, 2) +TESTATOPLANAR(RAW, 3, 1, J420, 2, 2) +TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2) +TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2) +TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2) +TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2) +TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1) +TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) + +#define TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_a_c, kWidth* kHeight); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_a_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_a_c, 1, kWidth* kHeight); \ + memset(dst_y_c, 2, kWidth* kHeight); \ + memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_a_opt, 101, kWidth* kHeight); \ + memset(dst_y_opt, 102, kWidth* kHeight); \ + memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ + kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \ + dst_a_c, kWidth, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \ + kStrideUV * 2, dst_a_opt, kWidth, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ + for (int j = 0; j < kStrideUV; ++j) { \ + EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_a_c); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_a_opt); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#endif + +TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2) + +#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ + kStrideUV * 2, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < kStrideUV * 2; ++j) { \ + EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j], \ + dst_uv_opt[i * kStrideUV * 2 + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#endif + +TESTATOBP(ARGB, 1, 4, NV12, 2, 2) +TESTATOBP(ARGB, 1, 4, NV21, 2, 2) +TESTATOBP(ABGR, 1, 4, NV12, 2, 2) +TESTATOBP(ABGR, 1, 4, NV21, 2, 2) +TESTATOBP(RAW, 1, 3, JNV21, 2, 2) +TESTATOBP(YUY2, 2, 4, NV12, 2, 2) +TESTATOBP(UYVY, 2, 4, NV12, 2, 2) +TESTATOBP(AYUV, 1, 4, NV12, 2, 2) +TESTATOBP(AYUV, 1, 4, NV21, 2, 2) + +#if !defined(LEAN_TESTS) + +#ifdef HAVE_JPEG +TEST_F(LibYUVConvertTest, ValidateJpeg) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg + ? benchmark_width_ * benchmark_height_ + : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_page_end(orig_pixels, kSize); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // Test special value that matches marker start. + memset(orig_pixels, 0xff, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // EOI, SOI. Expect pass. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize)); + } + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, ValidateJpegLarge) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg + ? benchmark_width_ * benchmark_height_ + : kMinJpeg; + const int kSize = kImageSize + kOff; + const int kMultiple = 10; + const int kBufSize = kImageSize * kMultiple + kOff; + align_buffer_page_end(orig_pixels, kBufSize); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kBufSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize)); + + // EOI, SOI. Expect pass. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize)); + } + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, InvalidateJpeg) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg + ? benchmark_width_ * benchmark_height_ + : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_page_end(orig_pixels, kSize); + + // NULL pointer. Expect fail. + EXPECT_FALSE(ValidateJpeg(NULL, kSize)); + + // Negative size. Expect fail. + EXPECT_FALSE(ValidateJpeg(orig_pixels, -1)); + + // Too large size. Expect fail. + EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull)); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // SOI but no EOI. Expect fail. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + } + + // EOI but no SOI. Expect fail. + orig_pixels[0] = 0; + orig_pixels[1] = 0; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, FuzzJpeg) { + // SOI but no EOI. Expect fail. + for (int times = 0; times < benchmark_iterations_; ++times) { + const int kSize = fastrand() % 5000 + 3; + align_buffer_page_end(orig_pixels, kSize); + MemRandomize(orig_pixels, kSize); + + // Add SOI so frame will be scanned. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + orig_pixels[kSize - 1] = 0xff; + ValidateJpeg(orig_pixels, + kSize); // Failure normally expected. + free_aligned_buffer_page_end(orig_pixels); + } +} + +// Test data created in GIMP. In export jpeg, disable +// thumbnails etc, choose a subsampling, and use low quality +// (50) to keep size small. Generated with xxd -i test.jpg +// test 0 is J400 +static const uint8_t kTest0Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10, + 0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01, + 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, + 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, + 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, + 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, + 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, + 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10, + 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, + 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b, + 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, + 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, + 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, + 0xd9}; +static const size_t kTest0JpgLen = 421; + +// test 1 is J444 +static const uint8_t kTest1Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, + 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda, + 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01, + 0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb, + 0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, + 0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, + 0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, + 0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31, + 0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, + 0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72, + 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, + 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, + 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, + 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, + 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, + 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, + 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, + 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, + 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, + 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff, + 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, + 0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, + 0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26, + 0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, + 0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5, + 0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00, + 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, + 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, + 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, + 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, + 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, + 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, + 0xd4, 0xff, 0xd9}; +static const size_t kTest1JpgLen = 735; + +// test 2 is J420 +static const uint8_t kTest2Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff, + 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00, + 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10, + 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02, + 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62, + 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, + 0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, + 0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, + 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, + 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, + 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, + 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, + 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, + 0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f, + 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, + 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e, + 0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, + 0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10, + 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, + 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, + 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, + 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, + 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, + 0xd9}; +static const size_t kTest2JpgLen = 685; + +// test 3 is J422 +static const uint8_t kTest3Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, + 0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, + 0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18, + 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda, + 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84, + 0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, + 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, + 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, + 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, + 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, + 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, + 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, + 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, + 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff, + 0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, + 0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53, + 0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca, + 0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, + 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, + 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, + 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, + 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, + 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; +static const size_t kTest3JpgLen = 704; + +// test 4 is J422 vertical - not supported +static const uint8_t kTest4Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff, + 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, + 0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff, + 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02, + 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9, + 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01, + 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0, + 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e, + 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde, + 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a, + 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19, + 0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca, + 0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff, + 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, + 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, + 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, + 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, + 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, + 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; +static const size_t kTest4JpgLen = 701; + +TEST_F(LibYUVConvertTest, TestMJPGSize) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + printf("test jpeg size %d x %d\n", width, height); +} + +TEST_F(LibYUVConvertTest, TestMJPGToI420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_u, half_width * half_height); + align_buffer_page_end(dst_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width, + dst_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381); + uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_u_hash, 2501859930u); + EXPECT_EQ(dst_v_hash, 2126459123u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); +} + +TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + // Convert to NV21 + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert to I420 + align_buffer_page_end(dst2_y, width * height); + align_buffer_page_end(dst2_u, half_width * half_height); + align_buffer_page_end(dst2_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, + dst2_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert I420 to NV21 + align_buffer_page_end(dst3_y, width * height); + align_buffer_page_end(dst3_vu, half_width * half_height * 2); + + I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, + width, dst3_vu, half_width * 2, width, height); + + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_y[i], dst3_y[i]); + } + for (int i = 0; i < half_width * half_height * 2; ++i) { + EXPECT_EQ(dst_vu[i], dst3_vu[i]); + EXPECT_EQ(dst_vu[i], dst3_vu[i]); + } + + free_aligned_buffer_page_end(dst3_y); + free_aligned_buffer_page_end(dst3_vu); + + free_aligned_buffer_page_end(dst2_y); + free_aligned_buffer_page_end(dst2_u); + free_aligned_buffer_page_end(dst2_v); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + // Convert to NV12 + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert to I420 + align_buffer_page_end(dst2_y, width * height); + align_buffer_page_end(dst2_u, half_width * half_height); + align_buffer_page_end(dst2_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, + dst2_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert I420 to NV12 + align_buffer_page_end(dst3_y, width * height); + align_buffer_page_end(dst3_uv, half_width * half_height * 2); + + I420ToNV12(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, + width, dst3_uv, half_width * 2, width, height); + + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_y[i], dst3_y[i]); + } + for (int i = 0; i < half_width * half_height * 2; ++i) { + EXPECT_EQ(dst_uv[i], dst3_uv[i]); + EXPECT_EQ(dst_uv[i], dst3_uv[i]); + } + + free_aligned_buffer_page_end(dst3_y); + free_aligned_buffer_page_end(dst3_uv); + + free_aligned_buffer_page_end(dst2_y); + free_aligned_buffer_page_end(dst2_u); + free_aligned_buffer_page_end(dst2_v); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 1069662856u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 1069662856u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +// TODO(fbarchard): Improve test to compare against I422, not checksum +TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 493520167u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 493520167u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 330644005u); + EXPECT_EQ(dst_uv_hash, 135214341u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 330644005u); + EXPECT_EQ(dst_vu_hash, 135214341u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 506143297u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 506143297u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToARGB) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_argb, width * height * 4); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width, + height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381); +#ifdef LIBYUV_UNLIMITED_DATA + EXPECT_EQ(dst_argb_hash, 3900633302u); +#else + EXPECT_EQ(dst_argb_hash, 2355976473u); +#endif + + free_aligned_buffer_page_end(dst_argb); +} + +static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) { + MJpegDecoder mjpeg_decoder; + LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); + + int width = mjpeg_decoder.GetWidth(); + int height = mjpeg_decoder.GetHeight(); + + // YUV420 + if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 2 && + mjpeg_decoder.GetHorizSampFactor(0) == 2 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J420, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + // YUV422 + } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 2 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J422, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + // YUV444 + } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 1 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J444, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + // YUV400 + } else if (mjpeg_decoder.GetColorSpace() == + MJpegDecoder::kColorSpaceGrayscale && + mjpeg_decoder.GetNumComponents() == 1 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 1) { + printf("JPeg is J400, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + } else { + // Unknown colorspace. + printf("JPeg is Unknown colorspace.\n"); + } + mjpeg_decoder.UnloadFrame(); + return ret; +} + +TEST_F(LibYUVConvertTest, TestMJPGInfo) { + EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg, + kTest4JpgLen)); // Valid but unsupported. +} +#endif // HAVE_JPEG + +TEST_F(LibYUVConvertTest, NV12Crop) { + const int SUBSAMP_X = 2; + const int SUBSAMP_Y = 2; + const int kWidth = benchmark_width_; + const int kHeight = benchmark_height_; + const int crop_y = + ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1; + const int kDestWidth = benchmark_width_; + const int kDestHeight = benchmark_height_ - crop_y * 2; + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); + const int sample_size = + kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; + align_buffer_page_end(src_y, sample_size); + uint8_t* src_uv = src_y + kWidth * kHeight; + + align_buffer_page_end(dst_y, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + for (int i = 0; i < kHeight * kWidth; ++i) { + src_y[i] = (fastrand() & 0xff); + } + for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideUV) * 2; ++i) { + src_uv[i] = (fastrand() & 0xff); + } + memset(dst_y, 1, kDestWidth * kDestHeight); + memset(dst_u, 2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v, 3, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_y_2, 1, kDestWidth * kDestHeight); + memset(dst_u_2, 2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v_2, 3, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + ConvertToI420(src_y, sample_size, dst_y_2, kDestWidth, dst_u_2, + SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v_2, + SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, + kDestWidth, kDestHeight, libyuv::kRotate0, libyuv::FOURCC_NV12); + + NV12ToI420(src_y + crop_y * kWidth, kWidth, + src_uv + (crop_y / 2) * kStrideUV * 2, kStrideUV * 2, dst_y, + kDestWidth, dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, + SUBSAMPLE(kDestWidth, SUBSAMP_X), kDestWidth, kDestHeight); + + for (int i = 0; i < kDestHeight; ++i) { + for (int j = 0; j < kDestWidth; ++j) { + EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], + dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], + dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); + free_aligned_buffer_page_end(dst_y_2); + free_aligned_buffer_page_end(dst_u_2); + free_aligned_buffer_page_end(dst_v_2); + free_aligned_buffer_page_end(src_y); +} + +TEST_F(LibYUVConvertTest, I420CropOddY) { + const int SUBSAMP_X = 2; + const int SUBSAMP_Y = 2; + const int kWidth = benchmark_width_; + const int kHeight = benchmark_height_; + const int crop_y = benchmark_height_ > 1 ? 1 : 0; + const int kDestWidth = benchmark_width_; + const int kDestHeight = benchmark_height_ - crop_y * 2; + const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X); + const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X); + const int sample_size = kWidth * kHeight + + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) + + kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y); + align_buffer_page_end(src_y, sample_size); + uint8_t* src_u = src_y + kWidth * kHeight; + uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y); + + align_buffer_page_end(dst_y, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + for (int i = 0; i < kHeight * kWidth; ++i) { + src_y[i] = (fastrand() & 0xff); + } + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) { + src_u[i] = (fastrand() & 0xff); + } + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) { + src_v[i] = (fastrand() & 0xff); + } + memset(dst_y, 1, kDestWidth * kDestHeight); + memset(dst_u, 2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v, 3, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + MaskCpuFlags(benchmark_cpu_info_); + for (int i = 0; i < benchmark_iterations_; ++i) { + ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u, + SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, + SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, + kDestWidth, kDestHeight, libyuv::kRotate0, + libyuv::FOURCC_I420); + } + + for (int i = 0; i < kDestHeight; ++i) { + for (int j = 0; j < kDestWidth; ++j) { + EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j], + dst_y[i * kDestWidth + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j], + dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j], + dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); + free_aligned_buffer_page_end(src_y); +} + +#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \ + TEST_F(LibYUVConvertTest, NAME) { \ + const int kWidth = benchmark_width_; \ + const int kHeight = benchmark_height_; \ + \ + align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ + align_buffer_page_end(orig_y, kWidth* kHeight); \ + align_buffer_page_end(orig_u, \ + SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + align_buffer_page_end(orig_v, \ + SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + \ + align_buffer_page_end(dst_y_orig, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_orig, \ + 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + \ + align_buffer_page_end(dst_y, kWidth* kHeight); \ + align_buffer_page_end(dst_uv, \ + 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + \ + MemRandomize(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ + \ + /* Convert UYVY to NV12 in 2 steps for reference */ \ + libyuv::UYVYTOI420(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), orig_y, kWidth, \ + orig_u, SUBSAMPLE(kWidth, 2), orig_v, \ + SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ + libyuv::I420ToNV12(orig_y, kWidth, orig_u, SUBSAMPLE(kWidth, 2), orig_v, \ + SUBSAMPLE(kWidth, 2), dst_y_orig, kWidth, dst_uv_orig, \ + 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ + \ + /* Convert to NV12 */ \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + libyuv::UYVYTONV12(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), dst_y, kWidth, \ + dst_uv, 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ + } \ + \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + EXPECT_EQ(orig_y[i], dst_y[i]); \ + } \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + EXPECT_EQ(dst_y_orig[i], dst_y[i]); \ + } \ + for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2); \ + ++i) { \ + EXPECT_EQ(dst_uv_orig[i], dst_uv[i]); \ + } \ + \ + free_aligned_buffer_page_end(orig_uyvy); \ + free_aligned_buffer_page_end(orig_y); \ + free_aligned_buffer_page_end(orig_u); \ + free_aligned_buffer_page_end(orig_v); \ + free_aligned_buffer_page_end(dst_y_orig); \ + free_aligned_buffer_page_end(dst_uv_orig); \ + free_aligned_buffer_page_end(dst_y); \ + free_aligned_buffer_page_end(dst_uv); \ + } + +TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12) +TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12) + +TEST_F(LibYUVConvertTest, MM21ToYUY2) { + const int kWidth = (benchmark_width_ + 15) & (~15); + const int kHeight = (benchmark_height_ + 31) & (~31); + + align_buffer_page_end(orig_y, kWidth * kHeight); + align_buffer_page_end(orig_uv, + 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + align_buffer_page_end(tmp_y, kWidth * kHeight); + align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight); + align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight); + + MemRandomize(orig_y, kWidth * kHeight); + MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + /* Convert MM21 to YUY2 in 2 steps for reference */ + libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y, + kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v, + SUBSAMPLE(kWidth, 2), kWidth, kHeight); + libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v, + SUBSAMPLE(kWidth, 2), golden_yuyv, + 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); + + /* Convert to NV12 */ + for (int i = 0; i < benchmark_iterations_; ++i) { + libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), + dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); + } + + for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) { + EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]); + } + + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(tmp_y); + free_aligned_buffer_page_end(tmp_u); + free_aligned_buffer_page_end(tmp_v); + free_aligned_buffer_page_end(dst_yuyv); + free_aligned_buffer_page_end(golden_yuyv); +} + +// Test RGB24 to J420 is exact +#if defined(LIBYUV_BIT_EXACT) +TEST_F(LibYUVConvertTest, TestRGB24ToJ420) { + const int kSize = 256; + align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 + align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2); + int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / + (kSize * 2) * benchmark_iterations_; + + for (int i = 0; i < kSize * 3 * 2; ++i) { + orig_rgb24[i] = i; + } + + for (int i = 0; i < iterations256; ++i) { + RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize, // Y plane + dest_j420 + kSize * 2, kSize / 2, // U plane + dest_j420 + kSize * 5 / 2, kSize / 2, // V plane + kSize, 2); + } + + uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381); + EXPECT_EQ(2755440272u, checksum); + + free_aligned_buffer_page_end(orig_rgb24); + free_aligned_buffer_page_end(dest_j420); +} +#endif + +// Test RGB24 to I420 is exact +#if defined(LIBYUV_BIT_EXACT) +TEST_F(LibYUVConvertTest, TestRGB24ToI420) { + const int kSize = 256; + align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 + align_buffer_page_end(dest_i420, kSize * 3 / 2 * 2); + int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / + (kSize * 2) * benchmark_iterations_; + + for (int i = 0; i < kSize * 3 * 2; ++i) { + orig_rgb24[i] = i; + } + + for (int i = 0; i < iterations256; ++i) { + RGB24ToI420(orig_rgb24, kSize * 3, dest_i420, kSize, // Y plane + dest_i420 + kSize * 2, kSize / 2, // U plane + dest_i420 + kSize * 5 / 2, kSize / 2, // V plane + kSize, 2); + } + + uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381); + EXPECT_EQ(1526656597u, checksum); + + free_aligned_buffer_page_end(orig_rgb24); + free_aligned_buffer_page_end(dest_i420); +} +#endif + +#endif // !defined(LEAN_TESTS) + +} // namespace libyuv diff --git a/files/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index 93867fa7..437b6632 100644 --- a/files/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -47,14 +47,15 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { int has_erms = TestCpuFlag(kCpuHasERMS); int has_fma3 = TestCpuFlag(kCpuHasFMA3); int has_f16c = TestCpuFlag(kCpuHasF16C); - int has_gfni = TestCpuFlag(kCpuHasGFNI); int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW); int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL); int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI); int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI); int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); - int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); + int has_avx10 = TestCpuFlag(kCpuHasAVX10); + int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI); + int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8); printf("Has X86 0x%x\n", has_x86); printf("Has SSE2 0x%x\n", has_sse2); printf("Has SSSE3 0x%x\n", has_ssse3); @@ -65,14 +66,15 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { printf("Has ERMS 0x%x\n", has_erms); printf("Has FMA3 0x%x\n", has_fma3); printf("Has F16C 0x%x\n", has_f16c); - printf("Has GFNI 0x%x\n", has_gfni); printf("Has AVX512BW 0x%x\n", has_avx512bw); printf("Has AVX512VL 0x%x\n", has_avx512vl); printf("Has AVX512VNNI 0x%x\n", has_avx512vnni); printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi); printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2); printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg); - printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq); + printf("Has AVX10 0x%x\n", has_avx10); + printf("HAS AVXVNNI 0x%x\n", has_avxvnni); + printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); #endif #if defined(__mips__) int has_mips = TestCpuFlag(kCpuHasMIPS); @@ -137,6 +139,9 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) { #ifdef __riscv_vector printf("__riscv_vector %d\n", __riscv_vector); #endif +#ifdef __riscv_v_intrinsic + printf("__riscv_v_intrinsic %d\n", __riscv_v_intrinsic); +#endif #ifdef __APPLE__ printf("__APPLE__ %d\n", __APPLE__); #endif @@ -180,7 +185,7 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) { printf("__pnacl__ %d\n", __pnacl__); #endif #ifdef GG_LONGLONG - printf("GG_LONGLONG %d\n", GG_LONGLONG); + printf("GG_LONGLONG %lld\n", GG_LONGLONG(1)); #endif #ifdef INT_TYPES_DEFINED printf("INT_TYPES_DEFINED\n"); diff --git a/files/unit_test/cpu_thread_test.cc b/unit_test/cpu_thread_test.cc index 69aab74e..69aab74e 100644 --- a/files/unit_test/cpu_thread_test.cc +++ b/unit_test/cpu_thread_test.cc diff --git a/files/unit_test/math_test.cc b/unit_test/math_test.cc index a1544c12..a1544c12 100644 --- a/files/unit_test/math_test.cc +++ b/unit_test/math_test.cc diff --git a/files/unit_test/planar_test.cc b/unit_test/planar_test.cc index ad97b87e..ec1d72eb 100644 --- a/files/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -30,9 +30,9 @@ #endif #if defined(LIBYUV_BIT_EXACT) -#define EXPECTED_ATTENUATE_DIFF 0 +#define EXPECTED_UNATTENUATE_DIFF 0 #else -#define EXPECTED_ATTENUATE_DIFF 2 +#define EXPECTED_UNATTENUATE_DIFF 2 #endif namespace libyuv { @@ -57,12 +57,17 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { orig_pixels[2 * 4 + 0] = 16u; orig_pixels[2 * 4 + 1] = 64u; orig_pixels[2 * 4 + 2] = 192u; - orig_pixels[2 * 4 + 3] = 255u; + orig_pixels[2 * 4 + 3] = 128u; orig_pixels[3 * 4 + 0] = 16u; orig_pixels[3 * 4 + 1] = 64u; orig_pixels[3 * 4 + 2] = 192u; - orig_pixels[3 * 4 + 3] = 128u; - ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1); + orig_pixels[3 * 4 + 3] = 255u; + orig_pixels[4 * 4 + 0] = 255u; + orig_pixels[4 * 4 + 1] = 255u; + orig_pixels[4 * 4 + 2] = 255u; + orig_pixels[4 * 4 + 3] = 255u; + + ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 5, 1); EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]); EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]); EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]); @@ -71,14 +76,55 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]); EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]); EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]); - EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]); - EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]); - EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]); - EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]); - EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]); - EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]); - EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]); - EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]); + EXPECT_EQ(32u, unatten_pixels[2 * 4 + 0]); + EXPECT_EQ(128u, unatten_pixels[2 * 4 + 1]); + EXPECT_EQ(255u, unatten_pixels[2 * 4 + 2]); + EXPECT_EQ(128u, unatten_pixels[2 * 4 + 3]); + EXPECT_EQ(16u, unatten_pixels[3 * 4 + 0]); + EXPECT_EQ(64u, unatten_pixels[3 * 4 + 1]); + EXPECT_EQ(192u, unatten_pixels[3 * 4 + 2]); + EXPECT_EQ(255u, unatten_pixels[3 * 4 + 3]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 0]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 1]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 2]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 3]); + + ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 5, 1); + EXPECT_EQ(100u, atten_pixels[0 * 4 + 0]); + EXPECT_EQ(65u, atten_pixels[0 * 4 + 1]); + EXPECT_EQ(64u, atten_pixels[0 * 4 + 2]); + EXPECT_EQ(128u, atten_pixels[0 * 4 + 3]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 0]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 1]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 2]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 3]); + EXPECT_EQ(8u, atten_pixels[2 * 4 + 0]); + EXPECT_EQ(32u, atten_pixels[2 * 4 + 1]); + EXPECT_EQ(96u, atten_pixels[2 * 4 + 2]); + EXPECT_EQ(128u, atten_pixels[2 * 4 + 3]); + EXPECT_EQ(16u, atten_pixels[3 * 4 + 0]); + EXPECT_EQ(64u, atten_pixels[3 * 4 + 1]); + EXPECT_EQ(192u, atten_pixels[3 * 4 + 2]); + EXPECT_EQ(255u, atten_pixels[3 * 4 + 3]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 0]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 1]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 2]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 3]); + + // test 255 + for (int i = 0; i < 256; ++i) { + orig_pixels[i * 4 + 0] = i; + orig_pixels[i * 4 + 1] = 0; + orig_pixels[i * 4 + 2] = 0; + orig_pixels[i * 4 + 3] = 255; + } + ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 256, 1); + for (int i = 0; i < 256; ++i) { + EXPECT_EQ(orig_pixels[i * 4 + 0], atten_pixels[i * 4 + 0]); + EXPECT_EQ(0, atten_pixels[i * 4 + 1]); + EXPECT_EQ(0, atten_pixels[i * 4 + 2]); + EXPECT_EQ(255, atten_pixels[i * 4 + 3]); + } for (int i = 0; i < 1280; ++i) { orig_pixels[i * 4 + 0] = i; @@ -92,10 +138,10 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1); } for (int i = 0; i < 1280; ++i) { - EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2); - EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2); - EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2); - EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2); + EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 1); + EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 1); + EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 1); + EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 1); } // Make sure transparent, 50% and opaque are fully accurate. EXPECT_EQ(0, atten_pixels[0 * 4 + 0]); @@ -106,9 +152,9 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { EXPECT_EQ(32, atten_pixels[128 * 4 + 1]); EXPECT_EQ(21, atten_pixels[128 * 4 + 2]); EXPECT_EQ(128, atten_pixels[128 * 4 + 3]); - EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF); - EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF); - EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(255, atten_pixels[255 * 4 + 0]); + EXPECT_EQ(127, atten_pixels[255 * 4 + 1]); + EXPECT_EQ(85, atten_pixels[255 * 4 + 2]); EXPECT_EQ(255, atten_pixels[255 * 4 + 3]); free_aligned_buffer_page_end(atten2_pixels); @@ -165,28 +211,28 @@ TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) { benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(max_diff, 0); } static int TestUnattenuateI(int width, @@ -238,28 +284,28 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) { int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { @@ -2749,12 +2795,23 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) { MaskCpuFlags(disable_cpu_flags_); ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c, benchmark_width_, benchmark_width_, benchmark_height_); - MaskCpuFlags(benchmark_cpu_info_); + double c_time = get_time(); + ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c, + benchmark_width_, benchmark_width_, benchmark_height_); + c_time = (get_time() - c_time); + MaskCpuFlags(benchmark_cpu_info_); + ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt, + benchmark_width_, benchmark_width_, benchmark_height_); + double opt_time = get_time(); for (int i = 0; i < benchmark_iterations_; ++i) { ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt, benchmark_width_, benchmark_width_, benchmark_height_); } + opt_time = (get_time() - opt_time) / benchmark_iterations_; + // Report performance of C vs OPT + printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6), + static_cast<int>(opt_time * 1e6)); for (int i = 0; i < kPixels; ++i) { EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } @@ -2777,12 +2834,24 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { MaskCpuFlags(disable_cpu_flags_); ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - MaskCpuFlags(benchmark_cpu_info_); + double c_time = get_time(); + ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + c_time = (get_time() - c_time); + MaskCpuFlags(benchmark_cpu_info_); + ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + double opt_time = get_time(); for (int i = 0; i < benchmark_iterations_; ++i) { ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_ * 4, benchmark_width_, benchmark_height_); } + opt_time = (get_time() - opt_time) / benchmark_iterations_; + + // Report performance of C vs OPT + printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6), + static_cast<int>(opt_time * 1e6)); for (int i = 0; i < kPixels * 4; ++i) { EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } @@ -4468,4 +4537,83 @@ TEST_F(LibYUVPlanarTest, NV21Copy) { free_aligned_buffer_page_end(dst_vu); } +#if defined(ENABLE_ROW_TESTS) && !defined(LIBYUV_DISABLE_NEON) && \ + defined(__aarch64__) + +TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32) { + int i, j; + const int y_plane_size = benchmark_width_ * benchmark_height_; + + align_buffer_page_end(orig_f, y_plane_size * 4); + align_buffer_page_end(orig_y, y_plane_size * 2); + align_buffer_page_end(dst_opt, y_plane_size * 4); + align_buffer_page_end(rec_opt, y_plane_size * 2); + + for (i = 0; i < y_plane_size; ++i) { + ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f; + } + memset(orig_y, 1, y_plane_size * 2); + memset(dst_opt, 2, y_plane_size * 4); + memset(rec_opt, 3, y_plane_size * 2); + + ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y, + y_plane_size); + + for (j = 0; j < benchmark_iterations_; j++) { + ConvertFP16ToFP32Row_NEON((const uint16_t*)orig_y, (float*)dst_opt, + y_plane_size); + } + + ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt, + y_plane_size); + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]); + } + + free_aligned_buffer_page_end(orig_f); + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(rec_opt); +} + +TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32Column) { + int i, j; + const int y_plane_size = benchmark_width_ * benchmark_height_; + + align_buffer_page_end(orig_f, y_plane_size * 4); + align_buffer_page_end(orig_y, y_plane_size * 2); + align_buffer_page_end(dst_opt, y_plane_size * 4); + align_buffer_page_end(rec_opt, y_plane_size * 2); + + for (i = 0; i < y_plane_size; ++i) { + ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f; + } + memset(orig_y, 1, y_plane_size * 2); + memset(dst_opt, 2, y_plane_size * 4); + memset(rec_opt, 3, y_plane_size * 2); + + ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y, + y_plane_size); + + for (j = 0; j < benchmark_iterations_; j++) { + ConvertFP16ToFP32Column_NEON((const uint16_t*)orig_y, 1, (float*)dst_opt, + y_plane_size); + } + + ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt, + y_plane_size); + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]); + } + + free_aligned_buffer_page_end(orig_f); + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(rec_opt); +} + +#endif // defined(ENABLE_ROW_TESTS) && defined(__aarch64__) + } // namespace libyuv diff --git a/files/unit_test/rotate_argb_test.cc b/unit_test/rotate_argb_test.cc index 74952c4e..74952c4e 100644 --- a/files/unit_test/rotate_argb_test.cc +++ b/unit_test/rotate_argb_test.cc diff --git a/files/unit_test/rotate_test.cc b/unit_test/rotate_test.cc index abc08efa..abc08efa 100644 --- a/files/unit_test/rotate_test.cc +++ b/unit_test/rotate_test.cc diff --git a/files/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc index f54a68f1..f54a68f1 100644 --- a/files/unit_test/scale_argb_test.cc +++ b/unit_test/scale_argb_test.cc diff --git a/unit_test/scale_plane_test.cc b/unit_test/scale_plane_test.cc new file mode 100644 index 00000000..9ce47a02 --- /dev/null +++ b/unit_test/scale_plane_test.cc @@ -0,0 +1,470 @@ +/* + * Copyright 2023 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <time.h> + +#include "../unit_test/unit_test.h" +#include "libyuv/cpu_id.h" +#include "libyuv/scale.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C +#endif + +#define STRINGIZE(line) #line +#define FILELINESTR(file, line) file ":" STRINGIZE(line) + +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#undef ENABLE_ROW_TESTS +#define LEAN_TESTS +#endif + +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +namespace libyuv { + +#ifdef ENABLE_ROW_TESTS +#ifdef HAS_SCALEROWDOWN2_SSSE3 +TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { + SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[64]); + SIMD_ALIGNED(uint8_t dst_pixels_c[64]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt)); + memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); + + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + if (!has_ssse3) { + printf("Warning SSSE3 not detected; Skipping test.\n"); + } else { + // TL. + orig_pixels[0] = 255u; + orig_pixels[1] = 0u; + orig_pixels[128 + 0] = 0u; + orig_pixels[128 + 1] = 0u; + // TR. + orig_pixels[2] = 0u; + orig_pixels[3] = 100u; + orig_pixels[128 + 2] = 0u; + orig_pixels[128 + 3] = 0u; + // BL. + orig_pixels[4] = 0u; + orig_pixels[5] = 0u; + orig_pixels[128 + 4] = 50u; + orig_pixels[128 + 5] = 0u; + // BR. + orig_pixels[6] = 0u; + orig_pixels[7] = 0u; + orig_pixels[128 + 6] = 0u; + orig_pixels[128 + 7] = 20u; + // Odd. + orig_pixels[126] = 4u; + orig_pixels[127] = 255u; + orig_pixels[128 + 126] = 16u; + orig_pixels[128 + 127] = 255u; + + // Test regular half size. + ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(133u, dst_pixels_c[63]); + + // Test Odd width version - Last pixel is just 1 horizontal pixel. + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(10u, dst_pixels_c[63]); + + // Test one pixel less, should skip the last pixel. + memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(0u, dst_pixels_c[63]); + + // Test regular half size SSSE3. + ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); + + EXPECT_EQ(64u, dst_pixels_opt[0]); + EXPECT_EQ(25u, dst_pixels_opt[1]); + EXPECT_EQ(13u, dst_pixels_opt[2]); + EXPECT_EQ(5u, dst_pixels_opt[3]); + EXPECT_EQ(0u, dst_pixels_opt[4]); + EXPECT_EQ(133u, dst_pixels_opt[63]); + + // Compare C and SSSE3 match. + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); + ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); + for (int i = 0; i < 64; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + } +} +#endif // HAS_SCALEROWDOWN2_SSSE3 + +extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); + +TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { + SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 2560 * 2; ++i) { + orig_pixels[i] = i; + } + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); + } else { + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); + } +#else + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); + EXPECT_EQ(dst_pixels_c[1279], 3839); +} +#endif // ENABLE_ROW_TESTS + +// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel +// difference. +// 0 = exact. +static int TestPlaneFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int src_stride_y = Abs(src_width); + int dst_y_plane_size = dst_width * dst_height; + int dst_stride_y = dst_width; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); + uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); + + MemRandomize(src_y, src_y_plane_size); + memset(dst_y_8, 0, dst_y_plane_size); + memset(dst_y_16, 1, dst_y_plane_size * 2); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i] & 255; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y, + dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + + for (i = 0; i < benchmark_iterations; ++i) { + ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16, + dst_stride_y, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_y_16); + + return max_diff; +} + +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +// 2 is chroma subsample. +#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2) +#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) + +#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ + TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \ + int diff = TestPlaneFilter_16( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but +// filtering is different fixed point implementations for SSSE3, Neon and C. +#define TEST_FACTOR(name, nom, denom, boxdiff) \ + TEST_FACTOR1(name, None, nom, denom, 0) \ + TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \ + TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \ + TEST_FACTOR1(name, Box, nom, denom, boxdiff) + +TEST_FACTOR(2, 1, 2, 0) +TEST_FACTOR(4, 1, 4, 0) +// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds. +TEST_FACTOR(3by4, 3, 4, 1) +TEST_FACTOR(3by8, 3, 8, 1) +TEST_FACTOR(3, 1, 3, 0) +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX + +TEST_F(LibYUVScaleTest, PlaneTest3x) { + const int kSrcStride = 480; + const int kDstStride = 160; + const int kSize = kSrcStride * 3; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 480 * 3; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(225, dest_pixels[0]); + + ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(225, dest_pixels[0]); + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest4x) { + const int kSrcStride = 640; + const int kDstStride = 160; + const int kSize = kSrcStride * 4; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 640 * 4; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(66, dest_pixels[0]); + + ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +// Intent is to test 200x50 to 50x200 but width and height can be parameters. +TEST_F(LibYUVScaleTest, PlaneTestRotate_None) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterNone); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterNone); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterBilinear); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterBilinear); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +// Intent is to test 200x50 to 50x200 but width and height can be parameters. +TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterBox); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterBox); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest1_Box) { + align_buffer_page_end(orig_pixels, 3); + align_buffer_page_end(dst_pixels, 3); + + // Pad the 1x1 byte image with invalid values before and after in case libyuv + // reads outside the memory boundaries. + orig_pixels[0] = 0; + orig_pixels[1] = 1; // scale this pixel + orig_pixels[2] = 2; + dst_pixels[0] = 3; + dst_pixels[1] = 3; + dst_pixels[2] = 3; + + libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, + /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, + /* dst_width= */ 1, /* dst_height= */ 2, + libyuv::kFilterBox); + + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); + + free_aligned_buffer_page_end(dst_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) { + align_buffer_page_end(orig_pixels_alloc, 3 * 2); + align_buffer_page_end(dst_pixels_alloc, 3 * 2); + uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc; + uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc; + + // Pad the 1x1 byte image with invalid values before and after in case libyuv + // reads outside the memory boundaries. + orig_pixels[0] = 0; + orig_pixels[1] = 1; // scale this pixel + orig_pixels[2] = 2; + dst_pixels[0] = 3; + dst_pixels[1] = 3; + dst_pixels[2] = 3; + + libyuv::ScalePlane_16( + orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, + /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, + /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone); + + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); + + free_aligned_buffer_page_end(dst_pixels_alloc); + free_aligned_buffer_page_end(orig_pixels_alloc); +} +} // namespace libyuv diff --git a/files/unit_test/scale_rgb_test.cc b/unit_test/scale_rgb_test.cc index 8296abe3..8296abe3 100644 --- a/files/unit_test/scale_rgb_test.cc +++ b/unit_test/scale_rgb_test.cc diff --git a/files/unit_test/scale_test.cc b/unit_test/scale_test.cc index a8c95268..6e3b9271 100644 --- a/files/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -22,6 +22,11 @@ #define STRINGIZE(line) #line #define FILELINESTR(file, line) file ":" STRINGIZE(line) +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#endif + #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) // SLOW TESTS are those that are unoptimized C code. // FULL TESTS are optimized but test many variations of the same code. @@ -1123,479 +1128,6 @@ TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3) TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3) #endif #endif - #undef TEST_SCALESWAPXY1 -#ifdef ENABLE_ROW_TESTS -#ifdef HAS_SCALEROWDOWN2_SSSE3 -TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { - SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]); - SIMD_ALIGNED(uint8_t dst_pixels_opt[64]); - SIMD_ALIGNED(uint8_t dst_pixels_c[64]); - memset(orig_pixels, 0, sizeof(orig_pixels)); - memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt)); - memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); - - int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); - if (!has_ssse3) { - printf("Warning SSSE3 not detected; Skipping test.\n"); - } else { - // TL. - orig_pixels[0] = 255u; - orig_pixels[1] = 0u; - orig_pixels[128 + 0] = 0u; - orig_pixels[128 + 1] = 0u; - // TR. - orig_pixels[2] = 0u; - orig_pixels[3] = 100u; - orig_pixels[128 + 2] = 0u; - orig_pixels[128 + 3] = 0u; - // BL. - orig_pixels[4] = 0u; - orig_pixels[5] = 0u; - orig_pixels[128 + 4] = 50u; - orig_pixels[128 + 5] = 0u; - // BR. - orig_pixels[6] = 0u; - orig_pixels[7] = 0u; - orig_pixels[128 + 6] = 0u; - orig_pixels[128 + 7] = 20u; - // Odd. - orig_pixels[126] = 4u; - orig_pixels[127] = 255u; - orig_pixels[128 + 126] = 16u; - orig_pixels[128 + 127] = 255u; - - // Test regular half size. - ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64); - - EXPECT_EQ(64u, dst_pixels_c[0]); - EXPECT_EQ(25u, dst_pixels_c[1]); - EXPECT_EQ(13u, dst_pixels_c[2]); - EXPECT_EQ(5u, dst_pixels_c[3]); - EXPECT_EQ(0u, dst_pixels_c[4]); - EXPECT_EQ(133u, dst_pixels_c[63]); - - // Test Odd width version - Last pixel is just 1 horizontal pixel. - ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); - - EXPECT_EQ(64u, dst_pixels_c[0]); - EXPECT_EQ(25u, dst_pixels_c[1]); - EXPECT_EQ(13u, dst_pixels_c[2]); - EXPECT_EQ(5u, dst_pixels_c[3]); - EXPECT_EQ(0u, dst_pixels_c[4]); - EXPECT_EQ(10u, dst_pixels_c[63]); - - // Test one pixel less, should skip the last pixel. - memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); - ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63); - - EXPECT_EQ(64u, dst_pixels_c[0]); - EXPECT_EQ(25u, dst_pixels_c[1]); - EXPECT_EQ(13u, dst_pixels_c[2]); - EXPECT_EQ(5u, dst_pixels_c[3]); - EXPECT_EQ(0u, dst_pixels_c[4]); - EXPECT_EQ(0u, dst_pixels_c[63]); - - // Test regular half size SSSE3. - ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); - - EXPECT_EQ(64u, dst_pixels_opt[0]); - EXPECT_EQ(25u, dst_pixels_opt[1]); - EXPECT_EQ(13u, dst_pixels_opt[2]); - EXPECT_EQ(5u, dst_pixels_opt[3]); - EXPECT_EQ(0u, dst_pixels_opt[4]); - EXPECT_EQ(133u, dst_pixels_opt[63]); - - // Compare C and SSSE3 match. - ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); - ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); - for (int i = 0; i < 64; ++i) { - EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); - } - } -} -#endif // HAS_SCALEROWDOWN2_SSSE3 - -extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width); -extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width); - -TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { - SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun. - SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); - SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); - - memset(orig_pixels, 0, sizeof(orig_pixels)); - memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt)); - memset(dst_pixels_c, 2, sizeof(dst_pixels_c)); - - for (int i = 0; i < 640 * 2 + 1; ++i) { - orig_pixels[i] = i; - } - ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280); - for (int i = 0; i < benchmark_pixels_div1280_; ++i) { -#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) - int has_neon = TestCpuFlag(kCpuHasNEON); - if (has_neon) { - ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); - } else { - ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); - } -#else - ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); -#endif - } - - for (int i = 0; i < 1280; ++i) { - EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); - } - EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16); - EXPECT_EQ(dst_pixels_c[1279], 800); -} - -extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width); - -TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { - SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]); - SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); - SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); - - memset(orig_pixels, 0, sizeof(orig_pixels)); - memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); - memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); - - for (int i = 0; i < 2560 * 2; ++i) { - orig_pixels[i] = i; - } - ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280); - for (int i = 0; i < benchmark_pixels_div1280_; ++i) { -#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) - int has_neon = TestCpuFlag(kCpuHasNEON); - if (has_neon) { - ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); - } else { - ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); - } -#else - ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); -#endif - } - - for (int i = 0; i < 1280; ++i) { - EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); - } - - EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); - EXPECT_EQ(dst_pixels_c[1279], 3839); -} -#endif // ENABLE_ROW_TESTS - -// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel -// difference. -// 0 = exact. -static int TestPlaneFilter_16(int src_width, - int src_height, - int dst_width, - int dst_height, - FilterMode f, - int benchmark_iterations, - int disable_cpu_flags, - int benchmark_cpu_info) { - if (!SizeValid(src_width, src_height, dst_width, dst_height)) { - return 0; - } - - int i; - int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); - int src_stride_y = Abs(src_width); - int dst_y_plane_size = dst_width * dst_height; - int dst_stride_y = dst_width; - - align_buffer_page_end(src_y, src_y_plane_size); - align_buffer_page_end(src_y_16, src_y_plane_size * 2); - align_buffer_page_end(dst_y_8, dst_y_plane_size); - align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); - uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); - uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); - - MemRandomize(src_y, src_y_plane_size); - memset(dst_y_8, 0, dst_y_plane_size); - memset(dst_y_16, 1, dst_y_plane_size * 2); - - for (i = 0; i < src_y_plane_size; ++i) { - p_src_y_16[i] = src_y[i] & 255; - } - - MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y, - dst_width, dst_height, f); - MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. - - for (i = 0; i < benchmark_iterations; ++i) { - ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16, - dst_stride_y, dst_width, dst_height, f); - } - - // Expect an exact match. - int max_diff = 0; - for (i = 0; i < dst_y_plane_size; ++i) { - int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); - if (abs_diff > max_diff) { - max_diff = abs_diff; - } - } - - free_aligned_buffer_page_end(dst_y_8); - free_aligned_buffer_page_end(dst_y_16); - free_aligned_buffer_page_end(src_y); - free_aligned_buffer_page_end(src_y_16); - - return max_diff; -} - -// The following adjustments in dimensions ensure the scale factor will be -// exactly achieved. -// 2 is chroma subsample. -#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2) -#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) - -#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ - TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \ - int diff = TestPlaneFilter_16( \ - SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ - DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ - kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ - benchmark_cpu_info_); \ - EXPECT_LE(diff, max_diff); \ - } - -// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but -// filtering is different fixed point implementations for SSSE3, Neon and C. -#define TEST_FACTOR(name, nom, denom, boxdiff) \ - TEST_FACTOR1(name, None, nom, denom, 0) \ - TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \ - TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \ - TEST_FACTOR1(name, Box, nom, denom, boxdiff) - -TEST_FACTOR(2, 1, 2, 0) -TEST_FACTOR(4, 1, 4, 0) -// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds. -TEST_FACTOR(3by4, 3, 4, 1) -TEST_FACTOR(3by8, 3, 8, 1) -TEST_FACTOR(3, 1, 3, 0) -#undef TEST_FACTOR1 -#undef TEST_FACTOR -#undef SX -#undef DX - -TEST_F(LibYUVScaleTest, PlaneTest3x) { - const int kSrcStride = 480; - const int kDstStride = 160; - const int kSize = kSrcStride * 3; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < 480 * 3; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_pixels, kDstStride); - - int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * - benchmark_iterations_; - for (int i = 0; i < iterations160; ++i) { - ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, - kFilterBilinear); - } - - EXPECT_EQ(225, dest_pixels[0]); - - ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, - kFilterNone); - - EXPECT_EQ(225, dest_pixels[0]); - - free_aligned_buffer_page_end(dest_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTest4x) { - const int kSrcStride = 640; - const int kDstStride = 160; - const int kSize = kSrcStride * 4; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < 640 * 4; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_pixels, kDstStride); - - int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * - benchmark_iterations_; - for (int i = 0; i < iterations160; ++i) { - ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, - kFilterBilinear); - } - - EXPECT_EQ(66, dest_pixels[0]); - - ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, - kFilterNone); - - EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row - - free_aligned_buffer_page_end(dest_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -// Intent is to test 200x50 to 50x200 but width and height can be parameters. -TEST_F(LibYUVScaleTest, PlaneTestRotate_None) { - const int kSize = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < kSize; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_opt_pixels, kSize); - align_buffer_page_end(dest_c_pixels, kSize); - - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, benchmark_height_, - benchmark_width_, kFilterNone); - MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, - benchmark_height_, dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, kFilterNone); - } - - for (int i = 0; i < kSize; ++i) { - EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); - } - - free_aligned_buffer_page_end(dest_c_pixels); - free_aligned_buffer_page_end(dest_opt_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) { - const int kSize = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < kSize; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_opt_pixels, kSize); - align_buffer_page_end(dest_c_pixels, kSize); - - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, benchmark_height_, - benchmark_width_, kFilterBilinear); - MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, - benchmark_height_, dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, kFilterBilinear); - } - - for (int i = 0; i < kSize; ++i) { - EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); - } - - free_aligned_buffer_page_end(dest_c_pixels); - free_aligned_buffer_page_end(dest_opt_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -// Intent is to test 200x50 to 50x200 but width and height can be parameters. -TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { - const int kSize = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < kSize; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_opt_pixels, kSize); - align_buffer_page_end(dest_c_pixels, kSize); - - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, benchmark_height_, - benchmark_width_, kFilterBox); - MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, - benchmark_height_, dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, kFilterBox); - } - - for (int i = 0; i < kSize; ++i) { - EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); - } - - free_aligned_buffer_page_end(dest_c_pixels); - free_aligned_buffer_page_end(dest_opt_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTest1_Box) { - align_buffer_page_end(orig_pixels, 3); - align_buffer_page_end(dst_pixels, 3); - - // Pad the 1x1 byte image with invalid values before and after in case libyuv - // reads outside the memory boundaries. - orig_pixels[0] = 0; - orig_pixels[1] = 1; // scale this pixel - orig_pixels[2] = 2; - dst_pixels[0] = 3; - dst_pixels[1] = 3; - dst_pixels[2] = 3; - - libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, - /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, - /* dst_width= */ 1, /* dst_height= */ 2, - libyuv::kFilterBox); - - EXPECT_EQ(dst_pixels[0], 1); - EXPECT_EQ(dst_pixels[1], 1); - EXPECT_EQ(dst_pixels[2], 3); - - free_aligned_buffer_page_end(dst_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) { - align_buffer_page_end(orig_pixels_alloc, 3 * 2); - align_buffer_page_end(dst_pixels_alloc, 3 * 2); - uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc; - uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc; - - // Pad the 1x1 byte image with invalid values before and after in case libyuv - // reads outside the memory boundaries. - orig_pixels[0] = 0; - orig_pixels[1] = 1; // scale this pixel - orig_pixels[2] = 2; - dst_pixels[0] = 3; - dst_pixels[1] = 3; - dst_pixels[2] = 3; - - libyuv::ScalePlane_16( - orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, - /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, - /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone); - - EXPECT_EQ(dst_pixels[0], 1); - EXPECT_EQ(dst_pixels[1], 1); - EXPECT_EQ(dst_pixels[2], 3); - - free_aligned_buffer_page_end(dst_pixels_alloc); - free_aligned_buffer_page_end(orig_pixels_alloc); -} } // namespace libyuv diff --git a/files/unit_test/scale_uv_test.cc b/unit_test/scale_uv_test.cc index dab217c9..dab217c9 100644 --- a/files/unit_test/scale_uv_test.cc +++ b/unit_test/scale_uv_test.cc diff --git a/files/unit_test/testdata/arm_v7.txt b/unit_test/testdata/arm_v7.txt index 5d7dbd04..5d7dbd04 100644 --- a/files/unit_test/testdata/arm_v7.txt +++ b/unit_test/testdata/arm_v7.txt diff --git a/files/unit_test/testdata/juno.txt b/unit_test/testdata/juno.txt index dd465272..dd465272 100644 --- a/files/unit_test/testdata/juno.txt +++ b/unit_test/testdata/juno.txt diff --git a/files/unit_test/testdata/mips.txt b/unit_test/testdata/mips.txt index d9f28cbf..d9f28cbf 100644 --- a/files/unit_test/testdata/mips.txt +++ b/unit_test/testdata/mips.txt diff --git a/files/unit_test/testdata/mips_loongson2k.txt b/unit_test/testdata/mips_loongson2k.txt index 8a88d38f..8a88d38f 100644 --- a/files/unit_test/testdata/mips_loongson2k.txt +++ b/unit_test/testdata/mips_loongson2k.txt diff --git a/files/unit_test/testdata/mips_loongson3.txt b/unit_test/testdata/mips_loongson3.txt index 1f540b12..1f540b12 100644 --- a/files/unit_test/testdata/mips_loongson3.txt +++ b/unit_test/testdata/mips_loongson3.txt diff --git a/files/unit_test/testdata/mips_loongson_mmi.txt b/unit_test/testdata/mips_loongson_mmi.txt index 0f10b8bb..0f10b8bb 100644 --- a/files/unit_test/testdata/mips_loongson_mmi.txt +++ b/unit_test/testdata/mips_loongson_mmi.txt diff --git a/files/unit_test/testdata/mips_msa.txt b/unit_test/testdata/mips_msa.txt index ac930615..ac930615 100644 --- a/files/unit_test/testdata/mips_msa.txt +++ b/unit_test/testdata/mips_msa.txt diff --git a/files/unit_test/testdata/riscv64.txt b/unit_test/testdata/riscv64.txt index fbb4200f..fbb4200f 100644 --- a/files/unit_test/testdata/riscv64.txt +++ b/unit_test/testdata/riscv64.txt diff --git a/files/unit_test/testdata/riscv64_rvv.txt b/unit_test/testdata/riscv64_rvv.txt index af1b3f36..af1b3f36 100644 --- a/files/unit_test/testdata/riscv64_rvv.txt +++ b/unit_test/testdata/riscv64_rvv.txt diff --git a/files/unit_test/testdata/riscv64_rvv_zvfh.txt b/unit_test/testdata/riscv64_rvv_zvfh.txt index c416c1af..c416c1af 100644 --- a/files/unit_test/testdata/riscv64_rvv_zvfh.txt +++ b/unit_test/testdata/riscv64_rvv_zvfh.txt diff --git a/files/unit_test/testdata/tegra3.txt b/unit_test/testdata/tegra3.txt index d1b09f6b..d1b09f6b 100644 --- a/files/unit_test/testdata/tegra3.txt +++ b/unit_test/testdata/tegra3.txt diff --git a/files/unit_test/testdata/test0.jpg b/unit_test/testdata/test0.jpg Binary files differindex f4461a81..f4461a81 100644 --- a/files/unit_test/testdata/test0.jpg +++ b/unit_test/testdata/test0.jpg diff --git a/files/unit_test/testdata/test1.jpg b/unit_test/testdata/test1.jpg Binary files differindex a0210e9d..a0210e9d 100644 --- a/files/unit_test/testdata/test1.jpg +++ b/unit_test/testdata/test1.jpg diff --git a/files/unit_test/testdata/test2.jpg b/unit_test/testdata/test2.jpg Binary files differindex 816ca767..816ca767 100644 --- a/files/unit_test/testdata/test2.jpg +++ b/unit_test/testdata/test2.jpg diff --git a/files/unit_test/testdata/test3.jpg b/unit_test/testdata/test3.jpg Binary files differindex 792d91dc..792d91dc 100644 --- a/files/unit_test/testdata/test3.jpg +++ b/unit_test/testdata/test3.jpg diff --git a/files/unit_test/testdata/test4.jpg b/unit_test/testdata/test4.jpg Binary files differindex 1ef41668..1ef41668 100644 --- a/files/unit_test/testdata/test4.jpg +++ b/unit_test/testdata/test4.jpg diff --git a/files/unit_test/unit_test.cc b/unit_test/unit_test.cc index b66ebfab..239d5b92 100644 --- a/files/unit_test/unit_test.cc +++ b/unit_test/unit_test.cc @@ -144,11 +144,14 @@ int TestCpuEnv(int cpu_info) { if (TestEnv("LIBYUV_DISABLE_AVX512VBITALG")) { cpu_info &= ~libyuv::kCpuHasAVX512VBITALG; } - if (TestEnv("LIBYUV_DISABLE_AVX512VPOPCNTDQ")) { - cpu_info &= ~libyuv::kCpuHasAVX512VPOPCNTDQ; + if (TestEnv("LIBYUV_DISABLE_AVX10")) { + cpu_info &= ~libyuv::kCpuHasAVX10; } - if (TestEnv("LIBYUV_DISABLE_GFNI")) { - cpu_info &= ~libyuv::kCpuHasGFNI; + if (TestEnv("LIBYUV_DISABLE_AVXVNNI")) { + cpu_info &= ~libyuv::kCpuHasAVXVNNI; + } + if (TestEnv("LIBYUV_DISABLE_AVXVNNIINT8")) { + cpu_info &= ~libyuv::kCpuHasAVXVNNIINT8; } #endif if (TestEnv("LIBYUV_DISABLE_ASM")) { diff --git a/files/unit_test/unit_test.h b/unit_test/unit_test.h index 99cc8d19..99cc8d19 100644 --- a/files/unit_test/unit_test.h +++ b/unit_test/unit_test.h diff --git a/files/unit_test/video_common_test.cc b/unit_test/video_common_test.cc index 36728ea9..36728ea9 100644 --- a/files/unit_test/video_common_test.cc +++ b/unit_test/video_common_test.cc diff --git a/files/util/Makefile b/util/Makefile index 40e74b65..40e74b65 100644 --- a/files/util/Makefile +++ b/util/Makefile diff --git a/files/util/color.cc b/util/color.cc index 8c3bbefd..8c3bbefd 100644 --- a/files/util/color.cc +++ b/util/color.cc diff --git a/files/util/compare.cc b/util/compare.cc index a16613ee..a16613ee 100644 --- a/files/util/compare.cc +++ b/util/compare.cc diff --git a/files/util/cpuid.c b/util/cpuid.c index edc6a26e..c07e6e95 100644 --- a/files/util/cpuid.c +++ b/util/cpuid.c @@ -96,14 +96,15 @@ int main(int argc, const char* argv[]) { int has_erms = TestCpuFlag(kCpuHasERMS); int has_fma3 = TestCpuFlag(kCpuHasFMA3); int has_f16c = TestCpuFlag(kCpuHasF16C); - int has_gfni = TestCpuFlag(kCpuHasGFNI); int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW); int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL); int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI); int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI); int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); - int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); + int has_avx10 = TestCpuFlag(kCpuHasAVX10); + int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI); + int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8); printf("Has X86 0x%x\n", has_x86); printf("Has SSE2 0x%x\n", has_sse2); printf("Has SSSE3 0x%x\n", has_ssse3); @@ -114,14 +115,15 @@ int main(int argc, const char* argv[]) { printf("Has ERMS 0x%x\n", has_erms); printf("Has FMA3 0x%x\n", has_fma3); printf("Has F16C 0x%x\n", has_f16c); - printf("Has GFNI 0x%x\n", has_gfni); printf("Has AVX512BW 0x%x\n", has_avx512bw); printf("Has AVX512VL 0x%x\n", has_avx512vl); printf("Has AVX512VNNI 0x%x\n", has_avx512vnni); printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi); printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2); printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg); - printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq); + printf("Has AVX10 0x%x\n", has_avx10); + printf("HAS AVXVNNI 0x%x\n", has_avxvnni); + printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); } return 0; } diff --git a/files/util/i444tonv12_eg.cc b/util/i444tonv12_eg.cc index 0fcb4095..0fcb4095 100644 --- a/files/util/i444tonv12_eg.cc +++ b/util/i444tonv12_eg.cc diff --git a/files/util/psnr.cc b/util/psnr.cc index c7bee7f9..c7bee7f9 100644 --- a/files/util/psnr.cc +++ b/util/psnr.cc diff --git a/files/util/psnr.h b/util/psnr.h index aac128cb..aac128cb 100644 --- a/files/util/psnr.h +++ b/util/psnr.h diff --git a/files/util/psnr_main.cc b/util/psnr_main.cc index 8b9fd972..8b9fd972 100644 --- a/files/util/psnr_main.cc +++ b/util/psnr_main.cc diff --git a/files/util/ssim.cc b/util/ssim.cc index 096fbcf0..096fbcf0 100644 --- a/files/util/ssim.cc +++ b/util/ssim.cc diff --git a/files/util/ssim.h b/util/ssim.h index a855f1d1..a855f1d1 100644 --- a/files/util/ssim.h +++ b/util/ssim.h diff --git a/files/util/yuvconstants.c b/util/yuvconstants.c index 4e5185af..4e5185af 100644 --- a/files/util/yuvconstants.c +++ b/util/yuvconstants.c diff --git a/files/util/yuvconvert.cc b/util/yuvconvert.cc index 93b52668..93b52668 100644 --- a/files/util/yuvconvert.cc +++ b/util/yuvconvert.cc diff --git a/files/winarm.mk b/winarm.mk index b0a344ae..b0a344ae 100644 --- a/files/winarm.mk +++ b/winarm.mk |