Merge "Snap for 11647390 from 488a2af021e3e7473f083a9435b1472c0d411f3d to androidx-vectordrawable-release" into androidx-vectordrawable-releaseandroidx-vectordrawable-release

author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2024-03-29 18:11:18 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> 2024-03-29 18:11:18 +0000
commit: e91b3430dd4106d54524333bde0062343a801770 (patch)
tree: 7aa355fd0b89ec0b2611e17ee84a14c6fa449e22
parent: eaf9564965f32c3a944ed1adf82cf4f80fdba734 (diff)
parent: c334ec011622efb99936768259a97d2a9b8dcd83 (diff)
download: libyuv-androidx-vectordrawable-release.tar.gz
197 files changed, 6979 insertions, 4464 deletions
diff --git a/files/.clang-format b/.clang-format
index 59d48705..59d48705 100644
--- a/files/.clang-format
+++ b/.clang-format
diff --git a/files/.gitignore b/.gitignore
index 20d679b7..20d679b7 100644
--- a/files/.gitignore
+++ b/.gitignore
diff --git a/files/.gn b/.gn
index f9a5ee6c..f9a5ee6c 100644
--- a/files/.gn
+++ b/.gn
diff --git a/files/.vpython b/.vpython
index 4a64fd21..4a64fd21 100644
--- a/files/.vpython
+++ b/.vpython
diff --git a/files/.vpython3 b/.vpython3
index 28d819e7..28d819e7 100644
--- a/files/.vpython3
+++ b/.vpython3
diff --git a/files/AUTHORS b/AUTHORS
index 28c08956..28c08956 100644
--- a/files/AUTHORS
+++ b/AUTHORS
diff --git a/Android.bp b/Android.bp
index e4ed511c..506184e0 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,7 +1,6 @@
 package {
     default_applicable_licenses: ["external_libyuv_license"],
 }
-
 // Added automatically by a large-scale-change
 // See: http://go/android-license-faq
 license {
@@ -12,7 +11,183 @@ license {
     ],
     license_text: [
         "LICENSE",
+        "PATENTS",
     ],
 }
-
 subdirs = ["files"]
+
+cc_library {
+    name: "libyuv",
+    vendor_available: true,
+    product_available: true,
+    host_supported: true,
+
+    srcs: [
+        "source/compare.cc",
+        "source/compare_common.cc",
+        "source/compare_gcc.cc",
+        "source/compare_msa.cc",
+        "source/compare_neon.cc",
+        "source/compare_neon64.cc",
+        "source/convert.cc",
+        "source/convert_argb.cc",
+        "source/convert_from.cc",
+        "source/convert_from_argb.cc",
+        "source/convert_jpeg.cc",
+        "source/convert_to_argb.cc",
+        "source/convert_to_i420.cc",
+        "source/cpu_id.cc",
+        "source/mjpeg_decoder.cc",
+        "source/mjpeg_validate.cc",
+        "source/planar_functions.cc",
+        "source/rotate.cc",
+        "source/rotate_any.cc",
+        "source/rotate_argb.cc",
+        "source/rotate_common.cc",
+        "source/rotate_gcc.cc",
+        "source/rotate_msa.cc",
+        "source/rotate_neon.cc",
+        "source/rotate_neon64.cc",
+        "source/row_any.cc",
+        "source/row_common.cc",
+        "source/row_gcc.cc",
+        "source/row_msa.cc",
+        "source/row_neon.cc",
+        "source/row_neon64.cc",
+        "source/row_rvv.cc",
+        "source/scale.cc",
+        "source/scale_any.cc",
+        "source/scale_argb.cc",
+        "source/scale_common.cc",
+        "source/scale_gcc.cc",
+        "source/scale_msa.cc",
+        "source/scale_neon.cc",
+        "source/scale_neon64.cc",
+        "source/scale_rgb.cc",
+        "source/scale_rvv.cc",
+        "source/scale_uv.cc",
+        "source/video_common.cc",
+    ],
+
+    cflags: [
+        "-Wall",
+        "-Werror",
+        "-Wno-unused-parameter",
+        "-fexceptions",
+        "-DHAVE_JPEG",
+        "-DLIBYUV_UNLIMITED_DATA",
+    ],
+
+    arch: {
+        arm: {
+            cflags: ["-mfpu=neon"],
+        },
+    },
+
+    shared_libs: ["libjpeg"],
+
+    export_include_dirs: ["include"],
+
+    apex_available: [
+        "//apex_available:platform",
+        "com.android.media.swcodec",
+        "com.android.virt",
+    ],
+    min_sdk_version: "29",
+}
+
+// compatibilty static library until all uses of libyuv_static are replaced
+// with libyuv (b/37646797)
+cc_library_static {
+    name: "libyuv_static",
+    vendor_available: true,
+    whole_static_libs: ["libyuv"],
+    apex_available: [
+        "//apex_available:platform",
+        "com.android.media.swcodec",
+    ],
+    min_sdk_version: "29",
+}
+
+cc_test {
+    name: "libyuv_unittest",
+    static_libs: ["libyuv"],
+    shared_libs: ["libjpeg"],
+    cflags: ["-Wall", "-Werror"],
+    srcs: [
+        "unit_test/basictypes_test.cc",
+        "unit_test/color_test.cc",
+        "unit_test/compare_test.cc",
+        "unit_test/convert_test.cc",
+        "unit_test/cpu_test.cc",
+        "unit_test/cpu_thread_test.cc",
+        "unit_test/math_test.cc",
+        "unit_test/planar_test.cc",
+        "unit_test/rotate_argb_test.cc",
+        "unit_test/rotate_test.cc",
+        "unit_test/scale_argb_test.cc",
+        "unit_test/scale_plane_test.cc",
+        "unit_test/scale_rgb_test.cc",
+        "unit_test/scale_test.cc",
+        "unit_test/scale_uv_test.cc",
+        "unit_test/unit_test.cc",
+        "unit_test/video_common_test.cc",
+    ],
+}
+
+cc_test {
+    name: "compare",
+    gtest: false,
+    srcs: [
+        "util/compare.cc",
+    ],
+    static_libs: ["libyuv"],
+}
+
+cc_test {
+    name: "cpuid",
+    gtest: false,
+    srcs: [
+        "util/cpuid.c",
+    ],
+    static_libs: ["libyuv"],
+}
+
+cc_test {
+    name: "i444tonv12_eg",
+    gtest: false,
+    srcs: [
+        "util/i444tonv12_eg.cc",
+    ],
+    static_libs: ["libyuv"],
+}
+
+cc_test {
+    name: "psnr",
+    gtest: false,
+    srcs: [
+        "util/psnr_main.cc",
+        "util/psnr.cc",
+        "util/ssim.cc",
+    ],
+    static_libs: ["libyuv"],
+}
+
+cc_test {
+    name: "yuvconstants",
+    gtest: false,
+    srcs: [
+        "util/yuvconstants.c",
+    ],
+    static_libs: ["libyuv"],
+}
+
+cc_test {
+    name: "yuvconvert",
+    gtest: false,
+    srcs: [
+        "util/yuvconvert.cc",
+    ],
+    static_libs: ["libyuv"],
+    shared_libs: ["libjpeg"],
+}
diff --git a/BUILD b/BUILD
deleted file mode 100644
index 3145e36a..00000000
--- a/BUILD
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright 2011 Google Inc. All Rights Reserved.
-#
-# Description:
-#   The libyuv package provides implementation yuv image conversion and
-#   scaling.
-#
-#   This library is used by Talk Video and WebRTC.
-#
-
-licenses(['notice'])  #  3-clause BSD
-
-exports_files(['LICENSE'])
-
-package(default_visibility = ['//visibility:public'])
diff --git a/files/BUILD.gn b/BUILD.gn
index adaae9d8..2c600b22 100644
--- a/files/BUILD.gn
+++ b/BUILD.gn
@@ -36,6 +36,12 @@ config("libyuv_config") {
   if (libyuv_disable_rvv) {
     defines += [ "LIBYUV_DISABLE_RVV" ]
   }
+  if (!libyuv_use_lsx) {
+    defines += [ "LIBYUV_DISABLE_LSX" ]
+  }
+  if (!libyuv_use_lasx) {
+    defines += [ "LIBYUV_DISABLE_LASX" ]
+  }
 }
 
 # This target is built when no specific target is specified on the command line.
@@ -74,6 +80,14 @@ group("libyuv") {
     deps += [ ":libyuv_msa" ]
   }
 
+  if (libyuv_use_lsx) {
+    deps += [ ":libyuv_lsx" ]
+  }
+
+  if (libyuv_use_lasx) {
+    deps += [ ":libyuv_lasx" ]
+  }
+
   if (!is_ios && !libyuv_disable_jpeg) {
     # Make sure that clients of libyuv link with libjpeg. This can't go in
     # libyuv_internal because in Windows x64 builds that will generate a clang
@@ -142,6 +156,7 @@ static_library("libyuv_internal") {
     "source/scale_common.cc",
     "source/scale_gcc.cc",
     "source/scale_rgb.cc",
+    "source/scale_rvv.cc",
     "source/scale_uv.cc",
     "source/scale_win.cc",
     "source/video_common.cc",
@@ -235,6 +250,44 @@ if (libyuv_use_msa) {
   }
 }
 
+if (libyuv_use_lsx) {
+  static_library("libyuv_lsx") {
+    sources = [
+      # LSX Source Files
+      "source/rotate_lsx.cc",
+      "source/row_lsx.cc",
+      "source/scale_lsx.cc",
+    ]
+
+    cflags_cc = [
+      "-mlsx",
+      "-Wno-c++11-narrowing",
+    ]
+
+    deps = [ ":libyuv_internal" ]
+
+    public_configs = [ ":libyuv_config" ]
+  }
+}
+
+if (libyuv_use_lasx) {
+  static_library("libyuv_lasx") {
+    sources = [
+      # LASX Source Files
+      "source/row_lasx.cc",
+    ]
+
+    cflags_cc = [
+      "-mlasx",
+      "-Wno-c++11-narrowing",
+    ]
+
+    deps = [ ":libyuv_internal" ]
+
+    public_configs = [ ":libyuv_config" ]
+  }
+}
+
 if (libyuv_include_tests) {
   config("libyuv_unittest_warnings_config") {
     if (!is_win) {
@@ -262,6 +315,7 @@ if (libyuv_include_tests) {
       "unit_test/basictypes_test.cc",
       "unit_test/color_test.cc",
       "unit_test/compare_test.cc",
+      "unit_test/convert_argb_test.cc",
       "unit_test/convert_test.cc",
       "unit_test/cpu_test.cc",
       "unit_test/cpu_thread_test.cc",
@@ -270,6 +324,7 @@ if (libyuv_include_tests) {
       "unit_test/rotate_argb_test.cc",
       "unit_test/rotate_test.cc",
       "unit_test/scale_argb_test.cc",
+      "unit_test/scale_plane_test.cc",
       "unit_test/scale_rgb_test.cc",
       "unit_test/scale_test.cc",
       "unit_test/scale_uv_test.cc",
diff --git a/files/CM_linux_packages.cmake b/CM_linux_packages.cmake
index 5f676f89..a073edfa 100644
--- a/files/CM_linux_packages.cmake
+++ b/CM_linux_packages.cmake
@@ -8,7 +8,7 @@ SET ( YUV_VER_MAJOR 0 )
 SET ( YUV_VER_MINOR 0 )
 SET ( YUV_VER_PATCH ${YUV_VERSION_NUMBER} )
 SET ( YUV_VERSION ${YUV_VER_MAJOR}.${YUV_VER_MINOR}.${YUV_VER_PATCH} )
-MESSAGE ( "Building ver.: ${YUV_VERSION}" )
+MESSAGE ( VERBOSE "Building ver.: ${YUV_VERSION}" )
 
 # is this a 32-bit or 64-bit build?
 IF ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
@@ -45,7 +45,7 @@ ELSE ()
 		SET ( YUV_SYSTEM_NAME "amd-${YUV_BIT_SIZE}" )
 	ENDIF ()
 ENDIF ()
-MESSAGE ( "Packaging for: ${YUV_SYSTEM_NAME}" )
+MESSAGE ( VERBOSE "Packaging for: ${YUV_SYSTEM_NAME}" )
 
 # define all the variables needed by CPack to create .deb and .rpm packages
 SET ( CPACK_PACKAGE_VENDOR					"Frank Barchard" )
diff --git a/files/CMakeLists.txt b/CMakeLists.txt
index 7a4a1994..9abfa74b 100644
--- a/files/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,6 +37,10 @@ if(WIN32)
   SET_TARGET_PROPERTIES	( ${ly_lib_shared} PROPERTIES IMPORT_PREFIX "lib" )
 endif()
 
+# this creates the cpuid tool
+ADD_EXECUTABLE      ( cpuid ${ly_base_dir}/util/cpuid.c )
+TARGET_LINK_LIBRARIES  ( cpuid ${ly_lib_static} )
+
 # this creates the conversion tool
 ADD_EXECUTABLE			( yuvconvert ${ly_base_dir}/util/yuvconvert.cc )
 TARGET_LINK_LIBRARIES	( yuvconvert ${ly_lib_static} )
diff --git a/files/DEPS b/DEPS
index a7bec8d3..70ed1d58 100644
--- a/files/DEPS
+++ b/DEPS
@@ -5,39 +5,39 @@ gclient_gn_args = [
 
 vars = {
   'chromium_git': 'https://chromium.googlesource.com',
-  'chromium_revision': 'd1501576384de23ddf8d8815ee7c95be2f708de5',
-  'gn_version': 'git_revision:e3978de3e8dafb50a2b11efa784e08699a43faf8',
+  'chromium_revision': 'af3d01376bec75a68f90160bfd38057d60510a2b',
+  'gn_version': 'git_revision:fae280eabe5d31accc53100137459ece19a7a295',
   # ninja CIPD package version.
   # https://chrome-infra-packages.appspot.com/p/infra/3pp/tools/ninja
   'ninja_version': 'version:2@1.11.1.chromium.6',
   # reclient CIPD package version
-  'reclient_version': 're_client_version:0.107.1.0b39c4c-gomaip',
+  'reclient_version': 're_client_version:0.110.0.43ec6b1-gomaip',
 
   # Keep the Chromium default of generating location tags.
   'generate_location_tags': True,
 
   # By default, download the fuchsia sdk from the public sdk directory.
-  'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/gn/',
-  'fuchsia_version': 'version:12.20230530.1.1',
+  'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/core/',
+  'fuchsia_version': 'version:15.20230909.2.1',
   # By default, download the fuchsia images from the fuchsia GCS bucket.
   'fuchsia_images_bucket': 'fuchsia',
   'checkout_fuchsia': False,
   # Since the images are hundreds of MB, default to only downloading the image
   # most commonly useful for developers. Bots and developers that need to use
   # other images can override this with additional images.
-  'checkout_fuchsia_boot_images': "terminal.qemu-x64",
+  'checkout_fuchsia_boot_images': "terminal.qemu-x64,terminal.x64",
   'checkout_fuchsia_product_bundles': '"{checkout_fuchsia_boot_images}" != ""',
 }
 
 deps = {
   'src/build':
-    Var('chromium_git') + '/chromium/src/build' + '@' + 'd0c2b4cf4fdd43866e066fb6722099aa8bf4ce79',
+    Var('chromium_git') + '/chromium/src/build' + '@' + '5885d3c24833ad72845a52a1b913a2b8bc651b56',
   'src/buildtools':
-    Var('chromium_git') + '/chromium/src/buildtools' + '@' + 'edbefcee3d2cc45cdb0c60c2b01b673f8ba728bc',
+    Var('chromium_git') + '/chromium/src/buildtools' + '@' + '79ab87fa54614258c4c95891e873223371194525',
   'src/testing':
-    Var('chromium_git') + '/chromium/src/testing' + '@' + 'a13817e1ea0255a375d13aeb3bb2527bd528495b',
+    Var('chromium_git') + '/chromium/src/testing' + '@' + '51e9a02297057cc0e917763a51e16680b7d16fb6',
   'src/third_party':
-    Var('chromium_git') + '/chromium/src/third_party' + '@' + '824e26c9fcbd00fccf6cdb712f8f127aae133042',
+    Var('chromium_git') + '/chromium/src/third_party' + '@' + '2dc4b18abd1003ce7b1eda509dc96f12d49a9667',
 
   'src/buildtools/linux64': {
     'packages': [
@@ -82,17 +82,10 @@ deps = {
     'dep_type': 'cipd',
   },
 
-  'src/buildtools/clang_format/script':
-    Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + 'f97059df7f8b205064625cdb5f97b56668a125ef',
-  'src/buildtools/third_party/libc++/trunk':
-    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + 'f8279b01085b800724f5c5629dc365b9f040dc53',
-  'src/buildtools/third_party/libc++abi/trunk':
-    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '5c8dbff7a4911fe1e0af0bc1628891e4187a3c90',
-  'src/buildtools/third_party/libunwind/trunk':
-    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'cd144ced35285edaa064a91561969e5b22c219b1',
-
   'src/third_party/catapult':
-    Var('chromium_git') + '/catapult.git' + '@' + '9f3ef9c2eae9b1adabde88efe5dcc438ba76e205',
+    Var('chromium_git') + '/catapult.git' + '@' + 'fa05d995e152efdae488a2aeba397cd609fdbc9d',
+  'src/third_party/clang-format/script':
+      Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + 'f97059df7f8b205064625cdb5f97b56668a125ef',
   'src/third_party/colorama/src':
     Var('chromium_git') + '/external/colorama.git' + '@' + '3de9f013df4b470069d03d250224062e8cf15c49',
   'src/third_party/cpu_features/src': {
@@ -100,19 +93,29 @@ deps = {
     'condition': 'checkout_android',
   },
   'src/third_party/depot_tools':
-    Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '05ab73be51774f098eb580eda6e96a49e1010b1b',
+    Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + 'd3e43dd4319ba169c0aaf44547eecf861f2fe5da',
   'src/third_party/freetype/src':
-    Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '80a507a6b8e3d2906ad2c8ba69329bd2fb2a85ef',
+    Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '9e3c5d7e183c1a8d5ed8868d7d28ef18d3ec9ec8',
+  'third_party/fuchsia-gn-sdk': {
+    'url': Var('chromium_git') + '/chromium/src/third_party/fuchsia-gn-sdk.git' + '@' + '0d6902558d92fe3d49ba9a8f638ddea829be595b',
+    'condition': 'checkout_fuchsia',
+  },
   'src/third_party/googletest/src':
     Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'af29db7ec28d6df1c7f0f745186884091e602e07',
   'src/third_party/harfbuzz-ng/src':
-    Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '8df5cdbcda495a582e72a7e2ce35d6106401edce',
+    Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + 'db700b5670d9475cc8ed4880cc9447b232c5e432',
+  'src/third_party/libc++/src':
+    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '84fb809dd6dae36d556dc0bb702c6cc2ce9d4b80',
+  'src/third_party/libc++abi/src':
+    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '8d21803b9076b16d46c32e2f10da191ee758520c',
+  'src/third_party/libunwind/src':
+    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'f1c687e0aaf0d70b9a53a150e9be5cb63af9215f',
   'src/third_party/libjpeg_turbo':
-    Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + 'aa4075f116e4312537d0d3e9dbd5e31096539f94',
+    Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + '30bdb85e302ecfc52593636b2f44af438e05e784',
   'src/third_party/nasm':
     Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '7fc833e889d1afda72c06220e5bed8fb43b2e5ce',
   'src/tools':
-    Var('chromium_git') + '/chromium/src/tools' + '@' + '916dfffd61cbf61075c47d7b480425d7de1483fd',
+    Var('chromium_git') + '/chromium/src/tools' + '@' + 'a76c0dbb64c603a0d45e0c6dfae3a351b6e1adf1',
 
   # libyuv-only dependencies (not present in Chromium).
   'src/third_party/gtest-parallel':
@@ -139,7 +142,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/kotlin_stdlib',
-              'version': 'z4_AYYz2Tw5GKikuiDLTuxxf0NJVGLkC3CVcyiIpc-gC',
+              'version': 'Z1gsqhL967kFQecxKrRwXHbl-vwQjpv0l7PMUZ0EVO8C',
           },
       ],
       'condition': 'checkout_android',
@@ -149,7 +152,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/kotlinc',
-              'version': 'J3BAlA7yf4corBopDhlwuT9W4jR1Z9R55KD3BUTVldQC',
+              'version': 'Rr02Gf2EkaeSs3EhSUHhPqDHSd1AzimrM6cRYUJCPjQC',
           },
       ],
       'condition': 'checkout_android',
@@ -157,9 +160,9 @@ deps = {
   },
 
   'src/third_party/boringssl/src':
-    'https://boringssl.googlesource.com/boringssl.git' + '@' + 'dd5219451c3ce26221762a15d867edf43b463bb2',
+    'https://boringssl.googlesource.com/boringssl.git' + '@' + '20a06474c0b4a16779311bfe98ba69dc2402101d',
   'src/base': {
-    'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'b4c5ce6cb1a7c90de3fdddc80ed439fe87eab443',
+    'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'd407b7061bce341bb6e11b539ea86c46c949ac4c',
     'condition': 'checkout_android',
   },
   'src/third_party/bazel': {
@@ -182,16 +185,22 @@ deps = {
       'condition': 'checkout_android',
       'dep_type': 'cipd',
   },
-  'src/third_party/android_ndk': {
-    'url': Var('chromium_git') + '/android_ndk.git' + '@' + '310956bd122ec2b96049f8d7398de6b717f3452e',
-    'condition': 'checkout_android',
+  'src/third_party/android_toolchain': {
+      'packages': [
+            {
+                'package': 'chromium/third_party/android_toolchain/android_toolchain',
+                'version': 'R_8suM8m0oHbZ1awdxGXvKEFpAOETscbfZxkkMthyk8C',
+            },
+      ],
+      'condition': 'checkout_android',
+      'dep_type': 'cipd',
   },
 
   'src/third_party/androidx': {
     'packages': [
       {
           'package': 'chromium/third_party/androidx',
-          'version': 'Wr5b9WJiFAzJcmjmvQIePIxk5IgpDl62kaGY_SiLxJEC',
+          'version': 'y7rF_rx56mD3FGhMiqnlbQ6HOqHJ95xUFNX1m-_a988C',
       },
     ],
     'condition': 'checkout_android',
@@ -211,8 +220,8 @@ deps = {
   'src/third_party/android_sdk/public': {
       'packages': [
           {
-              'package': 'chromium/third_party/android_sdk/public/build-tools/33.0.0',
-              'version': '-VRKr36Uw8L_iFqqo9nevIBgNMggND5iWxjidyjnCgsC',
+              'package': 'chromium/third_party/android_sdk/public/build-tools/34.0.0',
+              'version': 'YK9Rzw3fDzMHVzatNN6VlyoD_81amLZpN1AbmkdOd6AC',
           },
           {
               'package': 'chromium/third_party/android_sdk/public/emulator',
@@ -224,11 +233,11 @@ deps = {
           },
           {
               'package': 'chromium/third_party/android_sdk/public/platform-tools',
-              'version': 'RSI3iwryh7URLGRgJHsCvUxj092woTPnKt4pwFcJ6L8C',
+              'version': 'HWVsGs2HCKgSVv41FsOcsfJbNcB0UFiNrF6Tc4yRArYC',
           },
           {
-              'package': 'chromium/third_party/android_sdk/public/platforms/android-33',
-              'version': 'eo5KvW6UVor92LwZai8Zulc624BQZoCu-yn7wa1z_YcC',
+              'package': 'chromium/third_party/android_sdk/public/platforms/android-34',
+              'version': 'u-bhWbTME6u-DjypTgr3ZikCyeAeU6txkR9ET6Uudc8C',
           },
    {
               'package': 'chromium/third_party/android_sdk/public/platforms/android-tiramisuprivacysandbox',
@@ -330,7 +339,7 @@ deps = {
   },
 
   'src/third_party/icu': {
-    'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'a2961dc659b4ae847a9c6120718cc2517ee57d9e',
+    'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'e8c3bc9ea97d4423ad0515e5f1c064f486dae8b1',
   },
   'src/third_party/icu4j': {
       'packages': [
@@ -356,7 +365,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/jdk',
-              'version': '2Of9Pe_OdO4xoAATuiLDiMVNebKTNO3WrwJGqil4RosC',
+              'version': 'GCFtf5t6M4HlrHj6NXedHbpHp2xjgognF8ptNci4478C',
           },
       ],
       'condition': 'checkout_android',
@@ -411,7 +420,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/r8',
-              'version': '4Oq32DG2vuDh7Frxj6tH5xyi77sVgBWpvvl4hwvZRR4C',
+              'version': 'O1BBWiBTIeNUcraX8STMtQXVaCleu6SJJjWCcnfhPLkC',
           },
       ],
       'condition': 'checkout_android',
@@ -424,7 +433,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/r8',
-              'version': 'PwglNZFRNPkBBXdnY9NfrZFk2ULWDTRxhV9rl2kvkpUC',
+              'version': 'vw5kLlW3-suSlCKSO9OQpFWpR8oDnvQ8k1RgKNUapQYC',
           },
       ],
       'condition': 'checkout_android',
@@ -441,7 +450,7 @@ deps = {
       'dep_type': 'cipd',
   },
   'src/third_party/requests/src': {
-    'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'refs/tags/v2.23.0',
+    'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'c7e0fc087ceeadb8b4c84a0953a422c474093d6d',
     'condition': 'checkout_android',
   },
   'src/third_party/robolectric': {
@@ -468,7 +477,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/turbine',
-              'version': 'Foa7uRpVoKr4YoayCKc9EERkjpmGOE3DAUTWFLL7gKEC',
+              'version': '2I2Nz480QsuCxpQ1lMfbigX8l5HAhX3_ykWU4TKRGo4C',
           },
       ],
       'condition': 'checkout_android',
@@ -481,7 +490,7 @@ deps = {
 
   # iOS deps:
   'src/ios': {
-    'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '241921896b64f85de9a32d461462913cbff4baeb',
+    'url': Var('chromium_git') + '/chromium/src/ios' + '@' + 'ddd58e86cf4ebdc0db60a5d0f3c323de49bb295c',
     'condition': 'checkout_ios'
   },
 
@@ -1680,7 +1689,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/android_deps/libs/com_squareup_okio_okio_jvm',
-              'version': 'version:2@3.0.0.cr1',
+              'version': 'version:2@3.3.0.cr1',
           },
       ],
       'condition': 'checkout_android',
@@ -1691,7 +1700,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/android_deps/libs/com_squareup_wire_wire_runtime_jvm',
-              'version': 'version:2@4.5.1.cr1',
+              'version': 'version:2@4.7.0.cr1',
           },
       ],
       'condition': 'checkout_android',
@@ -1823,7 +1832,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy',
-              'version': 'version:2@1.14.4.cr1',
+              'version': 'version:2@1.14.5.cr1',
           },
       ],
       'condition': 'checkout_android',
@@ -1834,7 +1843,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy_agent',
-              'version': 'version:2@1.14.4.cr1',
+              'version': 'version:2@1.14.5.cr1',
           },
       ],
       'condition': 'checkout_android',
@@ -2043,7 +2052,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_android',
-              'version': 'version:2@5.3.1.cr1',
+              'version': 'version:2@5.4.0.cr1',
           },
       ],
       'condition': 'checkout_android',
@@ -2054,7 +2063,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_core',
-              'version': 'version:2@5.3.1.cr1',
+              'version': 'version:2@5.4.0.cr1',
           },
       ],
       'condition': 'checkout_android',
@@ -2065,7 +2074,7 @@ deps = {
       'packages': [
           {
               'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_subclass',
-              'version': 'version:2@5.3.1.cr1',
+              'version': 'version:2@5.4.0.cr1',
           },
       ],
       'condition': 'checkout_android',
diff --git a/files/DIR_METADATA b/DIR_METADATA
index 8bc04f15..8bc04f15 100644
--- a/files/DIR_METADATA
+++ b/DIR_METADATA
diff --git a/LICENSE b/LICENSE
index da40b336..c911747a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2011, Google Inc. All rights reserved.
+Copyright 2011 The LibYuv Project Authors. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
diff --git a/METADATA b/METADATA
index 5508de20..19d0436e 100644
--- a/METADATA
+++ b/METADATA
@@ -1,14 +1,19 @@
-name: "libyuv"
-description:
-    "libyuv is an open source project that includes YUV scaling and conversion "
-    "functionality."
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update libyuv
+# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
 
+name: "libyuv"
+description: "libyuv is an open source project that includes YUV scaling and conversion functionality."
 third_party {
-  url {
-    type: GIT
+  license_type: NOTICE
+  last_upgrade_date {
+    year: 2024
+    month: 1
+    day: 11
+  }
+  identifier {
+    type: "Git"
     value: "https://chromium.googlesource.com/libyuv/libyuv/"
+    version: "af6ac8265bbd07bcf977526458b60305c4304288"
   }
-  version: "2a6cb7431939faba1b40d3f08883847f0cf63572"
-  last_upgrade_date { year: 2023 month: 6 day: 1 }
-  license_type: NOTICE
 }
diff --git a/OWNERS b/OWNERS
index a607e727..f11a7bfd 100644
--- a/OWNERS
+++ b/OWNERS
@@ -1,4 +1,11 @@
-fbarchard@google.com
-phoglund@google.com
-magjed@google.com
-chz@google.com
+mbonadei@chromium.org
+fbarchard@chromium.org
+magjed@chromium.org
+wtc@google.com
+jansson@google.com
+
+per-file *.gn=mbonadei@chromium.org,jansson@google.com
+per-file .gitignore=*
+per-file AUTHORS=*
+per-file DEPS=*
+per-file PRESUBMIT.py=mbonadei@chromium.org,jansson@google.com
diff --git a/OWNERS.android b/OWNERS.android
new file mode 100644
index 00000000..7529cb92
--- /dev/null
+++ b/OWNERS.android
@@ -0,0 +1 @@
+include platform/system/core:/janitors/OWNERS
diff --git a/files/PATENTS b/PATENTS
index 64aa5c90..64aa5c90 100644
--- a/files/PATENTS
+++ b/PATENTS
diff --git a/files/PRESUBMIT.py b/PRESUBMIT.py
index d3901caf..d3901caf 100644
--- a/files/PRESUBMIT.py
+++ b/PRESUBMIT.py
diff --git a/files/README.chromium b/README.chromium
index 880191e4..1389f285 100644
--- a/files/README.chromium
+++ b/README.chromium
@@ -1,8 +1,9 @@
 Name: libyuv
-URL: http://code.google.com/p/libyuv/
-Version: 1871
+URL: https://chromium.googlesource.com/libyuv/libyuv/
+Version: 1883
 License: BSD
 License File: LICENSE
+Shipped: yes
 
 Description:
 libyuv is an open source project that includes YUV conversion and scaling functionality.
diff --git a/files/README.md b/README.md
index 95eeb04c..95eeb04c 100644
--- a/files/README.md
+++ b/README.md
diff --git a/README.version b/README.version
deleted file mode 100644
index 6eb9dc8c..00000000
--- a/README.version
+++ /dev/null
@@ -1,8 +0,0 @@
-Version: r1871
-BugComponent: 42195
-Owner: lajos
-Local Modifications:
- * Remove files/Android.mk (it messes with the android build system).
- * Remove OWNERS files within files/ and all the subdirectories (except for
-   files/fuzz). Having these files breaks repo presubmit hooks since they
-   contain non @google.com email addresses.
diff --git a/UPDATING b/UPDATING
deleted file mode 100644
index 2679284c..00000000
--- a/UPDATING
+++ /dev/null
@@ -1,36 +0,0 @@
-To sync the libyuv checkout to an upstream revision, do the following:
-
-These commands are known to work from the external/libyuv directory of the
-Android tree's checkout.
-
-Step 1: Remove the files/ subdirectory.
-
-$ rm -rf files
-
-Step 2: Clone the libyuv repository from upstream.
-
-$ git clone https://chromium.googlesource.com/libyuv/libyuv files
-
-Step 3 (optional): Checkout a specific commit/tag.
-
-$ cd files
-$ git checkout <commit_or_tag>
-$ cd ..
-
-Step 4: Remove files that aren't necessary (Android.mk, .git and OWNERS).
-
-$ rm files/Android.mk
-$ rm -rf files/.git
-$ find files/ -name "OWNERS" | xargs rm
-
-Step 5: Update the version and last_upgrade_date fields in the METADATA file.
-
-Step 6: Update README.version with the version (can be found in
-        files/include/libyuv/version.h)
-
-Step 7: If any local modifications are being done, update README.version and
-        this file with updated instructions.
-
-Step 8: Ensure that libyuv builds and camera and media related CTS tests are
-        passing. If there are any linker errors about missing symbols, try
-	updating frameworks/av/media/libstagefright/export.lds.
diff --git a/files/build_overrides/build.gni b/build_overrides/build.gni
index d9d01d51..d9d01d51 100644
--- a/files/build_overrides/build.gni
+++ b/build_overrides/build.gni
diff --git a/files/build_overrides/gtest.gni b/build_overrides/gtest.gni
index d3c3f68c..d3c3f68c 100644
--- a/files/build_overrides/gtest.gni
+++ b/build_overrides/gtest.gni
diff --git a/files/build_overrides/partition_alloc.gni b/build_overrides/partition_alloc.gni
index dcf8ac2d..dcf8ac2d 100644
--- a/files/build_overrides/partition_alloc.gni
+++ b/build_overrides/partition_alloc.gni
diff --git a/files/cleanup_links.py b/cleanup_links.py
index 7d1eba9b..7d1eba9b 100755
--- a/files/cleanup_links.py
+++ b/cleanup_links.py
diff --git a/codereview.settings b/codereview.settings
index 9782886f..b226fae5 100644
--- a/codereview.settings
+++ b/codereview.settings
@@ -1,5 +1,5 @@
-# This file is used by git cl to get repository specific information.
+# This file is used by `git cl` to get repository specific information.
+CODE_REVIEW_SERVER: codereview.chromium.org
 GERRIT_HOST: True
 PROJECT: libyuv
-TRY_ON_UPLOAD: False
 VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/
diff --git a/files/docs/deprecated_builds.md b/docs/deprecated_builds.md
index ba42966c..8edefd78 100644
--- a/files/docs/deprecated_builds.md
+++ b/docs/deprecated_builds.md
@@ -165,11 +165,11 @@ mipsel
 
 arm32 disassembly:
 
-    third_party/android_ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o
+    llvm-objdump -d out/Release/obj/source/libyuv.row_neon.o
 
 arm64 disassembly:
 
-    third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o
+    llvm-objdump -d out/Release/obj/source/libyuv.row_neon64.o
 
 Running tests:
 
diff --git a/files/docs/environment_variables.md b/docs/environment_variables.md
index 4eb09659..4eb09659 100644
--- a/files/docs/environment_variables.md
+++ b/docs/environment_variables.md
diff --git a/files/docs/filtering.md b/docs/filtering.md
index 8696976e..8696976e 100644
--- a/files/docs/filtering.md
+++ b/docs/filtering.md
diff --git a/files/docs/formats.md b/docs/formats.md
index 12ea9465..12ea9465 100644
--- a/files/docs/formats.md
+++ b/docs/formats.md
diff --git a/files/docs/getting_started.md b/docs/getting_started.md
index b19f0009..f2f71b8b 100644
--- a/files/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -139,11 +139,11 @@ mips
 
 arm disassembly:
 
-    third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt
+    llvm-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt
 
-    third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt
+    llvm-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt
 
-    third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt
+    llvm-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt
 
     Caveat: Disassembly may require optimize_max be disabled in BUILD.gn
 
@@ -238,6 +238,18 @@ After running script, clang & qemu are built in `build-toolchain-qemu/riscv-clan
           -DUSE_RVV=ON .
     cmake --build out/Release/
 
+#### Customized Compiler Flags
+
+Customized compiler flags are supported by `-DRISCV_COMPILER_FLAGS="xxx"`.
+If `-DRISCV_COMPILER_FLAGS="xxx"` is manually assigned, other compile flags(e.g disable -march=xxx) will not be appended.
+
+Example:
+
+    cmake -B out/Release/ -DUNIT_TEST=ON \
+          -DCMAKE_BUILD_TYPE=Release \
+          -DCMAKE_TOOLCHAIN_FILE="./riscv_script/riscv-clang.cmake" \
+          -DRISCV_COMPILER_FLAGS="-mcpu=sifive-x280" \
+          .
 
 ### Run on QEMU
 
diff --git a/files/docs/rotation.md b/docs/rotation.md
index a08430fd..a08430fd 100644
--- a/files/docs/rotation.md
+++ b/docs/rotation.md
diff --git a/files/download_vs_toolchain.py b/download_vs_toolchain.py
index 6bc086d6..6bc086d6 100644
--- a/files/download_vs_toolchain.py
+++ b/download_vs_toolchain.py
diff --git a/files/Android.bp b/files/Android.bp
deleted file mode 100644
index 9811a7b3..00000000
--- a/files/Android.bp
+++ /dev/null
@@ -1,197 +0,0 @@
-package {
-    default_applicable_licenses: ["external_libyuv_files_license"],
-}
-
-// Added automatically by a large-scale-change
-//
-// large-scale-change included anything that looked like it might be a license
-// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc.
-//
-// Please consider removing redundant or irrelevant files from 'license_text:'.
-// See: http://go/android-license-faq
-license {
-    name: "external_libyuv_files_license",
-    visibility: [":__subpackages__"],
-    license_kinds: [
-        "SPDX-license-identifier-BSD",
-    ],
-    license_text: [
-        "LICENSE",
-        "PATENTS",
-    ],
-}
-
-cc_library {
-    name: "libyuv",
-    vendor_available: true,
-    product_available: true,
-    host_supported: true,
-    vndk: {
-        enabled: true,
-    },
-
-    srcs: [
-        "source/compare.cc",
-        "source/compare_common.cc",
-        "source/compare_gcc.cc",
-        "source/compare_msa.cc",
-        "source/compare_neon.cc",
-        "source/compare_neon64.cc",
-        "source/convert.cc",
-        "source/convert_argb.cc",
-        "source/convert_from.cc",
-        "source/convert_from_argb.cc",
-        "source/convert_jpeg.cc",
-        "source/convert_to_argb.cc",
-        "source/convert_to_i420.cc",
-        "source/cpu_id.cc",
-        "source/mjpeg_decoder.cc",
-        "source/mjpeg_validate.cc",
-        "source/planar_functions.cc",
-        "source/rotate.cc",
-        "source/rotate_any.cc",
-        "source/rotate_argb.cc",
-        "source/rotate_common.cc",
-        "source/rotate_gcc.cc",
-        "source/rotate_msa.cc",
-        "source/rotate_neon.cc",
-        "source/rotate_neon64.cc",
-        "source/row_any.cc",
-        "source/row_common.cc",
-        "source/row_gcc.cc",
-        "source/row_msa.cc",
-        "source/row_neon.cc",
-        "source/row_neon64.cc",
-        "source/row_rvv.cc",
-        "source/scale.cc",
-        "source/scale_any.cc",
-        "source/scale_argb.cc",
-        "source/scale_common.cc",
-        "source/scale_gcc.cc",
-        "source/scale_msa.cc",
-        "source/scale_neon.cc",
-        "source/scale_neon64.cc",
-        "source/scale_rgb.cc",
-        "source/scale_uv.cc",
-        "source/video_common.cc",
-    ],
-
-    cflags: [
-        "-Wall",
-        "-Werror",
-        "-Wno-unused-parameter",
-        "-fexceptions",
-        "-DHAVE_JPEG",
-    ],
-
-    arch: {
-        arm: {
-            cflags: ["-mfpu=neon"],
-        },
-    },
-
-    shared_libs: ["libjpeg"],
-
-    export_include_dirs: ["include"],
-
-    apex_available: [
-        "//apex_available:platform",
-        "com.android.media.swcodec",
-    ],
-    min_sdk_version: "29",
-}
-
-// compatibilty static library until all uses of libyuv_static are replaced
-// with libyuv (b/37646797)
-cc_library_static {
-    name: "libyuv_static",
-    vendor_available: true,
-    whole_static_libs: ["libyuv"],
-    apex_available: [
-        "//apex_available:platform",
-        "com.android.media.swcodec",
-    ],
-    min_sdk_version: "29",
-}
-
-cc_test {
-    name: "libyuv_unittest",
-    static_libs: ["libyuv"],
-    shared_libs: ["libjpeg"],
-    cflags: ["-Wall", "-Werror"],
-    srcs: [
-        "unit_test/basictypes_test.cc",
-        "unit_test/color_test.cc",
-        "unit_test/compare_test.cc",
-        "unit_test/convert_test.cc",
-        "unit_test/cpu_test.cc",
-        "unit_test/cpu_thread_test.cc",
-        "unit_test/math_test.cc",
-        "unit_test/planar_test.cc",
-        "unit_test/rotate_argb_test.cc",
-        "unit_test/rotate_test.cc",
-        "unit_test/scale_argb_test.cc",
-        "unit_test/scale_rgb_test.cc",
-        "unit_test/scale_test.cc",
-        "unit_test/scale_uv_test.cc",
-        "unit_test/unit_test.cc",
-        "unit_test/video_common_test.cc",
-    ],
-}
-
-cc_test {
-    name: "compare",
-    gtest: false,
-    srcs: [
-        "util/compare.cc",
-    ],
-    static_libs: ["libyuv"],
-}
-
-cc_test {
-    name: "cpuid",
-    gtest: false,
-    srcs: [
-        "util/cpuid.c",
-    ],
-    static_libs: ["libyuv"],
-}
-
-cc_test {
-    name: "i444tonv12_eg",
-    gtest: false,
-    srcs: [
-        "util/i444tonv12_eg.cc",
-    ],
-    static_libs: ["libyuv"],
-}
-
-cc_test {
-    name: "psnr",
-    gtest: false,
-    srcs: [
-        "util/psnr_main.cc",
-        "util/psnr.cc",
-        "util/ssim.cc",
-    ],
-    static_libs: ["libyuv"],
-}
-
-cc_test {
-    name: "yuvconstants",
-    gtest: false,
-    srcs: [
-        "util/yuvconstants.c",
-    ],
-    static_libs: ["libyuv"],
-}
-
-cc_test {
-    name: "yuvconvert",
-    gtest: false,
-    srcs: [
-        "util/yuvconvert.cc",
-    ],
-    static_libs: ["libyuv"],
-    shared_libs: ["libjpeg"],
-}
diff --git a/files/LICENSE b/files/LICENSE
deleted file mode 100644
index c911747a..00000000
--- a/files/LICENSE
+++ /dev/null
@@ -1,29 +0,0 @@
-Copyright 2011 The LibYuv Project Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-  * Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-
-  * Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution.
-
-  * Neither the name of Google nor the names of its contributors may
-    be used to endorse or promote products derived from this software
-    without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/files/codereview.settings b/files/codereview.settings
deleted file mode 100644
index b226fae5..00000000
--- a/files/codereview.settings
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file is used by `git cl` to get repository specific information.
-CODE_REVIEW_SERVER: codereview.chromium.org
-GERRIT_HOST: True
-PROJECT: libyuv
-VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/
diff --git a/files/public.mk b/files/public.mk
deleted file mode 100644
index 1342307a..00000000
--- a/files/public.mk
+++ /dev/null
@@ -1,13 +0,0 @@
-# This file contains all the common make variables which are useful for
-# anyone depending on this library.
-# Note that dependencies on NDK are not directly listed since NDK auto adds
-# them.
-
-LIBYUV_INCLUDES := $(LIBYUV_PATH)/include
-
-LIBYUV_C_FLAGS :=
-
-LIBYUV_CPP_FLAGS :=
-
-LIBYUV_LDLIBS :=
-LIBYUV_DEP_MODULES :=
diff --git a/files/include/libyuv.h b/include/libyuv.h
index a06e1233..a06e1233 100644
--- a/files/include/libyuv.h
+++ b/include/libyuv.h
diff --git a/files/include/libyuv/basic_types.h b/include/libyuv/basic_types.h
index 1bea67f2..1bea67f2 100644
--- a/files/include/libyuv/basic_types.h
+++ b/include/libyuv/basic_types.h
diff --git a/files/include/libyuv/compare.h b/include/libyuv/compare.h
index 3353ad71..3353ad71 100644
--- a/files/include/libyuv/compare.h
+++ b/include/libyuv/compare.h
diff --git a/files/include/libyuv/compare_row.h b/include/libyuv/compare_row.h
index d8e82d72..8293c919 100644
--- a/files/include/libyuv/compare_row.h
+++ b/include/libyuv/compare_row.h
@@ -28,7 +28,10 @@ extern "C" {
 #endif
 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
 #if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
+#define LIBYUV_DISABLE_NEON
+#endif
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
 #define LIBYUV_DISABLE_X86
 #endif
 #endif
diff --git a/files/include/libyuv/convert.h b/include/libyuv/convert.h
index 88619a4f..88619a4f 100644
--- a/files/include/libyuv/convert.h
+++ b/include/libyuv/convert.h
diff --git a/files/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h
index 35eeac9b..35eeac9b 100644
--- a/files/include/libyuv/convert_argb.h
+++ b/include/libyuv/convert_argb.h
diff --git a/files/include/libyuv/convert_from.h b/include/libyuv/convert_from.h
index 32f42a63..32f42a63 100644
--- a/files/include/libyuv/convert_from.h
+++ b/include/libyuv/convert_from.h
diff --git a/files/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h
index ff2a581a..ff2a581a 100644
--- a/files/include/libyuv/convert_from_argb.h
+++ b/include/libyuv/convert_from_argb.h
diff --git a/files/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h
index 203f7e0d..5a81e7c9 100644
--- a/files/include/libyuv/cpu_id.h
+++ b/include/libyuv/cpu_id.h
@@ -31,24 +31,25 @@ static const int kCpuHasX86 = 0x10;
 static const int kCpuHasSSE2 = 0x20;
 static const int kCpuHasSSSE3 = 0x40;
 static const int kCpuHasSSE41 = 0x80;
-static const int kCpuHasSSE42 = 0x100;  // unused at this time.
+static const int kCpuHasSSE42 = 0x100;
 static const int kCpuHasAVX = 0x200;
 static const int kCpuHasAVX2 = 0x400;
 static const int kCpuHasERMS = 0x800;
 static const int kCpuHasFMA3 = 0x1000;
 static const int kCpuHasF16C = 0x2000;
-static const int kCpuHasGFNI = 0x4000;
-static const int kCpuHasAVX512BW = 0x8000;
-static const int kCpuHasAVX512VL = 0x10000;
-static const int kCpuHasAVX512VNNI = 0x20000;
-static const int kCpuHasAVX512VBMI = 0x40000;
-static const int kCpuHasAVX512VBMI2 = 0x80000;
-static const int kCpuHasAVX512VBITALG = 0x100000;
-static const int kCpuHasAVX512VPOPCNTDQ = 0x200000;
+static const int kCpuHasAVX512BW = 0x4000;
+static const int kCpuHasAVX512VL = 0x8000;
+static const int kCpuHasAVX512VNNI = 0x10000;
+static const int kCpuHasAVX512VBMI = 0x20000;
+static const int kCpuHasAVX512VBMI2 = 0x40000;
+static const int kCpuHasAVX512VBITALG = 0x80000;
+static const int kCpuHasAVX10 = 0x100000;
+static const int kCpuHasAVXVNNI = 0x200000;
+static const int kCpuHasAVXVNNIINT8 = 0x400000;
 
 // These flags are only valid on MIPS processors.
-static const int kCpuHasMIPS = 0x400000;
-static const int kCpuHasMSA = 0x800000;
+static const int kCpuHasMIPS = 0x800000;
+static const int kCpuHasMSA = 0x1000000;
 
 // These flags are only valid on LOONGARCH processors.
 static const int kCpuHasLOONGARCH = 0x2000000;
diff --git a/files/include/libyuv/loongson_intrinsics.h b/include/libyuv/loongson_intrinsics.h
index 1d613def..1d613def 100644
--- a/files/include/libyuv/loongson_intrinsics.h
+++ b/include/libyuv/loongson_intrinsics.h
diff --git a/files/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h
index b9a44fcc..b9a44fcc 100644
--- a/files/include/libyuv/macros_msa.h
+++ b/include/libyuv/macros_msa.h
diff --git a/files/include/libyuv/mjpeg_decoder.h b/include/libyuv/mjpeg_decoder.h
index 275f8d4c..275f8d4c 100644
--- a/files/include/libyuv/mjpeg_decoder.h
+++ b/include/libyuv/mjpeg_decoder.h
diff --git a/files/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h
index 154f2f21..f9344721 100644
--- a/files/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -30,7 +30,10 @@ extern "C" {
 #endif
 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
 #if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
+#define LIBYUV_DISABLE_NEON
+#endif
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
 #define LIBYUV_DISABLE_X86
 #endif
 #endif
@@ -827,15 +830,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
                      int width,
                      int height);
 
-typedef void (*ARGBBlendRow)(const uint8_t* src_argb0,
-                             const uint8_t* src_argb1,
-                             uint8_t* dst_argb,
-                             int width);
-
-// Get function to Alpha Blend ARGB pixels and store to destination.
-LIBYUV_API
-ARGBBlendRow GetARGBBlend();
-
 // Alpha Blend ARGB images and store to destination.
 // Source is pre-multiplied by alpha using ARGBAttenuate.
 // Alpha of destination is set to 255.
diff --git a/files/include/libyuv/rotate.h b/include/libyuv/rotate.h
index 37460c4a..37460c4a 100644
--- a/files/include/libyuv/rotate.h
+++ b/include/libyuv/rotate.h
diff --git a/files/include/libyuv/rotate_argb.h b/include/libyuv/rotate_argb.h
index 20432949..20432949 100644
--- a/files/include/libyuv/rotate_argb.h
+++ b/include/libyuv/rotate_argb.h
diff --git a/files/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h
index 2dd8c03d..3e6a2fef 100644
--- a/files/include/libyuv/rotate_row.h
+++ b/include/libyuv/rotate_row.h
@@ -28,7 +28,10 @@ extern "C" {
 #endif
 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
 #if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
+#define LIBYUV_DISABLE_NEON
+#endif
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
 #define LIBYUV_DISABLE_X86
 #endif
 #endif
diff --git a/files/include/libyuv/row.h b/include/libyuv/row.h
index 5b244d77..46685a50 100644
--- a/files/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -31,7 +31,10 @@ extern "C" {
 #endif
 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
 #if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
+#define LIBYUV_DISABLE_NEON
+#endif
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
 #define LIBYUV_DISABLE_X86
 #endif
 #endif
@@ -161,7 +164,6 @@ extern "C" {
 #define HAS_ARGBSEPIAROW_SSSE3
 #define HAS_ARGBSHADEROW_SSE2
 #define HAS_ARGBSUBTRACTROW_SSE2
-#define HAS_ARGBUNATTENUATEROW_SSE2
 #define HAS_BLENDPLANEROW_SSSE3
 #define HAS_COMPUTECUMULATIVESUMROW_SSE2
 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
@@ -171,9 +173,6 @@ extern "C" {
 #define HAS_SOBELXROW_SSE2
 #define HAS_SOBELXYROW_SSE2
 #define HAS_SOBELYROW_SSE2
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ARGBATTENUATEROW_SSSE3
-#endif
 
 // The following functions fail on gcc/clang 32 bit with fpic and framepointer.
 // caveat: clangcl uses row_win.cc which works.
@@ -241,11 +240,7 @@ extern "C" {
 #define HAS_ARGBADDROW_AVX2
 #define HAS_ARGBMULTIPLYROW_AVX2
 #define HAS_ARGBSUBTRACTROW_AVX2
-#define HAS_ARGBUNATTENUATEROW_AVX2
 #define HAS_BLENDPLANEROW_AVX2
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ARGBATTENUATEROW_AVX2
-#endif
 
 #if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
     defined(_MSC_VER)
@@ -285,14 +280,15 @@ extern "C" {
 #define HAS_ABGRTOAR30ROW_SSSE3
 #define HAS_ABGRTOYJROW_SSSE3
 #define HAS_AR64TOARGBROW_SSSE3
+#define HAS_ARGBATTENUATEROW_SSSE3
 #define HAS_ARGBTOAB64ROW_SSSE3
 #define HAS_ARGBTOAR30ROW_SSSE3
 #define HAS_ARGBTOAR64ROW_SSSE3
+#define HAS_ARGBUNATTENUATEROW_SSE2
 #define HAS_CONVERT16TO8ROW_SSSE3
 #define HAS_CONVERT8TO16ROW_SSE2
-#define HAS_DETILEROW_SSE2
 #define HAS_DETILEROW_16_SSE2
-#define HAS_DETILEROW_16_AVX
+#define HAS_DETILEROW_SSE2
 #define HAS_DETILESPLITUVROW_SSSE3
 #define HAS_DETILETOYUY2_SSE2
 #define HAS_HALFMERGEUVROW_SSSE3
@@ -345,13 +341,16 @@ extern "C" {
 #define HAS_ABGRTOYJROW_AVX2
 #define HAS_ABGRTOYROW_AVX2
 #define HAS_AR64TOARGBROW_AVX2
+#define HAS_ARGBATTENUATEROW_AVX2
 #define HAS_ARGBTOAB64ROW_AVX2
 #define HAS_ARGBTOAR30ROW_AVX2
 #define HAS_ARGBTOAR64ROW_AVX2
 #define HAS_ARGBTORAWROW_AVX2
 #define HAS_ARGBTORGB24ROW_AVX2
+#define HAS_ARGBUNATTENUATEROW_AVX2
 #define HAS_CONVERT16TO8ROW_AVX2
 #define HAS_CONVERT8TO16ROW_AVX2
+#define HAS_DETILEROW_16_AVX
 #define HAS_DIVIDEROW_16_AVX2
 #define HAS_HALFMERGEUVROW_AVX2
 #define HAS_I210TOAR30ROW_AVX2
@@ -795,19 +794,29 @@ extern "C" {
 #endif
 
 #if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
+#define HAS_COPYROW_RVV
+#if __riscv_v_intrinsic == 11000
 #define HAS_AB64TOARGBROW_RVV
+#define HAS_ABGRTOYJROW_RVV
+#define HAS_ABGRTOYROW_RVV
 #define HAS_AR64TOARGBROW_RVV
+#define HAS_AR64TOAB64ROW_RVV
 #define HAS_ARGBATTENUATEROW_RVV
+#define HAS_ARGBBLENDROW_RVV
+#define HAS_ARGBCOPYYTOALPHAROW_RVV
+#define HAS_ARGBEXTRACTALPHAROW_RVV
 #define HAS_ARGBTOAB64ROW_RVV
+#define HAS_ARGBTOABGRROW_RVV
 #define HAS_ARGBTOAR64ROW_RVV
+#define HAS_ARGBTOBGRAROW_RVV
 #define HAS_ARGBTORAWROW_RVV
 #define HAS_ARGBTORGB24ROW_RVV
-#define HAS_ARGBTOYROW_RVV
+#define HAS_ARGBTORGBAROW_RVV
 #define HAS_ARGBTOYJROW_RVV
-#define HAS_ABGRTOYROW_RVV
-#define HAS_ABGRTOYJROW_RVV
+#define HAS_ARGBTOYMATRIXROW_RVV
+#define HAS_ARGBTOYROW_RVV
 #define HAS_BGRATOYROW_RVV
-#define HAS_COPYROW_RVV
+#define HAS_BLENDPLANEROW_RVV
 #define HAS_I400TOARGBROW_RVV
 #define HAS_I422ALPHATOARGBROW_RVV
 #define HAS_I422TOARGBROW_RVV
@@ -822,10 +831,10 @@ extern "C" {
 #define HAS_MERGERGBROW_RVV
 #define HAS_MERGEUVROW_RVV
 #define HAS_MERGEXRGBROW_RVV
-#define HAS_SPLITARGBROW_RVV
-#define HAS_SPLITRGBROW_RVV
-#define HAS_SPLITUVROW_RVV
-#define HAS_SPLITXRGBROW_RVV
+#define HAS_NV12TOARGBROW_RVV
+#define HAS_NV12TORGB24ROW_RVV
+#define HAS_NV21TOARGBROW_RVV
+#define HAS_NV21TORGB24ROW_RVV
 #define HAS_RAWTOARGBROW_RVV
 #define HAS_RAWTORGB24ROW_RVV
 #define HAS_RAWTORGBAROW_RVV
@@ -834,8 +843,16 @@ extern "C" {
 #define HAS_RGB24TOARGBROW_RVV
 #define HAS_RGB24TOYJROW_RVV
 #define HAS_RGB24TOYROW_RVV
-#define HAS_RGBATOYROW_RVV
+#define HAS_RGBATOARGBROW_RVV
 #define HAS_RGBATOYJROW_RVV
+#define HAS_RGBATOYMATRIXROW_RVV
+#define HAS_RGBATOYROW_RVV
+#define HAS_RGBTOYMATRIXROW_RVV
+#define HAS_SPLITARGBROW_RVV
+#define HAS_SPLITRGBROW_RVV
+#define HAS_SPLITUVROW_RVV
+#define HAS_SPLITXRGBROW_RVV
+#endif
 #endif
 
 #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
@@ -932,14 +949,6 @@ struct YuvConstants {
   free(var##_mem);                  \
   var = NULL
 
-#define align_buffer_64_16(var, size)                                        \
-  void* var##_mem = malloc((size)*2 + 63);                      /* NOLINT */ \
-  uint16_t* var = (uint16_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
-
-#define free_aligned_buffer_64_16(var) \
-  free(var##_mem);                     \
-  var = NULL
-
 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
 #define OMITFP
 #else
@@ -1351,6 +1360,26 @@ void UYVYToARGBRow_LSX(const uint8_t* src_uyvy,
                        uint8_t* dst_argb,
                        const struct YuvConstants* yuvconstants,
                        int width);
+void NV12ToARGBRow_RVV(const uint8_t* src_y,
+                       const uint8_t* src_uv,
+                       uint8_t* dst_argb,
+                       const struct YuvConstants* yuvconstants,
+                       int width);
+void NV21ToARGBRow_RVV(const uint8_t* src_y,
+                       const uint8_t* src_vu,
+                       uint8_t* dst_argb,
+                       const struct YuvConstants* yuvconstants,
+                       int width);
+void NV12ToRGB24Row_RVV(const uint8_t* src_y,
+                        const uint8_t* src_uv,
+                        uint8_t* dst_rgb24,
+                        const struct YuvConstants* yuvconstants,
+                        int width);
+void NV21ToRGB24Row_RVV(const uint8_t* src_y,
+                        const uint8_t* src_vu,
+                        uint8_t* dst_rgb24,
+                        const struct YuvConstants* yuvconstants,
+                        int width);
 
 void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
 void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -3081,6 +3110,9 @@ void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
 void ARGBExtractAlphaRow_LSX(const uint8_t* src_argb,
                              uint8_t* dst_a,
                              int width);
+void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
+                             uint8_t* dst_a,
+                             int width);
 void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr,
                                   uint8_t* dst_ptr,
                                   int width);
@@ -3100,6 +3132,7 @@ void ARGBExtractAlphaRow_Any_LSX(const uint8_t* src_ptr,
 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
 void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
 void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width);
 void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr,
                                   uint8_t* dst_ptr,
                                   int width);
@@ -3458,8 +3491,13 @@ void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
                             int width);
 
 void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width);
+void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
+void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width);
+void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
 void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width);
 
+void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
+void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width);
 void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -3473,6 +3511,8 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
 void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
 void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
 void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
+void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
+void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
 void AR64ShuffleRow_C(const uint8_t* src_ar64,
                       uint8_t* dst_ar64,
                       const uint8_t* shuffler,
@@ -3501,6 +3541,8 @@ void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
 void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
 void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
 void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
+void AR64ToAB64Row_RVV(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
+void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
 void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr,
                              uint16_t* dst_ptr,
                              int width);
@@ -4515,6 +4557,10 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb0,
                       const uint8_t* src_argb1,
                       uint8_t* dst_argb,
                       int width);
+void ARGBBlendRow_RVV(const uint8_t* src_argb0,
+                      const uint8_t* src_argb1,
+                      uint8_t* dst_argb,
+                      int width);
 void ARGBBlendRow_C(const uint8_t* src_argb,
                     const uint8_t* src_argb1,
                     uint8_t* dst_argb,
@@ -4541,6 +4587,11 @@ void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf,
                             const uint8_t* v_buf,
                             uint8_t* dst_ptr,
                             int width);
+void BlendPlaneRow_RVV(const uint8_t* src0,
+                       const uint8_t* src1,
+                       const uint8_t* alpha,
+                       uint8_t* dst,
+                       int width);
 void BlendPlaneRow_C(const uint8_t* src0,
                      const uint8_t* src1,
                      const uint8_t* alpha,
@@ -6180,7 +6231,19 @@ void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr,
                              float* dst_ptr,
                              float param,
                              int width);
-
+// Convert FP16 Half Floats to FP32 Floats
+void ConvertFP16ToFP32Row_NEON(const uint16_t* src,  // fp16
+                               float* dst,
+                               int width);
+// Convert a column of FP16 Half Floats to a row of FP32 Floats
+void ConvertFP16ToFP32Column_NEON(const uint16_t* src,  // fp16
+                                  int src_stride,       // stride in elements
+                                  float* dst,
+                                  int width);
+// Convert FP32 Floats to FP16 Half Floats
+void ConvertFP32ToFP16Row_NEON(const float* src,
+                               uint16_t* dst,  // fp16
+                               int width);
 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
                              uint8_t* dst_argb,
                              int width,
diff --git a/files/include/libyuv/scale.h b/include/libyuv/scale.h
index 443f89c2..bfe4a344 100644
--- a/files/include/libyuv/scale.h
+++ b/include/libyuv/scale.h
@@ -27,39 +27,40 @@ typedef enum FilterMode {
 } FilterModeEnum;
 
 // Scale a YUV plane.
+// Returns 0 if successful.
 LIBYUV_API
-void ScalePlane(const uint8_t* src,
-                int src_stride,
-                int src_width,
-                int src_height,
-                uint8_t* dst,
-                int dst_stride,
-                int dst_width,
-                int dst_height,
-                enum FilterMode filtering);
+int ScalePlane(const uint8_t* src,
+               int src_stride,
+               int src_width,
+               int src_height,
+               uint8_t* dst,
+               int dst_stride,
+               int dst_width,
+               int dst_height,
+               enum FilterMode filtering);
 
 LIBYUV_API
-void ScalePlane_16(const uint16_t* src,
-                   int src_stride,
-                   int src_width,
-                   int src_height,
-                   uint16_t* dst,
-                   int dst_stride,
-                   int dst_width,
-                   int dst_height,
-                   enum FilterMode filtering);
+int ScalePlane_16(const uint16_t* src,
+                  int src_stride,
+                  int src_width,
+                  int src_height,
+                  uint16_t* dst,
+                  int dst_stride,
+                  int dst_width,
+                  int dst_height,
+                  enum FilterMode filtering);
 
 // Sample is expected to be in the low 12 bits.
 LIBYUV_API
-void ScalePlane_12(const uint16_t* src,
-                   int src_stride,
-                   int src_width,
-                   int src_height,
-                   uint16_t* dst,
-                   int dst_stride,
-                   int dst_width,
-                   int dst_height,
-                   enum FilterMode filtering);
+int ScalePlane_12(const uint16_t* src,
+                  int src_stride,
+                  int src_width,
+                  int src_height,
+                  uint16_t* dst,
+                  int dst_stride,
+                  int dst_width,
+                  int dst_height,
+                  enum FilterMode filtering);
 
 // Scales a YUV 4:2:0 image from the src width and height to the
 // dst width and height.
diff --git a/files/include/libyuv/scale_argb.h b/include/libyuv/scale_argb.h
index 7641f18e..7641f18e 100644
--- a/files/include/libyuv/scale_argb.h
+++ b/include/libyuv/scale_argb.h
diff --git a/files/include/libyuv/scale_rgb.h b/include/libyuv/scale_rgb.h
index d17c39fd..d17c39fd 100644
--- a/files/include/libyuv/scale_rgb.h
+++ b/include/libyuv/scale_rgb.h
diff --git a/files/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index a7957c3f..02ed61ca 100644
--- a/files/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -29,7 +29,10 @@ extern "C" {
 #endif
 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
 #if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
+#define LIBYUV_DISABLE_NEON
+#endif
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
 #define LIBYUV_DISABLE_X86
 #endif
 #endif
@@ -175,6 +178,38 @@ extern "C" {
 #define HAS_SCALEROWDOWN34_LSX
 #endif
 
+#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
+#define HAS_SCALEADDROW_RVV
+// TODO: Test ScaleARGBRowDownEven_RVV and enable it
+// #define HAS_SCALEARGBROWDOWNEVEN_RVV
+#define HAS_SCALEUVROWDOWN4_RVV
+#define HAS_SCALEUVROWDOWNEVEN_RVV
+#if __riscv_v_intrinsic == 11000
+#define HAS_SCALEARGBROWDOWN2_RVV
+#define HAS_SCALEARGBROWDOWN2BOX_RVV
+#define HAS_SCALEARGBROWDOWN2LINEAR_RVV
+#define HAS_SCALEARGBROWDOWNEVENBOX_RVV
+#define HAS_SCALEROWDOWN2_RVV
+#define HAS_SCALEROWDOWN2BOX_RVV
+#define HAS_SCALEROWDOWN2LINEAR_RVV
+#define HAS_SCALEROWDOWN34_0_BOX_RVV
+#define HAS_SCALEROWDOWN34_1_BOX_RVV
+#define HAS_SCALEROWDOWN34_RVV
+#define HAS_SCALEROWDOWN38_2_BOX_RVV
+#define HAS_SCALEROWDOWN38_3_BOX_RVV
+#define HAS_SCALEROWDOWN38_RVV
+#define HAS_SCALEROWDOWN4_RVV
+#define HAS_SCALEROWDOWN4BOX_RVV
+#define HAS_SCALEROWUP2_BILINEAR_RVV
+#define HAS_SCALEROWUP2_LINEAR_RVV
+#define HAS_SCALEUVROWDOWN2_RVV
+#define HAS_SCALEUVROWDOWN2BOX_RVV
+#define HAS_SCALEUVROWDOWN2LINEAR_RVV
+#define HAS_SCALEUVROWUP2_BILINEAR_RVV
+#define HAS_SCALEUVROWUP2_LINEAR_RVV
+#endif
+#endif
+
 // Scale ARGB vertically with bilinear interpolation.
 void ScalePlaneVertical(int src_height,
                         int dst_width,
@@ -949,6 +984,18 @@ void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
                                ptrdiff_t src_stride,
                                uint8_t* dst,
                                int dst_width);
+void ScaleARGBRowDown2_RVV(const uint8_t* src_argb,
+                           ptrdiff_t src_stride,
+                           uint8_t* dst_argb,
+                           int dst_width);
+void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
+                                 ptrdiff_t src_stride,
+                                 uint8_t* dst_argb,
+                                 int dst_width);
+void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_argb,
+                              int dst_width);
 void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
                            ptrdiff_t src_stride,
                            uint8_t* dst_argb,
@@ -1061,6 +1108,16 @@ void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb,
                                  int src_stepx,
                                  uint8_t* dst_argb,
                                  int dst_width);
+void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
+                              ptrdiff_t src_stride,
+                              int32_t src_stepx,
+                              uint8_t* dst_argb,
+                              int dst_width);
+void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
+                                 ptrdiff_t src_stride,
+                                 int src_stepx,
+                                 uint8_t* dst_argb,
+                                 int dst_width);
 void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr,
                                    ptrdiff_t src_stride,
                                    int src_stepx,
@@ -1143,6 +1200,18 @@ void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr,
                             ptrdiff_t src_stride,
                             uint8_t* dst_uv,
                             int dst_width);
+void ScaleUVRowDown2_RVV(const uint8_t* src_ptr,
+                         ptrdiff_t src_stride,
+                         uint8_t* dst_uv,
+                         int dst_width);
+void ScaleUVRowDown2Linear_RVV(const uint8_t* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8_t* dst_uv,
+                               int dst_width);
+void ScaleUVRowDown2Box_RVV(const uint8_t* src_ptr,
+                            ptrdiff_t src_stride,
+                            uint8_t* dst,
+                            int dst_width);
 void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr,
                                ptrdiff_t src_stride,
                                uint8_t* dst_ptr,
@@ -1203,6 +1272,16 @@ void ScaleUVRowDownEvenBox_NEON(const uint8_t* src_ptr,
                                 int src_stepx,
                                 uint8_t* dst_uv,
                                 int dst_width);
+void ScaleUVRowDown4_RVV(const uint8_t* src_ptr,
+                         ptrdiff_t src_stride,
+                         int32_t src_stepx,
+                         uint8_t* dst_uv,
+                         int dst_width);
+void ScaleUVRowDownEven_RVV(const uint8_t* src_ptr,
+                            ptrdiff_t src_stride,
+                            int32_t src_stepx,
+                            uint8_t* dst_uv,
+                            int dst_width);
 void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr,
                             ptrdiff_t src_stride,
                             int32_t src_stepx,
@@ -1292,6 +1371,14 @@ void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr,
                                      uint8_t* dst_ptr,
                                      ptrdiff_t dst_stride,
                                      int dst_width);
+void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr,
+                              uint8_t* dst_ptr,
+                              int dst_width);
+void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8_t* dst_ptr,
+                                ptrdiff_t dst_stride,
+                                int dst_width);
 void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
                                    uint16_t* dst_ptr,
                                    int dst_width);
@@ -1744,6 +1831,61 @@ void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr,
                                   uint8_t* dst_ptr,
                                   int dst_width);
 
+void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleRowDown2_RVV(const uint8_t* src_ptr,
+                       ptrdiff_t src_stride,
+                       uint8_t* dst,
+                       int dst_width);
+void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
+                             ptrdiff_t src_stride,
+                             uint8_t* dst,
+                             int dst_width);
+void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
+                          ptrdiff_t src_stride,
+                          uint8_t* dst,
+                          int dst_width);
+
+void ScaleRowDown4_RVV(const uint8_t* src_ptr,
+                       ptrdiff_t src_stride,
+                       uint8_t* dst_ptr,
+                       int dst_width);
+void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
+                          ptrdiff_t src_stride,
+                          uint8_t* dst_ptr,
+                          int dst_width);
+void ScaleRowDown34_RVV(const uint8_t* src_ptr,
+                        ptrdiff_t src_stride,
+                        uint8_t* dst_ptr,
+                        int dst_width);
+void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_ptr,
+                              int dst_width);
+void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_ptr,
+                              int dst_width);
+void ScaleRowDown38_RVV(const uint8_t* src_ptr,
+                        ptrdiff_t src_stride,
+                        uint8_t* dst,
+                        int dst_width);
+void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_ptr,
+                              int dst_width);
+void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_ptr,
+                              int dst_width);
+
+void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr,
+                            uint8_t* dst_ptr,
+                            int dst_width);
+void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_ptr,
+                              ptrdiff_t dst_stride,
+                              int dst_width);
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
diff --git a/files/include/libyuv/scale_uv.h b/include/libyuv/scale_uv.h
index 8e74e319..8e74e319 100644
--- a/files/include/libyuv/scale_uv.h
+++ b/include/libyuv/scale_uv.h
diff --git a/files/include/libyuv/version.h b/include/libyuv/version.h
index b6623dbb..a9c54400 100644
--- a/files/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 1871
+#define LIBYUV_VERSION 1883
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_
diff --git a/files/include/libyuv/video_common.h b/include/libyuv/video_common.h
index 32b8a521..32b8a521 100644
--- a/files/include/libyuv/video_common.h
+++ b/include/libyuv/video_common.h
diff --git a/infra/config/OWNERS b/infra/config/OWNERS
new file mode 100644
index 00000000..2c4f90a0
--- /dev/null
+++ b/infra/config/OWNERS
@@ -0,0 +1,3 @@
+fbarchard@chromium.org
+mbonadei@chromium.org
+jansson@google.com
diff --git a/files/infra/config/PRESUBMIT.py b/infra/config/PRESUBMIT.py
index f79e08ad..f79e08ad 100644
--- a/files/infra/config/PRESUBMIT.py
+++ b/infra/config/PRESUBMIT.py
diff --git a/files/infra/config/README.md b/infra/config/README.md
index e5e3b5f8..e5e3b5f8 100644
--- a/files/infra/config/README.md
+++ b/infra/config/README.md
diff --git a/files/infra/config/codereview.settings b/infra/config/codereview.settings
index 6d742273..6d742273 100644
--- a/files/infra/config/codereview.settings
+++ b/infra/config/codereview.settings
diff --git a/files/infra/config/commit-queue.cfg b/infra/config/commit-queue.cfg
index 4a8d77f4..4a8d77f4 100644
--- a/files/infra/config/commit-queue.cfg
+++ b/infra/config/commit-queue.cfg
diff --git a/files/infra/config/cr-buildbucket.cfg b/infra/config/cr-buildbucket.cfg
index be9d1d28..7415851b 100644
--- a/files/infra/config/cr-buildbucket.cfg
+++ b/infra/config/cr-buildbucket.cfg
@@ -29,11 +29,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -64,11 +59,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -99,11 +89,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -132,10 +117,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -164,10 +145,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -196,10 +173,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -230,11 +203,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -265,11 +233,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -300,11 +263,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -335,11 +293,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -370,11 +323,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -405,11 +353,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -440,11 +383,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -475,11 +413,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -510,11 +443,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -545,11 +473,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -580,11 +503,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -614,10 +532,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -647,10 +561,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -680,10 +590,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -714,11 +620,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -749,11 +650,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -784,11 +680,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -819,11 +710,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -854,11 +740,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -889,11 +770,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -924,11 +800,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -959,11 +830,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -993,10 +859,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1026,10 +888,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-trusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1114,10 +972,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1146,10 +1000,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1178,10 +1028,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1212,11 +1058,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1247,11 +1088,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1281,10 +1117,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1314,10 +1146,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1348,11 +1176,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1383,11 +1206,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1418,11 +1236,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1453,11 +1266,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1488,11 +1296,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1523,11 +1326,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1558,11 +1356,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1593,11 +1386,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1627,10 +1415,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1660,10 +1444,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1693,10 +1473,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1727,11 +1503,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": true,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1764,11 +1535,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": false,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1799,11 +1565,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": false,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1834,11 +1595,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": false,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1869,11 +1625,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": false,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1904,11 +1655,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": false,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
@@ -1939,11 +1685,6 @@ buckets {
       }
       properties:
         '{'
-        '  "$build/goma": {'
-        '    "enable_ats": false,'
-        '    "server_host": "goma.chromium.org",'
-        '    "use_luci_auth": true'
-        '  },'
         '  "$build/reclient": {'
         '    "instance": "rbe-webrtc-untrusted",'
         '    "metrics_project": "chromium-reclient-metrics"'
diff --git a/files/infra/config/luci-logdog.cfg b/infra/config/luci-logdog.cfg
index adc75bef..adc75bef 100644
--- a/files/infra/config/luci-logdog.cfg
+++ b/infra/config/luci-logdog.cfg
diff --git a/files/infra/config/luci-milo.cfg b/infra/config/luci-milo.cfg
index baf786f2..baf786f2 100644
--- a/files/infra/config/luci-milo.cfg
+++ b/infra/config/luci-milo.cfg
diff --git a/files/infra/config/luci-scheduler.cfg b/infra/config/luci-scheduler.cfg
index 0ec5dd0e..0ec5dd0e 100644
--- a/files/infra/config/luci-scheduler.cfg
+++ b/infra/config/luci-scheduler.cfg
diff --git a/files/infra/config/main.star b/infra/config/main.star
index 7490a599..e83afe4f 100755
--- a/files/infra/config/main.star
+++ b/infra/config/main.star
@@ -8,24 +8,6 @@ lucicfg.check_version("1.30.9")
 LIBYUV_GIT = "https://chromium.googlesource.com/libyuv/libyuv"
 LIBYUV_GERRIT = "https://chromium-review.googlesource.com/libyuv/libyuv"
 
-GOMA_BACKEND_RBE_PROD = {
-    "server_host": "goma.chromium.org",
-    "use_luci_auth": True,
-}
-
-GOMA_BACKEND_RBE_ATS_PROD = {
-    "server_host": "goma.chromium.org",
-    "use_luci_auth": True,
-    "enable_ats": True,
-}
-
-# Disable ATS on Windows CQ/try.
-GOMA_BACKEND_RBE_NO_ATS_PROD = {
-    "server_host": "goma.chromium.org",
-    "use_luci_auth": True,
-    "enable_ats": False,
-}
-
 RECLIENT_CI = {
     "instance": "rbe-webrtc-trusted",
     "metrics_project": "chromium-reclient-metrics",
@@ -80,7 +62,7 @@ luci.project(
     ],
     bindings = [
         luci.binding(
-            roles = "role/swarming.taskTriggerer", # for LED tasks.
+            roles = "role/swarming.taskTriggerer",  # for LED tasks.
             groups = "project-libyuv-admins",
         ),
         luci.binding(
@@ -218,19 +200,6 @@ def get_os_dimensions(os):
         return {"os": "Ubuntu-18.04", "cores": "8", "cpu": "x86-64"}
     return {}
 
-def get_os_properties(os, try_builder = False):
-    if os == "android":
-        return {"$build/goma": GOMA_BACKEND_RBE_PROD}
-    elif os in ("ios", "mac"):
-        return {"$build/goma": GOMA_BACKEND_RBE_PROD}
-    elif os == "win" and try_builder:
-        return {"$build/goma": GOMA_BACKEND_RBE_NO_ATS_PROD}
-    elif os == "win":
-        return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD}
-    elif os == "linux":
-        return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD}
-    return {}
-
 def libyuv_ci_builder(name, dimensions, properties, triggered_by):
     return luci.builder(
         name = name,
@@ -268,8 +237,7 @@ def libyuv_try_builder(name, dimensions, properties, recipe_name = "libyuv/libyu
 
 def ci_builder(name, os, category, short_name = None):
     dimensions = get_os_dimensions(os)
-    properties = get_os_properties(os)
-    properties["$build/reclient"] = RECLIENT_CI
+    properties = {"$build/reclient": RECLIENT_CI}
 
     dimensions["pool"] = "luci.flex.ci"
     properties["builder_group"] = "client.libyuv"
@@ -280,8 +248,7 @@ def ci_builder(name, os, category, short_name = None):
 
 def try_builder(name, os, experiment_percentage = None):
     dimensions = get_os_dimensions(os)
-    properties = get_os_properties(os, try_builder = True)
-    properties["$build/reclient"] = RECLIENT_CQ
+    properties = {"$build/reclient": RECLIENT_CQ}
 
     dimensions["pool"] = "luci.flex.try"
     properties["builder_group"] = "tryserver.libyuv"
diff --git a/files/infra/config/project.cfg b/infra/config/project.cfg
index af79cfb2..3c327118 100644
--- a/files/infra/config/project.cfg
+++ b/infra/config/project.cfg
@@ -7,7 +7,7 @@
 name: "libyuv"
 access: "group:all"
 lucicfg {
-  version: "1.39.8"
+  version: "1.39.14"
   package_dir: "."
   config_dir: "."
   entry_point: "main.star"
diff --git a/files/infra/config/realms.cfg b/infra/config/realms.cfg
index 16ffaac9..16ffaac9 100644
--- a/files/infra/config/realms.cfg
+++ b/infra/config/realms.cfg
diff --git a/files/libyuv.gni b/libyuv.gni
index 0a6c4453..343160c3 100644
--- a/files/libyuv.gni
+++ b/libyuv.gni
@@ -7,6 +7,7 @@
 # be found in the AUTHORS file in the root of the source tree.
 
 import("//build/config/arm.gni")
+import("//build/config/loongarch64.gni")
 import("//build/config/mips.gni")
 import("//build_overrides/build.gni")
 
@@ -21,4 +22,8 @@ declare_args() {
       (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa
   libyuv_use_mmi =
       (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi
+  libyuv_use_lsx =
+      (current_cpu == "loong64") && loongarch64_use_lsx
+  libyuv_use_lasx =
+      (current_cpu == "loong64") && loongarch64_use_lasx
 }
diff --git a/files/libyuv.gyp b/libyuv.gyp
index f73a1a4b..f73a1a4b 100644
--- a/files/libyuv.gyp
+++ b/libyuv.gyp
diff --git a/files/libyuv.gypi b/libyuv.gypi
index 48936aa7..48936aa7 100644
--- a/files/libyuv.gypi
+++ b/libyuv.gypi
diff --git a/files/linux.mk b/linux.mk
index b541b47c..d19a888a 100644
--- a/files/linux.mk
+++ b/linux.mk
@@ -33,6 +33,7 @@ LOCAL_OBJ_FILES := \
 	source/rotate_argb.o       \
 	source/rotate_common.o     \
 	source/rotate_gcc.o        \
+	source/rotate_lsx.o        \
 	source/rotate_msa.o        \
 	source/rotate_neon.o       \
 	source/rotate_neon64.o     \
@@ -40,19 +41,24 @@ LOCAL_OBJ_FILES := \
 	source/row_any.o           \
 	source/row_common.o        \
 	source/row_gcc.o           \
+	source/row_lasx.o          \
+	source/row_lsx.o           \
 	source/row_msa.o           \
 	source/row_neon.o          \
 	source/row_neon64.o        \
+	source/row_rvv.o           \
 	source/row_win.o           \
 	source/scale.o             \
 	source/scale_any.o         \
 	source/scale_argb.o        \
 	source/scale_common.o      \
 	source/scale_gcc.o         \
+	source/scale_lsx.o         \
 	source/scale_msa.o         \
 	source/scale_neon.o        \
 	source/scale_neon64.o      \
 	source/scale_rgb.o         \
+	source/scale_rvv.o         \
 	source/scale_uv.o          \
 	source/scale_win.o         \
 	source/video_common.o
diff --git a/public.mk b/public.mk
index 259ece21..1342307a 100644
--- a/public.mk
+++ b/public.mk
@@ -3,7 +3,7 @@
 # Note that dependencies on NDK are not directly listed since NDK auto adds
 # them.
 
-LIBYUV_INCLUDES := $(LIBYUV_PATH)/files/include
+LIBYUV_INCLUDES := $(LIBYUV_PATH)/include
 
 LIBYUV_C_FLAGS :=
 
diff --git a/files/pylintrc b/pylintrc
index b8bea334..b8bea334 100644
--- a/files/pylintrc
+++ b/pylintrc
diff --git a/files/riscv_script/prepare_toolchain_qemu.sh b/riscv_script/prepare_toolchain_qemu.sh
index 2a901739..2a901739 100755
--- a/files/riscv_script/prepare_toolchain_qemu.sh
+++ b/riscv_script/prepare_toolchain_qemu.sh
diff --git a/files/riscv_script/riscv-clang.cmake b/riscv_script/riscv-clang.cmake
index 47dd5067..e287941f 100644
--- a/files/riscv_script/riscv-clang.cmake
+++ b/riscv_script/riscv-clang.cmake
@@ -28,17 +28,20 @@ set(CMAKE_OBJDUMP "${TOOLCHAIN_PATH}/bin/llvm-objdump")
 set(CMAKE_OBJCOPY "${TOOLCHAIN_PATH}/bin/llvm-objcopy")
 
 # compile options
-message(STATUS "USE_RVV: ${USE_RVV}")
-message(STATUS "USE_AUTO_VECTORIZER: ${USE_AUTO_VECTORIZER}")
-set(RISCV_COMPILER_FLAGS)
-if(USE_RVV)
-  list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gcv")
-  if(NOT USE_AUTO_VECTORIZER)
-    # Disable auto-vectorizer
-    add_compile_options(-fno-vectorize -fno-slp-vectorize)
+set(RISCV_COMPILER_FLAGS "" CACHE STRING "Compile flags")
+# if user provides RISCV_COMPILER_FLAGS, appeding compile flags is avoided.
+if(RISCV_COMPILER_FLAGS STREQUAL "")
+  message(STATUS "USE_RVV: ${USE_RVV}")
+  message(STATUS "USE_AUTO_VECTORIZER: ${USE_AUTO_VECTORIZER}")
+  if(USE_RVV)
+    list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gcv")
+    if(NOT USE_AUTO_VECTORIZER)
+      # Disable auto-vectorizer
+      add_compile_options(-fno-vectorize -fno-slp-vectorize)
+    endif()
+  else()
+    list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gc")
   endif()
-else()
-  list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gc")
 endif()
 message(STATUS "RISCV_COMPILER_FLAGS: ${RISCV_COMPILER_FLAGS}")
 
diff --git a/files/riscv_script/run_qemu.sh b/riscv_script/run_qemu.sh
index 080af3b1..080af3b1 100755
--- a/files/riscv_script/run_qemu.sh
+++ b/riscv_script/run_qemu.sh
diff --git a/files/source/compare.cc b/source/compare.cc
index 50a736bd..50a736bd 100644
--- a/files/source/compare.cc
+++ b/source/compare.cc
diff --git a/files/source/compare_common.cc b/source/compare_common.cc
index d1cab8d2..d1cab8d2 100644
--- a/files/source/compare_common.cc
+++ b/source/compare_common.cc
diff --git a/files/source/compare_gcc.cc b/source/compare_gcc.cc
index 33cbe25d..33cbe25d 100644
--- a/files/source/compare_gcc.cc
+++ b/source/compare_gcc.cc
diff --git a/files/source/compare_msa.cc b/source/compare_msa.cc
index 0b807d37..0b807d37 100644
--- a/files/source/compare_msa.cc
+++ b/source/compare_msa.cc
diff --git a/files/source/compare_neon.cc b/source/compare_neon.cc
index afdd6012..afdd6012 100644
--- a/files/source/compare_neon.cc
+++ b/source/compare_neon.cc
diff --git a/files/source/compare_neon64.cc b/source/compare_neon64.cc
index 70fb9b91..70fb9b91 100644
--- a/files/source/compare_neon64.cc
+++ b/source/compare_neon64.cc
diff --git a/files/source/compare_win.cc b/source/compare_win.cc
index 9bb27f1d..9bb27f1d 100644
--- a/files/source/compare_win.cc
+++ b/source/compare_win.cc
diff --git a/files/source/convert.cc b/source/convert.cc
index b11ab1bf..6ac5bc43 100644
--- a/files/source/convert.cc
+++ b/source/convert.cc
@@ -54,18 +54,25 @@ static int I4xxToI420(const uint8_t* src_y,
   const int dst_y_height = Abs(src_y_height);
   const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
   const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
+  int r;
   if (src_uv_width <= 0 || src_uv_height == 0) {
     return -1;
   }
   if (dst_y) {
-    ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
-               dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+    r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
+                   dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
-             dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
-  ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
-             dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
-  return 0;
+  r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+                 dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+                 dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+  return r;
 }
 
 // Copy I420 with optional flipping.
@@ -526,18 +533,25 @@ static int Ix10ToI010(const uint16_t* src_y,
   const int src_uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
   const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
   const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
+  int r;
   if (width <= 0 || height == 0) {
     return -1;
   }
   if (dst_y) {
-    ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-                  dst_y_width, dst_y_height, kFilterBilinear);
+    r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                      dst_y_width, dst_y_height, kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
-                dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
-  ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
-                dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
-  return 0;
+  r = ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+                    dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+                    dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+  return r;
 }
 
 LIBYUV_API
@@ -777,6 +791,8 @@ int I422ToNV21(const uint8_t* src_y,
   // Allocate u and v buffers
   align_buffer_64(plane_u, halfwidth * halfheight * 2);
   uint8_t* plane_v = plane_u + halfwidth * halfheight;
+  if (!plane_u)
+    return 1;
 
   I422ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
              dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width,
@@ -892,6 +908,8 @@ int MT2TToP010(const uint8_t* src_y,
     void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) =
         UnpackMT2T_C;
     align_buffer_64(row_buf, row_buf_size);
+    if (!row_buf)
+      return 1;
 
 #if defined(HAS_UNPACKMT2T_NEON)
     if (TestCpuFlag(kCpuHasNEON)) {
@@ -1092,6 +1110,8 @@ int I422ToNV21(const uint8_t* src_y,
     int awidth = halfwidth * 2;
     align_buffer_64(row_vu_0, awidth * 2);
     uint8_t* row_vu_1 = row_vu_0 + awidth;
+    if (!row_vu_0)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       MergeUVRow(src_v, src_u, row_vu_0, halfwidth);
@@ -1330,18 +1350,22 @@ int NV12ToNV24(const uint8_t* src_y,
                int dst_stride_uv,
                int width,
                int height) {
+  int r;
   if (width <= 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-               Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                   Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
-          SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
-          Abs(height), kFilterBilinear);
-  return 0;
+  r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
+              SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
+              Abs(height), kFilterBilinear);
+  return r;
 }
 
 LIBYUV_API
@@ -1355,17 +1379,21 @@ int NV16ToNV24(const uint8_t* src_y,
                int dst_stride_uv,
                int width,
                int height) {
+  int r;
   if (width <= 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-               Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                   Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
-          dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
-  return 0;
+  r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
+              dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
+  return r;
 }
 
 // Any P[420]1[02] to I[420]1[02] format with mirroring.
@@ -1443,18 +1471,22 @@ int P010ToP410(const uint16_t* src_y,
                int dst_stride_uv,
                int width,
                int height) {
+  int r;
   if (width <= 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-                  Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                      Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
-             SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
-             Abs(height), kFilterBilinear);
-  return 0;
+  r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
+                 SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
+                 Abs(height), kFilterBilinear);
+  return r;
 }
 
 LIBYUV_API
@@ -1468,17 +1500,21 @@ int P210ToP410(const uint16_t* src_y,
                int dst_stride_uv,
                int width,
                int height) {
+  int r;
   if (width <= 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-                  Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                      Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
-             dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
-  return 0;
+  r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
+                 dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
+  return r;
 }
 
 // Convert YUY2 to I420.
@@ -2128,6 +2164,11 @@ int ARGBToI420Alpha(const uint8_t* src_argb,
                                                 : ARGBExtractAlphaRow_Any_LSX;
   }
 #endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV;
+  }
+#endif
 
   for (y = 0; y < height - 1; y += 2) {
     ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
@@ -2655,6 +2696,8 @@ int RGB24ToI420(const uint8_t* src_rgb24,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -2831,6 +2874,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -3010,6 +3055,8 @@ int RAWToI420(const uint8_t* src_raw,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -3186,6 +3233,8 @@ int RAWToJ420(const uint8_t* src_raw,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -3364,6 +3413,8 @@ int RGB565ToI420(const uint8_t* src_rgb565,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
     for (y = 0; y < height - 1; y += 2) {
 #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
@@ -3544,6 +3595,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -3757,6 +3810,8 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
     // Allocate 2 rows of ARGB.
     const int row_size = (width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
diff --git a/files/source/convert_argb.cc b/source/convert_argb.cc
index cc6560de..871fea59 100644
--- a/files/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -3003,6 +3003,7 @@ int J400ToARGB(const uint8_t* src_y,
   return 0;
 }
 
+#ifndef __riscv
 // Shuffle table for converting BGRA to ARGB.
 static const uvec8 kShuffleMaskBGRAToARGB = {
     3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u};
@@ -3090,6 +3091,195 @@ int AR64ToAB64(const uint16_t* src_ar64,
   return AR64Shuffle(src_ar64, src_stride_ar64, dst_ab64, dst_stride_ab64,
                      (const uint8_t*)&kShuffleMaskAR64ToAB64, width, height);
 }
+#else
+// Convert BGRA to ARGB (same as ARGBToBGRA).
+LIBYUV_API
+int BGRAToARGB(const uint8_t* src_bgra,
+               int src_stride_bgra,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height) {
+  return ARGBToBGRA(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, width,
+                    height);
+}
+
+// Convert ARGB to BGRA.
+LIBYUV_API
+int ARGBToBGRA(const uint8_t* src_argb,
+               int src_stride_argb,
+               uint8_t* dst_bgra,
+               int dst_stride_bgra,
+               int width,
+               int height) {
+  int y;
+  void (*ARGBToBGRARow)(const uint8_t* src_argb, uint8_t* dst_bgra, int width) =
+      ARGBToBGRARow_C;
+  if (!src_argb || !dst_bgra || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 && dst_stride_bgra == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_bgra = 0;
+  }
+
+#if defined(HAS_ARGBTOBGRAROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToBGRARow = ARGBToBGRARow_RVV;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToBGRARow(src_argb, dst_bgra, width);
+    src_argb += src_stride_argb;
+    dst_bgra += dst_stride_bgra;
+  }
+  return 0;
+}
+
+// Convert ARGB to ABGR.
+LIBYUV_API
+int ARGBToABGR(const uint8_t* src_argb,
+               int src_stride_argb,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height) {
+  int y;
+  void (*ARGBToABGRRow)(const uint8_t* src_argb, uint8_t* dst_abgr, int width) =
+      ARGBToABGRRow_C;
+  if (!src_argb || !dst_abgr || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 && dst_stride_abgr == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_abgr = 0;
+  }
+
+#if defined(HAS_ARGBTOABGRROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToABGRRow = ARGBToABGRRow_RVV;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToABGRRow(src_argb, dst_abgr, width);
+    src_argb += src_stride_argb;
+    dst_abgr += dst_stride_abgr;
+  }
+  return 0;
+}
+
+// Convert ABGR to ARGB (same as ARGBToABGR).
+LIBYUV_API
+int ABGRToARGB(const uint8_t* src_abgr,
+               int src_stride_abgr,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height) {
+  return ARGBToABGR(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, width,
+                    height);
+}
+
+// Convert RGBA to ARGB.
+LIBYUV_API
+int RGBAToARGB(const uint8_t* src_rgba,
+               int src_stride_rgba,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height) {
+  int y;
+  void (*RGBAToARGBRow)(const uint8_t* src_rgba, uint8_t* dst_argb, int width) =
+      RGBAToARGBRow_C;
+  if (!src_rgba || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_rgba = src_rgba + (height - 1) * src_stride_rgba;
+    src_stride_rgba = -src_stride_rgba;
+  }
+  // Coalesce rows.
+  if (src_stride_rgba == width * 4 && dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_rgba = dst_stride_argb = 0;
+  }
+
+#if defined(HAS_RGBATOARGBROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    RGBAToARGBRow = RGBAToARGBRow_RVV;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    RGBAToARGBRow(src_rgba, dst_argb, width);
+    src_rgba += src_stride_rgba;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert AR64 To AB64.
+LIBYUV_API
+int AR64ToAB64(const uint16_t* src_ar64,
+               int src_stride_ar64,
+               uint16_t* dst_ab64,
+               int dst_stride_ab64,
+               int width,
+               int height) {
+  int y;
+  void (*AR64ToAB64Row)(const uint16_t* src_ar64, uint16_t* dst_ab64,
+                        int width) = AR64ToAB64Row_C;
+  if (!src_ar64 || !dst_ab64 || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
+    src_stride_ar64 = -src_stride_ar64;
+  }
+  // Coalesce rows.
+  if (src_stride_ar64 == width * 4 && dst_stride_ab64 == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_ar64 = dst_stride_ab64 = 0;
+  }
+
+#if defined(HAS_AR64TOAB64ROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    AR64ToAB64Row = AR64ToAB64Row_RVV;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    AR64ToAB64Row(src_ar64, dst_ab64, width);
+    src_ar64 += src_stride_ar64;
+    dst_ab64 += dst_stride_ab64;
+  }
+  return 0;
+}
+#endif
 
 // Convert RGB24 to ARGB.
 LIBYUV_API
@@ -3853,6 +4043,11 @@ int NV12ToARGBMatrix(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_NV12TOARGBROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    NV12ToARGBRow = NV12ToARGBRow_RVV;
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
@@ -3938,6 +4133,11 @@ int NV21ToARGBMatrix(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_NV21TOARGBROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    NV21ToARGBRow = NV21ToARGBRow_RVV;
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width);
@@ -4058,6 +4258,11 @@ int NV12ToRGB24Matrix(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_NV12TORGB24ROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    NV12ToRGB24Row = NV12ToRGB24Row_RVV;
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width);
@@ -4119,6 +4324,11 @@ int NV21ToRGB24Matrix(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_NV21TORGB24ROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    NV21ToRGB24Row = NV21ToRGB24Row_RVV;
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width);
@@ -4460,6 +4670,8 @@ int Android420ToARGBMatrix(const uint8_t* src_y,
 
   // General case fallback creates NV12
   align_buffer_64(plane_uv, halfwidth * 2 * halfheight);
+  if (!plane_uv)
+    return 1;
   dst_uv = plane_uv;
   for (y = 0; y < halfheight; ++y) {
     WeavePixels(src_u, src_v, src_pixel_stride_uv, dst_uv, halfwidth);
@@ -5772,6 +5984,8 @@ int I420ToRGB565Dither(const uint8_t* src_y,
   {
     // Allocate a row of argb.
     align_buffer_64(row_argb, width * 4);
+    if (!row_argb)
+      return 1;
     for (y = 0; y < height; ++y) {
       I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
       ARGBToRGB565DitherRow(row_argb, dst_rgb565,
@@ -6020,6 +6234,12 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
     ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
+#if defined(HAS_SCALEROWUP2_BILINEAR_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
+  }
+#endif
 
   // alloc 4 lines temp
   const int row_size = (width + 31) & ~31;
@@ -6028,6 +6248,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
   uint8_t* temp_u_2 = row + row_size;
   uint8_t* temp_v_1 = row + row_size * 2;
   uint8_t* temp_v_2 = row + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear(src_u, temp_u_1, width);
   ScaleRowUp2_Linear(src_v, temp_v_1, width);
@@ -6151,12 +6373,19 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
     ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
+#if defined(HAS_SCALEROWUP2_LINEAR_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
+  }
+#endif
 
   // alloc 2 lines temp
   const int row_size = (width + 31) & ~31;
   align_buffer_64(row, row_size * 2);
   uint8_t* temp_u = row;
   uint8_t* temp_v = row + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_u, temp_u, width);
@@ -6276,6 +6505,12 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y,
     ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
+#if defined(HAS_SCALEROWUP2_BILINEAR_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
+  }
+#endif
 
   // alloc 4 lines temp
   const int row_size = (width + 31) & ~31;
@@ -6284,6 +6519,8 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y,
   uint8_t* temp_u_2 = row + row_size;
   uint8_t* temp_v_1 = row + row_size * 2;
   uint8_t* temp_v_2 = row + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear(src_u, temp_u_1, width);
   ScaleRowUp2_Linear(src_v, temp_v_1, width);
@@ -6390,6 +6627,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
   uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
   uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
   uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
   ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
@@ -6487,6 +6726,8 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
   align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_u = (uint16_t*)(row);
   uint16_t* temp_v = (uint16_t*)(row) + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear_12(src_u, temp_u, width);
@@ -6577,6 +6818,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
   uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
   uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
   uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
   ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
@@ -6673,6 +6916,8 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
   align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_u = (uint16_t*)(row);
   uint16_t* temp_v = (uint16_t*)(row) + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear_12(src_u, temp_u, width);
@@ -6837,6 +7082,12 @@ static int I420AlphaToARGBMatrixBilinear(
     ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
+#if defined(HAS_SCALEROWUP2_BILINEAR_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
+  }
+#endif
 
   // alloc 4 lines temp
   const int row_size = (width + 31) & ~31;
@@ -6845,6 +7096,8 @@ static int I420AlphaToARGBMatrixBilinear(
   uint8_t* temp_u_2 = row + row_size;
   uint8_t* temp_v_1 = row + row_size * 2;
   uint8_t* temp_v_2 = row + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear(src_u, temp_u_1, width);
   ScaleRowUp2_Linear(src_v, temp_v_1, width);
@@ -7032,12 +7285,19 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
     ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
+#if defined(HAS_SCALEROWUP2_LINEAR_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
+  }
+#endif
 
   // alloc 2 lines temp
   const int row_size = (width + 31) & ~31;
   align_buffer_64(row, row_size * 2);
   uint8_t* temp_u = row;
   uint8_t* temp_v = row + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_u, temp_u, width);
@@ -7179,6 +7439,8 @@ static int I010AlphaToARGBMatrixBilinear(
   uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
   uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
   uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
+  if (!row)
+    return 1;
 
   ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
   ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
@@ -7338,6 +7600,8 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
   align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_u = (uint16_t*)(row);
   uint16_t* temp_v = (uint16_t*)(row) + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_u, temp_u, width);
@@ -7423,6 +7687,8 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
   align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_uv_1 = (uint16_t*)(row);
   uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
+  if (!row)
+    return 1;
 
   Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
   P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
@@ -7512,6 +7778,8 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
   const int row_size = (2 * width + 31) & ~31;
   align_buffer_64(row, row_size * sizeof(uint16_t));
   uint16_t* temp_uv = (uint16_t*)(row);
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_uv, temp_uv, width);
@@ -7591,6 +7859,8 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
   align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_uv_1 = (uint16_t*)(row);
   uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
+  if (!row)
+    return 1;
 
   Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
   P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
@@ -7680,6 +7950,8 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
   const int row_size = (2 * width + 31) & ~31;
   align_buffer_64(row, row_size * sizeof(uint16_t));
   uint16_t* temp_uv = (uint16_t*)(row);
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_uv, temp_uv, width);
@@ -7770,12 +8042,19 @@ static int I422ToRGB24MatrixLinear(const uint8_t* src_y,
     ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
+#if defined(HAS_SCALEROWUP2_LINEAR_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
+  }
+#endif
 
   // alloc 2 lines temp
   const int row_size = (width + 31) & ~31;
   align_buffer_64(row, row_size * 2);
   uint8_t* temp_u = row;
   uint8_t* temp_v = row + row_size;
+  if (!row)
+    return 1;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp2_Linear(src_u, temp_u, width);
diff --git a/files/source/convert_from.cc b/source/convert_from.cc
index 4102d610..e69da9e9 100644
--- a/files/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -52,19 +52,26 @@ static int I420ToI4xx(const uint8_t* src_y,
   const int dst_y_height = Abs(src_y_height);
   const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1);
   const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1);
+  int r;
   if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 ||
       dst_uv_height <= 0) {
     return -1;
   }
   if (dst_y) {
-    ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
-               dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+    r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
+                   dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
-             dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
-  ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
-             dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
-  return 0;
+  r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+                 dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+                 dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+  return r;
 }
 
 // Convert 8 bit YUV to 10 bit.
@@ -223,21 +230,28 @@ int I010ToI410(const uint16_t* src_y,
                int dst_stride_v,
                int width,
                int height) {
+  int r;
   if (width == 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-                  Abs(width), Abs(height), kFilterBilinear);
-  }
-  ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1),
-                SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width),
-                Abs(height), kFilterBilinear);
-  ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1),
-                SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width),
-                Abs(height), kFilterBilinear);
-  return 0;
+    r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                      Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
+  }
+  r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1),
+                    SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width),
+                    Abs(height), kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1),
+                    SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width),
+                    Abs(height), kFilterBilinear);
+  return r;
 }
 
 // 422 chroma to 444 chroma, 10/12 bit version
@@ -256,19 +270,26 @@ int I210ToI410(const uint16_t* src_y,
                int dst_stride_v,
                int width,
                int height) {
+  int r;
   if (width == 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-                  Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                      Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
-                dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
-  ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
-                dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
-  return 0;
+  r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
+                    dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
+                    dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
+  return r;
 }
 
 // 422 chroma is 1/2 width, 1x height
@@ -288,19 +309,26 @@ int I422ToI444(const uint8_t* src_y,
                int dst_stride_v,
                int width,
                int height) {
+  int r;
   if (width == 0 || height == 0) {
     return -1;
   }
 
   if (dst_y) {
-    ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
-               Abs(width), Abs(height), kFilterBilinear);
+    r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+                   Abs(width), Abs(height), kFilterBilinear);
+    if (r != 0) {
+      return r;
+    }
   }
-  ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
-             dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
-  ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
-             dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
-  return 0;
+  r = ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
+                 dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
+                 dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
+  return r;
 }
 
 // Copy to I400. Source can be I420,422,444,400,NV12,NV21
diff --git a/files/source/convert_from_argb.cc b/source/convert_from_argb.cc
index c3d037c4..b45de8c8 100644
--- a/files/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -463,6 +463,8 @@ int ARGBToNV12(const uint8_t* src_argb,
     // Allocate a rows of uv.
     align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
     uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+    if (!row_u)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -661,6 +663,8 @@ int ARGBToNV21(const uint8_t* src_argb,
     // Allocate a rows of uv.
     align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
     uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+    if (!row_u)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -846,6 +850,8 @@ int ABGRToNV12(const uint8_t* src_abgr,
     // Allocate a rows of uv.
     align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
     uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+    if (!row_u)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
@@ -1032,6 +1038,8 @@ int ABGRToNV21(const uint8_t* src_abgr,
     // Allocate a rows of uv.
     align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
     uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+    if (!row_u)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
@@ -1232,6 +1240,8 @@ int ARGBToYUY2(const uint8_t* src_argb,
     align_buffer_64(row_y, ((width + 63) & ~63) * 2);
     uint8_t* row_u = row_y + ((width + 63) & ~63);
     uint8_t* row_v = row_u + ((width + 63) & ~63) / 2;
+    if (!row_y)
+      return 1;
 
     for (y = 0; y < height; ++y) {
       ARGBToUVRow(src_argb, 0, row_u, row_v, width);
@@ -1426,6 +1436,8 @@ int ARGBToUYVY(const uint8_t* src_argb,
     align_buffer_64(row_y, ((width + 63) & ~63) * 2);
     uint8_t* row_u = row_y + ((width + 63) & ~63);
     uint8_t* row_v = row_u + ((width + 63) & ~63) / 2;
+    if (!row_y)
+      return 1;
 
     for (y = 0; y < height; ++y) {
       ARGBToUVRow(src_argb, 0, row_u, row_v, width);
@@ -1527,6 +1539,7 @@ int ARGBToI400(const uint8_t* src_argb,
   return 0;
 }
 
+#ifndef __riscv
 // Shuffle table for converting ARGB to RGBA.
 static const uvec8 kShuffleMaskARGBToRGBA = {
     3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u};
@@ -1542,6 +1555,47 @@ int ARGBToRGBA(const uint8_t* src_argb,
   return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba,
                      (const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height);
 }
+#else
+// Convert ARGB to RGBA.
+LIBYUV_API
+int ARGBToRGBA(const uint8_t* src_argb,
+               int src_stride_argb,
+               uint8_t* dst_rgba,
+               int dst_stride_rgba,
+               int width,
+               int height) {
+  int y;
+  void (*ARGBToRGBARow)(const uint8_t* src_argb, uint8_t* dst_rgba, int width) =
+      ARGBToRGBARow_C;
+  if (!src_argb || !dst_rgba || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 && dst_stride_rgba == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_rgba = 0;
+  }
+
+#if defined(HAS_ARGBTORGBAROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToRGBARow = ARGBToRGBARow_RVV;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToRGBARow(src_argb, dst_rgba, width);
+    src_argb += src_stride_argb;
+    dst_rgba += dst_stride_rgba;
+  }
+  return 0;
+}
+#endif
 
 // Convert ARGB To RGB24.
 LIBYUV_API
@@ -3230,14 +3284,21 @@ int RAWToJNV21(const uint8_t* src_raw,
   }
 #endif
   {
+#if defined(HAS_RAWTOYJROW)
     // Allocate a row of uv.
-    align_buffer_64(row_uj, ((halfwidth + 31) & ~31) * 2);
-    uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31);
-#if !defined(HAS_RAWTOYJROW)
-    // Allocate 2 rows of ARGB.
-    const int row_size = (width * 4 + 31) & ~31;
-    align_buffer_64(row, row_size * 2);
+    const int row_uv_size = ((halfwidth + 31) & ~31);
+    align_buffer_64(row_uj, row_uv_size * 2);
+    uint8_t* row_vj = row_uj + row_uv_size;
+#else
+    // Allocate row of uv and 2 rows of ARGB.
+    const int row_size = ((width * 4 + 31) & ~31);
+    const int row_uv_size = ((halfwidth + 31) & ~31);
+    align_buffer_64(row_uj, row_uv_size * 2 + row_size * 2);
+    uint8_t* row_vj = row_uj + row_uv_size;
+    uint8_t* row = row_vj + row_uv_size;
 #endif
+    if (!row_uj)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
 #if defined(HAS_RAWTOYJROW)
@@ -3269,9 +3330,6 @@ int RAWToJNV21(const uint8_t* src_raw,
       ARGBToYJRow(row, dst_y, width);
 #endif
     }
-#if !defined(HAS_RAWTOYJROW)
-    free_aligned_buffer_64(row);
-#endif
     free_aligned_buffer_64(row_uj);
   }
   return 0;
diff --git a/files/source/convert_jpeg.cc b/source/convert_jpeg.cc
index d7556ee9..d7556ee9 100644
--- a/files/source/convert_jpeg.cc
+++ b/source/convert_jpeg.cc
diff --git a/files/source/convert_to_argb.cc b/source/convert_to_argb.cc
index 84df16c8..84df16c8 100644
--- a/files/source/convert_to_argb.cc
+++ b/source/convert_to_argb.cc
diff --git a/files/source/convert_to_i420.cc b/source/convert_to_i420.cc
index 5869ecd7..5869ecd7 100644
--- a/files/source/convert_to_i420.cc
+++ b/source/convert_to_i420.cc
diff --git a/files/source/cpu_id.cc b/source/cpu_id.cc
index 0c4a1581..eedce16b 100644
--- a/files/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -292,10 +292,12 @@ static SAFEBUFFERS int GetCpuFlags(void) {
   int cpu_info0[4] = {0, 0, 0, 0};
   int cpu_info1[4] = {0, 0, 0, 0};
   int cpu_info7[4] = {0, 0, 0, 0};
+  int cpu_einfo7[4] = {0, 0, 0, 0};
   CpuId(0, 0, cpu_info0);
   CpuId(1, 0, cpu_info1);
   if (cpu_info0[0] >= 7) {
     CpuId(7, 0, cpu_info7);
+    CpuId(7, 1, cpu_einfo7);
   }
   cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
              ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
@@ -308,7 +310,9 @@ static SAFEBUFFERS int GetCpuFlags(void) {
       ((GetXCR0() & 6) == 6)) {  // Test OS saves YMM registers
     cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
                 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
-                ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
+                ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0) |
+                ((cpu_einfo7[0] & 0x00000010) ? kCpuHasAVXVNNI : 0) |
+                ((cpu_einfo7[3] & 0x00000010) ? kCpuHasAVXVNNIINT8 : 0);
 
     // Detect AVX512bw
     if ((GetXCR0() & 0xe0) == 0xe0) {
@@ -318,8 +322,7 @@ static SAFEBUFFERS int GetCpuFlags(void) {
       cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
       cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0;
       cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
-      cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0;
-      cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0;
+      cpu_info |= (cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0;
     }
   }
 #endif
diff --git a/files/source/mjpeg_decoder.cc b/source/mjpeg_decoder.cc
index 0141da8a..0141da8a 100644
--- a/files/source/mjpeg_decoder.cc
+++ b/source/mjpeg_decoder.cc
diff --git a/files/source/mjpeg_validate.cc b/source/mjpeg_validate.cc
index ba0a03ab..ba0a03ab 100644
--- a/files/source/mjpeg_validate.cc
+++ b/source/mjpeg_validate.cc
diff --git a/files/source/planar_functions.cc b/source/planar_functions.cc
index d115a2a1..1c94e260 100644
--- a/files/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -2783,37 +2783,6 @@ int RGB24Mirror(const uint8_t* src_rgb24,
   return 0;
 }
 
-// Get a blender that optimized for the CPU and pixel count.
-// As there are 6 blenders to choose from, the caller should try to use
-// the same blend function for all pixels if possible.
-LIBYUV_API
-ARGBBlendRow GetARGBBlend() {
-  void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
-                       uint8_t* dst_argb, int width) = ARGBBlendRow_C;
-#if defined(HAS_ARGBBLENDROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBBlendRow = ARGBBlendRow_SSSE3;
-    return ARGBBlendRow;
-  }
-#endif
-#if defined(HAS_ARGBBLENDROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    ARGBBlendRow = ARGBBlendRow_NEON;
-  }
-#endif
-#if defined(HAS_ARGBBLENDROW_MSA)
-  if (TestCpuFlag(kCpuHasMSA)) {
-    ARGBBlendRow = ARGBBlendRow_MSA;
-  }
-#endif
-#if defined(HAS_ARGBBLENDROW_LSX)
-  if (TestCpuFlag(kCpuHasLSX)) {
-    ARGBBlendRow = ARGBBlendRow_LSX;
-  }
-#endif
-  return ARGBBlendRow;
-}
-
 // Alpha Blend 2 ARGB images and store to destination.
 LIBYUV_API
 int ARGBBlend(const uint8_t* src_argb0,
@@ -2826,7 +2795,7 @@ int ARGBBlend(const uint8_t* src_argb0,
               int height) {
   int y;
   void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
-                       uint8_t* dst_argb, int width) = GetARGBBlend();
+                       uint8_t* dst_argb, int width) = ARGBBlendRow_C;
   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -2843,7 +2812,31 @@ int ARGBBlend(const uint8_t* src_argb0,
     height = 1;
     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   }
-
+#if defined(HAS_ARGBBLENDROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBBlendRow = ARGBBlendRow_SSSE3;
+  }
+#endif
+#if defined(HAS_ARGBBLENDROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBBlendRow = ARGBBlendRow_NEON;
+  }
+#endif
+#if defined(HAS_ARGBBLENDROW_MSA)
+  if (TestCpuFlag(kCpuHasMSA)) {
+    ARGBBlendRow = ARGBBlendRow_MSA;
+  }
+#endif
+#if defined(HAS_ARGBBLENDROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBBlendRow = ARGBBlendRow_LSX;
+  }
+#endif
+#if defined(HAS_ARGBBLENDROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBBlendRow = ARGBBlendRow_RVV;
+  }
+#endif
   for (y = 0; y < height; ++y) {
     ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
     src_argb0 += src_stride_argb0;
@@ -2903,6 +2896,11 @@ int BlendPlane(const uint8_t* src_y0,
     }
   }
 #endif
+#if defined(HAS_BLENDPLANEROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    BlendPlaneRow = BlendPlaneRow_RVV;
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
@@ -2980,6 +2978,11 @@ int I420Blend(const uint8_t* src_y0,
     }
   }
 #endif
+#if defined(HAS_BLENDPLANEROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    BlendPlaneRow = BlendPlaneRow_RVV;
+  }
+#endif
   if (!IS_ALIGNED(width, 2)) {
     ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
   }
@@ -3016,9 +3019,16 @@ int I420Blend(const uint8_t* src_y0,
     }
   }
 #endif
+#if defined(HAS_SCALEROWDOWN2_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleRowDown2 = ScaleRowDown2Box_RVV;
+  }
+#endif
 
   // Row buffer for intermediate alpha pixels.
   align_buffer_64(halfalpha, halfwidth);
+  if (!halfalpha)
+    return 1;
   for (y = 0; y < height; y += 2) {
     // last row of odd height image use 1 row of alpha instead of 2.
     if (y == (height - 1)) {
@@ -4702,6 +4712,8 @@ int GaussPlane_F32(const float* src,
   {
     // 2 pixels on each side, but aligned out to 16 bytes.
     align_buffer_64(rowbuf, (4 + width + 4) * 4);
+    if (!rowbuf)
+      return 1;
     memset(rowbuf, 0, 16);
     memset(rowbuf + (4 + width) * 4, 0, 16);
     float* row = (float*)(rowbuf + 16);
@@ -4860,6 +4872,8 @@ static int ARGBSobelize(const uint8_t* src_argb,
     uint8_t* row_y0 = row_y + kEdge;
     uint8_t* row_y1 = row_y0 + row_size;
     uint8_t* row_y2 = row_y1 + row_size;
+    if (!rows)
+      return 1;
     ARGBToYJRow(src_argb, row_y0, width);
     row_y0[-1] = row_y0[0];
     memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
@@ -5340,6 +5354,11 @@ int ARGBExtractAlpha(const uint8_t* src_argb,
                                                 : ARGBExtractAlphaRow_Any_LSX;
   }
 #endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV;
+  }
+#endif
 
   for (int y = 0; y < height; ++y) {
     ARGBExtractAlphaRow(src_argb, dst_a, width);
@@ -5391,6 +5410,11 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_ARGBCOPYYTOALPHAROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_RVV;
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBCopyYToAlphaRow(src_y, dst_argb, width);
@@ -5636,6 +5660,8 @@ int UYVYToNV12(const uint8_t* src_uyvy,
     int awidth = halfwidth * 2;
     // row of y and 2 rows of uv
     align_buffer_64(rows, awidth * 3);
+    if (!rows)
+      return 1;
 
     for (y = 0; y < height - 1; y += 2) {
       // Split Y from UV.
diff --git a/files/source/rotate.cc b/source/rotate.cc
index 8d3978c7..3f8332c3 100644
--- a/files/source/rotate.cc
+++ b/source/rotate.cc
@@ -8,6 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <assert.h>
+
 #include "libyuv/rotate.h"
 
 #include "libyuv/convert.h"
@@ -140,6 +142,9 @@ void RotatePlane180(const uint8_t* src,
                     int height) {
   // Swap top and bottom row and mirror the content. Uses a temporary row.
   align_buffer_64(row, width);
+  assert(row);
+  if (!row)
+    return;
   const uint8_t* src_bot = src + src_stride * (height - 1);
   uint8_t* dst_bot = dst + dst_stride * (height - 1);
   int half_height = (height + 1) >> 1;
@@ -489,13 +494,12 @@ int RotatePlane(const uint8_t* src,
   return -1;
 }
 
-LIBYUV_API
-void TransposePlane_16(const uint16_t* src,
-                       int src_stride,
-                       uint16_t* dst,
-                       int dst_stride,
-                       int width,
-                       int height) {
+static void TransposePlane_16(const uint16_t* src,
+                              int src_stride,
+                              uint16_t* dst,
+                              int dst_stride,
+                              int width,
+                              int height) {
   int i = height;
   // Work across the source in 8x8 tiles
   while (i >= 8) {
@@ -544,24 +548,29 @@ static void RotatePlane180_16(const uint16_t* src,
                               int dst_stride,
                               int width,
                               int height) {
-  // Swap top and bottom row and mirror the content. Uses a temporary row.
-  align_buffer_64_16(row, width);
   const uint16_t* src_bot = src + src_stride * (height - 1);
   uint16_t* dst_bot = dst + dst_stride * (height - 1);
   int half_height = (height + 1) >> 1;
   int y;
 
+  // Swap top and bottom row and mirror the content. Uses a temporary row.
+  align_buffer_64(row, width * 2);
+  uint16_t* row_tmp = (uint16_t*)row;
+  assert(row);
+  if (!row)
+    return;
+
   // Odd height will harmlessly mirror the middle row twice.
   for (y = 0; y < half_height; ++y) {
-    CopyRow_16_C(src, row, width);        // Copy top row into buffer
-    MirrorRow_16_C(src_bot, dst, width);  // Mirror bottom row into top row
-    MirrorRow_16_C(row, dst_bot, width);  // Mirror buffer into bottom row
+    CopyRow_16_C(src, row_tmp, width);        // Copy top row into buffer
+    MirrorRow_16_C(src_bot, dst, width);      // Mirror bottom row into top row
+    MirrorRow_16_C(row_tmp, dst_bot, width);  // Mirror buffer into bottom row
     src += src_stride;
     dst += dst_stride;
     src_bot -= src_stride;
     dst_bot -= dst_stride;
   }
-  free_aligned_buffer_64_16(row);
+  free_aligned_buffer_64(row);
 }
 
 LIBYUV_API
@@ -691,6 +700,7 @@ int I422Rotate(const uint8_t* src_y,
                enum RotationMode mode) {
   int halfwidth = (width + 1) >> 1;
   int halfheight = (height + 1) >> 1;
+  int r;
   if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
       !dst_u || !dst_v) {
     return -1;
@@ -726,23 +736,35 @@ int I422Rotate(const uint8_t* src_y,
     case kRotate90:
       RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
                     height);
-      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
-                 halfheight, width, kFilterBilinear);
+      r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u,
+                     dst_stride_u, halfheight, width, kFilterBilinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
                     height);
-      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
-                 halfheight, width, kFilterLinear);
+      r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v,
+                     dst_stride_v, halfheight, width, kFilterLinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
       return 0;
     case kRotate270:
       RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
                      height);
-      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
-                 halfheight, width, kFilterBilinear);
+      r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u,
+                     dst_stride_u, halfheight, width, kFilterBilinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
                      height);
-      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
-                 halfheight, width, kFilterLinear);
+      r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v,
+                     dst_stride_v, halfheight, width, kFilterLinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
       return 0;
     case kRotate180:
@@ -1056,6 +1078,7 @@ int I210Rotate(const uint16_t* src_y,
                enum RotationMode mode) {
   int halfwidth = (width + 1) >> 1;
   int halfheight = (height + 1) >> 1;
+  int r;
   if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
       !dst_u || !dst_v) {
     return -1;
@@ -1091,23 +1114,35 @@ int I210Rotate(const uint16_t* src_y,
     case kRotate90:
       RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
                        height);
-      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
-                    halfheight, width, kFilterBilinear);
+      r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u,
+                        dst_stride_u, halfheight, width, kFilterBilinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
                        height);
-      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
-                    halfheight, width, kFilterLinear);
+      r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v,
+                        dst_stride_v, halfheight, width, kFilterLinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
       return 0;
     case kRotate270:
       RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
                         height);
-      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
-                    halfheight, width, kFilterBilinear);
+      r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u,
+                        dst_stride_u, halfheight, width, kFilterBilinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
                         height);
-      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
-                    halfheight, width, kFilterLinear);
+      r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v,
+                        dst_stride_v, halfheight, width, kFilterLinear);
+      if (r != 0) {
+        return r;
+      }
       RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
                         height);
       return 0;
diff --git a/files/source/rotate_any.cc b/source/rotate_any.cc
index 88ca7876..88ca7876 100644
--- a/files/source/rotate_any.cc
+++ b/source/rotate_any.cc
diff --git a/files/source/rotate_argb.cc b/source/rotate_argb.cc
index c7239010..d55fac4f 100644
--- a/files/source/rotate_argb.cc
+++ b/source/rotate_argb.cc
@@ -69,6 +69,11 @@ static int ARGBTranspose(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV;
+  }
+#endif
 
   for (i = 0; i < width; ++i) {  // column of source to row of dest.
     ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
@@ -115,7 +120,6 @@ static int ARGBRotate180(const uint8_t* src_argb,
                          int width,
                          int height) {
   // Swap first and last row and mirror the content. Uses a temporary row.
-  align_buffer_64(row, width * 4);
   const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1);
   uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1);
   int half_height = (height + 1) >> 1;
@@ -124,6 +128,9 @@ static int ARGBRotate180(const uint8_t* src_argb,
       ARGBMirrorRow_C;
   void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
       CopyRow_C;
+  align_buffer_64(row, width * 4);
+  if (!row)
+    return 1;
 #if defined(HAS_ARGBMIRRORROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
diff --git a/files/source/rotate_common.cc b/source/rotate_common.cc
index 4b496d1b..e72608e9 100644
--- a/files/source/rotate_common.cc
+++ b/source/rotate_common.cc
@@ -120,37 +120,6 @@ void TransposeWx8_16_C(const uint16_t* src,
   }
 }
 
-void TransposeUVWx8_16_C(const uint16_t* src,
-                         int src_stride,
-                         uint16_t* dst_a,
-                         int dst_stride_a,
-                         uint16_t* dst_b,
-                         int dst_stride_b,
-                         int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    dst_a[0] = src[0 * src_stride + 0];
-    dst_b[0] = src[0 * src_stride + 1];
-    dst_a[1] = src[1 * src_stride + 0];
-    dst_b[1] = src[1 * src_stride + 1];
-    dst_a[2] = src[2 * src_stride + 0];
-    dst_b[2] = src[2 * src_stride + 1];
-    dst_a[3] = src[3 * src_stride + 0];
-    dst_b[3] = src[3 * src_stride + 1];
-    dst_a[4] = src[4 * src_stride + 0];
-    dst_b[4] = src[4 * src_stride + 1];
-    dst_a[5] = src[5 * src_stride + 0];
-    dst_b[5] = src[5 * src_stride + 1];
-    dst_a[6] = src[6 * src_stride + 0];
-    dst_b[6] = src[6 * src_stride + 1];
-    dst_a[7] = src[7 * src_stride + 0];
-    dst_b[7] = src[7 * src_stride + 1];
-    src += 2;
-    dst_a += dst_stride_a;
-    dst_b += dst_stride_b;
-  }
-}
-
 void TransposeWxH_16_C(const uint16_t* src,
                        int src_stride,
                        uint16_t* dst,
diff --git a/files/source/rotate_gcc.cc b/source/rotate_gcc.cc
index fd5eee05..fd5eee05 100644
--- a/files/source/rotate_gcc.cc
+++ b/source/rotate_gcc.cc
diff --git a/files/source/rotate_lsx.cc b/source/rotate_lsx.cc
index 94a2b91c..94a2b91c 100644
--- a/files/source/rotate_lsx.cc
+++ b/source/rotate_lsx.cc
diff --git a/files/source/rotate_msa.cc b/source/rotate_msa.cc
index 99bdca65..99bdca65 100644
--- a/files/source/rotate_msa.cc
+++ b/source/rotate_msa.cc
diff --git a/files/source/rotate_neon.cc b/source/rotate_neon.cc
index 569a7318..569a7318 100644
--- a/files/source/rotate_neon.cc
+++ b/source/rotate_neon.cc
diff --git a/files/source/rotate_neon64.cc b/source/rotate_neon64.cc
index 95047fa7..95047fa7 100644
--- a/files/source/rotate_neon64.cc
+++ b/source/rotate_neon64.cc
diff --git a/files/source/rotate_win.cc b/source/rotate_win.cc
index a78873f8..a78873f8 100644
--- a/files/source/rotate_win.cc
+++ b/source/rotate_win.cc
diff --git a/files/source/row_any.cc b/source/row_any.cc
index e574543c..e574543c 100644
--- a/files/source/row_any.cc
+++ b/source/row_any.cc
diff --git a/files/source/row_common.cc b/source/row_common.cc
index 8be37fb5..3afc4b4d 100644
--- a/files/source/row_common.cc
+++ b/source/row_common.cc
@@ -48,7 +48,6 @@ extern "C" {
                                    defined(__i386__) || defined(_M_IX86))
 #define LIBYUV_ARGBTOUV_PAVGB 1
 #define LIBYUV_RGBTOU_TRUNCATE 1
-#define LIBYUV_ATTENUATE_DUP 1
 #endif
 #if defined(LIBYUV_BIT_EXACT)
 #define LIBYUV_UNATTENUATE_DUP 1
@@ -282,6 +281,54 @@ void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
   }
 }
 
+void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8_t b = src_argb[0];
+    uint8_t g = src_argb[1];
+    uint8_t r = src_argb[2];
+    uint8_t a = src_argb[3];
+    dst_abgr[0] = r;
+    dst_abgr[1] = g;
+    dst_abgr[2] = b;
+    dst_abgr[3] = a;
+    dst_abgr += 4;
+    src_argb += 4;
+  }
+}
+
+void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8_t b = src_argb[0];
+    uint8_t g = src_argb[1];
+    uint8_t r = src_argb[2];
+    uint8_t a = src_argb[3];
+    dst_bgra[0] = a;
+    dst_bgra[1] = r;
+    dst_bgra[2] = g;
+    dst_bgra[3] = b;
+    dst_bgra += 4;
+    src_argb += 4;
+  }
+}
+
+void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8_t b = src_argb[0];
+    uint8_t g = src_argb[1];
+    uint8_t r = src_argb[2];
+    uint8_t a = src_argb[3];
+    dst_rgba[0] = a;
+    dst_rgba[1] = b;
+    dst_rgba[2] = g;
+    dst_rgba[3] = r;
+    dst_rgba += 4;
+    src_argb += 4;
+  }
+}
+
 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
   int x;
   for (x = 0; x < width; ++x) {
@@ -310,6 +357,22 @@ void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
   }
 }
 
+void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8_t a = src_rgba[0];
+    uint8_t b = src_rgba[1];
+    uint8_t g = src_rgba[2];
+    uint8_t r = src_rgba[3];
+    dst_argb[0] = b;
+    dst_argb[1] = g;
+    dst_argb[2] = r;
+    dst_argb[3] = a;
+    dst_argb += 4;
+    src_rgba += 4;
+  }
+}
+
 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
@@ -518,6 +581,22 @@ void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
   }
 }
 
+void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint16_t b = src_ar64[0];
+    uint16_t g = src_ar64[1];
+    uint16_t r = src_ar64[2];
+    uint16_t a = src_ar64[3];
+    dst_ab64[0] = r;
+    dst_ab64[1] = g;
+    dst_ab64[2] = b;
+    dst_ab64[3] = a;
+    dst_ab64 += 4;
+    src_ar64 += 4;
+  }
+}
+
 // TODO(fbarchard): Make shuffle compatible with SIMD versions
 void AR64ShuffleRow_C(const uint8_t* src_ar64,
                       uint8_t* dst_ar64,
@@ -1876,9 +1955,10 @@ static __inline void YPixel(uint8_t y,
   int yg = yuvconstants->kYToRgb[0];
 #endif
   uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
-  *b = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
-  *g = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
-  *r = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
+  uint8_t b8 = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
+  *b = b8;
+  *g = b8;
+  *r = b8;
 }
 
 void I444ToARGBRow_C(const uint8_t* src_y,
@@ -3369,12 +3449,7 @@ void BlendPlaneRow_C(const uint8_t* src0,
 }
 #undef UBLEND
 
-#if LIBYUV_ATTENUATE_DUP
-// This code mimics the SSSE3 version for better testability.
-#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
-#else
-#define ATTENUATE(f, a) (f * a + 128) >> 8
-#endif
+#define ATTENUATE(f, a) (f * a + 255) >> 8
 
 // Multiply source RGB by alpha and store to destination.
 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
diff --git a/files/source/row_gcc.cc b/source/row_gcc.cc
index e94fd04d..d8074987 100644
--- a/files/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -7441,93 +7441,106 @@ void BlendPlaneRow_AVX2(const uint8_t* src0,
 
 #ifdef HAS_ARGBATTENUATEROW_SSSE3
 // Shuffle table duplicating alpha.
-static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u,
-                                     7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u};
-static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
-                                     15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u};
+static const vec8 kAttenuateShuffle = {6,    -128, 6,    -128, 6,  -128,
+                                       -128, -128, 14,   -128, 14, -128,
+                                       14,   -128, -128, -128};
+
 // Attenuate 4 pixels at a time.
 void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
                             uint8_t* dst_argb,
                             int width) {
   asm volatile(
-      "pcmpeqb     %%xmm3,%%xmm3                 \n"
-      "pslld       $0x18,%%xmm3                  \n"
       "movdqa      %3,%%xmm4                     \n"
-      "movdqa      %4,%%xmm5                     \n"
+      "pcmpeqb     %%xmm5,%%xmm5                 \n"
+      "pslld       $0x18,%%xmm5                  \n"
+      "pxor        %%xmm6,%%xmm6                 \n"
+      "pcmpeqb     %%xmm7,%%xmm7                 \n"
+      "punpcklbw   %%xmm6,%%xmm7                 \n"
+      "sub         %0,%1                         \n"
 
       // 4 pixel loop.
       LABELALIGN
       "1:                                        \n"
-      "movdqu      (%0),%%xmm0                   \n"
-      "pshufb      %%xmm4,%%xmm0                 \n"
-      "movdqu      (%0),%%xmm1                   \n"
-      "punpcklbw   %%xmm1,%%xmm1                 \n"
-      "pmulhuw     %%xmm1,%%xmm0                 \n"
-      "movdqu      (%0),%%xmm1                   \n"
-      "pshufb      %%xmm5,%%xmm1                 \n"
-      "movdqu      (%0),%%xmm2                   \n"
-      "punpckhbw   %%xmm2,%%xmm2                 \n"
-      "pmulhuw     %%xmm2,%%xmm1                 \n"
-      "movdqu      (%0),%%xmm2                   \n"
-      "lea         0x10(%0),%0                   \n"
-      "pand        %%xmm3,%%xmm2                 \n"
+      "movdqu      (%0),%%xmm6                   \n"
+      "movdqa      %%xmm6,%%xmm0                 \n"
+      "movdqa      %%xmm6,%%xmm1                 \n"
+      "punpcklbw   %%xmm5,%%xmm0                 \n"
+      "punpckhbw   %%xmm5,%%xmm1                 \n"
+      "movdqa      %%xmm0,%%xmm2                 \n"
+      "movdqa      %%xmm1,%%xmm3                 \n"
+      "pshufb      %%xmm4,%%xmm2                 \n"  // a,a,a,0
+      "pshufb      %%xmm4,%%xmm3                 \n"
+      "pmullw      %%xmm2,%%xmm0                 \n"  // rgb * alpha
+      "pmullw      %%xmm3,%%xmm1                 \n"
+      "paddw       %%xmm7,%%xmm0                 \n"  // + 255
+      "paddw       %%xmm7,%%xmm1                 \n"
       "psrlw       $0x8,%%xmm0                   \n"
       "psrlw       $0x8,%%xmm1                   \n"
       "packuswb    %%xmm1,%%xmm0                 \n"
-      "por         %%xmm2,%%xmm0                 \n"
-      "movdqu      %%xmm0,(%1)                   \n"
-      "lea         0x10(%1),%1                   \n"
+      "pand        %%xmm5,%%xmm6                 \n"
+      "por         %%xmm6,%%xmm0                 \n"
+      "movdqu      %%xmm0,(%0,%1)                \n"
+      "lea         0x10(%0),%0                   \n"
       "sub         $0x4,%2                       \n"
       "jg          1b                            \n"
-      : "+r"(src_argb),       // %0
-        "+r"(dst_argb),       // %1
-        "+r"(width)           // %2
-      : "m"(kShuffleAlpha0),  // %3
-        "m"(kShuffleAlpha1)   // %4
-      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+      : "+r"(src_argb),         // %0
+        "+r"(dst_argb),         // %1
+        "+r"(width)             // %2
+      : "m"(kAttenuateShuffle)  // %3
+      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+        "xmm7");
 }
 #endif  // HAS_ARGBATTENUATEROW_SSSE3
 
 #ifdef HAS_ARGBATTENUATEROW_AVX2
+
 // Shuffle table duplicating alpha.
-static const uvec8 kShuffleAlpha_AVX2 = {6u,   7u,   6u,   7u,  6u,  7u,
-                                         128u, 128u, 14u,  15u, 14u, 15u,
-                                         14u,  15u,  128u, 128u};
+static const lvec8 kAttenuateShuffle_AVX2 = {
+    6,    -128, 6,    -128, 6,    -128, -128, -128, 14,   -128, 14,
+    -128, 14,   -128, -128, -128, 22,   -128, 22,   -128, 22,   -128,
+    -128, -128, 30,   -128, 30,   -128, 30,   -128, -128, -128};
+
 // Attenuate 8 pixels at a time.
 void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
                            uint8_t* dst_argb,
                            int width) {
   asm volatile(
-      "vbroadcastf128 %3,%%ymm4                  \n"
+      "vmovdqa     %3,%%ymm4                     \n"
       "vpcmpeqb    %%ymm5,%%ymm5,%%ymm5          \n"
       "vpslld      $0x18,%%ymm5,%%ymm5           \n"
+      "vpxor       %%ymm6,%%ymm6,%%ymm6          \n"
+      "vpcmpeqb    %%ymm7,%%ymm7,%%ymm7          \n"
+      "vpunpcklbw  %%ymm6,%%ymm7,%%ymm7          \n"
       "sub         %0,%1                         \n"
 
       // 8 pixel loop.
       LABELALIGN
       "1:                                        \n"
       "vmovdqu     (%0),%%ymm6                   \n"
-      "vpunpcklbw  %%ymm6,%%ymm6,%%ymm0          \n"
-      "vpunpckhbw  %%ymm6,%%ymm6,%%ymm1          \n"
+      "vpunpcklbw  %%ymm5,%%ymm6,%%ymm0          \n"
+      "vpunpckhbw  %%ymm5,%%ymm6,%%ymm1          \n"
       "vpshufb     %%ymm4,%%ymm0,%%ymm2          \n"
       "vpshufb     %%ymm4,%%ymm1,%%ymm3          \n"
-      "vpmulhuw    %%ymm2,%%ymm0,%%ymm0          \n"
-      "vpmulhuw    %%ymm3,%%ymm1,%%ymm1          \n"
-      "vpand       %%ymm5,%%ymm6,%%ymm6          \n"
+      "vpmullw     %%ymm2,%%ymm0,%%ymm0          \n"
+      "vpmullw     %%ymm3,%%ymm1,%%ymm1          \n"
+      "vpaddw      %%ymm7,%%ymm0,%%ymm0          \n"
+      "vpaddw      %%ymm7,%%ymm1,%%ymm1          \n"
       "vpsrlw      $0x8,%%ymm0,%%ymm0            \n"
       "vpsrlw      $0x8,%%ymm1,%%ymm1            \n"
       "vpackuswb   %%ymm1,%%ymm0,%%ymm0          \n"
-      "vpor        %%ymm6,%%ymm0,%%ymm0          \n"
+      "vpand       %%ymm5,%%ymm6,%%ymm1          \n"
+      "vpor        %%ymm1,%%ymm0,%%ymm0          \n"
       "vmovdqu     %%ymm0,0x00(%0,%1,1)          \n"
       "lea         0x20(%0),%0                   \n"
       "sub         $0x8,%2                       \n"
       "jg          1b                            \n"
       "vzeroupper                                \n"
-      : "+r"(src_argb),          // %0
-        "+r"(dst_argb),          // %1
-        "+r"(width)              // %2
-      : "m"(kShuffleAlpha_AVX2)  // %3
-      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+      : "+r"(src_argb),              // %0
+        "+r"(dst_argb),              // %1
+        "+r"(width)                  // %2
+      : "m"(kAttenuateShuffle_AVX2)  // %3
+      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+        "xmm7");
 }
 #endif  // HAS_ARGBATTENUATEROW_AVX2
 
diff --git a/files/source/row_lasx.cc b/source/row_lasx.cc
index 1082ad80..be85022e 100644
--- a/files/source/row_lasx.cc
+++ b/source/row_lasx.cc
@@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y,
   __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
   __m256i vec_ubvr, vec_ugvg;
   __m256i const_0x80 = __lasx_xvldi(0x80);
-  __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000,
-                   0xF000F000F000F000};
+  __m256i alpha = (__m256i)v4u64{0xF000F000F000F000, 0xF000F000F000F000,
+                                 0xF000F000F000F000, 0xF000F000F000F000};
   __m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0,
                   0x00F000F000F000F0};
 
@@ -595,8 +595,8 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y,
   __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
   __m256i vec_ubvr, vec_ugvg;
   __m256i const_0x80 = __lasx_xvldi(0x80);
-  __m256i alpha = {0x8000800080008000, 0x8000800080008000, 0x8000800080008000,
-                   0x8000800080008000};
+  __m256i alpha = (__m256i)v4u64{0x8000800080008000, 0x8000800080008000,
+                                 0x8000800080008000, 0x8000800080008000};
 
   YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb);
   vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr);
@@ -799,8 +799,8 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0,
                         0x0009000900090009, 0x0009000900090009};
   __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
                      0x0000000700000003};
-  __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
-                          0x8080808080808080, 0x8080808080808080};
+  __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                        0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
@@ -1037,8 +1037,8 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb,
   __m256i const_38 = __lasx_xvldi(38);
   __m256i const_94 = __lasx_xvldi(94);
   __m256i const_18 = __lasx_xvldi(18);
-  __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
-                          0x8080808080808080, 0x8080808080808080};
+  __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                        0x8080808080808080, 0x8080808080808080};
   __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
                      0x0000000700000003};
   for (x = 0; x < len; x++) {
@@ -1609,8 +1609,8 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555,
   __m256i const_38 = __lasx_xvldi(0x413);
   __m256i const_94 = __lasx_xvldi(0x42F);
   __m256i const_18 = __lasx_xvldi(0x409);
-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
-                        0x8080808080808080, 0x8080808080808080};
+  __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                      0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0,
@@ -1726,8 +1726,8 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
   __m256i const_38 = __lasx_xvldi(0x413);
   __m256i const_94 = __lasx_xvldi(0x42F);
   __m256i const_18 = __lasx_xvldi(0x409);
-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
-                        0x8080808080808080, 0x8080808080808080};
+  __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                      0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0,
@@ -1793,8 +1793,8 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
   __m256i const_38 = __lasx_xvldi(0x413);
   __m256i const_94 = __lasx_xvldi(0x42F);
   __m256i const_18 = __lasx_xvldi(0x409);
-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
-                        0x8080808080808080, 0x8080808080808080};
+  __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                      0x8080808080808080, 0x8080808080808080};
   __m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18,
                       0x15120F0C09060300, 0x00000000001E1B18};
   __m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908,
@@ -1856,8 +1856,8 @@ void RAWToUVRow_LASX(const uint8_t* src_raw,
   __m256i const_38 = __lasx_xvldi(0x413);
   __m256i const_94 = __lasx_xvldi(0x42F);
   __m256i const_18 = __lasx_xvldi(0x409);
-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
-                        0x8080808080808080, 0x8080808080808080};
+  __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                      0x8080808080808080, 0x8080808080808080};
   __m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18,
                       0x15120F0C09060300, 0x00000000001E1B18};
   __m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908,
@@ -2242,8 +2242,8 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb,
   __m256i const_21 = __lasx_xvldi(0x415);
   __m256i const_53 = __lasx_xvldi(0x435);
   __m256i const_10 = __lasx_xvldi(0x40A);
-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
-                        0x8080808080808080, 0x8080808080808080};
+  __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+                                      0x8080808080808080, 0x8080808080808080};
   __m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301,
                    0x1F1D0F0D1B190B09};
 
diff --git a/files/source/row_lsx.cc b/source/row_lsx.cc
index e626072a..fa088c9e 100644
--- a/files/source/row_lsx.cc
+++ b/source/row_lsx.cc
@@ -565,7 +565,7 @@ void I422ToARGB4444Row_LSX(const uint8_t* src_y,
   __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
   __m128i vec_ubvr, vec_ugvg;
   __m128i const_80 = __lsx_vldi(0x80);
-  __m128i alpha = {0xF000F000F000F000, 0xF000F000F000F000};
+  __m128i alpha = (__m128i)v2u64{0xF000F000F000F000, 0xF000F000F000F000};
   __m128i mask = {0x00F000F000F000F0, 0x00F000F000F000F0};
 
   YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
@@ -612,7 +612,7 @@ void I422ToARGB1555Row_LSX(const uint8_t* src_y,
   __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
   __m128i vec_ubvr, vec_ugvg;
   __m128i const_80 = __lsx_vldi(0x80);
-  __m128i alpha = {0x8000800080008000, 0x8000800080008000};
+  __m128i alpha = (__m128i)v2u64{0x8000800080008000, 0x8000800080008000};
 
   YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
   vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
@@ -792,7 +792,7 @@ void ARGBToUVRow_LSX(const uint8_t* src_argb0,
   __m128i const_0x26 = {0x0013001300130013, 0x0013001300130013};
   __m128i const_0x5E = {0x002f002f002f002f, 0x002f002f002f002f};
   __m128i const_0x12 = {0x0009000900090009, 0x0009000900090009};
-  __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_argb0, 0, src_argb0, 16, src_argb0, 32, src_argb0,
               48, src0, src1, src2, src3);
@@ -991,7 +991,7 @@ void ARGBToUV444Row_LSX(const uint8_t* src_argb,
   __m128i const_38 = __lsx_vldi(38);
   __m128i const_94 = __lsx_vldi(94);
   __m128i const_18 = __lsx_vldi(18);
-  __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
               src0, src1, src2, src3);
@@ -1533,7 +1533,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0,
@@ -1642,7 +1642,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0,
@@ -1703,7 +1703,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
   __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18};
   __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908};
   __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
@@ -1756,7 +1756,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
   __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18};
   __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908};
   __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
@@ -1991,7 +1991,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
@@ -2039,7 +2039,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
@@ -2087,7 +2087,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba,
   __m128i const_38 = __lsx_vldi(0x413);
   __m128i const_94 = __lsx_vldi(0x42F);
   __m128i const_18 = __lsx_vldi(0x409);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
@@ -2136,7 +2136,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb,
   __m128i const_21 = __lsx_vldi(0x415);
   __m128i const_53 = __lsx_vldi(0x435);
   __m128i const_10 = __lsx_vldi(0x40A);
-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+  __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
@@ -2514,7 +2514,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb,
   __m128i const_256 = __lsx_vldi(0x500);
   __m128i zero = __lsx_vldi(0);
   __m128i alpha = __lsx_vldi(0xFF);
-  __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
+  __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16,
@@ -2560,7 +2560,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb,
   __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset);
   __m128i vec_scale = __lsx_vreplgr2vr_w(scale);
   __m128i zero = __lsx_vldi(0);
-  __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
+  __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000};
 
   for (x = 0; x < len; x++) {
     DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48,
diff --git a/files/source/row_msa.cc b/source/row_msa.cc
index b7d5bb5e..b7d5bb5e 100644
--- a/files/source/row_msa.cc
+++ b/source/row_msa.cc
diff --git a/files/source/row_neon.cc b/source/row_neon.cc
index 4ed13638..31142a90 100644
--- a/files/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -1827,19 +1827,27 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
   );
 }
 
+struct RgbUVConstants {
+  uint8_t kRGBToU[4];
+  uint8_t kRGBToV[4];
+};
+
 // 8x1 pixels.
-void ARGBToUV444Row_NEON(const uint8_t* src_argb,
-                         uint8_t* dst_u,
-                         uint8_t* dst_v,
-                         int width) {
-  asm volatile(
-      "vmov.u8     d24, #112                     \n"  // UB / VR 0.875
-                                                      // coefficient
-      "vmov.u8     d25, #74                      \n"  // UG -0.5781 coefficient
-      "vmov.u8     d26, #38                      \n"  // UR -0.2969 coefficient
-      "vmov.u8     d27, #18                      \n"  // VB -0.1406 coefficient
-      "vmov.u8     d28, #94                      \n"  // VG -0.7344 coefficient
+void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb,
+                               uint8_t* dst_u,
+                               uint8_t* dst_v,
+                               int width,
+                               const struct RgbUVConstants* rgbuvconstants) {
+  asm volatile(
+
+      "vld1.8      {d0}, [%4]                    \n"  // load rgbuvconstants
+      "vdup.u8     d24, d0[0]                    \n"  // UB  0.875  coefficient
+      "vdup.u8     d25, d0[1]                    \n"  // UG -0.5781 coefficient
+      "vdup.u8     d26, d0[2]                    \n"  // UR -0.2969 coefficient
+      "vdup.u8     d27, d0[4]                    \n"  // VB -0.1406 coefficient
+      "vdup.u8     d28, d0[5]                    \n"  // VG -0.7344 coefficient
       "vmov.u16    q15, #0x8080                  \n"  // 128.5
+
       "1:                                        \n"
       "vld4.8      {d0, d1, d2, d3}, [%0]!       \n"  // load 8 ARGB pixels.
       "subs        %3, %3, #8                    \n"  // 8 processed per loop.
@@ -1857,15 +1865,53 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb,
       "vst1.8      {d0}, [%1]!                   \n"  // store 8 pixels U.
       "vst1.8      {d1}, [%2]!                   \n"  // store 8 pixels V.
       "bgt         1b                            \n"
-      : "+r"(src_argb),  // %0
-        "+r"(dst_u),     // %1
-        "+r"(dst_v),     // %2
-        "+r"(width)      // %3
-      :
+      : "+r"(src_argb),      // %0
+        "+r"(dst_u),         // %1
+        "+r"(dst_v),         // %2
+        "+r"(width)          // %3
+      : "r"(rgbuvconstants)  // %4
       : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14",
         "q15");
 }
 
+// RGB to bt601 coefficients
+// UB   0.875 coefficient = 112
+// UG -0.5781 coefficient = 74
+// UR -0.2969 coefficient = 38
+// VB -0.1406 coefficient = 18
+// VG -0.7344 coefficient = 94
+// VR   0.875 coefficient = 112 (ignored)
+
+static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0},
+                                                            {18, 94, 112, 0}};
+
+// RGB to JPeg coefficients
+// UB coeff 0.500    = 127
+// UG coeff -0.33126 = 84
+// UR coeff -0.16874 = 43
+// VB coeff -0.08131 = 20
+// VG coeff -0.41869 = 107
+// VR coeff 0.500    = 127 (ignored)
+
+static const struct RgbUVConstants kRgb24JPegUVConstants = {{127, 84, 43, 0},
+                                                            {20, 107, 127, 0}};
+
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+                         uint8_t* dst_u,
+                         uint8_t* dst_v,
+                         int width) {
+  ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
+                            &kRgb24I601UVConstants);
+}
+
+void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
+                          uint8_t* dst_u,
+                          uint8_t* dst_v,
+                          int width) {
+  ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
+                            &kRgb24JPegUVConstants);
+}
+
 // clang-format off
 // 16x2 pixels -> 8x1.  width is number of argb pixels. e.g. 16.
 #define RGBTOUV(QB, QG, QR)                                                 \
@@ -2702,7 +2748,6 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
 struct RgbConstants {
   uint8_t kRGBToY[4];
   uint16_t kAddY;
-  uint16_t pad;
 };
 
 // RGB to JPeg coefficients
@@ -2710,11 +2755,9 @@ struct RgbConstants {
 // G * 0.5870 coefficient = 150
 // R * 0.2990 coefficient = 77
 // Add 0.5 = 0x80
-static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
-                                                        128,
-                                                        0};
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128};
 
-static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128};
 
 // RGB to BT.601 coefficients
 // B * 0.1016 coefficient = 25
@@ -2723,12 +2766,9 @@ static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
 // Add 16.5 = 0x1080
 
 static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
-                                                        0x1080,
-                                                        0};
+                                                        0x1080};
 
-static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
-                                                      0x1080,
-                                                      0};
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080};
 
 // ARGB expects first 3 values to contain RGB and 4th value is ignored.
 void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
@@ -3058,6 +3098,8 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
                            uint8_t* dst_argb,
                            int width) {
   asm volatile(
+      "vmov.u16    q15, #0x00ff                  \n"  // 255 for rounding up
+
       // Attenuate 8 pixels.
       "1:                                        \n"
       "vld4.8      {d0, d1, d2, d3}, [%0]!       \n"  // load 8 pixels of ARGB.
@@ -3065,16 +3107,16 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
       "vmull.u8    q10, d0, d3                   \n"  // b * a
       "vmull.u8    q11, d1, d3                   \n"  // g * a
       "vmull.u8    q12, d2, d3                   \n"  // r * a
-      "vqrshrn.u16 d0, q10, #8                   \n"  // b >>= 8
-      "vqrshrn.u16 d1, q11, #8                   \n"  // g >>= 8
-      "vqrshrn.u16 d2, q12, #8                   \n"  // r >>= 8
+      "vaddhn.u16  d0, q10, q15                  \n"  // (b + 255) >> 8
+      "vaddhn.u16  d1, q11, q15                  \n"  // (g + 255) >> 8
+      "vaddhn.u16  d2, q12, q15                  \n"  // (r + 255) >> 8
       "vst4.8      {d0, d1, d2, d3}, [%1]!       \n"  // store 8 pixels of ARGB.
       "bgt         1b                            \n"
       : "+r"(src_argb),  // %0
         "+r"(dst_argb),  // %1
         "+r"(width)      // %2
       :
-      : "cc", "memory", "q0", "q1", "q10", "q11", "q12");
+      : "cc", "memory", "q0", "q1", "q10", "q11", "q12", "q15");
 }
 
 // Quantize 8 ARGB pixels (32 bytes).
diff --git a/files/source/row_neon64.cc b/source/row_neon64.cc
index 74190d61..1679f87c 100644
--- a/files/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -2198,19 +2198,26 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
   );
 }
 
+struct RgbUVConstants {
+  uint8_t kRGBToU[4];
+  uint8_t kRGBToV[4];
+};
+
 // 8x1 pixels.
-void ARGBToUV444Row_NEON(const uint8_t* src_argb,
-                         uint8_t* dst_u,
-                         uint8_t* dst_v,
-                         int width) {
-  asm volatile(
-      "movi        v24.8b, #112                  \n"  // UB / VR 0.875
-                                                      // coefficient
-      "movi        v25.8b, #74                   \n"  // UG -0.5781 coefficient
-      "movi        v26.8b, #38                   \n"  // UR -0.2969 coefficient
-      "movi        v27.8b, #18                   \n"  // VB -0.1406 coefficient
-      "movi        v28.8b, #94                   \n"  // VG -0.7344 coefficient
-      "movi        v29.16b,#0x80                 \n"  // 128.5
+void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb,
+                               uint8_t* dst_u,
+                               uint8_t* dst_v,
+                               int width,
+                               const struct RgbUVConstants* rgbuvconstants) {
+  asm volatile(
+      "ldr         d0, [%4]                      \n"  // load rgbuvconstants
+      "dup         v24.16b, v0.b[0]              \n"  // UB  0.875 coefficient
+      "dup         v25.16b, v0.b[1]              \n"  // UG -0.5781 coefficient
+      "dup         v26.16b, v0.b[2]              \n"  // UR -0.2969 coefficient
+      "dup         v27.16b, v0.b[4]              \n"  // VB -0.1406 coefficient
+      "dup         v28.16b, v0.b[5]              \n"  // VG -0.7344 coefficient
+      "movi        v29.16b, #0x80                \n"  // 128.5
+
       "1:                                        \n"
       "ld4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"  // load 8 ARGB
       "subs        %w3, %w3, #8                  \n"  // 8 processed per loop.
@@ -2229,15 +2236,53 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb,
       "st1         {v0.8b}, [%1], #8             \n"  // store 8 pixels U.
       "st1         {v1.8b}, [%2], #8             \n"  // store 8 pixels V.
       "b.gt        1b                            \n"
-      : "+r"(src_argb),  // %0
-        "+r"(dst_u),     // %1
-        "+r"(dst_v),     // %2
-        "+r"(width)      // %3
-      :
+      : "+r"(src_argb),      // %0
+        "+r"(dst_u),         // %1
+        "+r"(dst_v),         // %2
+        "+r"(width)          // %3
+      : "r"(rgbuvconstants)  // %4
       : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26",
         "v27", "v28", "v29");
 }
 
+// RGB to bt601 coefficients
+// UB   0.875 coefficient = 112
+// UG -0.5781 coefficient = 74
+// UR -0.2969 coefficient = 38
+// VB -0.1406 coefficient = 18
+// VG -0.7344 coefficient = 94
+// VR   0.875 coefficient = 112 (ignored)
+
+static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0},
+                                                            {18, 94, 112, 0}};
+
+// RGB to JPeg coefficients
+// UB coeff 0.500    = 127
+// UG coeff -0.33126 = 84
+// UR coeff -0.16874 = 43
+// VB coeff -0.08131 = 20
+// VG coeff -0.41869 = 107
+// VR coeff 0.500    = 127 (ignored)
+
+static const struct RgbUVConstants kRgb24JPegUVConstants = {{127, 84, 43, 0},
+                                                            {20, 107, 127, 0}};
+
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+                         uint8_t* dst_u,
+                         uint8_t* dst_v,
+                         int width) {
+  ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
+                            &kRgb24I601UVConstants);
+}
+
+void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
+                          uint8_t* dst_u,
+                          uint8_t* dst_v,
+                          int width) {
+  ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
+                            &kRgb24JPegUVConstants);
+}
+
 #define RGBTOUV_SETUP_REG                                                  \
   "movi       v20.8h, #56, lsl #0  \n" /* UB/VR coefficient (0.875) / 2 */ \
   "movi       v21.8h, #37, lsl #0  \n" /* UG coefficient (-0.5781) / 2  */ \
@@ -2943,34 +2988,8 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
 struct RgbConstants {
   uint8_t kRGBToY[4];
   uint16_t kAddY;
-  uint16_t pad;
 };
 
-// RGB to JPeg coefficients
-// B * 0.1140 coefficient = 29
-// G * 0.5870 coefficient = 150
-// R * 0.2990 coefficient = 77
-// Add 0.5 = 0x80
-static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
-                                                        128,
-                                                        0};
-
-static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
-
-// RGB to BT.601 coefficients
-// B * 0.1016 coefficient = 25
-// G * 0.5078 coefficient = 129
-// R * 0.2578 coefficient = 66
-// Add 16.5 = 0x1080
-
-static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
-                                                        0x1080,
-                                                        0};
-
-static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
-                                                      0x1080,
-                                                      0};
-
 // ARGB expects first 3 values to contain RGB and 4th value is ignored.
 void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
                            uint8_t* dst_y,
@@ -3005,6 +3024,26 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
         "v17");
 }
 
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+                                                        0x1080};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080};
+
 void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
   ARGBToYMatrixRow_NEON(src_argb, dst_y, width, &kRgb24I601Constants);
 }
@@ -3402,6 +3441,8 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
                            uint8_t* dst_argb,
                            int width) {
   asm volatile(
+      "movi        v7.8h, #0x00ff                \n"  // 255 for rounding up
+
       // Attenuate 8 pixels.
       "1:                                        \n"
       "ld4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"  // load 8 ARGB
@@ -3410,16 +3451,16 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
       "prfm        pldl1keep, [%0, 448]          \n"
       "umull       v5.8h, v1.8b, v3.8b           \n"         // g * a
       "umull       v6.8h, v2.8b, v3.8b           \n"         // r * a
-      "uqrshrn     v0.8b, v4.8h, #8              \n"         // b >>= 8
-      "uqrshrn     v1.8b, v5.8h, #8              \n"         // g >>= 8
-      "uqrshrn     v2.8b, v6.8h, #8              \n"         // r >>= 8
+      "addhn       v0.8b, v4.8h, v7.8h           \n"         // (b + 255) >> 8
+      "addhn       v1.8b, v5.8h, v7.8h           \n"         // (g + 255) >> 8
+      "addhn       v2.8b, v6.8h, v7.8h           \n"         // (r + 255) >> 8
       "st4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"  // store 8 ARGB
       "b.gt        1b                            \n"
       : "+r"(src_argb),  // %0
         "+r"(dst_argb),  // %1
         "+r"(width)      // %2
       :
-      : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
+      : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
 }
 
 // Quantize 8 ARGB pixels (32 bytes).
@@ -3960,6 +4001,86 @@ void ByteToFloatRow_NEON(const uint8_t* src,
       : "cc", "memory", "v1", "v2", "v3");
 }
 
+// Convert FP16 Half Floats to FP32 Floats
+void ConvertFP16ToFP32Row_NEON(const uint16_t* src,  // fp16
+                               float* dst,
+                               int width) {
+  asm volatile(
+      "1:                                        \n"
+      "ld1         {v1.8h}, [%0], #16            \n"  // load 8 halffloats
+      "subs        %w2, %w2, #8                  \n"  // 8 floats per loop
+      "prfm        pldl1keep, [%0, 448]          \n"
+      "fcvtl       v2.4s, v1.4h                  \n"  // 8 floats
+      "fcvtl2      v3.4s, v1.8h                  \n"
+      "stp         q2, q3, [%1], #32             \n"  // store 8 floats
+      "b.gt        1b                            \n"
+      : "+r"(src),   // %0
+        "+r"(dst),   // %1
+        "+r"(width)  // %2
+      :
+      : "cc", "memory", "v1", "v2", "v3");
+}
+
+// Convert FP16 Half Floats to FP32 Floats
+// Read a column and write a row
+void ConvertFP16ToFP32Column_NEON(const uint16_t* src,  // fp16
+                                  int src_stride,       // stride in elements
+                                  float* dst,
+                                  int width) {
+  asm volatile(
+      "cmp         %w2, #8                       \n"  // Is there 8 rows?
+      "b.lo        2f                            \n"
+      "1:                                        \n"
+      "ld1         {v0.h}[0], [%0], %3           \n"  // load 8 halffloats
+      "ld1         {v0.h}[1], [%0], %3           \n"
+      "ld1         {v0.h}[2], [%0], %3           \n"
+      "ld1         {v0.h}[3], [%0], %3           \n"
+      "ld1         {v1.h}[0], [%0], %3           \n"
+      "ld1         {v1.h}[1], [%0], %3           \n"
+      "ld1         {v1.h}[2], [%0], %3           \n"
+      "ld1         {v1.h}[3], [%0], %3           \n"
+      "subs        %w2, %w2, #8                  \n"  // 8 rows per loop
+      "prfm        pldl1keep, [%0, 448]          \n"
+      "fcvtl       v2.4s, v0.4h                  \n"  // 4 floats
+      "fcvtl       v3.4s, v1.4h                  \n"  // 4 more floats
+      "stp         q2, q3, [%1], #32             \n"  // store 8 floats
+      "b.gt        1b                            \n"
+      "cmp         %w2, #1                       \n"  // Is there 1 value?
+      "b.lo        3f                            \n"
+      "2:                                        \n"
+      "ld1         {v1.h}[0], [%0], %3           \n"  // load 1 halffloats
+      "subs        %w2, %w2, #1                  \n"  // 1 floats per loop
+      "fcvtl       v2.4s, v1.4h                  \n"  // 1 floats
+      "str         s2, [%1], #4                  \n"  // store 1 floats
+      "b.gt        2b                            \n"
+      "3:                                        \n"
+      : "+r"(src),                        // %0
+        "+r"(dst),                        // %1
+        "+r"(width)                       // %2
+      : "r"((ptrdiff_t)(src_stride * 2))  // %3
+      : "cc", "memory", "v0", "v1", "v2", "v3");
+}
+
+// Convert FP32 Floats to FP16 Half Floats
+void ConvertFP32ToFP16Row_NEON(const float* src,
+                               uint16_t* dst,  // fp16
+                               int width) {
+  asm volatile(
+      "1:                                        \n"
+      "ldp         q2, q3, [%0], #32             \n"  // load 8 floats
+      "subs        %w2, %w2, #8                  \n"  // 8 floats per loop
+      "prfm        pldl1keep, [%0, 448]          \n"
+      "fcvtn       v1.4h, v2.4s                  \n"  // 8 fp16 halffloats
+      "fcvtn2      v1.8h, v3.4s                  \n"
+      "str         q1, [%1], #16                 \n"  // store 8 fp16 halffloats
+      "b.gt        1b                            \n"
+      : "+r"(src),   // %0
+        "+r"(dst),   // %1
+        "+r"(width)  // %2
+      :
+      : "cc", "memory", "v1", "v2", "v3");
+}
+
 float ScaleMaxSamples_NEON(const float* src,
                            float* dst,
                            float scale,
diff --git a/files/source/row_rvv.cc b/source/row_rvv.cc
index 27e91a3b..0bf2bef6 100644
--- a/files/source/row_rvv.cc
+++ b/source/row_rvv.cc
@@ -17,7 +17,9 @@
 
 #include "libyuv/row.h"
 
-#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
+// This module is for clang rvv. GCC hasn't supported segment load & store.
+#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && \
+    defined(__clang__)
 #include <assert.h>
 #include <riscv_vector.h>
 
@@ -29,48 +31,48 @@ extern "C" {
 // Fill YUV -> RGB conversion constants into vectors
 // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
 // register) is set to round-to-nearest-up mode(0).
-#define YUVTORGB_SETUP(vl, yuvconst, ub, vr, ug, vg, yg, bb, bg, br) \
-  {                                                                  \
-    asm volatile("csrwi vxrm, 0");                                   \
-    ub = yuvconst->kUVCoeff[0];                                      \
-    vr = yuvconst->kUVCoeff[1];                                      \
-    ug = yuvconst->kUVCoeff[2];                                      \
-    vg = yuvconst->kUVCoeff[3];                                      \
-    yg = yuvconst->kRGBCoeffBias[0];                                 \
-    bb = yuvconst->kRGBCoeffBias[1] + 32;                            \
-    bg = yuvconst->kRGBCoeffBias[2] - 32;                            \
-    br = yuvconst->kRGBCoeffBias[3] + 32;                            \
+#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, yg, bb, bg, br) \
+  {                                                              \
+    asm volatile("csrwi vxrm, 0");                               \
+    ub = yuvconst->kUVCoeff[0];                                  \
+    vr = yuvconst->kUVCoeff[1];                                  \
+    ug = yuvconst->kUVCoeff[2];                                  \
+    vg = yuvconst->kUVCoeff[3];                                  \
+    yg = yuvconst->kRGBCoeffBias[0];                             \
+    bb = yuvconst->kRGBCoeffBias[1] + 32;                        \
+    bg = yuvconst->kRGBCoeffBias[2] - 32;                        \
+    br = yuvconst->kRGBCoeffBias[3] + 32;                        \
   }
 
-// Read [VLEN/8] Y, [VLEN/(8 * 2)] U and [VLEN/(8 * 2)] V from 422
-#define READYUV422(vl, v_u, v_v, v_y_16)                \
-  {                                                     \
-    vuint8m1_t v_tmp0, v_tmp1;                          \
-    vuint8m2_t v_y;                                     \
-    vuint16m2_t v_u_16, v_v_16;                         \
-    vl = __riscv_vsetvl_e8m1((w + 1) / 2);              \
-    v_tmp0 = __riscv_vle8_v_u8m1(src_u, vl);            \
-    v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl);    \
-    v_tmp1 = __riscv_vle8_v_u8m1(src_v, vl);            \
-    v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl);    \
-    v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \
-    v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \
-    v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16);    \
-    v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16);    \
-    vl = __riscv_vsetvl_e8m2(w);                        \
-    v_y = __riscv_vle8_v_u8m2(src_y, vl);               \
-    v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);       \
+// Read [2*VLEN/8] Y, [VLEN/8] U and [VLEN/8] V from 422
+#define READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16) \
+  {                                                              \
+    vuint8m1_t v_tmp0, v_tmp1;                                   \
+    vuint8m2_t v_y;                                              \
+    vuint16m2_t v_u_16, v_v_16;                                  \
+    vl = __riscv_vsetvl_e8m1((w + 1) / 2);                       \
+    v_tmp0 = __riscv_vle8_v_u8m1(src_u, vl);                     \
+    v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl);             \
+    v_tmp1 = __riscv_vle8_v_u8m1(src_v, vl);                     \
+    v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl);             \
+    v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl);          \
+    v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl);          \
+    v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16);             \
+    v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16);             \
+    vl = __riscv_vsetvl_e8m2(w);                                 \
+    v_y = __riscv_vle8_v_u8m2(src_y, vl);                        \
+    v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);                \
   }
 
-// Read [VLEN/8] Y, [VLEN/8] U, and [VLEN/8] V from 444
-#define READYUV444(vl, v_u, v_v, v_y_16)          \
-  {                                               \
-    vuint8m2_t v_y;                               \
-    vl = __riscv_vsetvl_e8m2(w);                  \
-    v_y = __riscv_vle8_v_u8m2(src_y, vl);         \
-    v_u = __riscv_vle8_v_u8m2(src_u, vl);         \
-    v_v = __riscv_vle8_v_u8m2(src_v, vl);         \
-    v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
+// Read [2*VLEN/8] Y, [2*VLEN/8] U, and [2*VLEN/8] V from 444
+#define READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16) \
+  {                                                              \
+    vuint8m2_t v_y;                                              \
+    vl = __riscv_vsetvl_e8m2(w);                                 \
+    v_y = __riscv_vle8_v_u8m2(src_y, vl);                        \
+    v_u = __riscv_vle8_v_u8m2(src_u, vl);                        \
+    v_v = __riscv_vle8_v_u8m2(src_v, vl);                        \
+    v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);                \
   }
 
 // Convert from YUV to fixed point RGB
@@ -101,6 +103,45 @@ extern "C" {
     v_r = __riscv_vnclipu_wx_u8m2(v_r_16, 6, vl);            \
   }
 
+// Read [2*VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_uv
+#define READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16)   \
+  {                                                        \
+    vuint8m1_t v_tmp0, v_tmp1;                             \
+    vuint8m2_t v_y;                                        \
+    vuint16m2_t v_u_16, v_v_16;                            \
+    vl = __riscv_vsetvl_e8m1((w + 1) / 2);                 \
+    __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_uv, vl); \
+    v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl);       \
+    v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl);       \
+    v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl);    \
+    v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl);    \
+    v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16);       \
+    v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16);       \
+    vl = __riscv_vsetvl_e8m2(w);                           \
+    v_y = __riscv_vle8_v_u8m2(src_y, vl);                  \
+    v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);          \
+  }
+
+// Read 2*[VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_vu
+#define READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16)   \
+  {                                                        \
+    vuint8m1_t v_tmp0, v_tmp1;                             \
+    vuint8m2_t v_y;                                        \
+    vuint16m2_t v_u_16, v_v_16;                            \
+    vl = __riscv_vsetvl_e8m1((w + 1) / 2);                 \
+    __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_vu, vl); \
+    v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl);       \
+    v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl);       \
+    v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl);    \
+    v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl);    \
+    v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16);       \
+    v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16);       \
+    vl = __riscv_vsetvl_e8m2(w);                           \
+    v_y = __riscv_vle8_v_u8m2(src_y, vl);                  \
+    v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);          \
+  }
+
+#ifdef HAS_ARGBTOAR64ROW_RVV
 void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
   size_t avl = (size_t)4 * width;
   do {
@@ -116,7 +157,9 @@ void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
     dst_ar64 += vl;
   } while (avl > 0);
 }
+#endif
 
+#ifdef HAS_ARGBTOAB64ROW_RVV
 void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
   size_t avl = (size_t)width;
   do {
@@ -138,7 +181,9 @@ void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
     dst_ab64 += 4 * vl;
   } while (avl > 0);
 }
+#endif
 
+#ifdef HAS_AR64TOARGBROW_RVV
 void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
   size_t avl = (size_t)4 * width;
   do {
@@ -153,7 +198,26 @@ void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
     dst_argb += vl;
   } while (avl > 0);
 }
+#endif
+
+#ifdef HAS_AR64TOAB64ROW_RVV
+void AR64ToAB64Row_RVV(const uint16_t* src_ar64,
+                       uint16_t* dst_ab64,
+                       int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e16m2(w);
+    vuint16m2_t v_b, v_g, v_r, v_a;
+    __riscv_vlseg4e16_v_u16m2(&v_b, &v_g, &v_r, &v_a, src_ar64, vl);
+    __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r, v_g, v_b, v_a, vl);
+    w -= vl;
+    src_ar64 += vl * 4;
+    dst_ab64 += vl * 4;
+  } while (w > 0);
+}
+#endif
 
+#ifdef HAS_AB64TOARGBROW_RVV
 void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
   size_t avl = (size_t)width;
   do {
@@ -171,7 +235,9 @@ void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
     dst_argb += 4 * vl;
   } while (avl > 0);
 }
+#endif
 
+#ifdef HAS_RAWTOARGBROW_RVV
 void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
   size_t w = (size_t)width;
   size_t vl = __riscv_vsetvl_e8m2(w);
@@ -186,7 +252,9 @@ void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
     vl = __riscv_vsetvl_e8m2(w);
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_RAWTORGBAROW_RVV
 void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
   size_t w = (size_t)width;
   size_t vl = __riscv_vsetvl_e8m2(w);
@@ -201,7 +269,9 @@ void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
     vl = __riscv_vsetvl_e8m2(w);
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_RAWTORGB24ROW_RVV
 void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
   size_t w = (size_t)width;
   do {
@@ -214,7 +284,9 @@ void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
     dst_rgb24 += vl * 3;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_ARGBTORAWROW_RVV
 void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
   size_t w = (size_t)width;
   do {
@@ -227,7 +299,9 @@ void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
     dst_raw += vl * 3;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_ARGBTORGB24ROW_RVV
 void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
                         uint8_t* dst_rgb24,
                         int width) {
@@ -242,7 +316,69 @@ void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
     dst_rgb24 += vl * 3;
   } while (w > 0);
 }
+#endif
+
+#ifdef HAS_ARGBTOABGRROW_RVV
+void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_a, v_r, v_g, v_b;
+    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+    __riscv_vsseg4e8_v_u8m2(dst_abgr, v_r, v_g, v_b, v_a, vl);
+    w -= vl;
+    src_argb += vl * 4;
+    dst_abgr += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTOBGRAROW_RVV
+void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_a, v_r, v_g, v_b;
+    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+    __riscv_vsseg4e8_v_u8m2(dst_bgra, v_a, v_r, v_g, v_b, vl);
+    w -= vl;
+    src_argb += vl * 4;
+    dst_bgra += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTORGBAROW_RVV
+void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_a, v_r, v_g, v_b;
+    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+    __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
+    w -= vl;
+    src_argb += vl * 4;
+    dst_rgba += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_RGBATOARGBROW_RVV
+void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_a, v_r, v_g, v_b;
+    __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl);
+    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+    w -= vl;
+    src_rgba += vl * 4;
+    dst_argb += vl * 4;
+  } while (w > 0);
+}
+#endif
 
+#ifdef HAS_RGB24TOARGBROW_RVV
 void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
                         uint8_t* dst_argb,
                         int width) {
@@ -259,24 +395,26 @@ void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
     vl = __riscv_vsetvl_e8m2(w);
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_I444TOARGBROW_RVV
 void I444ToARGBRow_RVV(const uint8_t* src_y,
                        const uint8_t* src_u,
                        const uint8_t* src_v,
                        uint8_t* dst_argb,
                        const struct YuvConstants* yuvconstants,
                        int width) {
-  size_t vl;
   size_t w = (size_t)width;
+  size_t vl = __riscv_vsetvl_e8m2(w);
   uint8_t ub, vr, ug, vg;
   int16_t yg, bb, bg, br;
   vuint8m2_t v_u, v_v;
   vuint8m2_t v_b, v_g, v_r, v_a;
   vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
   v_a = __riscv_vmv_v_x_u8m2(255u, vl);
   do {
-    READYUV444(vl, v_u, v_v, v_y_16);
+    READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
     YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
              v_b_16, v_r_16);
     RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
@@ -288,7 +426,9 @@ void I444ToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_I444ALPHATOARGBROW_RVV
 void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
                             const uint8_t* src_u,
                             const uint8_t* src_v,
@@ -303,9 +443,9 @@ void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
   vuint8m2_t v_u, v_v;
   vuint8m2_t v_b, v_g, v_r, v_a;
   vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
   do {
-    READYUV444(vl, v_u, v_v, v_y_16);
+    READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
     v_a = __riscv_vle8_v_u8m2(src_a, vl);
     YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
              v_b_16, v_r_16);
@@ -319,7 +459,9 @@ void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_I444TORGB24ROW_RVV
 void I444ToRGB24Row_RVV(const uint8_t* src_y,
                         const uint8_t* src_u,
                         const uint8_t* src_v,
@@ -333,9 +475,9 @@ void I444ToRGB24Row_RVV(const uint8_t* src_y,
   vuint8m2_t v_u, v_v;
   vuint8m2_t v_b, v_g, v_r;
   vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
   do {
-    READYUV444(vl, v_u, v_v, v_y_16);
+    READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
     YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
              v_b_16, v_r_16);
     RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
@@ -347,24 +489,26 @@ void I444ToRGB24Row_RVV(const uint8_t* src_y,
     dst_rgb24 += vl * 3;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_I422TOARGBROW_RVV
 void I422ToARGBRow_RVV(const uint8_t* src_y,
                        const uint8_t* src_u,
                        const uint8_t* src_v,
                        uint8_t* dst_argb,
                        const struct YuvConstants* yuvconstants,
                        int width) {
-  size_t vl;
   size_t w = (size_t)width;
+  size_t vl = __riscv_vsetvl_e8m2(w);
   uint8_t ub, vr, ug, vg;
   int16_t yg, bb, bg, br;
   vuint8m2_t v_u, v_v;
   vuint8m2_t v_b, v_g, v_r, v_a;
   vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
   v_a = __riscv_vmv_v_x_u8m2(255u, vl);
   do {
-    READYUV422(vl, v_u, v_v, v_y_16);
+    READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
     YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
              v_b_16, v_r_16);
     RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
@@ -376,7 +520,9 @@ void I422ToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_I422ALPHATOARGBROW_RVV
 void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
                             const uint8_t* src_u,
                             const uint8_t* src_v,
@@ -391,9 +537,9 @@ void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
   vuint8m2_t v_u, v_v;
   vuint8m2_t v_b, v_g, v_r, v_a;
   vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
   do {
-    READYUV422(vl, v_u, v_v, v_y_16);
+    READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
     v_a = __riscv_vle8_v_u8m2(src_a, vl);
     YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
              v_b_16, v_r_16);
@@ -407,24 +553,26 @@ void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_I422TORGBAROW_RVV
 void I422ToRGBARow_RVV(const uint8_t* src_y,
                        const uint8_t* src_u,
                        const uint8_t* src_v,
                        uint8_t* dst_rgba,
                        const struct YuvConstants* yuvconstants,
                        int width) {
-  size_t vl;
   size_t w = (size_t)width;
+  size_t vl = __riscv_vsetvl_e8m2(w);
   uint8_t ub, vr, ug, vg;
   int16_t yg, bb, bg, br;
   vuint8m2_t v_u, v_v;
   vuint8m2_t v_b, v_g, v_r, v_a;
   vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
   v_a = __riscv_vmv_v_x_u8m2(255u, vl);
   do {
-    READYUV422(vl, v_u, v_v, v_y_16);
+    READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
     YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
              v_b_16, v_r_16);
     RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
@@ -436,7 +584,9 @@ void I422ToRGBARow_RVV(const uint8_t* src_y,
     dst_rgba += vl * 4;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_I422TORGB24ROW_RVV
 void I422ToRGB24Row_RVV(const uint8_t* src_y,
                         const uint8_t* src_u,
                         const uint8_t* src_v,
@@ -450,9 +600,9 @@ void I422ToRGB24Row_RVV(const uint8_t* src_y,
   vuint8m2_t v_u, v_v;
   vuint8m2_t v_b, v_g, v_r;
   vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
   do {
-    READYUV422(vl, v_u, v_v, v_y_16);
+    READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
     YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
              v_b_16, v_r_16);
     RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
@@ -464,7 +614,9 @@ void I422ToRGB24Row_RVV(const uint8_t* src_y,
     dst_rgb24 += vl * 3;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_I400TOARGBROW_RVV
 void I400ToARGBRow_RVV(const uint8_t* src_y,
                        uint8_t* dst_argb,
                        const struct YuvConstants* yuvconstants,
@@ -503,7 +655,9 @@ void I400ToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_J400TOARGBROW_RVV
 void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) {
   size_t w = (size_t)width;
   size_t vl = __riscv_vsetvl_e8m2(w);
@@ -518,7 +672,9 @@ void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) {
     vl = __riscv_vsetvl_e8m2(w);
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_COPYROW_RVV
 void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
   size_t w = (size_t)width;
   do {
@@ -530,8 +686,125 @@ void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
     dst += vl;
   } while (w > 0);
 }
+#endif
+
+#ifdef HAS_NV12TOARGBROW_RVV
+void NV12ToARGBRow_RVV(const uint8_t* src_y,
+                       const uint8_t* src_uv,
+                       uint8_t* dst_argb,
+                       const struct YuvConstants* yuvconstants,
+                       int width) {
+  size_t w = (size_t)width;
+  size_t vl = __riscv_vsetvl_e8m2(w);
+  uint8_t ub, vr, ug, vg;
+  int16_t yg, bb, bg, br;
+  vuint8m2_t v_u, v_v;
+  vuint8m2_t v_b, v_g, v_r, v_a;
+  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+  do {
+    READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16);
+    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+             v_b_16, v_r_16);
+    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+    w -= vl;
+    src_y += vl;
+    src_uv += vl;
+    dst_argb += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_NV12TORGB24ROW_RVV
+void NV12ToRGB24Row_RVV(const uint8_t* src_y,
+                        const uint8_t* src_uv,
+                        uint8_t* dst_rgb24,
+                        const struct YuvConstants* yuvconstants,
+                        int width) {
+  size_t w = (size_t)width;
+  size_t vl = __riscv_vsetvl_e8m2(w);
+  uint8_t ub, vr, ug, vg;
+  int16_t yg, bb, bg, br;
+  vuint8m2_t v_u, v_v;
+  vuint8m2_t v_b, v_g, v_r;
+  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  do {
+    READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16);
+    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+             v_b_16, v_r_16);
+    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+    __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+    w -= vl;
+    src_y += vl;
+    src_uv += vl;
+    dst_rgb24 += vl * 3;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_NV21TOARGBROW_RVV
+void NV21ToARGBRow_RVV(const uint8_t* src_y,
+                       const uint8_t* src_vu,
+                       uint8_t* dst_argb,
+                       const struct YuvConstants* yuvconstants,
+                       int width) {
+  size_t w = (size_t)width;
+  size_t vl = __riscv_vsetvl_e8m2(w);
+  uint8_t ub, vr, ug, vg;
+  int16_t yg, bb, bg, br;
+  vuint8m2_t v_u, v_v;
+  vuint8m2_t v_b, v_g, v_r, v_a;
+  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+  do {
+    READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16);
+    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+             v_b_16, v_r_16);
+    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+    w -= vl;
+    src_y += vl;
+    src_vu += vl;
+    dst_argb += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_NV21TORGB24ROW_RVV
+void NV21ToRGB24Row_RVV(const uint8_t* src_y,
+                        const uint8_t* src_vu,
+                        uint8_t* dst_rgb24,
+                        const struct YuvConstants* yuvconstants,
+                        int width) {
+  size_t w = (size_t)width;
+  size_t vl = __riscv_vsetvl_e8m2(w);
+  uint8_t ub, vr, ug, vg;
+  int16_t yg, bb, bg, br;
+  vuint8m2_t v_u, v_v;
+  vuint8m2_t v_b, v_g, v_r;
+  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+  do {
+    READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16);
+    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+             v_b_16, v_r_16);
+    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+    __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+    w -= vl;
+    src_y += vl;
+    src_vu += vl;
+    dst_rgb24 += vl * 3;
+  } while (w > 0);
+}
+#endif
 
 // Bilinear filter [VLEN/8]x2 -> [VLEN/8]x1
+
+#ifdef HAS_INTERPOLATEROW_RVV
 void InterpolateRow_RVV(uint8_t* dst_ptr,
                         const uint8_t* src_ptr,
                         ptrdiff_t src_stride,
@@ -554,13 +827,16 @@ void InterpolateRow_RVV(uint8_t* dst_ptr,
     } while (dst_w > 0);
     return;
   }
+  // To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up(0).
+  asm volatile("csrwi vxrm, 0");
   // Blend 50 / 50.
   if (y1_fraction == 128) {
     do {
       size_t vl = __riscv_vsetvl_e8m8(dst_w);
       vuint8m8_t row0 = __riscv_vle8_v_u8m8(src_ptr, vl);
       vuint8m8_t row1 = __riscv_vle8_v_u8m8(src_ptr1, vl);
-      // Averaging add
+      // Use round-to-nearest-up mode for averaging add
       vuint8m8_t row_out = __riscv_vaaddu_vv_u8m8(row0, row1, vl);
       __riscv_vse8_v_u8m8(dst_ptr, row_out, vl);
       dst_w -= vl;
@@ -571,15 +847,13 @@ void InterpolateRow_RVV(uint8_t* dst_ptr,
     return;
   }
   // General purpose row blend.
-  // To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up(0).
-  asm volatile("csrwi vxrm, 0");
   do {
     size_t vl = __riscv_vsetvl_e8m4(dst_w);
     vuint8m4_t row0 = __riscv_vle8_v_u8m4(src_ptr, vl);
     vuint16m8_t acc = __riscv_vwmulu_vx_u16m8(row0, y0_fraction, vl);
     vuint8m4_t row1 = __riscv_vle8_v_u8m4(src_ptr1, vl);
     acc = __riscv_vwmaccu_vx_u16m8(acc, y1_fraction, row1, vl);
+    // Use round-to-nearest-up mode for vnclip
     __riscv_vse8_v_u8m4(dst_ptr, __riscv_vnclipu_wx_u8m4(acc, 8, vl), vl);
     dst_w -= vl;
     src_ptr += vl;
@@ -587,7 +861,9 @@ void InterpolateRow_RVV(uint8_t* dst_ptr,
     dst_ptr += vl;
   } while (dst_w > 0);
 }
+#endif
 
+#ifdef HAS_SPLITRGBROW_RVV
 void SplitRGBRow_RVV(const uint8_t* src_rgb,
                      uint8_t* dst_r,
                      uint8_t* dst_g,
@@ -608,7 +884,9 @@ void SplitRGBRow_RVV(const uint8_t* src_rgb,
     src_rgb += vl * 3;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_MERGERGBROW_RVV
 void MergeRGBRow_RVV(const uint8_t* src_r,
                      const uint8_t* src_g,
                      const uint8_t* src_b,
@@ -628,7 +906,9 @@ void MergeRGBRow_RVV(const uint8_t* src_r,
     dst_rgb += vl * 3;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_SPLITARGBROW_RVV
 void SplitARGBRow_RVV(const uint8_t* src_argb,
                       uint8_t* dst_r,
                       uint8_t* dst_g,
@@ -652,7 +932,9 @@ void SplitARGBRow_RVV(const uint8_t* src_argb,
     src_argb += vl * 4;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_MERGEARGBROW_RVV
 void MergeARGBRow_RVV(const uint8_t* src_r,
                       const uint8_t* src_g,
                       const uint8_t* src_b,
@@ -675,7 +957,9 @@ void MergeARGBRow_RVV(const uint8_t* src_r,
     dst_argb += vl * 4;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_SPLITXRGBROW_RVV
 void SplitXRGBRow_RVV(const uint8_t* src_argb,
                       uint8_t* dst_r,
                       uint8_t* dst_g,
@@ -696,7 +980,9 @@ void SplitXRGBRow_RVV(const uint8_t* src_argb,
     src_argb += vl * 4;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_MERGEXRGBROW_RVV
 void MergeXRGBRow_RVV(const uint8_t* src_r,
                       const uint8_t* src_g,
                       const uint8_t* src_b,
@@ -719,7 +1005,9 @@ void MergeXRGBRow_RVV(const uint8_t* src_r,
     vl = __riscv_vsetvl_e8m2(w);
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_SPLITUVROW_RVV
 void SplitUVRow_RVV(const uint8_t* src_uv,
                     uint8_t* dst_u,
                     uint8_t* dst_v,
@@ -737,7 +1025,9 @@ void SplitUVRow_RVV(const uint8_t* src_uv,
     src_uv += 2 * vl;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_MERGEUVROW_RVV
 void MergeUVRow_RVV(const uint8_t* src_u,
                     const uint8_t* src_v,
                     uint8_t* dst_uv,
@@ -755,6 +1045,7 @@ void MergeUVRow_RVV(const uint8_t* src_u,
     dst_uv += 2 * vl;
   } while (w > 0);
 }
+#endif
 
 struct RgbConstants {
   uint8_t kRGBToY[4];
@@ -787,7 +1078,8 @@ static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
                                                       0x1080,
                                                       0};
 
-// ARGB expects first 3 values to contain RGB and 4th value is ignored.
+// ARGB expects first 3 values to contain RGB and 4th value is ignored
+#ifdef HAS_ARGBTOYMATRIXROW_RVV
 void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
                           uint8_t* dst_y,
                           int width,
@@ -817,24 +1109,34 @@ void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
     dst_y += vl;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_ARGBTOYROW_RVV
 void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width) {
   ARGBToYMatrixRow_RVV(src_argb, dst_y, width, &kRgb24I601Constants);
 }
+#endif
 
+#ifdef HAS_ARGBTOYJROW_RVV
 void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
   ARGBToYMatrixRow_RVV(src_argb, dst_yj, width, &kRgb24JPEGConstants);
 }
+#endif
 
+#ifdef HAS_ABGRTOYROW_RVV
 void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
   ARGBToYMatrixRow_RVV(src_abgr, dst_y, width, &kRawI601Constants);
 }
+#endif
 
+#ifdef HAS_ABGRTOYJROW_RVV
 void ABGRToYJRow_RVV(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
   ARGBToYMatrixRow_RVV(src_abgr, dst_yj, width, &kRawJPEGConstants);
 }
+#endif
 
 // RGBA expects first value to be A and ignored, then 3 values to contain RGB.
+#ifdef HAS_RGBATOYMATRIXROW_RVV
 void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba,
                           uint8_t* dst_y,
                           int width,
@@ -864,19 +1166,27 @@ void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba,
     dst_y += vl;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_RGBATOYROW_RVV
 void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
   RGBAToYMatrixRow_RVV(src_rgba, dst_y, width, &kRgb24I601Constants);
 }
+#endif
 
+#ifdef HAS_RGBATOYJROW_RVV
 void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
   RGBAToYMatrixRow_RVV(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
 }
+#endif
 
+#ifdef HAS_BGRATOYROW_RVV
 void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
   RGBAToYMatrixRow_RVV(src_bgra, dst_y, width, &kRawI601Constants);
 }
+#endif
 
+#ifdef HAS_RGBTOYMATRIXROW_RVV
 void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
                          uint8_t* dst_y,
                          int width,
@@ -906,51 +1216,179 @@ void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
     dst_y += vl;
   } while (w > 0);
 }
+#endif
 
+#ifdef HAS_RGB24TOYJROW_RVV
 void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
   RGBToYMatrixRow_RVV(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
 }
+#endif
 
+#ifdef HAS_RAWTOYJROW_RVV
 void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
   RGBToYMatrixRow_RVV(src_raw, dst_yj, width, &kRawJPEGConstants);
 }
+#endif
 
+#ifdef HAS_RGB24TOYROW_RVV
 void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
   RGBToYMatrixRow_RVV(src_rgb24, dst_y, width, &kRgb24I601Constants);
 }
+#endif
 
+#ifdef HAS_RAWTOYROW_RVV
 void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) {
   RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants);
 }
+#endif
+
+// Blend src_argb over src_argb1 and store to dst_argb.
+// dst_argb may be src_argb or src_argb1.
+// src_argb: RGB values have already been pre-multiplied by the a.
+#ifdef HAS_ARGBBLENDROW_RVV
+void ARGBBlendRow_RVV(const uint8_t* src_argb,
+                      const uint8_t* src_argb1,
+                      uint8_t* dst_argb,
+                      int width) {
+  size_t w = (size_t)width;
+  size_t vl = __riscv_vsetvlmax_e8m2();
+  // clamp255((((256 - a) * b) >> 8) + f)
+  // = b * (256 - a) / 256 + f
+  // = b - (b * a / 256) + f
+  vuint8m2_t v_255 = __riscv_vmv_v_x_u8m2(255, vl);
+  do {
+    vuint8m2_t v_src0_b, v_src0_g, v_src0_r, v_src0_a;
+    vuint8m2_t v_src1_b, v_src1_g, v_src1_r, v_src1_a;
+    vuint8m2_t v_tmp_b, v_tmp_g, v_tmp_r;
+    vuint8m2_t v_dst_b, v_dst_g, v_dst_r;
+    vl = __riscv_vsetvl_e8m2(w);
+    __riscv_vlseg4e8_v_u8m2(&v_src0_b, &v_src0_g, &v_src0_r, &v_src0_a,
+                            src_argb, vl);
+    __riscv_vlseg4e8_v_u8m2(&v_src1_b, &v_src1_g, &v_src1_r, &v_src1_a,
+                            src_argb1, vl);
+
+    v_tmp_b = __riscv_vmulhu_vv_u8m2(v_src1_b, v_src0_a, vl);
+    v_tmp_g = __riscv_vmulhu_vv_u8m2(v_src1_g, v_src0_a, vl);
+    v_tmp_r = __riscv_vmulhu_vv_u8m2(v_src1_r, v_src0_a, vl);
+
+    v_dst_b = __riscv_vsub_vv_u8m2(v_src1_b, v_tmp_b, vl);
+    v_dst_g = __riscv_vsub_vv_u8m2(v_src1_g, v_tmp_g, vl);
+    v_dst_r = __riscv_vsub_vv_u8m2(v_src1_r, v_tmp_r, vl);
+
+    v_dst_b = __riscv_vsaddu_vv_u8m2(v_dst_b, v_src0_b, vl);
+    v_dst_g = __riscv_vsaddu_vv_u8m2(v_dst_g, v_src0_g, vl);
+    v_dst_r = __riscv_vsaddu_vv_u8m2(v_dst_r, v_src0_r, vl);
+    __riscv_vsseg4e8_v_u8m2(dst_argb, v_dst_b, v_dst_g, v_dst_r, v_255, vl);
+
+    w -= vl;
+    src_argb += 4 * vl;
+    src_argb1 += 4 * vl;
+    dst_argb += 4 * vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_BLENDPLANEROW_RVV
+void BlendPlaneRow_RVV(const uint8_t* src0,
+                       const uint8_t* src1,
+                       const uint8_t* alpha,
+                       uint8_t* dst,
+                       int width) {
+  size_t w = (size_t)width;
+  do {
+    vuint16m8_t v_dst_u16;
+    vuint8m4_t v_dst;
+    size_t vl = __riscv_vsetvl_e8m4(w);
+    vuint8m4_t v_src0 = __riscv_vle8_v_u8m4(src0, vl);
+    vuint8m4_t v_src1 = __riscv_vle8_v_u8m4(src1, vl);
+    vuint8m4_t v_alpha = __riscv_vle8_v_u8m4(alpha, vl);
+    vuint8m4_t v_255_minus_alpha = __riscv_vrsub_vx_u8m4(v_alpha, 255u, vl);
 
+    // (a * foreground) + (1-a) * background
+    v_dst_u16 = __riscv_vwmulu_vv_u16m8(v_alpha, v_src0, vl);
+    v_dst_u16 =
+        __riscv_vwmaccu_vv_u16m8(v_dst_u16, v_255_minus_alpha, v_src1, vl);
+    v_dst_u16 = __riscv_vadd_vx_u16m8(v_dst_u16, 255u, vl);
+    v_dst = __riscv_vnsrl_wx_u8m4(v_dst_u16, 8, vl);
+
+    __riscv_vse8_v_u8m4(dst, v_dst, vl);
+    w -= vl;
+    src0 += vl;
+    src1 += vl;
+    alpha += vl;
+    dst += vl;
+  } while (w > 0);
+}
+#endif
+
+// Attenuate: (f * a + 255) >> 8
+#ifdef HAS_ARGBATTENUATEROW_RVV
 void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
                           uint8_t* dst_argb,
                           int width) {
   size_t w = (size_t)width;
-  // To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up(0).
-  asm volatile("csrwi vxrm, 0");
   do {
     vuint8m2_t v_b, v_g, v_r, v_a;
     vuint16m4_t v_ba_16, v_ga_16, v_ra_16;
     size_t vl = __riscv_vsetvl_e8m2(w);
     __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+    // f * a
     v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl);
     v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl);
     v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl);
-    v_b = __riscv_vnclipu_wx_u8m2(v_ba_16, 8, vl);
-    v_g = __riscv_vnclipu_wx_u8m2(v_ga_16, 8, vl);
-    v_r = __riscv_vnclipu_wx_u8m2(v_ra_16, 8, vl);
+    // f * a + 255
+    v_ba_16 = __riscv_vadd_vx_u16m4(v_ba_16, 255u, vl);
+    v_ga_16 = __riscv_vadd_vx_u16m4(v_ga_16, 255u, vl);
+    v_ra_16 = __riscv_vadd_vx_u16m4(v_ra_16, 255u, vl);
+    // (f * a + 255) >> 8
+    v_b = __riscv_vnsrl_wx_u8m2(v_ba_16, 8, vl);
+    v_g = __riscv_vnsrl_wx_u8m2(v_ga_16, 8, vl);
+    v_r = __riscv_vnsrl_wx_u8m2(v_ra_16, 8, vl);
     __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
     w -= vl;
     src_argb += vl * 4;
     dst_argb += vl * 4;
   } while (w > 0);
 }
+#endif
+
+#ifdef HAS_ARGBEXTRACTALPHAROW_RVV
+void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
+                             uint8_t* dst_a,
+                             int width) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_b, v_g, v_r, v_a;
+    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+    __riscv_vse8_v_u8m2(dst_a, v_a, vl);
+    w -= vl;
+    src_argb += vl * 4;
+    dst_a += vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBCOPYYTOALPHAROW_RVV
+void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
+  size_t w = (size_t)width;
+  const ptrdiff_t dst_stride = 4;
+  dst += 3;
+  do {
+    size_t vl = __riscv_vsetvl_e8m8(w);
+    vuint8m8_t v_a = __riscv_vle8_v_u8m8(src, vl);
+    __riscv_vsse8_v_u8m8(dst, dst_stride, v_a, vl);
+    w -= vl;
+    src += vl;
+    dst += vl * dst_stride;
+  } while (w > 0);
+}
+#endif
 
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
 #endif
 
-#endif  // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
+#endif  // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) &&
+        // defined(__clang__)
diff --git a/files/source/row_win.cc b/source/row_win.cc
index 5fb28521..5fb28521 100644
--- a/files/source/row_win.cc
+++ b/source/row_win.cc
diff --git a/files/source/scale.cc b/source/scale.cc
index 80b030dc..b7a602ba 100644
--- a/files/source/scale.cc
+++ b/source/scale.cc
@@ -135,6 +135,14 @@ static void ScalePlaneDown2(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEROWDOWN2_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleRowDown2 = filtering == kFilterNone
+                        ? ScaleRowDown2_RVV
+                        : (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV
+                                                      : ScaleRowDown2Box_RVV);
+  }
+#endif
 
   if (filtering == kFilterLinear) {
     src_stride = 0;
@@ -312,6 +320,11 @@ static void ScalePlaneDown4(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEROWDOWN4_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV;
+  }
+#endif
 
   if (filtering == kFilterLinear) {
     src_stride = 0;
@@ -472,6 +485,17 @@ static void ScalePlaneDown34(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEROWDOWN34_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    if (!filtering) {
+      ScaleRowDown34_0 = ScaleRowDown34_RVV;
+      ScaleRowDown34_1 = ScaleRowDown34_RVV;
+    } else {
+      ScaleRowDown34_0 = ScaleRowDown34_0_Box_RVV;
+      ScaleRowDown34_1 = ScaleRowDown34_1_Box_RVV;
+    }
+  }
+#endif
 
   for (y = 0; y < dst_height - 2; y += 3) {
     ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
@@ -687,6 +711,17 @@ static void ScalePlaneDown38(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEROWDOWN38_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    if (!filtering) {
+      ScaleRowDown38_3 = ScaleRowDown38_RVV;
+      ScaleRowDown38_2 = ScaleRowDown38_RVV;
+    } else {
+      ScaleRowDown38_3 = ScaleRowDown38_3_Box_RVV;
+      ScaleRowDown38_2 = ScaleRowDown38_2_Box_RVV;
+    }
+  }
+#endif
 
   for (y = 0; y < dst_height - 2; y += 3) {
     ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
@@ -904,14 +939,14 @@ static void ScaleAddCols1_16_C(int dst_width,
 // one pixel of destination using fixed point (16.16) to step
 // through source, sampling a box of pixel with simple
 // averaging.
-static void ScalePlaneBox(int src_width,
-                          int src_height,
-                          int dst_width,
-                          int dst_height,
-                          int src_stride,
-                          int dst_stride,
-                          const uint8_t* src_ptr,
-                          uint8_t* dst_ptr) {
+static int ScalePlaneBox(int src_width,
+                         int src_height,
+                         int dst_width,
+                         int dst_height,
+                         int src_stride,
+                         int dst_stride,
+                         const uint8_t* src_ptr,
+                         uint8_t* dst_ptr) {
   int j, k;
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
@@ -925,6 +960,8 @@ static void ScalePlaneBox(int src_width,
   {
     // Allocate a row buffer of uint16_t.
     align_buffer_64(row16, src_width * 2);
+    if (!row16)
+      return 1;
     void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
                          const uint16_t* src_ptr, uint8_t* dst_ptr) =
         (dx & 0xffff) ? ScaleAddCols2_C
@@ -971,6 +1008,11 @@ static void ScalePlaneBox(int src_width,
       }
     }
 #endif
+#if defined(HAS_SCALEADDROW_RVV)
+    if (TestCpuFlag(kCpuHasRVV)) {
+      ScaleAddRow = ScaleAddRow_RVV;
+    }
+#endif
 
     for (j = 0; j < dst_height; ++j) {
       int boxheight;
@@ -991,16 +1033,17 @@ static void ScalePlaneBox(int src_width,
     }
     free_aligned_buffer_64(row16);
   }
+  return 0;
 }
 
-static void ScalePlaneBox_16(int src_width,
-                             int src_height,
-                             int dst_width,
-                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
-                             const uint16_t* src_ptr,
-                             uint16_t* dst_ptr) {
+static int ScalePlaneBox_16(int src_width,
+                            int src_height,
+                            int dst_width,
+                            int dst_height,
+                            int src_stride,
+                            int dst_stride,
+                            const uint16_t* src_ptr,
+                            uint16_t* dst_ptr) {
   int j, k;
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
@@ -1014,6 +1057,8 @@ static void ScalePlaneBox_16(int src_width,
   {
     // Allocate a row buffer of uint32_t.
     align_buffer_64(row32, src_width * 4);
+    if (!row32)
+      return 1;
     void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
                          const uint32_t* src_ptr, uint16_t* dst_ptr) =
         (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
@@ -1045,18 +1090,19 @@ static void ScalePlaneBox_16(int src_width,
     }
     free_aligned_buffer_64(row32);
   }
+  return 0;
 }
 
 // Scale plane down with bilinear interpolation.
-void ScalePlaneBilinearDown(int src_width,
-                            int src_height,
-                            int dst_width,
-                            int dst_height,
-                            int src_stride,
-                            int dst_stride,
-                            const uint8_t* src_ptr,
-                            uint8_t* dst_ptr,
-                            enum FilterMode filtering) {
+static int ScalePlaneBilinearDown(int src_width,
+                                  int src_height,
+                                  int dst_width,
+                                  int dst_height,
+                                  int src_stride,
+                                  int dst_stride,
+                                  const uint8_t* src_ptr,
+                                  uint8_t* dst_ptr,
+                                  enum FilterMode filtering) {
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
   int y = 0;
@@ -1065,6 +1111,8 @@ void ScalePlaneBilinearDown(int src_width,
   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
   // Allocate a row buffer.
   align_buffer_64(row, src_width);
+  if (!row)
+    return 1;
 
   const int max_y = (src_height - 1) << 16;
   int j;
@@ -1174,17 +1222,18 @@ void ScalePlaneBilinearDown(int src_width,
     }
   }
   free_aligned_buffer_64(row);
+  return 0;
 }
 
-void ScalePlaneBilinearDown_16(int src_width,
-                               int src_height,
-                               int dst_width,
-                               int dst_height,
-                               int src_stride,
-                               int dst_stride,
-                               const uint16_t* src_ptr,
-                               uint16_t* dst_ptr,
-                               enum FilterMode filtering) {
+static int ScalePlaneBilinearDown_16(int src_width,
+                                     int src_height,
+                                     int dst_width,
+                                     int dst_height,
+                                     int src_stride,
+                                     int dst_stride,
+                                     const uint16_t* src_ptr,
+                                     uint16_t* dst_ptr,
+                                     enum FilterMode filtering) {
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
   int y = 0;
@@ -1193,6 +1242,8 @@ void ScalePlaneBilinearDown_16(int src_width,
   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
   // Allocate a row buffer.
   align_buffer_64(row, src_width * 2);
+  if (!row)
+    return 1;
 
   const int max_y = (src_height - 1) << 16;
   int j;
@@ -1265,18 +1316,19 @@ void ScalePlaneBilinearDown_16(int src_width,
     }
   }
   free_aligned_buffer_64(row);
+  return 0;
 }
 
 // Scale up down with bilinear interpolation.
-void ScalePlaneBilinearUp(int src_width,
-                          int src_height,
-                          int dst_width,
-                          int dst_height,
-                          int src_stride,
-                          int dst_stride,
-                          const uint8_t* src_ptr,
-                          uint8_t* dst_ptr,
-                          enum FilterMode filtering) {
+static int ScalePlaneBilinearUp(int src_width,
+                                int src_height,
+                                int dst_width,
+                                int dst_height,
+                                int src_stride,
+                                int dst_stride,
+                                const uint8_t* src_ptr,
+                                uint8_t* dst_ptr,
+                                enum FilterMode filtering) {
   int j;
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
@@ -1375,6 +1427,8 @@ void ScalePlaneBilinearUp(int src_width,
     // Allocate 2 row buffers.
     const int row_size = (dst_width + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 
     uint8_t* rowptr = row;
     int rowstride = row_size;
@@ -1418,6 +1472,7 @@ void ScalePlaneBilinearUp(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 
 // Scale plane, horizontally up by 2 times.
@@ -1425,20 +1480,21 @@ void ScalePlaneBilinearUp(int src_width,
 // This is an optimized version for scaling up a plane to 2 times of
 // its original width, using linear interpolation.
 // This is used to scale U and V planes of I422 to I444.
-void ScalePlaneUp2_Linear(int src_width,
-                          int src_height,
-                          int dst_width,
-                          int dst_height,
-                          int src_stride,
-                          int dst_stride,
-                          const uint8_t* src_ptr,
-                          uint8_t* dst_ptr) {
+static void ScalePlaneUp2_Linear(int src_width,
+                                 int src_height,
+                                 int dst_width,
+                                 int dst_height,
+                                 int src_stride,
+                                 int dst_stride,
+                                 const uint8_t* src_ptr,
+                                 uint8_t* dst_ptr) {
   void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
       ScaleRowUp2_Linear_Any_C;
   int i;
   int y;
   int dy;
 
+  (void)src_width;
   // This function can only scale up by 2 times horizontally.
   assert(src_width == ((dst_width + 1) / 2));
 
@@ -1465,6 +1521,11 @@ void ScalePlaneUp2_Linear(int src_width,
     ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
+#ifdef HAS_SCALEROWUP2_LINEAR_RVV
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleRowUp = ScaleRowUp2_Linear_RVV;
+  }
+#endif
 
   if (dst_height == 1) {
     ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
@@ -1484,19 +1545,20 @@ void ScalePlaneUp2_Linear(int src_width,
 // This is an optimized version for scaling up a plane to 2 times of
 // its original size, using bilinear interpolation.
 // This is used to scale U and V planes of I420 to I444.
-void ScalePlaneUp2_Bilinear(int src_width,
-                            int src_height,
-                            int dst_width,
-                            int dst_height,
-                            int src_stride,
-                            int dst_stride,
-                            const uint8_t* src_ptr,
-                            uint8_t* dst_ptr) {
+static void ScalePlaneUp2_Bilinear(int src_width,
+                                   int src_height,
+                                   int dst_width,
+                                   int dst_height,
+                                   int src_stride,
+                                   int dst_stride,
+                                   const uint8_t* src_ptr,
+                                   uint8_t* dst_ptr) {
   void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
                       uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
       ScaleRowUp2_Bilinear_Any_C;
   int x;
 
+  (void)src_width;
   // This function can only scale up by 2 times.
   assert(src_width == ((dst_width + 1) / 2));
   assert(src_height == ((dst_height + 1) / 2));
@@ -1524,6 +1586,11 @@ void ScalePlaneUp2_Bilinear(int src_width,
     Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
   }
 #endif
+#ifdef HAS_SCALEROWUP2_BILINEAR_RVV
+  if (TestCpuFlag(kCpuHasRVV)) {
+    Scale2RowUp = ScaleRowUp2_Bilinear_RVV;
+  }
+#endif
 
   Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
   dst_ptr += dst_stride;
@@ -1544,20 +1611,21 @@ void ScalePlaneUp2_Bilinear(int src_width,
 // its original width, using linear interpolation.
 // stride is in count of uint16_t.
 // This is used to scale U and V planes of I210 to I410 and I212 to I412.
-void ScalePlaneUp2_12_Linear(int src_width,
-                             int src_height,
-                             int dst_width,
-                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
-                             const uint16_t* src_ptr,
-                             uint16_t* dst_ptr) {
+static void ScalePlaneUp2_12_Linear(int src_width,
+                                    int src_height,
+                                    int dst_width,
+                                    int dst_height,
+                                    int src_stride,
+                                    int dst_stride,
+                                    const uint16_t* src_ptr,
+                                    uint16_t* dst_ptr) {
   void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
                      int dst_width) = ScaleRowUp2_Linear_16_Any_C;
   int i;
   int y;
   int dy;
 
+  (void)src_width;
   // This function can only scale up by 2 times horizontally.
   assert(src_width == ((dst_width + 1) / 2));
 
@@ -1598,19 +1666,20 @@ void ScalePlaneUp2_12_Linear(int src_width,
 // its original size, using bilinear interpolation.
 // stride is in count of uint16_t.
 // This is used to scale U and V planes of I010 to I410 and I012 to I412.
-void ScalePlaneUp2_12_Bilinear(int src_width,
-                               int src_height,
-                               int dst_width,
-                               int dst_height,
-                               int src_stride,
-                               int dst_stride,
-                               const uint16_t* src_ptr,
-                               uint16_t* dst_ptr) {
+static void ScalePlaneUp2_12_Bilinear(int src_width,
+                                      int src_height,
+                                      int dst_width,
+                                      int dst_height,
+                                      int src_stride,
+                                      int dst_stride,
+                                      const uint16_t* src_ptr,
+                                      uint16_t* dst_ptr) {
   void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
                       uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
       ScaleRowUp2_Bilinear_16_Any_C;
   int x;
 
+  (void)src_width;
   // This function can only scale up by 2 times.
   assert(src_width == ((dst_width + 1) / 2));
   assert(src_height == ((dst_height + 1) / 2));
@@ -1645,20 +1714,21 @@ void ScalePlaneUp2_12_Bilinear(int src_width,
   }
 }
 
-void ScalePlaneUp2_16_Linear(int src_width,
-                             int src_height,
-                             int dst_width,
-                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
-                             const uint16_t* src_ptr,
-                             uint16_t* dst_ptr) {
+static void ScalePlaneUp2_16_Linear(int src_width,
+                                    int src_height,
+                                    int dst_width,
+                                    int dst_height,
+                                    int src_stride,
+                                    int dst_stride,
+                                    const uint16_t* src_ptr,
+                                    uint16_t* dst_ptr) {
   void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
                      int dst_width) = ScaleRowUp2_Linear_16_Any_C;
   int i;
   int y;
   int dy;
 
+  (void)src_width;
   // This function can only scale up by 2 times horizontally.
   assert(src_width == ((dst_width + 1) / 2));
 
@@ -1694,19 +1764,20 @@ void ScalePlaneUp2_16_Linear(int src_width,
   }
 }
 
-void ScalePlaneUp2_16_Bilinear(int src_width,
-                               int src_height,
-                               int dst_width,
-                               int dst_height,
-                               int src_stride,
-                               int dst_stride,
-                               const uint16_t* src_ptr,
-                               uint16_t* dst_ptr) {
+static void ScalePlaneUp2_16_Bilinear(int src_width,
+                                      int src_height,
+                                      int dst_width,
+                                      int dst_height,
+                                      int src_stride,
+                                      int dst_stride,
+                                      const uint16_t* src_ptr,
+                                      uint16_t* dst_ptr) {
   void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
                       uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
       ScaleRowUp2_Bilinear_16_Any_C;
   int x;
 
+  (void)src_width;
   // This function can only scale up by 2 times.
   assert(src_width == ((dst_width + 1) / 2));
   assert(src_height == ((dst_height + 1) / 2));
@@ -1741,15 +1812,15 @@ void ScalePlaneUp2_16_Bilinear(int src_width,
   }
 }
 
-void ScalePlaneBilinearUp_16(int src_width,
-                             int src_height,
-                             int dst_width,
-                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
-                             const uint16_t* src_ptr,
-                             uint16_t* dst_ptr,
-                             enum FilterMode filtering) {
+static int ScalePlaneBilinearUp_16(int src_width,
+                                   int src_height,
+                                   int dst_width,
+                                   int dst_height,
+                                   int src_stride,
+                                   int dst_stride,
+                                   const uint16_t* src_ptr,
+                                   uint16_t* dst_ptr,
+                                   enum FilterMode filtering) {
   int j;
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
@@ -1826,10 +1897,11 @@ void ScalePlaneBilinearUp_16(int src_width,
     // Allocate 2 row buffers.
     const int row_size = (dst_width + 31) & ~31;
     align_buffer_64(row, row_size * 4);
-
-    uint16_t* rowptr = (uint16_t*)row;
     int rowstride = row_size;
     int lasty = yi;
+    uint16_t* rowptr = (uint16_t*)row;
+    if (!row)
+      return 1;
 
     ScaleFilterCols(rowptr, src, dst_width, x, dx);
     if (src_height > 1) {
@@ -1869,6 +1941,7 @@ void ScalePlaneBilinearUp_16(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 
 // Scale Plane to/from any dimensions, without interpolation.
@@ -1953,15 +2026,15 @@ static void ScalePlaneSimple_16(int src_width,
 // Scale a plane.
 // This function dispatches to a specialized scaler based on scale factor.
 LIBYUV_API
-void ScalePlane(const uint8_t* src,
-                int src_stride,
-                int src_width,
-                int src_height,
-                uint8_t* dst,
-                int dst_stride,
-                int dst_width,
-                int dst_height,
-                enum FilterMode filtering) {
+int ScalePlane(const uint8_t* src,
+               int src_stride,
+               int src_width,
+               int src_height,
+               uint8_t* dst,
+               int dst_stride,
+               int dst_width,
+               int dst_height,
+               enum FilterMode filtering) {
   // Simplify filtering when possible.
   filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
                                 filtering);
@@ -1977,7 +2050,7 @@ void ScalePlane(const uint8_t* src,
   if (dst_width == src_width && dst_height == src_height) {
     // Straight copy.
     CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
-    return;
+    return 0;
   }
   if (dst_width == src_width && filtering != kFilterBox) {
     int dy = 0;
@@ -1993,7 +2066,7 @@ void ScalePlane(const uint8_t* src,
     // Arbitrary scale vertically, but unscaled horizontally.
     ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
                        dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
-    return;
+    return 0;
   }
   if (dst_width <= Abs(src_width) && dst_height <= src_height) {
     // Scale down.
@@ -2001,69 +2074,67 @@ void ScalePlane(const uint8_t* src,
       // optimized, 3/4
       ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
                        dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     if (2 * dst_width == src_width && 2 * dst_height == src_height) {
       // optimized, 1/2
       ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
                       dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     // 3/8 rounded up for odd sized chroma height.
     if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
       // optimized, 3/8
       ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
                        dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     if (4 * dst_width == src_width && 4 * dst_height == src_height &&
         (filtering == kFilterBox || filtering == kFilterNone)) {
       // optimized, 1/4
       ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
                       dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
   }
   if (filtering == kFilterBox && dst_height * 2 < src_height) {
-    ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
-                  dst_stride, src, dst);
-    return;
+    return ScalePlaneBox(src_width, src_height, dst_width, dst_height,
+                         src_stride, dst_stride, src, dst);
   }
   if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
     ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height,
                          src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
   if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
       (filtering == kFilterBilinear || filtering == kFilterBox)) {
     ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height,
                            src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
   if (filtering && dst_height > src_height) {
-    ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
-                         src_stride, dst_stride, src, dst, filtering);
-    return;
+    return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
+                                src_stride, dst_stride, src, dst, filtering);
   }
   if (filtering) {
-    ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
-                           src_stride, dst_stride, src, dst, filtering);
-    return;
+    return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
+                                  src_stride, dst_stride, src, dst, filtering);
   }
   ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
                    dst_stride, src, dst);
+  return 0;
 }
 
 LIBYUV_API
-void ScalePlane_16(const uint16_t* src,
-                   int src_stride,
-                   int src_width,
-                   int src_height,
-                   uint16_t* dst,
-                   int dst_stride,
-                   int dst_width,
-                   int dst_height,
-                   enum FilterMode filtering) {
+int ScalePlane_16(const uint16_t* src,
+                  int src_stride,
+                  int src_width,
+                  int src_height,
+                  uint16_t* dst,
+                  int dst_stride,
+                  int dst_width,
+                  int dst_height,
+                  enum FilterMode filtering) {
   // Simplify filtering when possible.
   filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
                                 filtering);
@@ -2079,7 +2150,7 @@ void ScalePlane_16(const uint16_t* src,
   if (dst_width == src_width && dst_height == src_height) {
     // Straight copy.
     CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
-    return;
+    return 0;
   }
   if (dst_width == src_width && filtering != kFilterBox) {
     int dy = 0;
@@ -2098,7 +2169,7 @@ void ScalePlane_16(const uint16_t* src,
     // Arbitrary scale vertically, but unscaled horizontally.
     ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
                           dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
-    return;
+    return 0;
   }
   if (dst_width <= Abs(src_width) && dst_height <= src_height) {
     // Scale down.
@@ -2106,69 +2177,68 @@ void ScalePlane_16(const uint16_t* src,
       // optimized, 3/4
       ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
                           src_stride, dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     if (2 * dst_width == src_width && 2 * dst_height == src_height) {
       // optimized, 1/2
       ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
                          src_stride, dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     // 3/8 rounded up for odd sized chroma height.
     if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
       // optimized, 3/8
       ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
                           src_stride, dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
     if (4 * dst_width == src_width && 4 * dst_height == src_height &&
         (filtering == kFilterBox || filtering == kFilterNone)) {
       // optimized, 1/4
       ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
                          src_stride, dst_stride, src, dst, filtering);
-      return;
+      return 0;
     }
   }
   if (filtering == kFilterBox && dst_height * 2 < src_height) {
-    ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
-                     dst_stride, src, dst);
-    return;
+    return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
+                            src_stride, dst_stride, src, dst);
   }
   if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
     ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height,
                             src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
   if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
       (filtering == kFilterBilinear || filtering == kFilterBox)) {
     ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height,
                               src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
   if (filtering && dst_height > src_height) {
-    ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
-                            src_stride, dst_stride, src, dst, filtering);
-    return;
+    return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
+                                   src_stride, dst_stride, src, dst, filtering);
   }
   if (filtering) {
-    ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
-                              src_stride, dst_stride, src, dst, filtering);
-    return;
+    return ScalePlaneBilinearDown_16(src_width, src_height, dst_width,
+                                     dst_height, src_stride, dst_stride, src,
+                                     dst, filtering);
   }
   ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
                       dst_stride, src, dst);
+  return 0;
 }
 
 LIBYUV_API
-void ScalePlane_12(const uint16_t* src,
-                   int src_stride,
-                   int src_width,
-                   int src_height,
-                   uint16_t* dst,
-                   int dst_stride,
-                   int dst_width,
-                   int dst_height,
-                   enum FilterMode filtering) {
+int ScalePlane_12(const uint16_t* src,
+                  int src_stride,
+                  int src_width,
+                  int src_height,
+                  uint16_t* dst,
+                  int dst_stride,
+                  int dst_width,
+                  int dst_height,
+                  enum FilterMode filtering) {
   // Simplify filtering when possible.
   filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
                                 filtering);
@@ -2183,17 +2253,17 @@ void ScalePlane_12(const uint16_t* src,
   if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
     ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height,
                             src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
   if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
       (filtering == kFilterBilinear || filtering == kFilterBox)) {
     ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height,
                               src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
 
-  ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
-                dst_width, dst_height, filtering);
+  return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
+                       dst_width, dst_height, filtering);
 }
 
 // Scale an I420 image.
@@ -2221,6 +2291,7 @@ int I420Scale(const uint8_t* src_y,
   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2228,13 +2299,19 @@ int I420Scale(const uint8_t* src_y,
     return -1;
   }
 
-  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-             dst_width, dst_height, filtering);
-  ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
-             dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
-  ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
-             dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
-  return 0;
+  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+                 dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+                 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+                 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2259,6 +2336,7 @@ int I420Scale_16(const uint16_t* src_y,
   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2266,13 +2344,19 @@ int I420Scale_16(const uint16_t* src_y,
     return -1;
   }
 
-  ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
-                dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
-  ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
-                dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
-  return 0;
+  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+                    dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+                    dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2297,6 +2381,7 @@ int I420Scale_12(const uint16_t* src_y,
   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2304,13 +2389,19 @@ int I420Scale_12(const uint16_t* src_y,
     return -1;
   }
 
-  ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
-                dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
-  ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
-                dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
-  return 0;
+  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+                    dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+                    dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+  return r;
 }
 
 // Scale an I444 image.
@@ -2334,19 +2425,27 @@ int I444Scale(const uint8_t* src_y,
               int dst_width,
               int dst_height,
               enum FilterMode filtering) {
+  int r;
+
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
       dst_width <= 0 || dst_height <= 0) {
     return -1;
   }
 
-  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-             dst_width, dst_height, filtering);
-  ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
-             dst_width, dst_height, filtering);
-  ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
-             dst_width, dst_height, filtering);
-  return 0;
+  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+                 dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u,
+                 dst_stride_u, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v,
+                 dst_stride_v, dst_width, dst_height, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2367,19 +2466,27 @@ int I444Scale_16(const uint16_t* src_y,
                  int dst_width,
                  int dst_height,
                  enum FilterMode filtering) {
+  int r;
+
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
       dst_width <= 0 || dst_height <= 0) {
     return -1;
   }
 
-  ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
-                dst_width, dst_height, filtering);
-  ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
-                dst_width, dst_height, filtering);
-  return 0;
+  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u,
+                    dst_stride_u, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v,
+                    dst_stride_v, dst_width, dst_height, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2400,19 +2507,27 @@ int I444Scale_12(const uint16_t* src_y,
                  int dst_width,
                  int dst_height,
                  enum FilterMode filtering) {
+  int r;
+
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
       dst_width <= 0 || dst_height <= 0) {
     return -1;
   }
 
-  ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
-                dst_width, dst_height, filtering);
-  ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
-                dst_width, dst_height, filtering);
-  return 0;
+  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u,
+                    dst_stride_u, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v,
+                    dst_stride_v, dst_width, dst_height, filtering);
+  return r;
 }
 
 // Scale an I422 image.
@@ -2438,6 +2553,7 @@ int I422Scale(const uint8_t* src_y,
               enum FilterMode filtering) {
   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2445,13 +2561,19 @@ int I422Scale(const uint8_t* src_y,
     return -1;
   }
 
-  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-             dst_width, dst_height, filtering);
-  ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
-             dst_stride_u, dst_halfwidth, dst_height, filtering);
-  ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
-             dst_stride_v, dst_halfwidth, dst_height, filtering);
-  return 0;
+  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+                 dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
+                 dst_stride_u, dst_halfwidth, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
+                 dst_stride_v, dst_halfwidth, dst_height, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2474,6 +2596,7 @@ int I422Scale_16(const uint16_t* src_y,
                  enum FilterMode filtering) {
   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2481,13 +2604,19 @@ int I422Scale_16(const uint16_t* src_y,
     return -1;
   }
 
-  ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
-                dst_stride_u, dst_halfwidth, dst_height, filtering);
-  ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
-                dst_stride_v, dst_halfwidth, dst_height, filtering);
-  return 0;
+  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
+                    dst_stride_u, dst_halfwidth, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
+                    dst_stride_v, dst_halfwidth, dst_height, filtering);
+  return r;
 }
 
 LIBYUV_API
@@ -2510,6 +2639,7 @@ int I422Scale_12(const uint16_t* src_y,
                  enum FilterMode filtering) {
   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+  int r;
 
   if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2517,13 +2647,19 @@ int I422Scale_12(const uint16_t* src_y,
     return -1;
   }
 
-  ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-                dst_width, dst_height, filtering);
-  ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
-                dst_stride_u, dst_halfwidth, dst_height, filtering);
-  ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
-                dst_stride_v, dst_halfwidth, dst_height, filtering);
-  return 0;
+  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
+                    dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
+                    dst_stride_u, dst_halfwidth, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
+                    dst_stride_v, dst_halfwidth, dst_height, filtering);
+  return r;
 }
 
 // Scale an NV12 image.
@@ -2547,6 +2683,7 @@ int NV12Scale(const uint8_t* src_y,
   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  int r;
 
   if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
       src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
@@ -2554,11 +2691,14 @@ int NV12Scale(const uint8_t* src_y,
     return -1;
   }
 
-  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
-             dst_width, dst_height, filtering);
-  UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
-          dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
-  return 0;
+  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+                 dst_stride_y, dst_width, dst_height, filtering);
+  if (r != 0) {
+    return r;
+  }
+  r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
+              dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
+  return r;
 }
 
 // Deprecated api
diff --git a/files/source/scale_any.cc b/source/scale_any.cc
index f6576874..f6576874 100644
--- a/files/source/scale_any.cc
+++ b/source/scale_any.cc
diff --git a/files/source/scale_argb.cc b/source/scale_argb.cc
index ddd8d29e..18bdeb86 100644
--- a/files/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -16,6 +16,7 @@
 #include "libyuv/cpu_id.h"
 #include "libyuv/planar_functions.h"  // For CopyARGB
 #include "libyuv/row.h"
+#include "libyuv/scale_argb.h"
 #include "libyuv/scale_row.h"
 
 #ifdef __cplusplus
@@ -127,6 +128,15 @@ static void ScaleARGBDown2(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEARGBROWDOWN2_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleARGBRowDown2 =
+        filtering == kFilterNone
+            ? ScaleARGBRowDown2_RVV
+            : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_RVV
+                                          : ScaleARGBRowDown2Box_RVV);
+  }
+#endif
 
   if (filtering == kFilterLinear) {
     src_stride = 0;
@@ -141,22 +151,27 @@ static void ScaleARGBDown2(int src_width,
 // ScaleARGB ARGB, 1/4
 // This is an optimized version for scaling down a ARGB to 1/4 of
 // its original size.
-static void ScaleARGBDown4Box(int src_width,
-                              int src_height,
-                              int dst_width,
-                              int dst_height,
-                              int src_stride,
-                              int dst_stride,
-                              const uint8_t* src_argb,
-                              uint8_t* dst_argb,
-                              int x,
-                              int dx,
-                              int y,
-                              int dy) {
+static int ScaleARGBDown4Box(int src_width,
+                             int src_height,
+                             int dst_width,
+                             int dst_height,
+                             int src_stride,
+                             int dst_stride,
+                             const uint8_t* src_argb,
+                             uint8_t* dst_argb,
+                             int x,
+                             int dx,
+                             int y,
+                             int dy) {
   int j;
   // Allocate 2 rows of ARGB.
   const int row_size = (dst_width * 2 * 4 + 31) & ~31;
+  // TODO(fbarchard): Remove this row buffer and implement a ScaleARGBRowDown4
+  // but implemented via a 2 pass wrapper that uses a very small array on the
+  // stack with a horizontal loop.
   align_buffer_64(row, row_size * 2);
+  if (!row)
+    return 1;
   int row_stride = src_stride * (dy >> 16);
   void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
                             uint8_t* dst_argb, int dst_width) =
@@ -184,6 +199,11 @@ static void ScaleARGBDown4Box(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEARGBROWDOWN2_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_RVV;
+  }
+#endif
 
   for (j = 0; j < dst_height; ++j) {
     ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
@@ -194,6 +214,7 @@ static void ScaleARGBDown4Box(int src_width,
     dst_argb += dst_stride;
   }
   free_aligned_buffer_64(row);
+  return 0;
 }
 
 // ScaleARGB ARGB Even
@@ -263,6 +284,16 @@ static void ScaleARGBDownEven(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEARGBROWDOWNEVENBOX_RVV)
+  if (filtering && TestCpuFlag(kCpuHasRVV)) {
+    ScaleARGBRowDownEven = ScaleARGBRowDownEvenBox_RVV;
+  }
+#endif
+#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV)
+  if (!filtering && TestCpuFlag(kCpuHasRVV)) {
+    ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV;
+  }
+#endif
 
   if (filtering == kFilterLinear) {
     src_stride = 0;
@@ -275,19 +306,19 @@ static void ScaleARGBDownEven(int src_width,
 }
 
 // Scale ARGB down with bilinear interpolation.
-static void ScaleARGBBilinearDown(int src_width,
-                                  int src_height,
-                                  int dst_width,
-                                  int dst_height,
-                                  int src_stride,
-                                  int dst_stride,
-                                  const uint8_t* src_argb,
-                                  uint8_t* dst_argb,
-                                  int x,
-                                  int dx,
-                                  int y,
-                                  int dy,
-                                  enum FilterMode filtering) {
+static int ScaleARGBBilinearDown(int src_width,
+                                 int src_height,
+                                 int dst_width,
+                                 int dst_height,
+                                 int src_stride,
+                                 int dst_stride,
+                                 const uint8_t* src_argb,
+                                 uint8_t* dst_argb,
+                                 int x,
+                                 int dx,
+                                 int y,
+                                 int dy,
+                                 enum FilterMode filtering) {
   int j;
   void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
                          ptrdiff_t src_stride, int dst_width,
@@ -386,6 +417,8 @@ static void ScaleARGBBilinearDown(int src_width,
   // Allocate a row of ARGB.
   {
     align_buffer_64(row, clip_src_width * 4);
+    if (!row)
+      return 1;
 
     const int max_y = (src_height - 1) << 16;
     if (y > max_y) {
@@ -409,22 +442,23 @@ static void ScaleARGBBilinearDown(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 
 // Scale ARGB up with bilinear interpolation.
-static void ScaleARGBBilinearUp(int src_width,
-                                int src_height,
-                                int dst_width,
-                                int dst_height,
-                                int src_stride,
-                                int dst_stride,
-                                const uint8_t* src_argb,
-                                uint8_t* dst_argb,
-                                int x,
-                                int dx,
-                                int y,
-                                int dy,
-                                enum FilterMode filtering) {
+static int ScaleARGBBilinearUp(int src_width,
+                               int src_height,
+                               int dst_width,
+                               int dst_height,
+                               int src_stride,
+                               int dst_stride,
+                               const uint8_t* src_argb,
+                               uint8_t* dst_argb,
+                               int x,
+                               int dx,
+                               int y,
+                               int dy,
+                               enum FilterMode filtering) {
   int j;
   void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
                          ptrdiff_t src_stride, int dst_width,
@@ -560,6 +594,8 @@ static void ScaleARGBBilinearUp(int src_width,
     // Allocate 2 rows of ARGB.
     const int row_size = (dst_width * 4 + 31) & ~31;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 
     uint8_t* rowptr = row;
     int rowstride = row_size;
@@ -603,27 +639,28 @@ static void ScaleARGBBilinearUp(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 
 #ifdef YUVSCALEUP
 // Scale YUV to ARGB up with bilinear interpolation.
-static void ScaleYUVToARGBBilinearUp(int src_width,
-                                     int src_height,
-                                     int dst_width,
-                                     int dst_height,
-                                     int src_stride_y,
-                                     int src_stride_u,
-                                     int src_stride_v,
-                                     int dst_stride_argb,
-                                     const uint8_t* src_y,
-                                     const uint8_t* src_u,
-                                     const uint8_t* src_v,
-                                     uint8_t* dst_argb,
-                                     int x,
-                                     int dx,
-                                     int y,
-                                     int dy,
-                                     enum FilterMode filtering) {
+static int ScaleYUVToARGBBilinearUp(int src_width,
+                                    int src_height,
+                                    int dst_width,
+                                    int dst_height,
+                                    int src_stride_y,
+                                    int src_stride_u,
+                                    int src_stride_v,
+                                    int dst_stride_argb,
+                                    const uint8_t* src_y,
+                                    const uint8_t* src_u,
+                                    const uint8_t* src_v,
+                                    uint8_t* dst_argb,
+                                    int x,
+                                    int dx,
+                                    int y,
+                                    int dy,
+                                    enum FilterMode filtering) {
   int j;
   void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
                         const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
@@ -825,16 +862,17 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
   const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
   const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
 
-  // Allocate 2 rows of ARGB.
+  // Allocate 1 row of ARGB for source conversion and 2 rows of ARGB
+  // scaled horizontally to the destination width.
   const int row_size = (dst_width * 4 + 31) & ~31;
-  align_buffer_64(row, row_size * 2);
-
-  // Allocate 1 row of ARGB for source conversion.
-  align_buffer_64(argb_row, src_width * 4);
+  align_buffer_64(row, row_size * 2 + src_width * 4);
 
+  uint8_t* argb_row = row + row_size * 2;
   uint8_t* rowptr = row;
   int rowstride = row_size;
   int lasty = yi;
+  if (!row)
+    return 1;
 
   // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
   ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
@@ -889,7 +927,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
     y += dy;
   }
   free_aligned_buffer_64(row);
-  free_aligned_buffer_64(row_argb);
+  return 0;
 }
 #endif
 
@@ -964,19 +1002,19 @@ static void ScaleARGBSimple(int src_width,
 // ScaleARGB a ARGB.
 // This function in turn calls a scaling function
 // suitable for handling the desired resolutions.
-static void ScaleARGB(const uint8_t* src,
-                      int src_stride,
-                      int src_width,
-                      int src_height,
-                      uint8_t* dst,
-                      int dst_stride,
-                      int dst_width,
-                      int dst_height,
-                      int clip_x,
-                      int clip_y,
-                      int clip_width,
-                      int clip_height,
-                      enum FilterMode filtering) {
+static int ScaleARGB(const uint8_t* src,
+                     int src_stride,
+                     int src_width,
+                     int src_height,
+                     uint8_t* dst,
+                     int dst_stride,
+                     int dst_width,
+                     int dst_height,
+                     int clip_x,
+                     int clip_y,
+                     int clip_width,
+                     int clip_height,
+                     enum FilterMode filtering) {
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
   int y = 0;
@@ -1021,18 +1059,18 @@ static void ScaleARGB(const uint8_t* src,
           ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
                          src_stride, dst_stride, src, dst, x, dx, y, dy,
                          filtering);
-          return;
+          return 0;
         }
         if (dx == 0x40000 && filtering == kFilterBox) {
           // Optimized 1/4 box downsample.
-          ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
-                            src_stride, dst_stride, src, dst, x, dx, y, dy);
-          return;
+          return ScaleARGBDown4Box(src_width, src_height, clip_width,
+                                   clip_height, src_stride, dst_stride, src,
+                                   dst, x, dx, y, dy);
         }
         ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
                           src_stride, dst_stride, src, dst, x, dx, y, dy,
                           filtering);
-        return;
+        return 0;
       }
       // Optimized odd scale down. ie 3, 5, 7, 9x.
       if ((dx & 0x10000) && (dy & 0x10000)) {
@@ -1041,7 +1079,7 @@ static void ScaleARGB(const uint8_t* src,
           // Straight copy.
           ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
                    src_stride, dst, dst_stride, clip_width, clip_height);
-          return;
+          return 0;
         }
       }
     }
@@ -1050,22 +1088,21 @@ static void ScaleARGB(const uint8_t* src,
     // Arbitrary scale vertically, but unscaled horizontally.
     ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
                        dst_stride, src, dst, x, y, dy, /*bpp=*/4, filtering);
-    return;
+    return 0;
   }
   if (filtering && dy < 65536) {
-    ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
-                        src_stride, dst_stride, src, dst, x, dx, y, dy,
-                        filtering);
-    return;
+    return ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
+                               src_stride, dst_stride, src, dst, x, dx, y, dy,
+                               filtering);
   }
   if (filtering) {
-    ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
-                          src_stride, dst_stride, src, dst, x, dx, y, dy,
-                          filtering);
-    return;
+    return ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
+                                 src_stride, dst_stride, src, dst, x, dx, y, dy,
+                                 filtering);
   }
   ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
                   dst_stride, src, dst, x, dx, y, dy);
+  return 0;
 }
 
 LIBYUV_API
@@ -1089,10 +1126,9 @@ int ARGBScaleClip(const uint8_t* src_argb,
       (clip_y + clip_height) > dst_height) {
     return -1;
   }
-  ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
-            dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
-            clip_height, filtering);
-  return 0;
+  return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
+                   dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
+                   clip_width, clip_height, filtering);
 }
 
 // Scale an ARGB image.
@@ -1110,10 +1146,9 @@ int ARGBScale(const uint8_t* src_argb,
       src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
     return -1;
   }
-  ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
-            dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
-            filtering);
-  return 0;
+  return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
+                   dst_stride_argb, dst_width, dst_height, 0, 0, dst_width,
+                   dst_height, filtering);
 }
 
 // Scale with YUV conversion to ARGB and clipping.
@@ -1137,8 +1172,11 @@ int YUVToARGBScaleClip(const uint8_t* src_y,
                        int clip_width,
                        int clip_height,
                        enum FilterMode filtering) {
-  uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
   int r;
+  uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
+  if (!argb_buffer) {
+    return 1;  // Out of memory runtime error.
+  }
   (void)src_fourcc;  // TODO(fbarchard): implement and/or assert.
   (void)dst_fourcc;
   I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
diff --git a/files/source/scale_common.cc b/source/scale_common.cc
index 77455903..d07a39af 100644
--- a/files/source/scale_common.cc
+++ b/source/scale_common.cc
@@ -1964,35 +1964,6 @@ void ScaleSlope(int src_width,
 }
 #undef CENTERSTART
 
-// Read 8x2 upsample with filtering and write 16x1.
-// actually reads an extra pixel, so 9x2.
-void ScaleRowUp2_16_C(const uint16_t* src_ptr,
-                      ptrdiff_t src_stride,
-                      uint16_t* dst,
-                      int dst_width) {
-  const uint16_t* src2 = src_ptr + src_stride;
-
-  int x;
-  for (x = 0; x < dst_width - 1; x += 2) {
-    uint16_t p0 = src_ptr[0];
-    uint16_t p1 = src_ptr[1];
-    uint16_t p2 = src2[0];
-    uint16_t p3 = src2[1];
-    dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
-    dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
-    ++src_ptr;
-    ++src2;
-    dst += 2;
-  }
-  if (dst_width & 1) {
-    uint16_t p0 = src_ptr[0];
-    uint16_t p1 = src_ptr[1];
-    uint16_t p2 = src2[0];
-    uint16_t p3 = src2[1];
-    dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
-  }
-}
-
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
diff --git a/files/source/scale_gcc.cc b/source/scale_gcc.cc
index 17eeffad..17eeffad 100644
--- a/files/source/scale_gcc.cc
+++ b/source/scale_gcc.cc
diff --git a/files/source/scale_lsx.cc b/source/scale_lsx.cc
index bfe5e9fb..bfe5e9fb 100644
--- a/files/source/scale_lsx.cc
+++ b/source/scale_lsx.cc
diff --git a/files/source/scale_msa.cc b/source/scale_msa.cc
index 482a521f..482a521f 100644
--- a/files/source/scale_msa.cc
+++ b/source/scale_msa.cc
diff --git a/files/source/scale_neon.cc b/source/scale_neon.cc
index ccc75106..ccc75106 100644
--- a/files/source/scale_neon.cc
+++ b/source/scale_neon.cc
diff --git a/files/source/scale_neon64.cc b/source/scale_neon64.cc
index ad06ee83..7c072380 100644
--- a/files/source/scale_neon64.cc
+++ b/source/scale_neon64.cc
@@ -1118,101 +1118,6 @@ void ScaleFilterCols_NEON(uint8_t* dst_ptr,
 
 #undef LOAD2_DATA8_LANE
 
-// 16x2 -> 16x1
-void ScaleFilterRows_NEON(uint8_t* dst_ptr,
-                          const uint8_t* src_ptr,
-                          ptrdiff_t src_stride,
-                          int dst_width,
-                          int source_y_fraction) {
-  int y_fraction = 256 - source_y_fraction;
-  asm volatile(
-      "cmp         %w4, #0                       \n"
-      "b.eq        100f                          \n"
-      "add         %2, %2, %1                    \n"
-      "cmp         %w4, #64                      \n"
-      "b.eq        75f                           \n"
-      "cmp         %w4, #128                     \n"
-      "b.eq        50f                           \n"
-      "cmp         %w4, #192                     \n"
-      "b.eq        25f                           \n"
-
-      "dup         v5.8b, %w4                    \n"
-      "dup         v4.8b, %w5                    \n"
-      // General purpose row blend.
-      "1:                                        \n"
-      "ld1         {v0.16b}, [%1], #16           \n"
-      "ld1         {v1.16b}, [%2], #16           \n"
-      "subs        %w3, %w3, #16                 \n"
-      "umull       v6.8h, v0.8b, v4.8b           \n"
-      "umull2      v7.8h, v0.16b, v4.16b         \n"
-      "prfm        pldl1keep, [%1, 448]          \n"  // prefetch 7 lines ahead
-      "umlal       v6.8h, v1.8b, v5.8b           \n"
-      "umlal2      v7.8h, v1.16b, v5.16b         \n"
-      "prfm        pldl1keep, [%2, 448]          \n"
-      "rshrn       v0.8b, v6.8h, #8              \n"
-      "rshrn2      v0.16b, v7.8h, #8             \n"
-      "st1         {v0.16b}, [%0], #16           \n"
-      "b.gt        1b                            \n"
-      "b           99f                           \n"
-
-      // Blend 25 / 75.
-      "25:                                       \n"
-      "ld1         {v0.16b}, [%1], #16           \n"
-      "ld1         {v1.16b}, [%2], #16           \n"
-      "subs        %w3, %w3, #16                 \n"
-      "urhadd      v0.16b, v0.16b, v1.16b        \n"
-      "prfm        pldl1keep, [%1, 448]          \n"  // prefetch 7 lines ahead
-      "urhadd      v0.16b, v0.16b, v1.16b        \n"
-      "prfm        pldl1keep, [%2, 448]          \n"
-      "st1         {v0.16b}, [%0], #16           \n"
-      "b.gt        25b                           \n"
-      "b           99f                           \n"
-
-      // Blend 50 / 50.
-      "50:                                       \n"
-      "ld1         {v0.16b}, [%1], #16           \n"
-      "ld1         {v1.16b}, [%2], #16           \n"
-      "subs        %w3, %w3, #16                 \n"
-      "prfm        pldl1keep, [%1, 448]          \n"  // prefetch 7 lines ahead
-      "urhadd      v0.16b, v0.16b, v1.16b        \n"
-      "prfm        pldl1keep, [%2, 448]          \n"
-      "st1         {v0.16b}, [%0], #16           \n"
-      "b.gt        50b                           \n"
-      "b           99f                           \n"
-
-      // Blend 75 / 25.
-      "75:                                       \n"
-      "ld1         {v1.16b}, [%1], #16           \n"
-      "ld1         {v0.16b}, [%2], #16           \n"
-      "subs        %w3, %w3, #16                 \n"
-      "urhadd      v0.16b, v0.16b, v1.16b        \n"
-      "prfm        pldl1keep, [%1, 448]          \n"  // prefetch 7 lines ahead
-      "urhadd      v0.16b, v0.16b, v1.16b        \n"
-      "prfm        pldl1keep, [%2, 448]          \n"
-      "st1         {v0.16b}, [%0], #16           \n"
-      "b.gt        75b                           \n"
-      "b           99f                           \n"
-
-      // Blend 100 / 0 - Copy row unchanged.
-      "100:                                      \n"
-      "ld1         {v0.16b}, [%1], #16           \n"
-      "subs        %w3, %w3, #16                 \n"
-      "prfm        pldl1keep, [%1, 448]          \n"  // prefetch 7 lines ahead
-      "st1         {v0.16b}, [%0], #16           \n"
-      "b.gt        100b                          \n"
-
-      "99:                                       \n"
-      "st1         {v0.b}[15], [%0]              \n"
-      : "+r"(dst_ptr),            // %0
-        "+r"(src_ptr),            // %1
-        "+r"(src_stride),         // %2
-        "+r"(dst_width),          // %3
-        "+r"(source_y_fraction),  // %4
-        "+r"(y_fraction)          // %5
-      :
-      : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc");
-}
-
 void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
                             ptrdiff_t src_stride,
                             uint8_t* dst,
diff --git a/files/source/scale_rgb.cc b/source/scale_rgb.cc
index 8db59b56..8db59b56 100644
--- a/files/source/scale_rgb.cc
+++ b/source/scale_rgb.cc
diff --git a/source/scale_rvv.cc b/source/scale_rvv.cc
new file mode 100644
index 00000000..de037e45
--- /dev/null
+++ b/source/scale_rvv.cc
@@ -0,0 +1,1040 @@
+/*
+ *  Copyright 2023 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * Copyright (c) 2023 SiFive, Inc. All rights reserved.
+ *
+ * Contributed by Darren Hsieh <darren.hsieh@sifive.com>
+ * Contributed by Bruce Lai <bruce.lai@sifive.com>
+ */
+
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
+
+// This module is for clang rvv. GCC hasn't supported segment load & store.
+#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && \
+    defined(__clang__)
+#include <assert.h>
+#include <riscv_vector.h>
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#ifdef HAS_SCALEADDROW_RVV
+void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
+  size_t w = (size_t)src_width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m4(w);
+    vuint8m4_t v_src = __riscv_vle8_v_u8m4(src_ptr, vl);
+    vuint16m8_t v_dst = __riscv_vle16_v_u16m8(dst_ptr, vl);
+    // Use widening multiply-add instead of widening + add
+    v_dst = __riscv_vwmaccu_vx_u16m8(v_dst, 1, v_src, vl);
+    __riscv_vse16_v_u16m8(dst_ptr, v_dst, vl);
+    w -= vl;
+    src_ptr += vl;
+    dst_ptr += vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWN2_RVV
+void ScaleARGBRowDown2_RVV(const uint8_t* src_argb,
+                           ptrdiff_t src_stride,
+                           uint8_t* dst_argb,
+                           int dst_width) {
+  (void)src_stride;
+  size_t w = (size_t)dst_width;
+  const uint64_t* src = (const uint64_t*)(src_argb);
+  uint32_t* dst = (uint32_t*)(dst_argb);
+  do {
+    size_t vl = __riscv_vsetvl_e64m8(w);
+    vuint64m8_t v_data = __riscv_vle64_v_u64m8(src, vl);
+    vuint32m4_t v_dst = __riscv_vnsrl_wx_u32m4(v_data, 32, vl);
+    __riscv_vse32_v_u32m4(dst, v_dst, vl);
+    w -= vl;
+    src += vl;
+    dst += vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWN2LINEAR_RVV
+void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
+                                 ptrdiff_t src_stride,
+                                 uint8_t* dst_argb,
+                                 int dst_width) {
+  (void)src_stride;
+  size_t w = (size_t)dst_width;
+  const uint32_t* src = (const uint32_t*)(src_argb);
+  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up mode(0).
+  asm volatile("csrwi vxrm, 0");
+  do {
+    vuint8m4_t v_odd, v_even, v_dst;
+    vuint32m4_t v_odd_32, v_even_32;
+    size_t vl = __riscv_vsetvl_e32m4(w);
+    __riscv_vlseg2e32_v_u32m4(&v_even_32, &v_odd_32, src, vl);
+    v_even = __riscv_vreinterpret_v_u32m4_u8m4(v_even_32);
+    v_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_odd_32);
+    // Use round-to-nearest-up mode for averaging add
+    v_dst = __riscv_vaaddu_vv_u8m4(v_even, v_odd, vl * 4);
+    __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
+    w -= vl;
+    src += vl * 2;
+    dst_argb += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWN2BOX_RVV
+void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_argb,
+                              int dst_width) {
+  size_t w = (size_t)dst_width;
+  const uint32_t* src0 = (const uint32_t*)(src_argb);
+  const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride);
+  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up mode(0).
+  asm volatile("csrwi vxrm, 0");
+  do {
+    vuint8m4_t v_row0_odd, v_row0_even, v_row1_odd, v_row1_even, v_dst;
+    vuint16m8_t v_row0_sum, v_row1_sum, v_dst_16;
+    vuint32m4_t v_row0_odd_32, v_row0_even_32, v_row1_odd_32, v_row1_even_32;
+    size_t vl = __riscv_vsetvl_e32m4(w);
+    __riscv_vlseg2e32_v_u32m4(&v_row0_even_32, &v_row0_odd_32, src0, vl);
+    __riscv_vlseg2e32_v_u32m4(&v_row1_even_32, &v_row1_odd_32, src1, vl);
+    v_row0_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_even_32);
+    v_row0_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_odd_32);
+    v_row1_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_even_32);
+    v_row1_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_odd_32);
+    v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_even, v_row0_odd, vl * 4);
+    v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_even, v_row1_odd, vl * 4);
+    v_dst_16 = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4);
+    // Use round-to-nearest-up mode for vnclip
+    v_dst = __riscv_vnclipu_wx_u8m4(v_dst_16, 2, vl * 4);
+    __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
+    w -= vl;
+    src0 += vl * 2;
+    src1 += vl * 2;
+    dst_argb += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWNEVEN_RVV
+void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
+                              ptrdiff_t src_stride,
+                              int src_stepx,
+                              uint8_t* dst_argb,
+                              int dst_width) {
+  size_t w = (size_t)dst_width;
+  const uint32_t* src = (const uint32_t*)(src_argb);
+  uint32_t* dst = (uint32_t*)(dst_argb);
+  const int stride_byte = src_stepx * 4;
+  do {
+    size_t vl = __riscv_vsetvl_e32m8(w);
+    vuint32m8_t v_row = __riscv_vlse32_v_u32m8(src, stride_byte, vl);
+    __riscv_vse32_v_u32m8(dst, v_row, vl);
+    w -= vl;
+    src += vl * src_stepx;
+    dst += vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWNEVENBOX_RVV
+void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
+                                 ptrdiff_t src_stride,
+                                 int src_stepx,
+                                 uint8_t* dst_argb,
+                                 int dst_width) {
+  size_t w = (size_t)dst_width;
+  const uint32_t* src0 = (const uint32_t*)(src_argb);
+  const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride);
+  const int stride_byte = src_stepx * 4;
+  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up mode(0).
+  asm volatile("csrwi vxrm, 0");
+  do {
+    vuint8m4_t v_row0_low, v_row0_high, v_row1_low, v_row1_high, v_dst;
+    vuint16m8_t v_row0_sum, v_row1_sum, v_sum;
+    vuint32m4_t v_row0_low_32, v_row0_high_32, v_row1_low_32, v_row1_high_32;
+    size_t vl = __riscv_vsetvl_e32m4(w);
+    __riscv_vlsseg2e32_v_u32m4(&v_row0_low_32, &v_row0_high_32, src0,
+                               stride_byte, vl);
+    __riscv_vlsseg2e32_v_u32m4(&v_row1_low_32, &v_row1_high_32, src1,
+                               stride_byte, vl);
+    v_row0_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_low_32);
+    v_row0_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_high_32);
+    v_row1_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_low_32);
+    v_row1_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_high_32);
+    v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_low, v_row0_high, vl * 4);
+    v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_low, v_row1_high, vl * 4);
+    v_sum = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4);
+    // Use round-to-nearest-up mode for vnclip
+    v_dst = __riscv_vnclipu_wx_u8m4(v_sum, 2, vl * 4);
+    __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
+    w -= vl;
+    src0 += vl * src_stepx;
+    src1 += vl * src_stepx;
+    dst_argb += vl * 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN2_RVV
+void ScaleRowDown2_RVV(const uint8_t* src_ptr,
+                       ptrdiff_t src_stride,
+                       uint8_t* dst,
+                       int dst_width) {
+  size_t w = (size_t)dst_width;
+  const uint16_t* src = (const uint16_t*)src_ptr;
+  (void)src_stride;
+  do {
+    size_t vl = __riscv_vsetvl_e16m8(w);
+    vuint16m8_t v_src = __riscv_vle16_v_u16m8(src, vl);
+    vuint8m4_t v_dst = __riscv_vnsrl_wx_u8m4(v_src, 8, vl);
+    __riscv_vse8_v_u8m4(dst, v_dst, vl);
+    w -= vl;
+    src += vl;
+    dst += vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN2LINEAR_RVV
+void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
+                             ptrdiff_t src_stride,
+                             uint8_t* dst,
+                             int dst_width) {
+  size_t w = (size_t)dst_width;
+  (void)src_stride;
+  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up mode(0).
+  asm volatile("csrwi vxrm, 0");
+  do {
+    vuint8m4_t v_s0, v_s1, v_dst;
+    size_t vl = __riscv_vsetvl_e8m4(w);
+    __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, src_ptr, vl);
+    // Use round-to-nearest-up mode for averaging add
+    v_dst = __riscv_vaaddu_vv_u8m4(v_s0, v_s1, vl);
+    __riscv_vse8_v_u8m4(dst, v_dst, vl);
+    w -= vl;
+    src_ptr += 2 * vl;
+    dst += vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN2BOX_RVV
+void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
+                          ptrdiff_t src_stride,
+                          uint8_t* dst,
+                          int dst_width) {
+  const uint8_t* s = src_ptr;
+  const uint8_t* t = src_ptr + src_stride;
+  size_t w = (size_t)dst_width;
+  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up mode(0).
+  asm volatile("csrwi vxrm, 0");
+  do {
+    size_t vl = __riscv_vsetvl_e8m4(w);
+    vuint8m4_t v_s0, v_s1, v_t0, v_t1;
+    vuint16m8_t v_s01, v_t01, v_st01;
+    vuint8m4_t v_dst;
+    __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, s, vl);
+    __riscv_vlseg2e8_v_u8m4(&v_t0, &v_t1, t, vl);
+    v_s01 = __riscv_vwaddu_vv_u16m8(v_s0, v_s1, vl);
+    v_t01 = __riscv_vwaddu_vv_u16m8(v_t0, v_t1, vl);
+    v_st01 = __riscv_vadd_vv_u16m8(v_s01, v_t01, vl);
+    // Use round-to-nearest-up mode for vnclip
+    v_dst = __riscv_vnclipu_wx_u8m4(v_st01, 2, vl);
+    __riscv_vse8_v_u8m4(dst, v_dst, vl);
+    w -= vl;
+    s += 2 * vl;
+    t += 2 * vl;
+    dst += vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN4_RVV
+void ScaleRowDown4_RVV(const uint8_t* src_ptr,
+                       ptrdiff_t src_stride,
+                       uint8_t* dst_ptr,
+                       int dst_width) {
+  size_t w = (size_t)dst_width;
+  (void)src_stride;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_s0, v_s1, v_s2, v_s3;
+    __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
+    __riscv_vse8_v_u8m2(dst_ptr, v_s2, vl);
+    w -= vl;
+    src_ptr += (4 * vl);
+    dst_ptr += vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN4BOX_RVV
+void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
+                          ptrdiff_t src_stride,
+                          uint8_t* dst_ptr,
+                          int dst_width) {
+  const uint8_t* src_ptr1 = src_ptr + src_stride;
+  const uint8_t* src_ptr2 = src_ptr + src_stride * 2;
+  const uint8_t* src_ptr3 = src_ptr + src_stride * 3;
+  size_t w = (size_t)dst_width;
+  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up mode(0).
+  asm volatile("csrwi vxrm, 0");
+  do {
+    vuint8m2_t v_s0, v_s1, v_s2, v_s3;
+    vuint8m2_t v_t0, v_t1, v_t2, v_t3;
+    vuint8m2_t v_u0, v_u1, v_u2, v_u3;
+    vuint8m2_t v_v0, v_v1, v_v2, v_v3;
+    vuint16m4_t v_s01, v_s23, v_t01, v_t23;
+    vuint16m4_t v_u01, v_u23, v_v01, v_v23;
+    vuint16m4_t v_st01, v_st23, v_uv01, v_uv23;
+    vuint16m4_t v_st0123, v_uv0123, v_stuv0123;
+    vuint8m2_t v_dst;
+    size_t vl = __riscv_vsetvl_e8m2(w);
+
+    __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
+    v_s01 = __riscv_vwaddu_vv_u16m4(v_s0, v_s1, vl);
+
+    __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, src_ptr1, vl);
+    v_t01 = __riscv_vwaddu_vv_u16m4(v_t0, v_t1, vl);
+
+    __riscv_vlseg4e8_v_u8m2(&v_u0, &v_u1, &v_u2, &v_u3, src_ptr2, vl);
+    v_u01 = __riscv_vwaddu_vv_u16m4(v_u0, v_u1, vl);
+    v_u23 = __riscv_vwaddu_vv_u16m4(v_u2, v_u3, vl);
+
+    v_s23 = __riscv_vwaddu_vv_u16m4(v_s2, v_s3, vl);
+    v_t23 = __riscv_vwaddu_vv_u16m4(v_t2, v_t3, vl);
+    v_st01 = __riscv_vadd_vv_u16m4(v_s01, v_t01, vl);
+    v_st23 = __riscv_vadd_vv_u16m4(v_s23, v_t23, vl);
+
+    __riscv_vlseg4e8_v_u8m2(&v_v0, &v_v1, &v_v2, &v_v3, src_ptr3, vl);
+
+    v_v01 = __riscv_vwaddu_vv_u16m4(v_v0, v_v1, vl);
+    v_v23 = __riscv_vwaddu_vv_u16m4(v_v2, v_v3, vl);
+
+    v_uv01 = __riscv_vadd_vv_u16m4(v_u01, v_v01, vl);
+    v_uv23 = __riscv_vadd_vv_u16m4(v_u23, v_v23, vl);
+
+    v_st0123 = __riscv_vadd_vv_u16m4(v_st01, v_st23, vl);
+    v_uv0123 = __riscv_vadd_vv_u16m4(v_uv01, v_uv23, vl);
+    v_stuv0123 = __riscv_vadd_vv_u16m4(v_st0123, v_uv0123, vl);
+    // Use round-to-nearest-up mode for vnclip
+    v_dst = __riscv_vnclipu_wx_u8m2(v_stuv0123, 4, vl);
+    __riscv_vse8_v_u8m2(dst_ptr, v_dst, vl);
+    w -= vl;
+    src_ptr += 4 * vl;
+    src_ptr1 += 4 * vl;
+    src_ptr2 += 4 * vl;
+    src_ptr3 += 4 * vl;
+    dst_ptr += vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN34_RVV
+void ScaleRowDown34_RVV(const uint8_t* src_ptr,
+                        ptrdiff_t src_stride,
+                        uint8_t* dst_ptr,
+                        int dst_width) {
+  size_t w = (size_t)dst_width / 3u;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2_t v_s0, v_s1, v_s2, v_s3;
+    __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
+    __riscv_vsseg3e8_v_u8m2(dst_ptr, v_s0, v_s1, v_s3, vl);
+    w -= vl;
+    src_ptr += 4 * vl;
+    dst_ptr += 3 * vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN34_0_BOX_RVV
+void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_ptr,
+                              int dst_width) {
+  size_t w = (size_t)dst_width / 3u;
+  const uint8_t* s = src_ptr;
+  const uint8_t* t = src_ptr + src_stride;
+  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up mode(0).
+  asm volatile("csrwi vxrm, 0");
+  do {
+    vuint8m2_t v_s0, v_s1, v_s2, v_s3;
+    vuint16m4_t v_t0_u16, v_t1_u16, v_t2_u16, v_t3_u16;
+    vuint8m2_t v_u0, v_u1, v_u2, v_u3;
+    vuint16m4_t v_u1_u16;
+    vuint8m2_t v_a0, v_a1, v_a2;
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl);
+
+    if (src_stride == 0) {
+      v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
+      v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
+      v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_s2, 2, vl);
+      v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_s3, 2, vl);
+    } else {
+      vuint8m2_t v_t0, v_t1, v_t2, v_t3;
+      __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl);
+      v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 0, vl);
+      v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 0, vl);
+      v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_t2, 0, vl);
+      v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_t3, 0, vl);
+      t += 4 * vl;
+    }
+
+    v_t0_u16 = __riscv_vwmaccu_vx_u16m4(v_t0_u16, 3, v_s0, vl);
+    v_t1_u16 = __riscv_vwmaccu_vx_u16m4(v_t1_u16, 3, v_s1, vl);
+    v_t2_u16 = __riscv_vwmaccu_vx_u16m4(v_t2_u16, 3, v_s2, vl);
+    v_t3_u16 = __riscv_vwmaccu_vx_u16m4(v_t3_u16, 3, v_s3, vl);
+
+    // Use round-to-nearest-up mode for vnclip & averaging add
+    v_u0 = __riscv_vnclipu_wx_u8m2(v_t0_u16, 2, vl);
+    v_u1 = __riscv_vnclipu_wx_u8m2(v_t1_u16, 2, vl);
+    v_u2 = __riscv_vnclipu_wx_u8m2(v_t2_u16, 2, vl);
+    v_u3 = __riscv_vnclipu_wx_u8m2(v_t3_u16, 2, vl);
+
+    // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2
+    v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u1, 0, vl);
+    v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u0, vl);
+    v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
+
+    // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1
+    v_a1 = __riscv_vaaddu_vv_u8m2(v_u1, v_u2, vl);
+
+    // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2
+    v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u2, 0, vl);
+    v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u3, vl);
+    v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
+
+    __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl);
+
+    w -= vl;
+    s += 4 * vl;
+    dst_ptr += 3 * vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN34_1_BOX_RVV
+void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_ptr,
+                              int dst_width) {
+  size_t w = (size_t)dst_width / 3u;
+  const uint8_t* s = src_ptr;
+  const uint8_t* t = src_ptr + src_stride;
+  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up mode(0).
+  asm volatile("csrwi vxrm, 0");
+  do {
+    vuint8m2_t v_s0, v_s1, v_s2, v_s3;
+    vuint8m2_t v_ave0, v_ave1, v_ave2, v_ave3;
+    vuint16m4_t v_u1_u16;
+    vuint8m2_t v_a0, v_a1, v_a2;
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl);
+
+    // Use round-to-nearest-up mode for vnclip & averaging add
+    if (src_stride == 0) {
+      v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_s0, vl);
+      v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_s1, vl);
+      v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_s2, vl);
+      v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_s3, vl);
+    } else {
+      vuint8m2_t v_t0, v_t1, v_t2, v_t3;
+      __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl);
+      v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_t0, vl);
+      v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_t1, vl);
+      v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_t2, vl);
+      v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_t3, vl);
+      t += 4 * vl;
+    }
+    // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2
+    v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave1, 0, vl);
+    v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave0, vl);
+    v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
+
+    // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1
+    v_a1 = __riscv_vaaddu_vv_u8m2(v_ave1, v_ave2, vl);
+
+    // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2
+    v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave2, 0, vl);
+    v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave3, vl);
+    v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
+
+    __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl);
+
+    w -= vl;
+    s += 4 * vl;
+    dst_ptr += 3 * vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN38_RVV
+void ScaleRowDown38_RVV(const uint8_t* src_ptr,
+                        ptrdiff_t src_stride,
+                        uint8_t* dst_ptr,
+                        int dst_width) {
+  size_t w = (size_t)dst_width / 3u;
+  (void)src_stride;
+  assert(dst_width % 3 == 0);
+  do {
+    vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
+    size_t vl = __riscv_vsetvl_e8m1(w);
+    __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
+                            &v_s7, src_ptr, vl);
+    __riscv_vsseg3e8_v_u8m1(dst_ptr, v_s0, v_s3, v_s6, vl);
+    w -= vl;
+    src_ptr += 8 * vl;
+    dst_ptr += 3 * vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN38_2_BOX_RVV
+void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_ptr,
+                              int dst_width) {
+  size_t w = (size_t)dst_width / 3u;
+  const uint16_t coeff_a = (65536u / 6u);
+  const uint16_t coeff_b = (65536u / 4u);
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  do {
+    vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
+    vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7;
+    vuint16m2_t v_e0, v_e1, v_e2, v_e;
+    vuint16m2_t v_f0, v_f1, v_f2, v_f;
+    vuint16m2_t v_g0, v_g1, v_g;
+    vuint8m1_t v_dst_e, v_dst_f, v_dst_g;
+    size_t vl = __riscv_vsetvl_e8m1(w);
+    // s: e00, e10, e20, f00, f10, f20, g00, g10
+    // t: e01, e11, e21, f01, f11, f21, g01, g11
+    __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
+                            &v_s7, src_ptr, vl);
+    __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6,
+                            &v_t7, src_ptr + src_stride, vl);
+    // Calculate sum of [e00, e21] to v_e
+    // Calculate sum of [f00, f21] to v_f
+    // Calculate sum of [g00, g11] to v_g
+    v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl);
+    v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl);
+    v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl);
+    v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl);
+    v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl);
+    v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl);
+    v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl);
+    v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl);
+
+    v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl);
+    v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl);
+    v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl);
+    v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl);
+    v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl);
+
+    // Average in 16-bit fixed-point
+    v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl);
+    v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl);
+    v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl);
+
+    v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl);
+    v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl);
+    v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl);
+
+    __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl);
+    w -= vl;
+    src_ptr += 8 * vl;
+    dst_ptr += 3 * vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN38_3_BOX_RVV
+void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_ptr,
+                              int dst_width) {
+  size_t w = (size_t)dst_width / 3u;
+  const uint16_t coeff_a = (65536u / 9u);
+  const uint16_t coeff_b = (65536u / 6u);
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  do {
+    vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
+    vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7;
+    vuint8m1_t v_u0, v_u1, v_u2, v_u3, v_u4, v_u5, v_u6, v_u7;
+    vuint16m2_t v_e0, v_e1, v_e2, v_e3, v_e4, v_e;
+    vuint16m2_t v_f0, v_f1, v_f2, v_f3, v_f4, v_f;
+    vuint16m2_t v_g0, v_g1, v_g2, v_g;
+    vuint8m1_t v_dst_e, v_dst_f, v_dst_g;
+    size_t vl = __riscv_vsetvl_e8m1(w);
+    // s: e00, e10, e20, f00, f10, f20, g00, g10
+    // t: e01, e11, e21, f01, f11, f21, g01, g11
+    // u: e02, e12, e22, f02, f12, f22, g02, g12
+    __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
+                            &v_s7, src_ptr, vl);
+    __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6,
+                            &v_t7, src_ptr + src_stride, vl);
+    __riscv_vlseg8e8_v_u8m1(&v_u0, &v_u1, &v_u2, &v_u3, &v_u4, &v_u5, &v_u6,
+                            &v_u7, src_ptr + 2 * src_stride, vl);
+    // Calculate sum of [e00, e22]
+    v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl);
+    v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl);
+    v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl);
+    v_e3 = __riscv_vwaddu_vv_u16m2(v_u0, v_u1, vl);
+    v_e4 = __riscv_vwaddu_vx_u16m2(v_u2, 0, vl);
+
+    v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl);
+    v_e2 = __riscv_vadd_vv_u16m2(v_e2, v_e3, vl);
+    v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e4, vl);
+    v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl);
+    // Calculate sum of [f00, f22]
+    v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl);
+    v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl);
+    v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl);
+    v_f3 = __riscv_vwaddu_vv_u16m2(v_u3, v_u4, vl);
+    v_f4 = __riscv_vwaddu_vx_u16m2(v_u5, 0, vl);
+
+    v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl);
+    v_f2 = __riscv_vadd_vv_u16m2(v_f2, v_f3, vl);
+    v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f4, vl);
+    v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl);
+    // Calculate sum of [g00, g12]
+    v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl);
+    v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl);
+    v_g2 = __riscv_vwaddu_vv_u16m2(v_u6, v_u7, vl);
+
+    v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl);
+    v_g = __riscv_vadd_vv_u16m2(v_g, v_g2, vl);
+
+    // Average in 16-bit fixed-point
+    v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl);
+    v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl);
+    v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl);
+
+    v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl);
+    v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl);
+    v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl);
+    __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl);
+    w -= vl;
+    src_ptr += 8 * vl;
+    dst_ptr += 3 * vl;
+  } while (w > 0);
+}
+#endif
+
+// ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms'
+// ScaleRowUp2_(Bi)linear_Any_XXX. We process entire row in this function. Other
+// platforms only implement non-edge part of image and process edge with scalar.
+
+#ifdef HAS_SCALEROWUP2_LINEAR_RVV
+void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr,
+                            uint8_t* dst_ptr,
+                            int dst_width) {
+  size_t work_width = (size_t)dst_width - 1u;
+  size_t src_width = work_width >> 1u;
+  const uint8_t* work_src_ptr = src_ptr;
+  uint8_t* work_dst_ptr = dst_ptr + 1;
+  size_t vl = __riscv_vsetvlmax_e8m4();
+  vuint8m4_t v_3 = __riscv_vmv_v_x_u8m4(3, vl);
+  dst_ptr[0] = src_ptr[0];
+  while (src_width > 0) {
+    vuint8m4_t v_src0, v_src1, v_dst_odd, v_dst_even;
+    vuint16m8_t v_src0_u16, v_src1_u16;
+    size_t vl = __riscv_vsetvl_e8m4(src_width);
+    v_src0 = __riscv_vle8_v_u8m4(work_src_ptr, vl);
+    v_src1 = __riscv_vle8_v_u8m4(work_src_ptr + 1, vl);
+
+    v_src0_u16 = __riscv_vwaddu_vx_u16m8(v_src0, 2, vl);
+    v_src1_u16 = __riscv_vwaddu_vx_u16m8(v_src1, 2, vl);
+    v_src0_u16 = __riscv_vwmaccu_vv_u16m8(v_src0_u16, v_3, v_src1, vl);
+    v_src1_u16 = __riscv_vwmaccu_vv_u16m8(v_src1_u16, v_3, v_src0, vl);
+
+    v_dst_odd = __riscv_vnsrl_wx_u8m4(v_src0_u16, 2, vl);
+    v_dst_even = __riscv_vnsrl_wx_u8m4(v_src1_u16, 2, vl);
+
+    __riscv_vsseg2e8_v_u8m4(work_dst_ptr, v_dst_even, v_dst_odd, vl);
+
+    src_width -= vl;
+    work_src_ptr += vl;
+    work_dst_ptr += 2 * vl;
+  }
+  dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2];
+}
+#endif
+
+#ifdef HAS_SCALEROWUP2_BILINEAR_RVV
+void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst_ptr,
+                              ptrdiff_t dst_stride,
+                              int dst_width) {
+  size_t work_width = ((size_t)dst_width - 1u) & ~1u;
+  size_t src_width = work_width >> 1u;
+  const uint8_t* work_s = src_ptr;
+  const uint8_t* work_t = src_ptr + src_stride;
+  const uint8_t* s = work_s;
+  const uint8_t* t = work_t;
+  uint8_t* d = dst_ptr;
+  uint8_t* e = dst_ptr + dst_stride;
+  uint8_t* work_d = d + 1;
+  uint8_t* work_e = e + 1;
+  size_t vl = __riscv_vsetvlmax_e16m4();
+  vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl);
+  vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl);
+  d[0] = (3 * s[0] + t[0] + 2) >> 2;
+  e[0] = (s[0] + 3 * t[0] + 2) >> 2;
+  while (src_width > 0) {
+    vuint8m2_t v_s0, v_s1, v_t0, v_t1;
+    vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16;
+    vuint16m4_t v_t0_u16_, v_t1_u16_;
+    vuint8m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd;
+    size_t vl = __riscv_vsetvl_e8m2(src_width);
+    v_s0 = __riscv_vle8_v_u8m2(work_s, vl);
+    v_s1 = __riscv_vle8_v_u8m2(work_s + 1, vl);
+
+    v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
+    v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
+    v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl);
+    v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl);
+
+    v_t0 = __riscv_vle8_v_u8m2(work_t, vl);
+    v_t1 = __riscv_vle8_v_u8m2(work_t + 1, vl);
+
+    v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl);
+    v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl);
+    v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl);
+    v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl);
+
+    v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl);
+    v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl);
+
+    v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl);
+    v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl);
+    v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl);
+    v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl);
+
+    v_dst0_odd = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl);
+    v_dst0_even = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl);
+    v_dst1_odd = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl);
+    v_dst1_even = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl);
+
+    __riscv_vsseg2e8_v_u8m2(work_d, v_dst0_even, v_dst0_odd, vl);
+    __riscv_vsseg2e8_v_u8m2(work_e, v_dst1_even, v_dst1_odd, vl);
+
+    src_width -= vl;
+    work_s += vl;
+    work_t += vl;
+    work_d += 2 * vl;
+    work_e += 2 * vl;
+  }
+  d[dst_width - 1] =
+      (3 * s[(dst_width - 1) / 2] + t[(dst_width - 1) / 2] + 2) >> 2;
+  e[dst_width - 1] =
+      (s[(dst_width - 1) / 2] + 3 * t[(dst_width - 1) / 2] + 2) >> 2;
+}
+#endif
+
+#ifdef HAS_SCALEUVROWDOWN2_RVV
+void ScaleUVRowDown2_RVV(const uint8_t* src_uv,
+                         ptrdiff_t src_stride,
+                         uint8_t* dst_uv,
+                         int dst_width) {
+  size_t w = (size_t)dst_width;
+  const uint32_t* src = (const uint32_t*)src_uv;
+  uint16_t* dst = (uint16_t*)dst_uv;
+  (void)src_stride;
+  do {
+    size_t vl = __riscv_vsetvl_e32m8(w);
+    vuint32m8_t v_data = __riscv_vle32_v_u32m8(src, vl);
+    vuint16m4_t v_u1v1 = __riscv_vnsrl_wx_u16m4(v_data, 16, vl);
+    __riscv_vse16_v_u16m4(dst, v_u1v1, vl);
+    w -= vl;
+    src += vl;
+    dst += vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEUVROWDOWN2LINEAR_RVV
+void ScaleUVRowDown2Linear_RVV(const uint8_t* src_uv,
+                               ptrdiff_t src_stride,
+                               uint8_t* dst_uv,
+                               int dst_width) {
+  size_t w = (size_t)dst_width;
+  const uint16_t* src = (const uint16_t*)src_uv;
+  (void)src_stride;
+  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up mode(0).
+  asm volatile("csrwi vxrm, 0");
+  do {
+    vuint8m4_t v_u0v0, v_u1v1, v_avg;
+    vuint16m4_t v_u0v0_16, v_u1v1_16;
+    size_t vl = __riscv_vsetvl_e16m4(w);
+    __riscv_vlseg2e16_v_u16m4(&v_u0v0_16, &v_u1v1_16, src, vl);
+    v_u0v0 = __riscv_vreinterpret_v_u16m4_u8m4(v_u0v0_16);
+    v_u1v1 = __riscv_vreinterpret_v_u16m4_u8m4(v_u1v1_16);
+    // Use round-to-nearest-up mode for averaging add
+    v_avg = __riscv_vaaddu_vv_u8m4(v_u0v0, v_u1v1, vl * 2);
+    __riscv_vse8_v_u8m4(dst_uv, v_avg, vl * 2);
+    w -= vl;
+    src += vl * 2;
+    dst_uv += vl * 2;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEUVROWDOWN2BOX_RVV
+void ScaleUVRowDown2Box_RVV(const uint8_t* src_uv,
+                            ptrdiff_t src_stride,
+                            uint8_t* dst_uv,
+                            int dst_width) {
+  const uint8_t* src_uv_row1 = src_uv + src_stride;
+  size_t w = (size_t)dst_width;
+  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+  // register) is set to round-to-nearest-up mode(0).
+  asm volatile("csrwi vxrm, 0");
+  do {
+    vuint8m2_t v_u0_row0, v_v0_row0, v_u1_row0, v_v1_row0;
+    vuint8m2_t v_u0_row1, v_v0_row1, v_u1_row1, v_v1_row1;
+    vuint16m4_t v_u0u1_row0, v_u0u1_row1, v_v0v1_row0, v_v0v1_row1;
+    vuint16m4_t v_sum0, v_sum1;
+    vuint8m2_t v_dst_u, v_dst_v;
+    size_t vl = __riscv_vsetvl_e8m2(w);
+
+    __riscv_vlseg4e8_v_u8m2(&v_u0_row0, &v_v0_row0, &v_u1_row0, &v_v1_row0,
+                            src_uv, vl);
+    __riscv_vlseg4e8_v_u8m2(&v_u0_row1, &v_v0_row1, &v_u1_row1, &v_v1_row1,
+                            src_uv_row1, vl);
+
+    v_u0u1_row0 = __riscv_vwaddu_vv_u16m4(v_u0_row0, v_u1_row0, vl);
+    v_u0u1_row1 = __riscv_vwaddu_vv_u16m4(v_u0_row1, v_u1_row1, vl);
+    v_v0v1_row0 = __riscv_vwaddu_vv_u16m4(v_v0_row0, v_v1_row0, vl);
+    v_v0v1_row1 = __riscv_vwaddu_vv_u16m4(v_v0_row1, v_v1_row1, vl);
+
+    v_sum0 = __riscv_vadd_vv_u16m4(v_u0u1_row0, v_u0u1_row1, vl);
+    v_sum1 = __riscv_vadd_vv_u16m4(v_v0v1_row0, v_v0v1_row1, vl);
+    // Use round-to-nearest-up mode for vnclip
+    v_dst_u = __riscv_vnclipu_wx_u8m2(v_sum0, 2, vl);
+    v_dst_v = __riscv_vnclipu_wx_u8m2(v_sum1, 2, vl);
+
+    __riscv_vsseg2e8_v_u8m2(dst_uv, v_dst_u, v_dst_v, vl);
+
+    dst_uv += 2 * vl;
+    src_uv += 4 * vl;
+    w -= vl;
+    src_uv_row1 += 4 * vl;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEUVROWDOWN4_RVV
+void ScaleUVRowDown4_RVV(const uint8_t* src_uv,
+                         ptrdiff_t src_stride,
+                         int src_stepx,
+                         uint8_t* dst_uv,
+                         int dst_width) {
+  // Overflow will never happen here, since sizeof(size_t)/sizeof(int)=2.
+  // dst_width = src_width / 4 and src_width is also int.
+  size_t w = (size_t)dst_width * 8;
+  (void)src_stride;
+  (void)src_stepx;
+  do {
+    size_t vl = __riscv_vsetvl_e8m8(w);
+    vuint8m8_t v_row = __riscv_vle8_v_u8m8(src_uv, vl);
+    vuint64m8_t v_row_64 = __riscv_vreinterpret_v_u8m8_u64m8(v_row);
+    // Narrowing without clipping
+    vuint32m4_t v_tmp = __riscv_vncvt_x_x_w_u32m4(v_row_64, vl / 8);
+    vuint16m2_t v_dst_16 = __riscv_vncvt_x_x_w_u16m2(v_tmp, vl / 8);
+    vuint8m2_t v_dst = __riscv_vreinterpret_v_u16m2_u8m2(v_dst_16);
+    __riscv_vse8_v_u8m2(dst_uv, v_dst, vl / 4);
+    w -= vl;
+    src_uv += vl;
+    dst_uv += vl / 4;
+  } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEUVROWDOWNEVEN_RVV
+void ScaleUVRowDownEven_RVV(const uint8_t* src_uv,
+                            ptrdiff_t src_stride,
+                            int src_stepx,
+                            uint8_t* dst_uv,
+                            int dst_width) {
+  size_t w = (size_t)dst_width;
+  const ptrdiff_t stride_byte = (ptrdiff_t)src_stepx * 2;
+  const uint16_t* src = (const uint16_t*)(src_uv);
+  uint16_t* dst = (uint16_t*)(dst_uv);
+  (void)src_stride;
+  do {
+    size_t vl = __riscv_vsetvl_e16m8(w);
+    vuint16m8_t v_row = __riscv_vlse16_v_u16m8(src, stride_byte, vl);
+    __riscv_vse16_v_u16m8(dst, v_row, vl);
+    w -= vl;
+    src += vl * src_stepx;
+    dst += vl;
+  } while (w > 0);
+}
+#endif
+
+// ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms'
+// ScaleUVRowUp2_(Bi)linear_Any_XXX. We process entire row in this function.
+// Other platforms only implement non-edge part of image and process edge with
+// scalar.
+
+#ifdef HAS_SCALEUVROWUP2_LINEAR_RVV
+void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr,
+                              uint8_t* dst_ptr,
+                              int dst_width) {
+  size_t work_width = ((size_t)dst_width - 1u) & ~1u;
+  uint16_t* work_dst_ptr = (uint16_t*)dst_ptr + 1;
+  const uint8_t* work_src_ptr = src_ptr;
+  size_t vl = __riscv_vsetvlmax_e8m4();
+  vuint8m4_t v_3_u8 = __riscv_vmv_v_x_u8m4(3, vl);
+  dst_ptr[0] = src_ptr[0];
+  dst_ptr[1] = src_ptr[1];
+  while (work_width > 0) {
+    vuint8m4_t v_uv0, v_uv1, v_dst_odd_u8, v_dst_even_u8;
+    vuint16m4_t v_dst_odd, v_dst_even;
+    vuint16m8_t v_uv0_u16, v_uv1_u16;
+    size_t vl = __riscv_vsetvl_e8m4(work_width);
+    v_uv0 = __riscv_vle8_v_u8m4(work_src_ptr, vl);
+    v_uv1 = __riscv_vle8_v_u8m4(work_src_ptr + 2, vl);
+
+    v_uv0_u16 = __riscv_vwaddu_vx_u16m8(v_uv0, 2, vl);
+    v_uv1_u16 = __riscv_vwaddu_vx_u16m8(v_uv1, 2, vl);
+
+    v_uv0_u16 = __riscv_vwmaccu_vv_u16m8(v_uv0_u16, v_3_u8, v_uv1, vl);
+    v_uv1_u16 = __riscv_vwmaccu_vv_u16m8(v_uv1_u16, v_3_u8, v_uv0, vl);
+
+    v_dst_odd_u8 = __riscv_vnsrl_wx_u8m4(v_uv0_u16, 2, vl);
+    v_dst_even_u8 = __riscv_vnsrl_wx_u8m4(v_uv1_u16, 2, vl);
+
+    v_dst_even = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_even_u8);
+    v_dst_odd = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_odd_u8);
+
+    __riscv_vsseg2e16_v_u16m4(work_dst_ptr, v_dst_even, v_dst_odd, vl / 2);
+
+    work_width -= vl;
+    work_src_ptr += vl;
+    work_dst_ptr += vl;
+  }
+  dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2];
+  dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1];
+}
+#endif
+
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV
+void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8_t* dst_ptr,
+                                ptrdiff_t dst_stride,
+                                int dst_width) {
+  size_t work_width = ((size_t)dst_width - 1u) & ~1u;
+  const uint8_t* work_s = src_ptr;
+  const uint8_t* work_t = src_ptr + src_stride;
+  const uint8_t* s = work_s;
+  const uint8_t* t = work_t;
+  uint8_t* d = dst_ptr;
+  uint8_t* e = dst_ptr + dst_stride;
+  uint16_t* work_d = (uint16_t*)d + 1;
+  uint16_t* work_e = (uint16_t*)e + 1;
+  size_t vl = __riscv_vsetvlmax_e16m4();
+  vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl);
+  vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl);
+  d[0] = (3 * s[0] + t[0] + 2) >> 2;
+  e[0] = (s[0] + 3 * t[0] + 2) >> 2;
+  d[1] = (3 * s[1] + t[1] + 2) >> 2;
+  e[1] = (s[1] + 3 * t[1] + 2) >> 2;
+  while (work_width > 0) {
+    vuint8m2_t v_s0, v_s1, v_t0, v_t1;
+    vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16;
+    vuint16m4_t v_t0_u16_, v_t1_u16_;
+    vuint8m2_t v_dst0_odd_u8, v_dst0_even_u8, v_dst1_odd_u8, v_dst1_even_u8;
+    vuint16m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd;
+    size_t vl = __riscv_vsetvl_e8m2(work_width);
+    v_s0 = __riscv_vle8_v_u8m2(work_s, vl);
+    v_s1 = __riscv_vle8_v_u8m2(work_s + 2, vl);
+
+    v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
+    v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
+    v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl);
+    v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl);
+
+    v_t0 = __riscv_vle8_v_u8m2(work_t, vl);
+    v_t1 = __riscv_vle8_v_u8m2(work_t + 2, vl);
+
+    v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl);
+    v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl);
+    v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl);
+    v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl);
+
+    v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl);
+    v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl);
+
+    v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl);
+    v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl);
+    v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl);
+    v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl);
+
+    v_dst0_odd_u8 = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl);
+    v_dst0_even_u8 = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl);
+    v_dst1_odd_u8 = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl);
+    v_dst1_even_u8 = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl);
+
+    v_dst0_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_even_u8);
+    v_dst0_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_odd_u8);
+    v_dst1_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_even_u8);
+    v_dst1_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_odd_u8);
+
+    __riscv_vsseg2e16_v_u16m2(work_d, v_dst0_even, v_dst0_odd, vl / 2);
+    __riscv_vsseg2e16_v_u16m2(work_e, v_dst1_even, v_dst1_odd, vl / 2);
+
+    work_width -= vl;
+    work_s += vl;
+    work_t += vl;
+    work_d += vl;
+    work_e += vl;
+  }
+  d[2 * dst_width - 2] =
+      (3 * s[((dst_width + 1) & ~1) - 2] + t[((dst_width + 1) & ~1) - 2] + 2) >>
+      2;
+  e[2 * dst_width - 2] =
+      (s[((dst_width + 1) & ~1) - 2] + 3 * t[((dst_width + 1) & ~1) - 2] + 2) >>
+      2;
+  d[2 * dst_width - 1] =
+      (3 * s[((dst_width + 1) & ~1) - 1] + t[((dst_width + 1) & ~1) - 1] + 2) >>
+      2;
+  e[2 * dst_width - 1] =
+      (s[((dst_width + 1) & ~1) - 1] + 3 * t[((dst_width + 1) & ~1) - 1] + 2) >>
+      2;
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) &&
+        // defined(__clang__)
diff --git a/files/source/scale_uv.cc b/source/scale_uv.cc
index 1556071d..0931c89a 100644
--- a/files/source/scale_uv.cc
+++ b/source/scale_uv.cc
@@ -128,6 +128,15 @@ static void ScaleUVDown2(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEUVROWDOWN2_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleUVRowDown2 =
+        filtering == kFilterNone
+            ? ScaleUVRowDown2_RVV
+            : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_RVV
+                                          : ScaleUVRowDown2Box_RVV);
+  }
+#endif
 
 // This code is not enabled.  Only box filter is available at this time.
 #if defined(HAS_SCALEUVROWDOWN2_SSSE3)
@@ -179,22 +188,24 @@ static void ScaleUVDown2(int src_width,
 // This is an optimized version for scaling down a UV to 1/4 of
 // its original size.
 #if HAS_SCALEUVDOWN4BOX
-static void ScaleUVDown4Box(int src_width,
-                            int src_height,
-                            int dst_width,
-                            int dst_height,
-                            int src_stride,
-                            int dst_stride,
-                            const uint8_t* src_uv,
-                            uint8_t* dst_uv,
-                            int x,
-                            int dx,
-                            int y,
-                            int dy) {
+static int ScaleUVDown4Box(int src_width,
+                           int src_height,
+                           int dst_width,
+                           int dst_height,
+                           int src_stride,
+                           int dst_stride,
+                           const uint8_t* src_uv,
+                           uint8_t* dst_uv,
+                           int x,
+                           int dx,
+                           int y,
+                           int dy) {
   int j;
   // Allocate 2 rows of UV.
   const int row_size = (dst_width * 2 * 2 + 15) & ~15;
   align_buffer_64(row, row_size * 2);
+  if (!row)
+    return 1;
   int row_stride = src_stride * (dy >> 16);
   void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
                           uint8_t* dst_uv, int dst_width) =
@@ -231,6 +242,11 @@ static void ScaleUVDown4Box(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEUVROWDOWN2BOX_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleUVRowDown2 = ScaleUVRowDown2Box_RVV;
+  }
+#endif
 
   for (j = 0; j < dst_height; ++j) {
     ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
@@ -241,6 +257,7 @@ static void ScaleUVDown4Box(int src_width,
     dst_uv += dst_stride;
   }
   free_aligned_buffer_64(row);
+  return 0;
 }
 #endif  // HAS_SCALEUVDOWN4BOX
 
@@ -310,6 +327,12 @@ static void ScaleUVDownEven(int src_width,
     }
   }
 #endif
+#if defined(HAS_SCALEUVROWDOWNEVEN_RVV)
+  if (TestCpuFlag(kCpuHasRVV) && !filtering) {
+    ScaleUVRowDownEven =
+        (col_step == 4) ? ScaleUVRowDown4_RVV : ScaleUVRowDownEven_RVV;
+  }
+#endif
 
   if (filtering == kFilterLinear) {
     src_stride = 0;
@@ -324,19 +347,19 @@ static void ScaleUVDownEven(int src_width,
 
 // Scale UV down with bilinear interpolation.
 #if HAS_SCALEUVBILINEARDOWN
-static void ScaleUVBilinearDown(int src_width,
-                                int src_height,
-                                int dst_width,
-                                int dst_height,
-                                int src_stride,
-                                int dst_stride,
-                                const uint8_t* src_uv,
-                                uint8_t* dst_uv,
-                                int x,
-                                int dx,
-                                int y,
-                                int dy,
-                                enum FilterMode filtering) {
+static int ScaleUVBilinearDown(int src_width,
+                               int src_height,
+                               int dst_width,
+                               int dst_height,
+                               int src_stride,
+                               int dst_stride,
+                               const uint8_t* src_uv,
+                               uint8_t* dst_uv,
+                               int x,
+                               int dx,
+                               int y,
+                               int dy,
+                               enum FilterMode filtering) {
   int j;
   void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
                          ptrdiff_t src_stride, int dst_width,
@@ -426,9 +449,10 @@ static void ScaleUVBilinearDown(int src_width,
   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
   // Allocate a row of UV.
   {
-    align_buffer_64(row, clip_src_width * 2);
-
     const int max_y = (src_height - 1) << 16;
+    align_buffer_64(row, clip_src_width * 2);
+    if (!row)
+      return 1;
     if (y > max_y) {
       y = max_y;
     }
@@ -450,24 +474,25 @@ static void ScaleUVBilinearDown(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 #endif
 
 // Scale UV up with bilinear interpolation.
 #if HAS_SCALEUVBILINEARUP
-static void ScaleUVBilinearUp(int src_width,
-                              int src_height,
-                              int dst_width,
-                              int dst_height,
-                              int src_stride,
-                              int dst_stride,
-                              const uint8_t* src_uv,
-                              uint8_t* dst_uv,
-                              int x,
-                              int dx,
-                              int y,
-                              int dy,
-                              enum FilterMode filtering) {
+static int ScaleUVBilinearUp(int src_width,
+                             int src_height,
+                             int dst_width,
+                             int dst_height,
+                             int src_stride,
+                             int dst_stride,
+                             const uint8_t* src_uv,
+                             uint8_t* dst_uv,
+                             int x,
+                             int dx,
+                             int y,
+                             int dy,
+                             enum FilterMode filtering) {
   int j;
   void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
                          ptrdiff_t src_stride, int dst_width,
@@ -586,6 +611,8 @@ static void ScaleUVBilinearUp(int src_width,
     // Allocate 2 rows of UV.
     const int row_size = (dst_width * 2 + 15) & ~15;
     align_buffer_64(row, row_size * 2);
+    if (!row)
+      return 1;
 
     uint8_t* rowptr = row;
     int rowstride = row_size;
@@ -629,6 +656,7 @@ static void ScaleUVBilinearUp(int src_width,
     }
     free_aligned_buffer_64(row);
   }
+  return 0;
 }
 #endif  // HAS_SCALEUVBILINEARUP
 
@@ -637,14 +665,14 @@ static void ScaleUVBilinearUp(int src_width,
 // This is an optimized version for scaling up a plane to 2 times of
 // its original width, using linear interpolation.
 // This is used to scale U and V planes of NV16 to NV24.
-void ScaleUVLinearUp2(int src_width,
-                      int src_height,
-                      int dst_width,
-                      int dst_height,
-                      int src_stride,
-                      int dst_stride,
-                      const uint8_t* src_uv,
-                      uint8_t* dst_uv) {
+static void ScaleUVLinearUp2(int src_width,
+                             int src_height,
+                             int dst_width,
+                             int dst_height,
+                             int src_stride,
+                             int dst_stride,
+                             const uint8_t* src_uv,
+                             uint8_t* dst_uv) {
   void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) =
       ScaleUVRowUp2_Linear_Any_C;
   int i;
@@ -672,6 +700,12 @@ void ScaleUVLinearUp2(int src_width,
   }
 #endif
 
+#ifdef HAS_SCALEUVROWUP2_LINEAR_RVV
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ScaleRowUp = ScaleUVRowUp2_Linear_RVV;
+  }
+#endif
+
   if (dst_height == 1) {
     ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
                dst_width);
@@ -690,14 +724,14 @@ void ScaleUVLinearUp2(int src_width,
 // This is an optimized version for scaling up a plane to 2 times of
 // its original size, using bilinear interpolation.
 // This is used to scale U and V planes of NV12 to NV24.
-void ScaleUVBilinearUp2(int src_width,
-                        int src_height,
-                        int dst_width,
-                        int dst_height,
-                        int src_stride,
-                        int dst_stride,
-                        const uint8_t* src_ptr,
-                        uint8_t* dst_ptr) {
+static void ScaleUVBilinearUp2(int src_width,
+                               int src_height,
+                               int dst_width,
+                               int dst_height,
+                               int src_stride,
+                               int dst_stride,
+                               const uint8_t* src_ptr,
+                               uint8_t* dst_ptr) {
   void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
                       uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
       ScaleUVRowUp2_Bilinear_Any_C;
@@ -725,6 +759,12 @@ void ScaleUVBilinearUp2(int src_width,
   }
 #endif
 
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV
+  if (TestCpuFlag(kCpuHasRVV)) {
+    Scale2RowUp = ScaleUVRowUp2_Bilinear_RVV;
+  }
+#endif
+
   Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
   dst_ptr += dst_stride;
   for (x = 0; x < src_height - 1; ++x) {
@@ -744,14 +784,14 @@ void ScaleUVBilinearUp2(int src_width,
 // This is an optimized version for scaling up a plane to 2 times of
 // its original width, using linear interpolation.
 // This is used to scale U and V planes of P210 to P410.
-void ScaleUVLinearUp2_16(int src_width,
-                         int src_height,
-                         int dst_width,
-                         int dst_height,
-                         int src_stride,
-                         int dst_stride,
-                         const uint16_t* src_uv,
-                         uint16_t* dst_uv) {
+static void ScaleUVLinearUp2_16(int src_width,
+                                int src_height,
+                                int dst_width,
+                                int dst_height,
+                                int src_stride,
+                                int dst_stride,
+                                const uint16_t* src_uv,
+                                uint16_t* dst_uv) {
   void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
       ScaleUVRowUp2_Linear_16_Any_C;
   int i;
@@ -797,14 +837,14 @@ void ScaleUVLinearUp2_16(int src_width,
 // This is an optimized version for scaling up a plane to 2 times of
 // its original size, using bilinear interpolation.
 // This is used to scale U and V planes of P010 to P410.
-void ScaleUVBilinearUp2_16(int src_width,
-                           int src_height,
-                           int dst_width,
-                           int dst_height,
-                           int src_stride,
-                           int dst_stride,
-                           const uint16_t* src_ptr,
-                           uint16_t* dst_ptr) {
+static void ScaleUVBilinearUp2_16(int src_width,
+                                  int src_height,
+                                  int dst_width,
+                                  int dst_height,
+                                  int src_stride,
+                                  int dst_stride,
+                                  const uint16_t* src_ptr,
+                                  uint16_t* dst_ptr) {
   void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
                       uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
       ScaleUVRowUp2_Bilinear_16_Any_C;
@@ -952,19 +992,19 @@ static int UVCopy_16(const uint16_t* src_uv,
 // Scale a UV plane (from NV12)
 // This function in turn calls a scaling function
 // suitable for handling the desired resolutions.
-static void ScaleUV(const uint8_t* src,
-                    int src_stride,
-                    int src_width,
-                    int src_height,
-                    uint8_t* dst,
-                    int dst_stride,
-                    int dst_width,
-                    int dst_height,
-                    int clip_x,
-                    int clip_y,
-                    int clip_width,
-                    int clip_height,
-                    enum FilterMode filtering) {
+static int ScaleUV(const uint8_t* src,
+                   int src_stride,
+                   int src_width,
+                   int src_height,
+                   uint8_t* dst,
+                   int dst_stride,
+                   int dst_width,
+                   int dst_height,
+                   int clip_x,
+                   int clip_y,
+                   int clip_width,
+                   int clip_height,
+                   enum FilterMode filtering) {
   // Initial source x/y coordinate and step values as 16.16 fixed point.
   int x = 0;
   int y = 0;
@@ -1010,22 +1050,22 @@ static void ScaleUV(const uint8_t* src,
           ScaleUVDown2(src_width, src_height, clip_width, clip_height,
                        src_stride, dst_stride, src, dst, x, dx, y, dy,
                        filtering);
-          return;
+          return 0;
         }
 #endif
 #if HAS_SCALEUVDOWN4BOX
         if (dx == 0x40000 && filtering == kFilterBox) {
           // Optimized 1/4 box downsample.
-          ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
-                          src_stride, dst_stride, src, dst, x, dx, y, dy);
-          return;
+          return ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
+                                 src_stride, dst_stride, src, dst, x, dx, y,
+                                 dy);
         }
 #endif
 #if HAS_SCALEUVDOWNEVEN
         ScaleUVDownEven(src_width, src_height, clip_width, clip_height,
                         src_stride, dst_stride, src, dst, x, dx, y, dy,
                         filtering);
-        return;
+        return 0;
 #endif
       }
       // Optimized odd scale down. ie 3, 5, 7, 9x.
@@ -1036,7 +1076,7 @@ static void ScaleUV(const uint8_t* src,
           // Straight copy.
           UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
                  src_stride, dst, dst_stride, clip_width, clip_height);
-          return;
+          return 0;
         }
 #endif
       }
@@ -1047,38 +1087,37 @@ static void ScaleUV(const uint8_t* src,
     // Arbitrary scale vertically, but unscaled horizontally.
     ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
                        dst_stride, src, dst, x, y, dy, /*bpp=*/2, filtering);
-    return;
+    return 0;
   }
   if ((filtering == kFilterLinear) && ((dst_width + 1) / 2 == src_width)) {
     ScaleUVLinearUp2(src_width, src_height, clip_width, clip_height, src_stride,
                      dst_stride, src, dst);
-    return;
+    return 0;
   }
   if ((clip_height + 1) / 2 == src_height &&
       (clip_width + 1) / 2 == src_width &&
       (filtering == kFilterBilinear || filtering == kFilterBox)) {
     ScaleUVBilinearUp2(src_width, src_height, clip_width, clip_height,
                        src_stride, dst_stride, src, dst);
-    return;
+    return 0;
   }
 #if HAS_SCALEUVBILINEARUP
   if (filtering && dy < 65536) {
-    ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
-                      src_stride, dst_stride, src, dst, x, dx, y, dy,
-                      filtering);
-    return;
+    return ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
+                             src_stride, dst_stride, src, dst, x, dx, y, dy,
+                             filtering);
   }
 #endif
 #if HAS_SCALEUVBILINEARDOWN
   if (filtering) {
-    ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
-                        src_stride, dst_stride, src, dst, x, dx, y, dy,
-                        filtering);
-    return;
+    return ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
+                               src_stride, dst_stride, src, dst, x, dx, y, dy,
+                               filtering);
   }
 #endif
   ScaleUVSimple(src_width, src_height, clip_width, clip_height, src_stride,
                 dst_stride, src, dst, x, dx, y, dy);
+  return 0;
 }
 
 // Scale an UV image.
@@ -1096,9 +1135,9 @@ int UVScale(const uint8_t* src_uv,
       src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) {
     return -1;
   }
-  ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, dst_stride_uv,
-          dst_width, dst_height, 0, 0, dst_width, dst_height, filtering);
-  return 0;
+  return ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv,
+                 dst_stride_uv, dst_width, dst_height, 0, 0, dst_width,
+                 dst_height, filtering);
 }
 
 // Scale a 16 bit UV image.
diff --git a/files/source/scale_win.cc b/source/scale_win.cc
index ea1f95c6..ea1f95c6 100644
--- a/files/source/scale_win.cc
+++ b/source/scale_win.cc
diff --git a/files/source/test.sh b/source/test.sh
index 7f12c3c1..7f12c3c1 100755
--- a/files/source/test.sh
+++ b/source/test.sh
diff --git a/files/source/video_common.cc b/source/video_common.cc
index 92384c05..92384c05 100644
--- a/files/source/video_common.cc
+++ b/source/video_common.cc
diff --git a/tools_libyuv/OWNERS b/tools_libyuv/OWNERS
new file mode 100644
index 00000000..aae4fb6e
--- /dev/null
+++ b/tools_libyuv/OWNERS
@@ -0,0 +1,4 @@
+mbonadei@chromium.org
+fbarchard@chromium.org
+pbos@chromium.org
+
diff --git a/files/tools_libyuv/autoroller/roll_deps.py b/tools_libyuv/autoroller/roll_deps.py
index 2b57eb65..d5c1089f 100755
--- a/files/tools_libyuv/autoroller/roll_deps.py
+++ b/tools_libyuv/autoroller/roll_deps.py
@@ -31,6 +31,7 @@ def FindSrcDirPath():
 
 # Skip these dependencies (list without solution name prefix).
 DONT_AUTOROLL_THESE = [
+    'third_party/fuchsia-gn-sdk',
     'src/third_party/gflags/src',
     'src/third_party/mockito/src',
 ]
diff --git a/files/tools_libyuv/autoroller/unittests/roll_deps_test.py b/tools_libyuv/autoroller/unittests/roll_deps_test.py
index af86bdd5..af86bdd5 100755
--- a/files/tools_libyuv/autoroller/unittests/roll_deps_test.py
+++ b/tools_libyuv/autoroller/unittests/roll_deps_test.py
diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS b/tools_libyuv/autoroller/unittests/testdata/DEPS
index 4f45860c..4f45860c 100644
--- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS
+++ b/tools_libyuv/autoroller/unittests/testdata/DEPS
diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new
index d53083ce..d53083ce 100644
--- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new
+++ b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new
diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old
index dd6ddaec..dd6ddaec 100644
--- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old
+++ b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old
diff --git a/files/tools_libyuv/get_landmines.py b/tools_libyuv/get_landmines.py
index 8b33483e..8b33483e 100755
--- a/files/tools_libyuv/get_landmines.py
+++ b/tools_libyuv/get_landmines.py
diff --git a/tools_libyuv/msan/OWNERS b/tools_libyuv/msan/OWNERS
new file mode 100644
index 00000000..9b67a8f6
--- /dev/null
+++ b/tools_libyuv/msan/OWNERS
@@ -0,0 +1,3 @@
+mbonadei@chromium.org
+fbarchard@chromium.org
+pbos@chromium.org
diff --git a/files/tools_libyuv/msan/blacklist.txt b/tools_libyuv/msan/blacklist.txt
index 8b5e42a7..8b5e42a7 100644
--- a/files/tools_libyuv/msan/blacklist.txt
+++ b/tools_libyuv/msan/blacklist.txt
diff --git a/tools_libyuv/ubsan/OWNERS b/tools_libyuv/ubsan/OWNERS
new file mode 100644
index 00000000..9b67a8f6
--- /dev/null
+++ b/tools_libyuv/ubsan/OWNERS
@@ -0,0 +1,3 @@
+mbonadei@chromium.org
+fbarchard@chromium.org
+pbos@chromium.org
diff --git a/files/tools_libyuv/ubsan/blacklist.txt b/tools_libyuv/ubsan/blacklist.txt
index 8bcb2907..8bcb2907 100644
--- a/files/tools_libyuv/ubsan/blacklist.txt
+++ b/tools_libyuv/ubsan/blacklist.txt
diff --git a/files/tools_libyuv/ubsan/vptr_blacklist.txt b/tools_libyuv/ubsan/vptr_blacklist.txt
index 23cfca53..23cfca53 100644
--- a/files/tools_libyuv/ubsan/vptr_blacklist.txt
+++ b/tools_libyuv/ubsan/vptr_blacklist.txt
diff --git a/files/unit_test/basictypes_test.cc b/unit_test/basictypes_test.cc
index 9aaa2dcd..9aaa2dcd 100644
--- a/files/unit_test/basictypes_test.cc
+++ b/unit_test/basictypes_test.cc
diff --git a/files/unit_test/color_test.cc b/unit_test/color_test.cc
index 01267ff1..01267ff1 100644
--- a/files/unit_test/color_test.cc
+++ b/unit_test/color_test.cc
diff --git a/files/unit_test/compare_test.cc b/unit_test/compare_test.cc
index c29562cb..c29562cb 100644
--- a/files/unit_test/compare_test.cc
+++ b/unit_test/compare_test.cc
diff --git a/files/unit_test/convert_test.cc b/unit_test/convert_argb_test.cc
index 1f1896b0..aeee8a7f 100644
--- a/files/unit_test/convert_test.cc
+++ b/unit_test/convert_argb_test.cc
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *  Copyright 2023 The LibYuv Project Authors. All rights reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
@@ -31,6 +31,13 @@
 #include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
 #endif
 
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#undef ENABLE_ROW_TESTS
+#define LEAN_TESTS
+#endif
+
 // Some functions fail on big endian. Enable these tests on all cpus except
 // PowerPC, but they are not optimized so disabled by default.
 #if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__)
@@ -51,501 +58,7 @@ namespace libyuv {
 // subsample amount uses a divide.
 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
 
-// Planar test
-
-#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,         \
-                       SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,             \
-                       DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF,      \
-                       SRC_DEPTH)                                             \
-  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {              \
-    static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");       \
-    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");       \
-    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                   \
-                  "SRC_SUBSAMP_X unsupported");                               \
-    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                   \
-                  "SRC_SUBSAMP_Y unsupported");                               \
-    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                   \
-                  "DST_SUBSAMP_X unsupported");                               \
-    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                   \
-                  "DST_SUBSAMP_Y unsupported");                               \
-    const int kWidth = W1280;                                                 \
-    const int kHeight = benchmark_height_;                                    \
-    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);               \
-    const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y);             \
-    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);               \
-    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);             \
-    align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF);             \
-    align_buffer_page_end(src_u,                                              \
-                          kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF);      \
-    align_buffer_page_end(src_v,                                              \
-                          kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF);      \
-    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                 \
-    align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC);   \
-    align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC);   \
-    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);               \
-    align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
-    align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
-    MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC);                    \
-    MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC);      \
-    MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC);      \
-    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                   \
-    SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF);                   \
-    SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF);                   \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
-      src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1);                       \
-    }                                                                         \
-    for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) {                \
-      src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1);                       \
-      src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1);                       \
-    }                                                                         \
-    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                             \
-    memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC);               \
-    memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC);               \
-    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                         \
-    memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC);           \
-    memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC);           \
-    MaskCpuFlags(disable_cpu_flags_);                                         \
-    SRC_FMT_PLANAR##To##FMT_PLANAR(                                           \
-        src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth,      \
-        reinterpret_cast<DST_T*>(dst_y_c), kWidth,                            \
-        reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth,                     \
-        reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth,             \
-        NEG kHeight);                                                         \
-    MaskCpuFlags(benchmark_cpu_info_);                                        \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
-      SRC_FMT_PLANAR##To##FMT_PLANAR(                                         \
-          src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth,    \
-          reinterpret_cast<DST_T*>(dst_y_opt), kWidth,                        \
-          reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth,                 \
-          reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth,         \
-          NEG kHeight);                                                       \
-    }                                                                         \
-    for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) {                    \
-      EXPECT_EQ(dst_y_c[i], dst_y_opt[i]);                                    \
-    }                                                                         \
-    for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) {      \
-      EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);                                    \
-      EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);                                    \
-    }                                                                         \
-    free_aligned_buffer_page_end(dst_y_c);                                    \
-    free_aligned_buffer_page_end(dst_u_c);                                    \
-    free_aligned_buffer_page_end(dst_v_c);                                    \
-    free_aligned_buffer_page_end(dst_y_opt);                                  \
-    free_aligned_buffer_page_end(dst_u_opt);                                  \
-    free_aligned_buffer_page_end(dst_v_opt);                                  \
-    free_aligned_buffer_page_end(src_y);                                      \
-    free_aligned_buffer_page_end(src_u);                                      \
-    free_aligned_buffer_page_end(src_v);                                      \
-  }
-
-#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,           \
-                      SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,               \
-                      DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH)                 \
-  TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
-                 FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
-                 benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH)                  \
-  TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
-                 FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
-                 benchmark_width_, _Unaligned, +, 2, SRC_DEPTH)                \
-  TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
-                 FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
-                 benchmark_width_, _Invert, -, 0, SRC_DEPTH)                   \
-  TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
-                 FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
-                 benchmark_width_, _Opt, +, 0, SRC_DEPTH)
-
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1, 8)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1, 8)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8)
-TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8)
-TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8)
-TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8)
-TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10)
-TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8)
-TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8)
-TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10)
-TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10)
-TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12)
-TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12)
-TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12)
-TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
-TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
-TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
-TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
-TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10)
-TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
-TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
-TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 12)
-TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
-TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 12)
-TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
-
-// Test Android 420 to I420
-#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X,          \
-                        SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                        W1280, N, NEG, OFF, PN, OFF_U, OFF_V)                 \
-  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##To##PN##N) {      \
-    const int kWidth = W1280;                                                 \
-    const int kHeight = benchmark_height_;                                    \
-    const int kSizeUV =                                                       \
-        SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
-    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                      \
-    align_buffer_page_end(src_uv,                                             \
-                          kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF);       \
-    align_buffer_page_end(dst_y_c, kWidth* kHeight);                          \
-    align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) *             \
-                                       SUBSAMPLE(kHeight, SUBSAMP_Y));        \
-    align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) *             \
-                                       SUBSAMPLE(kHeight, SUBSAMP_Y));        \
-    align_buffer_page_end(dst_y_opt, kWidth* kHeight);                        \
-    align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) *           \
-                                         SUBSAMPLE(kHeight, SUBSAMP_Y));      \
-    align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) *           \
-                                         SUBSAMPLE(kHeight, SUBSAMP_Y));      \
-    uint8_t* src_u = src_uv + OFF_U;                                          \
-    uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V);          \
-    int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE;          \
-    for (int i = 0; i < kHeight; ++i)                                         \
-      for (int j = 0; j < kWidth; ++j)                                        \
-        src_y[i * kWidth + j + OFF] = (fastrand() & 0xff);                    \
-    for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) {             \
-      for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) {            \
-        src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] =                 \
-            (fastrand() & 0xff);                                              \
-        src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] =                 \
-            (fastrand() & 0xff);                                              \
-      }                                                                       \
-    }                                                                         \
-    memset(dst_y_c, 1, kWidth* kHeight);                                      \
-    memset(dst_u_c, 2,                                                        \
-           SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
-    memset(dst_v_c, 3,                                                        \
-           SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
-    memset(dst_y_opt, 101, kWidth* kHeight);                                  \
-    memset(dst_u_opt, 102,                                                    \
-           SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
-    memset(dst_v_opt, 103,                                                    \
-           SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
-    MaskCpuFlags(disable_cpu_flags_);                                         \
-    SRC_FMT_PLANAR##To##FMT_PLANAR(                                           \
-        src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X),   \
-        src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \
-        kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c,               \
-        SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight);                   \
-    MaskCpuFlags(benchmark_cpu_info_);                                        \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
-      SRC_FMT_PLANAR##To##FMT_PLANAR(                                         \
-          src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
-          src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE,        \
-          dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X),         \
-          dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight);      \
-    }                                                                         \
-    for (int i = 0; i < kHeight; ++i) {                                       \
-      for (int j = 0; j < kWidth; ++j) {                                      \
-        EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);        \
-      }                                                                       \
-    }                                                                         \
-    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                 \
-      for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                \
-        EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j],              \
-                  dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]);           \
-      }                                                                       \
-    }                                                                         \
-    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                 \
-      for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                \
-        EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j],              \
-                  dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]);           \
-      }                                                                       \
-    }                                                                         \
-    free_aligned_buffer_page_end(dst_y_c);                                    \
-    free_aligned_buffer_page_end(dst_u_c);                                    \
-    free_aligned_buffer_page_end(dst_v_c);                                    \
-    free_aligned_buffer_page_end(dst_y_opt);                                  \
-    free_aligned_buffer_page_end(dst_u_opt);                                  \
-    free_aligned_buffer_page_end(dst_v_opt);                                  \
-    free_aligned_buffer_page_end(src_y);                                      \
-    free_aligned_buffer_page_end(src_uv);                                     \
-  }
-
-#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V,         \
-                       SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X,    \
-                       SUBSAMP_Y)                                              \
-  TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-                  FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1,      \
-                  _Any, +, 0, PN, OFF_U, OFF_V)                                \
-  TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-                  FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_,          \
-                  _Unaligned, +, 2, PN, OFF_U, OFF_V)                          \
-  TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-                  FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \
-                  -, 0, PN, OFF_U, OFF_V)                                      \
-  TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-                  FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
-                  0, PN, OFF_U, OFF_V)
-
-TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
-TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
-TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
-#undef TESTAPLANARTOP
-#undef TESTAPLANARTOPI
-
-// wrapper to keep API the same
-int I400ToNV21(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* /* src_u */,
-               int /* src_stride_u */,
-               const uint8_t* /* src_v */,
-               int /* src_stride_v */,
-               uint8_t* dst_y,
-               int dst_stride_y,
-               uint8_t* dst_vu,
-               int dst_stride_vu,
-               int width,
-               int height) {
-  return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu,
-                    dst_stride_vu, width, height);
-}
-
-#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,        \
-                        SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,            \
-                        DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF,     \
-                        SRC_DEPTH)                                            \
-  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {              \
-    static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");       \
-    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");       \
-    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                   \
-                  "SRC_SUBSAMP_X unsupported");                               \
-    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                   \
-                  "SRC_SUBSAMP_Y unsupported");                               \
-    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                   \
-                  "DST_SUBSAMP_X unsupported");                               \
-    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                   \
-                  "DST_SUBSAMP_Y unsupported");                               \
-    const int kWidth = W1280;                                                 \
-    const int kHeight = benchmark_height_;                                    \
-    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);               \
-    const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y);             \
-    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);               \
-    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);             \
-    align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF);             \
-    align_buffer_page_end(src_u,                                              \
-                          kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF);      \
-    align_buffer_page_end(src_v,                                              \
-                          kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF);      \
-    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                 \
-    align_buffer_page_end(dst_uv_c,                                           \
-                          kDstHalfWidth* kDstHalfHeight* DST_BPC * 2);        \
-    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);               \
-    align_buffer_page_end(dst_uv_opt,                                         \
-                          kDstHalfWidth* kDstHalfHeight* DST_BPC * 2);        \
-    MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC);                    \
-    MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC);      \
-    MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC);      \
-    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                   \
-    SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF);                   \
-    SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF);                   \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
-      src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1);                       \
-    }                                                                         \
-    for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) {                \
-      src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1);                       \
-      src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1);                       \
-    }                                                                         \
-    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                             \
-    memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2);          \
-    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                         \
-    memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2);      \
-    MaskCpuFlags(disable_cpu_flags_);                                         \
-    SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth,   \
-                                   src_v_p, kSrcHalfWidth,                    \
-                                   reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
-                                   reinterpret_cast<DST_T*>(dst_uv_c),        \
-                                   kDstHalfWidth * 2, kWidth, NEG kHeight);   \
-    MaskCpuFlags(benchmark_cpu_info_);                                        \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
-      SRC_FMT_PLANAR##To##FMT_PLANAR(                                         \
-          src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth,    \
-          reinterpret_cast<DST_T*>(dst_y_opt), kWidth,                        \
-          reinterpret_cast<DST_T*>(dst_uv_opt), kDstHalfWidth * 2, kWidth,    \
-          NEG kHeight);                                                       \
-    }                                                                         \
-    for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) {                    \
-      EXPECT_EQ(dst_y_c[i], dst_y_opt[i]);                                    \
-    }                                                                         \
-    for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) {  \
-      EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]);                                  \
-    }                                                                         \
-    free_aligned_buffer_page_end(dst_y_c);                                    \
-    free_aligned_buffer_page_end(dst_uv_c);                                   \
-    free_aligned_buffer_page_end(dst_y_opt);                                  \
-    free_aligned_buffer_page_end(dst_uv_opt);                                 \
-    free_aligned_buffer_page_end(src_y);                                      \
-    free_aligned_buffer_page_end(src_u);                                      \
-    free_aligned_buffer_page_end(src_v);                                      \
-  }
-
-#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,         \
-                       SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,             \
-                       DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH)               \
-  TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                  DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \
-  TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                  DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2,          \
-                  SRC_DEPTH)                                                  \
-  TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                  DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH)  \
-  TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                  DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
-
-TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
-TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
-TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
-
-#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
-                    DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH,        \
-                    TILE_WIDTH, TILE_HEIGHT)                                  \
-  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {              \
-    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");       \
-    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                   \
-                  "SRC_SUBSAMP_X unsupported");                               \
-    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                   \
-                  "SRC_SUBSAMP_Y unsupported");                               \
-    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                   \
-                  "DST_SUBSAMP_X unsupported");                               \
-    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                   \
-                  "DST_SUBSAMP_Y unsupported");                               \
-    const int kWidth = W1280;                                                 \
-    const int kHeight = benchmark_height_;                                    \
-    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);               \
-    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);               \
-    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);             \
-    const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
-    const int kPaddedHeight =                                                 \
-        (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1);                   \
-    const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X);   \
-    const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
-    align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
-    align_buffer_page_end(                                                    \
-        src_uv,                                                               \
-        2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF);      \
-    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                 \
-    align_buffer_page_end(dst_uv_c,                                           \
-                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);      \
-    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);               \
-    align_buffer_page_end(dst_uv_opt,                                         \
-                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);      \
-    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                   \
-    SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF);                 \
-    for (int i = 0;                                                           \
-         i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T);     \
-         ++i) {                                                               \
-      src_y_p[i] =                                                            \
-          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));      \
-    }                                                                         \
-    for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 *      \
-                            SRC_BPC / (int)sizeof(SRC_T);                     \
-         ++i) {                                                               \
-      src_uv_p[i] =                                                           \
-          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));      \
-    }                                                                         \
-    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                             \
-    memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);        \
-    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                         \
-    memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);    \
-    MaskCpuFlags(disable_cpu_flags_);                                         \
-    SRC_FMT_PLANAR##To##FMT_PLANAR(                                           \
-        src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p,              \
-        2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T),                     \
-        DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth,               \
-        reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth,        \
-        NEG kHeight);                                                         \
-    MaskCpuFlags(benchmark_cpu_info_);                                        \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
-      SRC_FMT_PLANAR##To##FMT_PLANAR(                                         \
-          src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p,            \
-          2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T),                   \
-          DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth,           \
-          reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth,    \
-          NEG kHeight);                                                       \
-    }                                                                         \
-    if (DOY) {                                                                \
-      for (int i = 0; i < kHeight; ++i) {                                     \
-        for (int j = 0; j < kWidth; ++j) {                                    \
-          EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);      \
-        }                                                                     \
-      }                                                                       \
-    }                                                                         \
-    for (int i = 0; i < kDstHalfHeight; ++i) {                                \
-      for (int j = 0; j < 2 * kDstHalfWidth; ++j) {                           \
-        EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j],                        \
-                  dst_uv_opt[i * 2 * kDstHalfWidth + j]);                     \
-      }                                                                       \
-    }                                                                         \
-    free_aligned_buffer_page_end(dst_y_c);                                    \
-    free_aligned_buffer_page_end(dst_uv_c);                                   \
-    free_aligned_buffer_page_end(dst_y_opt);                                  \
-    free_aligned_buffer_page_end(dst_uv_opt);                                 \
-    free_aligned_buffer_page_end(src_y);                                      \
-    free_aligned_buffer_page_end(src_uv);                                     \
-  }
-
-#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
-                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
-                   DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
-  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
-              benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH,    \
-              TILE_HEIGHT)                                                   \
-  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
-              benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH,  \
-              TILE_HEIGHT)                                                   \
-  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
-              benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH,     \
-              TILE_HEIGHT)                                                   \
-  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
-              benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH,        \
-              TILE_HEIGHT)                                                   \
-  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
-              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
-              benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH,      \
-              TILE_HEIGHT)
-
-TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
-TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
+#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
 
 #define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
@@ -630,6 +143,7 @@ TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
     free_aligned_buffer_page_end(src_uv);                                      \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
                   DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
@@ -648,6 +162,14 @@ TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
   TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
              benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+#else
+#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                  DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+#endif
 
 TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
 TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
@@ -696,8 +218,6 @@ TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
   I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
                           kFilterBilinear)
 
-#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
-
 #define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
                        YALIGN, W1280, N, NEG, OFF)                            \
   TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                       \
@@ -761,8 +281,6 @@ TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
 #define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
                       YALIGN)                                                \
   TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                 YALIGN, benchmark_width_ + 1, _Any, +, 0)                   \
-  TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
                  YALIGN, benchmark_width_, _Opt, +, 0)
 #endif
 
@@ -837,7 +355,7 @@ TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
 TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1)
-#else
+#else  // FULL_TESTS
 TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1)
@@ -866,218 +384,6 @@ TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
 #endif
 
-#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                        YALIGN, W1280, N, NEG, OFF, ATTEN)                     \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
-    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
-    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);             \
-    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                       \
-    align_buffer_page_end(src_u, kSizeUV + OFF);                               \
-    align_buffer_page_end(src_v, kSizeUV + OFF);                               \
-    align_buffer_page_end(src_a, kWidth* kHeight + OFF);                       \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);                \
-    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);              \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
-      src_y[i + OFF] = (fastrand() & 0xff);                                    \
-      src_a[i + OFF] = (fastrand() & 0xff);                                    \
-    }                                                                          \
-    for (int i = 0; i < kSizeUV; ++i) {                                        \
-      src_u[i + OFF] = (fastrand() & 0xff);                                    \
-      src_v[i + OFF] = (fastrand() & 0xff);                                    \
-    }                                                                          \
-    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                           \
-    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                       \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,         \
-                          src_v + OFF, kStrideUV, src_a + OFF, kWidth,         \
-                          dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight,     \
-                          ATTEN);                                              \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,       \
-                            src_v + OFF, kStrideUV, src_a + OFF, kWidth,       \
-                            dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \
-                            ATTEN);                                            \
-    }                                                                          \
-    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                       \
-      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]);                   \
-    }                                                                          \
-    free_aligned_buffer_page_end(src_y);                                       \
-    free_aligned_buffer_page_end(src_u);                                       \
-    free_aligned_buffer_page_end(src_v);                                       \
-    free_aligned_buffer_page_end(src_a);                                       \
-    free_aligned_buffer_page_end(dst_argb_c);                                  \
-    free_aligned_buffer_page_end(dst_argb_opt);                                \
-  }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                       YALIGN)                                                \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_ + 1, _Any, +, 0, 0)                \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Unaligned, +, 2, 0)              \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Invert, -, 0, 0)                 \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Opt, +, 0, 0)                    \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Premult, +, 0, 1)
-#else
-#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                       YALIGN)                                                \
-  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                  YALIGN, benchmark_width_, _Opt, +, 0, 0)
-#endif
-
-#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
-                        l, m)
-#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
-                        l, m)
-#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
-                        l, m)
-#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
-  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
-                        l, m)
-#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
-  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
-                        l, m)
-
-#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
-  I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
-                              &kYuvI601Constants, k, l, m, kFilterBilinear)
-#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
-  I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
-                              &kYuvI601Constants, k, l, m, kFilterBilinear)
-
-#if defined(ENABLE_FULL_TESTS)
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
-#else
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
-#endif
-
 #define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
                    W1280, N, NEG, OFF)                                         \
   TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
@@ -1132,6 +438,7 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
     free_aligned_buffer_page_end(dst_argb32_opt);                              \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
   TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
              benchmark_width_ + 1, _Any, +, 0)                           \
@@ -1141,6 +448,11 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
              benchmark_width_, _Invert, -, 0)                            \
   TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
              benchmark_width_, _Opt, +, 0)
+#else
+#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Opt, +, 0)
+#endif
 
 #define JNV12ToARGB(a, b, c, d, e, f, g, h) \
   NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
@@ -1186,241 +498,6 @@ TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
 TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
 #endif
 
-#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
-                       W1280, N, NEG, OFF)                                     \
-  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) {                        \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
-    const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8;           \
-    align_buffer_page_end(src_argb, kStride* kHeight + OFF);                   \
-    align_buffer_page_end(dst_y_c, kWidth* kHeight);                           \
-    align_buffer_page_end(dst_uv_c,                                            \
-                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));      \
-    align_buffer_page_end(dst_y_opt, kWidth* kHeight);                         \
-    align_buffer_page_end(dst_uv_opt,                                          \
-                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));      \
-    memset(dst_y_c, 1, kWidth* kHeight);                                       \
-    memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
-    memset(dst_y_opt, 101, kWidth* kHeight);                                   \
-    memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));    \
-    for (int i = 0; i < kHeight; ++i)                                          \
-      for (int j = 0; j < kStride; ++j)                                        \
-        src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff);               \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c,  \
-                          kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2,  \
-                          kWidth, NEG kHeight);                                \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth,        \
-                            dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \
-                            kStrideUV * 2, kWidth, NEG kHeight);               \
-    }                                                                          \
-    for (int i = 0; i < kHeight; ++i) {                                        \
-      for (int j = 0; j < kWidth; ++j) {                                       \
-        EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);         \
-      }                                                                        \
-    }                                                                          \
-    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) {              \
-      for (int j = 0; j < kStrideUV; ++j) {                                    \
-        EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
-      }                                                                        \
-    }                                                                          \
-    free_aligned_buffer_page_end(dst_y_c);                                     \
-    free_aligned_buffer_page_end(dst_uv_c);                                    \
-    free_aligned_buffer_page_end(dst_y_opt);                                   \
-    free_aligned_buffer_page_end(dst_uv_opt);                                  \
-    free_aligned_buffer_page_end(src_argb);                                    \
-  }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
-  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                 benchmark_width_ + 1, _Any, +, 0)                            \
-  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                 benchmark_width_, _Unaligned, +, 2)                          \
-  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                 benchmark_width_, _Invert, -, 0)                             \
-  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                 benchmark_width_, _Opt, +, 0)
-#else
-#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
-  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                 benchmark_width_ + 1, _Any, +, 0)                            \
-  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                 benchmark_width_, _Opt, +, 0)
-#endif
-
-TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2)
-TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2)
-TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1)
-TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1)
-TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2)
-TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1)
-TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2)
-TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2)
-TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2)
-TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2)
-#endif
-TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2)
-TESTATOPLANAR(I400, 1, 1, I420, 2, 2)
-TESTATOPLANAR(J400, 1, 1, J420, 2, 2)
-TESTATOPLANAR(RAW, 3, 1, I420, 2, 2)
-TESTATOPLANAR(RAW, 3, 1, J420, 2, 2)
-TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2)
-TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2)
-TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2)
-TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2)
-TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
-TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
-TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
-
-#define TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X,           \
-                        SUBSAMP_Y, W1280, N, NEG, OFF)                         \
-  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) {                        \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
-    const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8;           \
-    align_buffer_page_end(src_argb, kStride* kHeight + OFF);                   \
-    align_buffer_page_end(dst_a_c, kWidth* kHeight);                           \
-    align_buffer_page_end(dst_y_c, kWidth* kHeight);                           \
-    align_buffer_page_end(dst_uv_c,                                            \
-                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));      \
-    align_buffer_page_end(dst_a_opt, kWidth* kHeight);                         \
-    align_buffer_page_end(dst_y_opt, kWidth* kHeight);                         \
-    align_buffer_page_end(dst_uv_opt,                                          \
-                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));      \
-    memset(dst_a_c, 1, kWidth* kHeight);                                       \
-    memset(dst_y_c, 2, kWidth* kHeight);                                       \
-    memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
-    memset(dst_a_opt, 101, kWidth* kHeight);                                   \
-    memset(dst_y_opt, 102, kWidth* kHeight);                                   \
-    memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));    \
-    for (int i = 0; i < kHeight; ++i)                                          \
-      for (int j = 0; j < kStride; ++j)                                        \
-        src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff);               \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c,  \
-                          kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2,  \
-                          dst_a_c, kWidth, kWidth, NEG kHeight);               \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth,        \
-                            dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \
-                            kStrideUV * 2, dst_a_opt, kWidth, kWidth,          \
-                            NEG kHeight);                                      \
-    }                                                                          \
-    for (int i = 0; i < kHeight; ++i) {                                        \
-      for (int j = 0; j < kWidth; ++j) {                                       \
-        EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);         \
-        EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]);         \
-      }                                                                        \
-    }                                                                          \
-    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) {              \
-      for (int j = 0; j < kStrideUV; ++j) {                                    \
-        EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
-      }                                                                        \
-    }                                                                          \
-    free_aligned_buffer_page_end(dst_a_c);                                     \
-    free_aligned_buffer_page_end(dst_y_c);                                     \
-    free_aligned_buffer_page_end(dst_uv_c);                                    \
-    free_aligned_buffer_page_end(dst_a_opt);                                   \
-    free_aligned_buffer_page_end(dst_y_opt);                                   \
-    free_aligned_buffer_page_end(dst_uv_opt);                                  \
-    free_aligned_buffer_page_end(src_argb);                                    \
-  }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
-  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                  benchmark_width_ + 1, _Any, +, 0)                            \
-  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                  benchmark_width_, _Unaligned, +, 2)                          \
-  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                  benchmark_width_, _Invert, -, 0)                             \
-  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                  benchmark_width_, _Opt, +, 0)
-#else
-#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
-  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                  benchmark_width_ + 1, _Any, +, 0)                            \
-  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                  benchmark_width_, _Opt, +, 0)
-#endif
-
-TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2)
-
-#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
-                   W1280, N, NEG, OFF)                                        \
-  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) {                       \
-    const int kWidth = W1280;                                                 \
-    const int kHeight = benchmark_height_;                                    \
-    const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A;                     \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                       \
-    align_buffer_page_end(src_argb, kStride* kHeight + OFF);                  \
-    align_buffer_page_end(dst_y_c, kWidth* kHeight);                          \
-    align_buffer_page_end(dst_uv_c,                                           \
-                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
-    align_buffer_page_end(dst_y_opt, kWidth* kHeight);                        \
-    align_buffer_page_end(dst_uv_opt,                                         \
-                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
-    for (int i = 0; i < kHeight; ++i)                                         \
-      for (int j = 0; j < kStride; ++j)                                       \
-        src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff);              \
-    memset(dst_y_c, 1, kWidth* kHeight);                                      \
-    memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));       \
-    memset(dst_y_opt, 101, kWidth* kHeight);                                  \
-    memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));   \
-    MaskCpuFlags(disable_cpu_flags_);                                         \
-    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
-                          kStrideUV * 2, kWidth, NEG kHeight);                \
-    MaskCpuFlags(benchmark_cpu_info_);                                        \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
-      FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth,       \
-                            dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight);  \
-    }                                                                         \
-    for (int i = 0; i < kHeight; ++i) {                                       \
-      for (int j = 0; j < kWidth; ++j) {                                      \
-        EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);        \
-      }                                                                       \
-    }                                                                         \
-    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                 \
-      for (int j = 0; j < kStrideUV * 2; ++j) {                               \
-        EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j],                            \
-                  dst_uv_opt[i * kStrideUV * 2 + j]);                         \
-      }                                                                       \
-    }                                                                         \
-    free_aligned_buffer_page_end(dst_y_c);                                    \
-    free_aligned_buffer_page_end(dst_uv_c);                                   \
-    free_aligned_buffer_page_end(dst_y_opt);                                  \
-    free_aligned_buffer_page_end(dst_uv_opt);                                 \
-    free_aligned_buffer_page_end(src_argb);                                   \
-  }
-
-#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
-  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-             benchmark_width_ + 1, _Any, +, 0)                           \
-  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-             benchmark_width_, _Unaligned, +, 2)                         \
-  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-             benchmark_width_, _Invert, -, 0)                            \
-  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-             benchmark_width_, _Opt, +, 0)
-
-TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
-TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
-TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
-TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
-TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
-TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
-TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
-TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
-TESTATOBP(AYUV, 1, 4, NV21, 2, 2)
-
 #define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,     \
                   EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF)               \
   TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) {                             \
@@ -1776,6 +853,7 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
     }                                                                          \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
                   HEIGHT_B)                                                 \
   TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B,      \
@@ -1788,6 +866,12 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
              HEIGHT_B, benchmark_width_, _Opt, +, 0)                        \
   TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
                   HEIGHT_B)
+#else
+#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
+                  HEIGHT_B)                                                 \
+  TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
+                  HEIGHT_B)
+#endif
 
 #ifdef LITTLE_ENDIAN_ONLY_TEST
 TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
@@ -1856,1117 +940,217 @@ TESTEND(BGRAToARGB, uint8_t, 4, 4, 1)
 TESTEND(ABGRToARGB, uint8_t, 4, 4, 1)
 TESTEND(AB64ToAR64, uint16_t, 4, 4, 1)
 
-#ifdef HAVE_JPEG
-TEST_F(LibYUVConvertTest, ValidateJpeg) {
-  const int kOff = 10;
-  const int kMinJpeg = 64;
-  const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
-                             ? benchmark_width_ * benchmark_height_
-                             : kMinJpeg;
-  const int kSize = kImageSize + kOff;
-  align_buffer_page_end(orig_pixels, kSize);
-
-  // No SOI or EOI. Expect fail.
-  memset(orig_pixels, 0, kSize);
-  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-
-  // Test special value that matches marker start.
-  memset(orig_pixels, 0xff, kSize);
-  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-
-  // EOI, SOI. Expect pass.
-  orig_pixels[0] = 0xff;
-  orig_pixels[1] = 0xd8;  // SOI.
-  orig_pixels[2] = 0xff;
-  orig_pixels[kSize - kOff + 0] = 0xff;
-  orig_pixels[kSize - kOff + 1] = 0xd9;  // EOI.
-  for (int times = 0; times < benchmark_iterations_; ++times) {
-    EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize));
-  }
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVConvertTest, ValidateJpegLarge) {
-  const int kOff = 10;
-  const int kMinJpeg = 64;
-  const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
-                             ? benchmark_width_ * benchmark_height_
-                             : kMinJpeg;
-  const int kSize = kImageSize + kOff;
-  const int kMultiple = 10;
-  const int kBufSize = kImageSize * kMultiple + kOff;
-  align_buffer_page_end(orig_pixels, kBufSize);
-
-  // No SOI or EOI. Expect fail.
-  memset(orig_pixels, 0, kBufSize);
-  EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize));
-
-  // EOI, SOI. Expect pass.
-  orig_pixels[0] = 0xff;
-  orig_pixels[1] = 0xd8;  // SOI.
-  orig_pixels[2] = 0xff;
-  orig_pixels[kSize - kOff + 0] = 0xff;
-  orig_pixels[kSize - kOff + 1] = 0xd9;  // EOI.
-  for (int times = 0; times < benchmark_iterations_; ++times) {
-    EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize));
-  }
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVConvertTest, InvalidateJpeg) {
-  const int kOff = 10;
-  const int kMinJpeg = 64;
-  const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
-                             ? benchmark_width_ * benchmark_height_
-                             : kMinJpeg;
-  const int kSize = kImageSize + kOff;
-  align_buffer_page_end(orig_pixels, kSize);
-
-  // NULL pointer. Expect fail.
-  EXPECT_FALSE(ValidateJpeg(NULL, kSize));
-
-  // Negative size. Expect fail.
-  EXPECT_FALSE(ValidateJpeg(orig_pixels, -1));
-
-  // Too large size. Expect fail.
-  EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull));
-
-  // No SOI or EOI. Expect fail.
-  memset(orig_pixels, 0, kSize);
-  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-
-  // SOI but no EOI. Expect fail.
-  orig_pixels[0] = 0xff;
-  orig_pixels[1] = 0xd8;  // SOI.
-  orig_pixels[2] = 0xff;
-  for (int times = 0; times < benchmark_iterations_; ++times) {
-    EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-  }
-
-  // EOI but no SOI. Expect fail.
-  orig_pixels[0] = 0;
-  orig_pixels[1] = 0;
-  orig_pixels[kSize - kOff + 0] = 0xff;
-  orig_pixels[kSize - kOff + 1] = 0xd9;  // EOI.
-  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVConvertTest, FuzzJpeg) {
-  // SOI but no EOI. Expect fail.
-  for (int times = 0; times < benchmark_iterations_; ++times) {
-    const int kSize = fastrand() % 5000 + 3;
-    align_buffer_page_end(orig_pixels, kSize);
-    MemRandomize(orig_pixels, kSize);
-
-    // Add SOI so frame will be scanned.
-    orig_pixels[0] = 0xff;
-    orig_pixels[1] = 0xd8;  // SOI.
-    orig_pixels[2] = 0xff;
-    orig_pixels[kSize - 1] = 0xff;
-    ValidateJpeg(orig_pixels,
-                 kSize);  // Failure normally expected.
-    free_aligned_buffer_page_end(orig_pixels);
-  }
-}
-
-// Test data created in GIMP.  In export jpeg, disable
-// thumbnails etc, choose a subsampling, and use low quality
-// (50) to keep size small. Generated with xxd -i test.jpg
-// test 0 is J400
-static const uint8_t kTest0Jpg[] = {
-    0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
-    0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
-    0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
-    0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
-    0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
-    0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
-    0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
-    0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10,
-    0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01,
-    0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01,
-    0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4,
-    0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
-    0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
-    0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
-    0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03,
-    0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff,
-    0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28,
-    0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4,
-    0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51,
-    0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
-    0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb,
-    0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9,
-    0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08,
-    0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10,
-    0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91,
-    0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
-    0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b,
-    0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec,
-    0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c,
-    0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff,
-    0xd9};
-static const size_t kTest0JpgLen = 421;
-
-// test 1 is J444
-static const uint8_t kTest1Jpg[] = {
-    0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
-    0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
-    0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
-    0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
-    0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
-    0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
-    0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
-    0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
-    0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
-    0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
-    0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4,
-    0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda,
-    0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01,
-    0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb,
-    0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11,
-    0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00,
-    0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99,
-    0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00,
-    0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08,
-    0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31,
-    0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
-    0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01,
-    0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72,
-    0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11,
-    0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00,
-    0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2,
-    0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c,
-    0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61,
-    0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21,
-    0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01,
-    0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48,
-    0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01,
-    0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff,
-    0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21,
-    0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f,
-    0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26,
-    0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01,
-    0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02,
-    0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5,
-    0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00,
-    0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61,
-    0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01,
-    0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a,
-    0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96,
-    0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad,
-    0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7,
-    0xd4, 0xff, 0xd9};
-static const size_t kTest1JpgLen = 735;
-
-// test 2 is J420
-static const uint8_t kTest2Jpg[] = {
-    0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
-    0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
-    0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
-    0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
-    0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
-    0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
-    0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
-    0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
-    0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
-    0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
-    0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff,
-    0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff,
-    0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
-    0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00,
-    0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10,
-    0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02,
-    0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62,
-    0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f,
-    0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f,
-    0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03,
-    0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff,
-    0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28,
-    0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4,
-    0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51,
-    0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
-    0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb,
-    0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9,
-    0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c,
-    0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f,
-    0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11,
-    0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e,
-    0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01,
-    0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10,
-    0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91,
-    0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
-    0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b,
-    0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec,
-    0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c,
-    0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff,
-    0xd9};
-static const size_t kTest2JpgLen = 685;
-
-// test 3 is J422
-static const uint8_t kTest3Jpg[] = {
-    0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
-    0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
-    0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
-    0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
-    0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
-    0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
-    0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
-    0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
-    0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
-    0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
-    0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4,
-    0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff,
-    0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
-    0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4,
-    0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
-    0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
-    0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
-    0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03,
-    0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18,
-    0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda,
-    0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84,
-    0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda,
-    0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32,
-    0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00,
-    0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31,
-    0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f,
-    0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9,
-    0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6,
-    0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03,
-    0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff,
-    0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21,
-    0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53,
-    0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00,
-    0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08,
-    0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca,
-    0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04,
-    0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
-    0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff,
-    0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9,
-    0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5,
-    0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c,
-    0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00,
-    0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9};
-static const size_t kTest3JpgLen = 704;
-
-// test 4 is J422 vertical - not supported
-static const uint8_t kTest4Jpg[] = {
-    0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
-    0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
-    0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
-    0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
-    0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
-    0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
-    0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
-    0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
-    0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
-    0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
-    0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff,
-    0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff,
-    0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
-    0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4,
-    0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
-    0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
-    0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
-    0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01,
-    0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00,
-    0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08,
-    0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff,
-    0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02,
-    0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01,
-    0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9,
-    0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01,
-    0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0,
-    0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e,
-    0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde,
-    0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a,
-    0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02,
-    0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19,
-    0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff,
-    0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca,
-    0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03,
-    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01,
-    0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff,
-    0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31,
-    0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08,
-    0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a,
-    0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd,
-    0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30,
-    0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03,
-    0x0b, 0xb7, 0xd4, 0xff, 0xd9};
-static const size_t kTest4JpgLen = 701;
-
-TEST_F(LibYUVConvertTest, TestMJPGSize) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  printf("test jpeg size %d x %d\n", width, height);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToI420) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_u, half_width * half_height);
-  align_buffer_page_end(dst_v, half_width * half_height);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width,
-                     dst_v, half_width, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Test result matches known hash value.
-  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
-  uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381);
-  uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381);
-  EXPECT_EQ(dst_y_hash, 2682851208u);
-  EXPECT_EQ(dst_u_hash, 2501859930u);
-  EXPECT_EQ(dst_v_hash, 2126459123u);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_u);
-  free_aligned_buffer_page_end(dst_v);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  // Convert to NV21
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_vu, half_width * half_height * 2);
-
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu,
-                     half_width * 2, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Convert to I420
-  align_buffer_page_end(dst2_y, width * height);
-  align_buffer_page_end(dst2_u, half_width * half_height);
-  align_buffer_page_end(dst2_v, half_width * half_height);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width,
-                     dst2_v, half_width, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Convert I420 to NV21
-  align_buffer_page_end(dst3_y, width * height);
-  align_buffer_page_end(dst3_vu, half_width * half_height * 2);
-
-  I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y,
-             width, dst3_vu, half_width * 2, width, height);
-
-  for (int i = 0; i < width * height; ++i) {
-    EXPECT_EQ(dst_y[i], dst3_y[i]);
-  }
-  for (int i = 0; i < half_width * half_height * 2; ++i) {
-    EXPECT_EQ(dst_vu[i], dst3_vu[i]);
-    EXPECT_EQ(dst_vu[i], dst3_vu[i]);
-  }
-
-  free_aligned_buffer_page_end(dst3_y);
-  free_aligned_buffer_page_end(dst3_vu);
-
-  free_aligned_buffer_page_end(dst2_y);
-  free_aligned_buffer_page_end(dst2_u);
-  free_aligned_buffer_page_end(dst2_v);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_vu);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  // Convert to NV12
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_uv, half_width * half_height * 2);
-
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
-                     half_width * 2, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Convert to I420
-  align_buffer_page_end(dst2_y, width * height);
-  align_buffer_page_end(dst2_u, half_width * half_height);
-  align_buffer_page_end(dst2_v, half_width * half_height);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width,
-                     dst2_v, half_width, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Convert I420 to NV12
-  align_buffer_page_end(dst3_y, width * height);
-  align_buffer_page_end(dst3_uv, half_width * half_height * 2);
-
-  I420ToNV12(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y,
-             width, dst3_uv, half_width * 2, width, height);
-
-  for (int i = 0; i < width * height; ++i) {
-    EXPECT_EQ(dst_y[i], dst3_y[i]);
-  }
-  for (int i = 0; i < half_width * half_height * 2; ++i) {
-    EXPECT_EQ(dst_uv[i], dst3_uv[i]);
-    EXPECT_EQ(dst_uv[i], dst3_uv[i]);
-  }
-
-  free_aligned_buffer_page_end(dst3_y);
-  free_aligned_buffer_page_end(dst3_uv);
-
-  free_aligned_buffer_page_end(dst2_y);
-  free_aligned_buffer_page_end(dst2_u);
-  free_aligned_buffer_page_end(dst2_v);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_uv);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_uv, half_width * half_height * 2);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
-                     half_width * 2, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Test result matches known hash value.
-  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
-  uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
-  EXPECT_EQ(dst_y_hash, 2682851208u);
-  EXPECT_EQ(dst_uv_hash, 1069662856u);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_uv);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_uv, half_width * half_height * 2);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
-                     half_width * 2, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Test result matches known hash value. Hashes are for VU so flip the plane.
-  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
-  align_buffer_page_end(dst_vu, half_width * half_height * 2);
-  SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
-              half_height);
-  uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
-  EXPECT_EQ(dst_y_hash, 2682851208u);
-  EXPECT_EQ(dst_vu_hash, 1069662856u);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_uv);
-  free_aligned_buffer_page_end(dst_vu);
-}
-
-// TODO(fbarchard): Improve test to compare against I422, not checksum
-TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_uv, half_width * half_height * 2);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv,
-                     half_width * 2, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Test result matches known hash value.
-  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
-  uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
-  EXPECT_EQ(dst_y_hash, 2682851208u);
-  EXPECT_EQ(dst_uv_hash, 493520167u);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_uv);
-}
-
-TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_uv, half_width * half_height * 2);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToNV12(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv,
-                     half_width * 2, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Test result matches known hash value. Hashes are for VU so flip the plane.
-  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
-  align_buffer_page_end(dst_vu, half_width * half_height * 2);
-  SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
-              half_height);
-  uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
-  EXPECT_EQ(dst_y_hash, 2682851208u);
-  EXPECT_EQ(dst_vu_hash, 493520167u);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_uv);
-  free_aligned_buffer_page_end(dst_vu);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_uv, half_width * half_height * 2);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv,
-                     half_width * 2, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Test result matches known hash value.
-  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
-  uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
-  EXPECT_EQ(dst_y_hash, 330644005u);
-  EXPECT_EQ(dst_uv_hash, 135214341u);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_uv);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_uv, half_width * half_height * 2);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToNV12(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv,
-                     half_width * 2, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Test result matches known hash value. Hashes are for VU so flip the plane.
-  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
-  align_buffer_page_end(dst_vu, half_width * half_height * 2);
-  SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
-              half_height);
-  uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
-  EXPECT_EQ(dst_y_hash, 330644005u);
-  EXPECT_EQ(dst_vu_hash, 135214341u);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_uv);
-  free_aligned_buffer_page_end(dst_vu);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_uv, half_width * half_height * 2);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv,
-                     half_width * 2, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Test result matches known hash value.
-  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
-  uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
-  EXPECT_EQ(dst_y_hash, 2682851208u);
-  EXPECT_EQ(dst_uv_hash, 506143297u);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_uv);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int half_width = (width + 1) / 2;
-  int half_height = (height + 1) / 2;
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  align_buffer_page_end(dst_y, width * height);
-  align_buffer_page_end(dst_uv, half_width * half_height * 2);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToNV12(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv,
-                     half_width * 2, width, height, width, height);
-  }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
-
-  // Test result matches known hash value. Hashes are for VU so flip the plane.
-  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
-  align_buffer_page_end(dst_vu, half_width * half_height * 2);
-  SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
-              half_height);
-  uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
-  EXPECT_EQ(dst_y_hash, 2682851208u);
-  EXPECT_EQ(dst_vu_hash, 506143297u);
-
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_uv);
-  free_aligned_buffer_page_end(dst_vu);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToARGB) {
-  int width = 0;
-  int height = 0;
-  int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
-  EXPECT_EQ(0, ret);
-
-  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
-                             benchmark_height_ / (width * height);
-  if (benchmark_iterations < 1) {
-    benchmark_iterations = 1;
-  }
-
-  align_buffer_page_end(dst_argb, width * height * 4);
-  for (int times = 0; times < benchmark_iterations; ++times) {
-    ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width,
-                     height, width, height);
+#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                        YALIGN, W1280, N, NEG, OFF, ATTEN)                     \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
+    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);             \
+    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                       \
+    align_buffer_page_end(src_u, kSizeUV + OFF);                               \
+    align_buffer_page_end(src_v, kSizeUV + OFF);                               \
+    align_buffer_page_end(src_a, kWidth* kHeight + OFF);                       \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);                \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);              \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
+      src_y[i + OFF] = (fastrand() & 0xff);                                    \
+      src_a[i + OFF] = (fastrand() & 0xff);                                    \
+    }                                                                          \
+    for (int i = 0; i < kSizeUV; ++i) {                                        \
+      src_u[i + OFF] = (fastrand() & 0xff);                                    \
+      src_v[i + OFF] = (fastrand() & 0xff);                                    \
+    }                                                                          \
+    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                           \
+    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                       \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,         \
+                          src_v + OFF, kStrideUV, src_a + OFF, kWidth,         \
+                          dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight,     \
+                          ATTEN);                                              \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV,       \
+                            src_v + OFF, kStrideUV, src_a + OFF, kWidth,       \
+                            dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \
+                            ATTEN);                                            \
+    }                                                                          \
+    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                       \
+      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]);                   \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_y);                                       \
+    free_aligned_buffer_page_end(src_u);                                       \
+    free_aligned_buffer_page_end(src_v);                                       \
+    free_aligned_buffer_page_end(src_a);                                       \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_opt);                                \
   }
-  // Expect sucesss
-  EXPECT_EQ(0, ret);
 
-  // Test result matches known hash value.
-  uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381);
-#ifdef LIBYUV_UNLIMITED_DATA
-  EXPECT_EQ(dst_argb_hash, 3900633302u);
+#if defined(ENABLE_FULL_TESTS)
+#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                       YALIGN)                                                \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_ + 1, _Any, +, 0, 0)                \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Unaligned, +, 2, 0)              \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Invert, -, 0, 0)                 \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Opt, +, 0, 0)                    \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Premult, +, 0, 1)
 #else
-  EXPECT_EQ(dst_argb_hash, 2355976473u);
+#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                       YALIGN)                                                \
+  TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Opt, +, 0, 0)
 #endif
 
-  free_aligned_buffer_page_end(dst_argb);
-}
-
-static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) {
-  MJpegDecoder mjpeg_decoder;
-  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
-
-  int width = mjpeg_decoder.GetWidth();
-  int height = mjpeg_decoder.GetHeight();
-
-  // YUV420
-  if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
-      mjpeg_decoder.GetNumComponents() == 3 &&
-      mjpeg_decoder.GetVertSampFactor(0) == 2 &&
-      mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
-      mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-      mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-      mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-      mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-    printf("JPeg is J420, %dx%d %d bytes\n", width, height,
-           static_cast<int>(sample_size));
-    // YUV422
-  } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
-             mjpeg_decoder.GetNumComponents() == 3 &&
-             mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-             mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
-             mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-             mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-             mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-             mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-    printf("JPeg is J422, %dx%d %d bytes\n", width, height,
-           static_cast<int>(sample_size));
-    // YUV444
-  } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
-             mjpeg_decoder.GetNumComponents() == 3 &&
-             mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-             mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
-             mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-             mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-             mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-             mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-    printf("JPeg is J444, %dx%d %d bytes\n", width, height,
-           static_cast<int>(sample_size));
-    // YUV400
-  } else if (mjpeg_decoder.GetColorSpace() ==
-                 MJpegDecoder::kColorSpaceGrayscale &&
-             mjpeg_decoder.GetNumComponents() == 1 &&
-             mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-             mjpeg_decoder.GetHorizSampFactor(0) == 1) {
-    printf("JPeg is J400, %dx%d %d bytes\n", width, height,
-           static_cast<int>(sample_size));
-  } else {
-    // Unknown colorspace.
-    printf("JPeg is Unknown colorspace.\n");
-  }
-  mjpeg_decoder.UnloadFrame();
-  return ret;
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGInfo) {
-  EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen));
-  EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen));
-  EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen));
-  EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen));
-  EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg,
-                            kTest4JpgLen));  // Valid but unsupported.
-}
-#endif  // HAVE_JPEG
-
-TEST_F(LibYUVConvertTest, NV12Crop) {
-  const int SUBSAMP_X = 2;
-  const int SUBSAMP_Y = 2;
-  const int kWidth = benchmark_width_;
-  const int kHeight = benchmark_height_;
-  const int crop_y =
-      ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1;
-  const int kDestWidth = benchmark_width_;
-  const int kDestHeight = benchmark_height_ - crop_y * 2;
-  const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);
-  const int sample_size =
-      kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;
-  align_buffer_page_end(src_y, sample_size);
-  uint8_t* src_uv = src_y + kWidth * kHeight;
-
-  align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
-  align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
-                                   SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-  align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
-                                   SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-
-  align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight);
-  align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
-                                     SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-  align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
-                                     SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-
-  for (int i = 0; i < kHeight * kWidth; ++i) {
-    src_y[i] = (fastrand() & 0xff);
-  }
-  for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideUV) * 2; ++i) {
-    src_uv[i] = (fastrand() & 0xff);
-  }
-  memset(dst_y, 1, kDestWidth * kDestHeight);
-  memset(dst_u, 2,
-         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-  memset(dst_v, 3,
-         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-  memset(dst_y_2, 1, kDestWidth * kDestHeight);
-  memset(dst_u_2, 2,
-         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-  memset(dst_v_2, 3,
-         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-
-  ConvertToI420(src_y, sample_size, dst_y_2, kDestWidth, dst_u_2,
-                SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v_2,
-                SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight,
-                kDestWidth, kDestHeight, libyuv::kRotate0, libyuv::FOURCC_NV12);
-
-  NV12ToI420(src_y + crop_y * kWidth, kWidth,
-             src_uv + (crop_y / 2) * kStrideUV * 2, kStrideUV * 2, dst_y,
-             kDestWidth, dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v,
-             SUBSAMPLE(kDestWidth, SUBSAMP_X), kDestWidth, kDestHeight);
-
-  for (int i = 0; i < kDestHeight; ++i) {
-    for (int j = 0; j < kDestWidth; ++j) {
-      EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]);
-    }
-  }
-  for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
-    for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
-      EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j],
-                dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
-    }
-  }
-  for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
-    for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
-      EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j],
-                dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
-    }
-  }
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_u);
-  free_aligned_buffer_page_end(dst_v);
-  free_aligned_buffer_page_end(dst_y_2);
-  free_aligned_buffer_page_end(dst_u_2);
-  free_aligned_buffer_page_end(dst_v_2);
-  free_aligned_buffer_page_end(src_y);
-}
-
-TEST_F(LibYUVConvertTest, I420CropOddY) {
-  const int SUBSAMP_X = 2;
-  const int SUBSAMP_Y = 2;
-  const int kWidth = benchmark_width_;
-  const int kHeight = benchmark_height_;
-  const int crop_y = benchmark_height_ > 1 ? 1 : 0;
-  const int kDestWidth = benchmark_width_;
-  const int kDestHeight = benchmark_height_ - crop_y * 2;
-  const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X);
-  const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X);
-  const int sample_size = kWidth * kHeight +
-                          kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) +
-                          kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y);
-  align_buffer_page_end(src_y, sample_size);
-  uint8_t* src_u = src_y + kWidth * kHeight;
-  uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y);
-
-  align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
-  align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
-                                   SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-  align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
-                                   SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-
-  for (int i = 0; i < kHeight * kWidth; ++i) {
-    src_y[i] = (fastrand() & 0xff);
-  }
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) {
-    src_u[i] = (fastrand() & 0xff);
-  }
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) {
-    src_v[i] = (fastrand() & 0xff);
-  }
-  memset(dst_y, 1, kDestWidth * kDestHeight);
-  memset(dst_u, 2,
-         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-  memset(dst_v, 3,
-         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-
-  MaskCpuFlags(benchmark_cpu_info_);
-  for (int i = 0; i < benchmark_iterations_; ++i) {
-    ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u,
-                  SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v,
-                  SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight,
-                  kDestWidth, kDestHeight, libyuv::kRotate0,
-                  libyuv::FOURCC_I420);
-  }
+#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
 
-  for (int i = 0; i < kDestHeight; ++i) {
-    for (int j = 0; j < kDestWidth; ++j) {
-      EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j],
-                dst_y[i * kDestWidth + j]);
-    }
-  }
-  for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
-    for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
-      EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j],
-                dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
-    }
-  }
-  for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
-    for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
-      EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j],
-                dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
-    }
-  }
+#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+  I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
+                              &kYuvI601Constants, k, l, m, kFilterBilinear)
+#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+  I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j,          \
+                              &kYuvI601Constants, k, l, m, kFilterBilinear)
 
-  free_aligned_buffer_page_end(dst_y);
-  free_aligned_buffer_page_end(dst_u);
-  free_aligned_buffer_page_end(dst_v);
-  free_aligned_buffer_page_end(src_y);
-}
+#if defined(ENABLE_FULL_TESTS)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
+#else
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
+#endif
 
 TEST_F(LibYUVConvertTest, TestYToARGB) {
   uint8_t y[32];
@@ -3104,6 +1288,7 @@ TEST_F(LibYUVConvertTest, TestDither) {
     free_aligned_buffer_page_end(dst_argb32_opt);                              \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
                        YALIGN, FMT_C, BPP_C)                                  \
   TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
@@ -3114,116 +1299,17 @@ TEST_F(LibYUVConvertTest, TestDither) {
                   YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C)      \
   TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
                   YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+#else
+#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                       YALIGN, FMT_C, BPP_C)                                  \
+  TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                  YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+#endif
 
 #ifdef LITTLE_ENDIAN_ONLY_TEST
 TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
 #endif
 
-#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12)                                \
-  TEST_F(LibYUVConvertTest, NAME) {                                           \
-    const int kWidth = benchmark_width_;                                      \
-    const int kHeight = benchmark_height_;                                    \
-                                                                              \
-    align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight);     \
-    align_buffer_page_end(orig_y, kWidth* kHeight);                           \
-    align_buffer_page_end(orig_u,                                             \
-                          SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));      \
-    align_buffer_page_end(orig_v,                                             \
-                          SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));      \
-                                                                              \
-    align_buffer_page_end(dst_y_orig, kWidth* kHeight);                       \
-    align_buffer_page_end(dst_uv_orig,                                        \
-                          2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));  \
-                                                                              \
-    align_buffer_page_end(dst_y, kWidth* kHeight);                            \
-    align_buffer_page_end(dst_uv,                                             \
-                          2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));  \
-                                                                              \
-    MemRandomize(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight);              \
-                                                                              \
-    /* Convert UYVY to NV12 in 2 steps for reference */                       \
-    libyuv::UYVYTOI420(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), orig_y, kWidth,   \
-                       orig_u, SUBSAMPLE(kWidth, 2), orig_v,                  \
-                       SUBSAMPLE(kWidth, 2), kWidth, kHeight);                \
-    libyuv::I420ToNV12(orig_y, kWidth, orig_u, SUBSAMPLE(kWidth, 2), orig_v,  \
-                       SUBSAMPLE(kWidth, 2), dst_y_orig, kWidth, dst_uv_orig, \
-                       2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);            \
-                                                                              \
-    /* Convert to NV12 */                                                     \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
-      libyuv::UYVYTONV12(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), dst_y, kWidth,  \
-                         dst_uv, 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);  \
-    }                                                                         \
-                                                                              \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
-      EXPECT_EQ(orig_y[i], dst_y[i]);                                         \
-    }                                                                         \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
-      EXPECT_EQ(dst_y_orig[i], dst_y[i]);                                     \
-    }                                                                         \
-    for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2);     \
-         ++i) {                                                               \
-      EXPECT_EQ(dst_uv_orig[i], dst_uv[i]);                                   \
-    }                                                                         \
-                                                                              \
-    free_aligned_buffer_page_end(orig_uyvy);                                  \
-    free_aligned_buffer_page_end(orig_y);                                     \
-    free_aligned_buffer_page_end(orig_u);                                     \
-    free_aligned_buffer_page_end(orig_v);                                     \
-    free_aligned_buffer_page_end(dst_y_orig);                                 \
-    free_aligned_buffer_page_end(dst_uv_orig);                                \
-    free_aligned_buffer_page_end(dst_y);                                      \
-    free_aligned_buffer_page_end(dst_uv);                                     \
-  }
-
-TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12)
-TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
-
-TEST_F(LibYUVConvertTest, MM21ToYUY2) {
-  const int kWidth = (benchmark_width_ + 15) & (~15);
-  const int kHeight = (benchmark_height_ + 31) & (~31);
-
-  align_buffer_page_end(orig_y, kWidth * kHeight);
-  align_buffer_page_end(orig_uv,
-                        2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
-
-  align_buffer_page_end(tmp_y, kWidth * kHeight);
-  align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
-  align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
-
-  align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
-  align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
-
-  MemRandomize(orig_y, kWidth * kHeight);
-  MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
-
-  /* Convert MM21 to YUY2 in 2 steps for reference */
-  libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y,
-                     kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
-                     SUBSAMPLE(kWidth, 2), kWidth, kHeight);
-  libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
-                     SUBSAMPLE(kWidth, 2), golden_yuyv,
-                     4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
-
-  /* Convert to NV12 */
-  for (int i = 0; i < benchmark_iterations_; ++i) {
-    libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2),
-                       dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
-  }
-
-  for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) {
-    EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]);
-  }
-
-  free_aligned_buffer_page_end(orig_y);
-  free_aligned_buffer_page_end(orig_uv);
-  free_aligned_buffer_page_end(tmp_y);
-  free_aligned_buffer_page_end(tmp_u);
-  free_aligned_buffer_page_end(tmp_v);
-  free_aligned_buffer_page_end(dst_yuyv);
-  free_aligned_buffer_page_end(golden_yuyv);
-}
-
 // Transitive test.  A to B to C is same as A to C.
 // Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere.
 #define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
@@ -3526,6 +1612,7 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
     free_aligned_buffer_page_end(dst_argb_bc);                                 \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \
   TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B,                    \
                 benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C)              \
@@ -3535,6 +1622,11 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
                 _Invert, -, 0, FMT_C, BPP_C)                                 \
   TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_,  \
                 _Opt, +, 0, FMT_C, BPP_C)
+#else
+#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \
+  TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_,  \
+                _Opt, +, 0, FMT_C, BPP_C)
+#endif
 
 // Caveat: Destination needs to be 4 bytes
 #ifdef LITTLE_ENDIAN_ONLY_TEST
@@ -3651,6 +1743,8 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
 }
 #endif  // HAS_ABGRTOAR30ROW_AVX2
 
+#if !defined(LEAN_TESTS)
+
 // Provide matrix wrappers for 12 bit YUV
 #define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \
   I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
@@ -3745,6 +1839,7 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
     free_aligned_buffer_page_end(dst_argb_opt);                               \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B,   \
                         BPP_B, ALIGN, YALIGN)                                \
   TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
@@ -3755,6 +1850,12 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
                    ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0)        \
   TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
                    ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0)
+#else
+#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B,   \
+                        BPP_B, ALIGN, YALIGN)                                \
+  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+                   ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0)
+#endif
 
 // These conversions are only optimized for x86
 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
@@ -4083,6 +2184,7 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
     free_aligned_buffer_page_end(dst_argb_opt);                                \
   }
 
+#if defined(ENABLE_FULL_TESTS)
 #define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
                     YALIGN, S_DEPTH)                                          \
   TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
@@ -4093,6 +2195,12 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
                benchmark_width_, _Invert, -, 0, 0, S_DEPTH)                   \
   TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
                benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#else
+#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
+                    YALIGN, S_DEPTH)                                          \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#endif
 
 #define P010ToARGB(a, b, c, d, e, f, g, h) \
   P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
@@ -4587,61 +2695,6 @@ TEST_F(LibYUVConvertTest, Test565) {
   uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
   EXPECT_EQ(610919429u, checksum);
 }
-
-// Test RGB24 to J420 is exact
-#if defined(LIBYUV_BIT_EXACT)
-TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
-  const int kSize = 256;
-  align_buffer_page_end(orig_rgb24, kSize * 3 * 2);  // 2 rows of RGB24
-  align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2);
-  int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
-                      (kSize * 2) * benchmark_iterations_;
-
-  for (int i = 0; i < kSize * 3 * 2; ++i) {
-    orig_rgb24[i] = i;
-  }
-
-  for (int i = 0; i < iterations256; ++i) {
-    RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize,  // Y plane
-                dest_j420 + kSize * 2, kSize / 2,         // U plane
-                dest_j420 + kSize * 5 / 2, kSize / 2,     // V plane
-                kSize, 2);
-  }
-
-  uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381);
-  EXPECT_EQ(2755440272u, checksum);
-
-  free_aligned_buffer_page_end(orig_rgb24);
-  free_aligned_buffer_page_end(dest_j420);
-}
-#endif
-
-// Test RGB24 to I420 is exact
-#if defined(LIBYUV_BIT_EXACT)
-TEST_F(LibYUVConvertTest, TestRGB24ToI420) {
-  const int kSize = 256;
-  align_buffer_page_end(orig_rgb24, kSize * 3 * 2);  // 2 rows of RGB24
-  align_buffer_page_end(dest_i420, kSize * 3 / 2 * 2);
-  int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
-                      (kSize * 2) * benchmark_iterations_;
-
-  for (int i = 0; i < kSize * 3 * 2; ++i) {
-    orig_rgb24[i] = i;
-  }
-
-  for (int i = 0; i < iterations256; ++i) {
-    RGB24ToI420(orig_rgb24, kSize * 3, dest_i420, kSize,  // Y plane
-                dest_i420 + kSize * 2, kSize / 2,         // U plane
-                dest_i420 + kSize * 5 / 2, kSize / 2,     // V plane
-                kSize, 2);
-  }
-
-  uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381);
-  EXPECT_EQ(1526656597u, checksum);
-
-  free_aligned_buffer_page_end(orig_rgb24);
-  free_aligned_buffer_page_end(dest_i420);
-}
-#endif
+#endif  // !defined(LEAN_TESTS)
 
 }  // namespace libyuv
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
new file mode 100644
index 00000000..f55bace3
--- /dev/null
+++ b/unit_test/convert_test.cc
@@ -0,0 +1,2110 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "libyuv/basic_types.h"
+#include "libyuv/compare.h"
+#include "libyuv/convert.h"
+#include "libyuv/convert_argb.h"
+#include "libyuv/convert_from.h"
+#include "libyuv/convert_from_argb.h"
+#include "libyuv/cpu_id.h"
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+#include "../unit_test/unit_test.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+#include "libyuv/video_common.h"
+
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
+#endif
+
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#undef ENABLE_ROW_TESTS
+#define LEAN_TESTS
+#endif
+
+// Some functions fail on big endian. Enable these tests on all cpus except
+// PowerPC, but they are not optimized so disabled by default.
+#if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__)
+#define LITTLE_ENDIAN_ONLY_TEST 1
+#endif
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
+#endif
+
+namespace libyuv {
+
+// Alias to copy pixels as is
+#define AR30ToAR30 ARGBCopy
+#define ABGRToABGR ARGBCopy
+
+// subsample amount uses a divide.
+#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
+
+#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
+
+// Planar test
+
+#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,         \
+                       SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,             \
+                       DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF,      \
+                       SRC_DEPTH)                                             \
+  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {              \
+    static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");       \
+    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");       \
+    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                   \
+                  "SRC_SUBSAMP_X unsupported");                               \
+    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                   \
+                  "SRC_SUBSAMP_Y unsupported");                               \
+    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                   \
+                  "DST_SUBSAMP_X unsupported");                               \
+    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                   \
+                  "DST_SUBSAMP_Y unsupported");                               \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = benchmark_height_;                                    \
+    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);               \
+    const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y);             \
+    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);               \
+    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);             \
+    align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF);             \
+    align_buffer_page_end(src_u,                                              \
+                          kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF);      \
+    align_buffer_page_end(src_v,                                              \
+                          kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF);      \
+    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                 \
+    align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC);   \
+    align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC);   \
+    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);               \
+    align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+    align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+    MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC);                    \
+    MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC);      \
+    MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC);      \
+    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                   \
+    SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF);                   \
+    SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF);                   \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
+      src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1);                       \
+    }                                                                         \
+    for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) {                \
+      src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1);                       \
+      src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1);                       \
+    }                                                                         \
+    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                             \
+    memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC);               \
+    memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC);               \
+    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                         \
+    memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC);           \
+    memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC);           \
+    MaskCpuFlags(disable_cpu_flags_);                                         \
+    SRC_FMT_PLANAR##To##FMT_PLANAR(                                           \
+        src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth,      \
+        reinterpret_cast<DST_T*>(dst_y_c), kWidth,                            \
+        reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth,                     \
+        reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth,             \
+        NEG kHeight);                                                         \
+    MaskCpuFlags(benchmark_cpu_info_);                                        \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      SRC_FMT_PLANAR##To##FMT_PLANAR(                                         \
+          src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth,    \
+          reinterpret_cast<DST_T*>(dst_y_opt), kWidth,                        \
+          reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth,                 \
+          reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth,         \
+          NEG kHeight);                                                       \
+    }                                                                         \
+    for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) {                    \
+      EXPECT_EQ(dst_y_c[i], dst_y_opt[i]);                                    \
+    }                                                                         \
+    for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) {      \
+      EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);                                    \
+      EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);                                    \
+    }                                                                         \
+    free_aligned_buffer_page_end(dst_y_c);                                    \
+    free_aligned_buffer_page_end(dst_u_c);                                    \
+    free_aligned_buffer_page_end(dst_v_c);                                    \
+    free_aligned_buffer_page_end(dst_y_opt);                                  \
+    free_aligned_buffer_page_end(dst_u_opt);                                  \
+    free_aligned_buffer_page_end(dst_v_opt);                                  \
+    free_aligned_buffer_page_end(src_y);                                      \
+    free_aligned_buffer_page_end(src_u);                                      \
+    free_aligned_buffer_page_end(src_v);                                      \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,           \
+                      SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,               \
+                      DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH)                 \
+  TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+                 FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
+                 benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH)                  \
+  TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+                 FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
+                 benchmark_width_, _Unaligned, +, 2, SRC_DEPTH)                \
+  TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+                 FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
+                 benchmark_width_, _Invert, -, 0, SRC_DEPTH)                   \
+  TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+                 FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
+                 benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#else
+#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,           \
+                      SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,               \
+                      DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH)                 \
+  TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+                 FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,     \
+                 benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#endif
+
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8)
+TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10)
+TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
+TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 12)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
+
+// Test Android 420 to I420
+#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X,          \
+                        SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                        W1280, N, NEG, OFF, PN, OFF_U, OFF_V)                 \
+  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##To##PN##N) {      \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = benchmark_height_;                                    \
+    const int kSizeUV =                                                       \
+        SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
+    align_buffer_page_end(src_y, kWidth* kHeight + OFF);                      \
+    align_buffer_page_end(src_uv,                                             \
+                          kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF);       \
+    align_buffer_page_end(dst_y_c, kWidth* kHeight);                          \
+    align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) *             \
+                                       SUBSAMPLE(kHeight, SUBSAMP_Y));        \
+    align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) *             \
+                                       SUBSAMPLE(kHeight, SUBSAMP_Y));        \
+    align_buffer_page_end(dst_y_opt, kWidth* kHeight);                        \
+    align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) *           \
+                                         SUBSAMPLE(kHeight, SUBSAMP_Y));      \
+    align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) *           \
+                                         SUBSAMPLE(kHeight, SUBSAMP_Y));      \
+    uint8_t* src_u = src_uv + OFF_U;                                          \
+    uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V);          \
+    int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE;          \
+    for (int i = 0; i < kHeight; ++i)                                         \
+      for (int j = 0; j < kWidth; ++j)                                        \
+        src_y[i * kWidth + j + OFF] = (fastrand() & 0xff);                    \
+    for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) {             \
+      for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) {            \
+        src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] =                 \
+            (fastrand() & 0xff);                                              \
+        src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] =                 \
+            (fastrand() & 0xff);                                              \
+      }                                                                       \
+    }                                                                         \
+    memset(dst_y_c, 1, kWidth* kHeight);                                      \
+    memset(dst_u_c, 2,                                                        \
+           SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
+    memset(dst_v_c, 3,                                                        \
+           SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
+    memset(dst_y_opt, 101, kWidth* kHeight);                                  \
+    memset(dst_u_opt, 102,                                                    \
+           SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
+    memset(dst_v_opt, 103,                                                    \
+           SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
+    MaskCpuFlags(disable_cpu_flags_);                                         \
+    SRC_FMT_PLANAR##To##FMT_PLANAR(                                           \
+        src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X),   \
+        src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \
+        kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c,               \
+        SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight);                   \
+    MaskCpuFlags(benchmark_cpu_info_);                                        \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      SRC_FMT_PLANAR##To##FMT_PLANAR(                                         \
+          src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
+          src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE,        \
+          dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X),         \
+          dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight);      \
+    }                                                                         \
+    for (int i = 0; i < kHeight; ++i) {                                       \
+      for (int j = 0; j < kWidth; ++j) {                                      \
+        EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);        \
+      }                                                                       \
+    }                                                                         \
+    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                 \
+      for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                \
+        EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j],              \
+                  dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]);           \
+      }                                                                       \
+    }                                                                         \
+    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                 \
+      for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                \
+        EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j],              \
+                  dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]);           \
+      }                                                                       \
+    }                                                                         \
+    free_aligned_buffer_page_end(dst_y_c);                                    \
+    free_aligned_buffer_page_end(dst_u_c);                                    \
+    free_aligned_buffer_page_end(dst_v_c);                                    \
+    free_aligned_buffer_page_end(dst_y_opt);                                  \
+    free_aligned_buffer_page_end(dst_u_opt);                                  \
+    free_aligned_buffer_page_end(dst_v_opt);                                  \
+    free_aligned_buffer_page_end(src_y);                                      \
+    free_aligned_buffer_page_end(src_uv);                                     \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V,         \
+                       SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X,    \
+                       SUBSAMP_Y)                                              \
+  TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+                  FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1,      \
+                  _Any, +, 0, PN, OFF_U, OFF_V)                                \
+  TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+                  FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_,          \
+                  _Unaligned, +, 2, PN, OFF_U, OFF_V)                          \
+  TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+                  FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \
+                  -, 0, PN, OFF_U, OFF_V)                                      \
+  TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+                  FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
+                  0, PN, OFF_U, OFF_V)
+#else
+#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V,         \
+                       SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X,    \
+                       SUBSAMP_Y)                                              \
+  TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+                  FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
+                  0, PN, OFF_U, OFF_V)
+#endif
+
+TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
+TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
+TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
+#undef TESTAPLANARTOP
+#undef TESTAPLANARTOPI
+
+// wrapper to keep API the same
+int I400ToNV21(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* /* src_u */,
+               int /* src_stride_u */,
+               const uint8_t* /* src_v */,
+               int /* src_stride_v */,
+               uint8_t* dst_y,
+               int dst_stride_y,
+               uint8_t* dst_vu,
+               int dst_stride_vu,
+               int width,
+               int height) {
+  return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu,
+                    dst_stride_vu, width, height);
+}
+
+#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,        \
+                        SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,            \
+                        DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF,     \
+                        SRC_DEPTH)                                            \
+  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {              \
+    static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");       \
+    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");       \
+    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                   \
+                  "SRC_SUBSAMP_X unsupported");                               \
+    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                   \
+                  "SRC_SUBSAMP_Y unsupported");                               \
+    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                   \
+                  "DST_SUBSAMP_X unsupported");                               \
+    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                   \
+                  "DST_SUBSAMP_Y unsupported");                               \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = benchmark_height_;                                    \
+    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);               \
+    const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y);             \
+    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);               \
+    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);             \
+    align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF);             \
+    align_buffer_page_end(src_u,                                              \
+                          kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF);      \
+    align_buffer_page_end(src_v,                                              \
+                          kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF);      \
+    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                 \
+    align_buffer_page_end(dst_uv_c,                                           \
+                          kDstHalfWidth* kDstHalfHeight* DST_BPC * 2);        \
+    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);               \
+    align_buffer_page_end(dst_uv_opt,                                         \
+                          kDstHalfWidth* kDstHalfHeight* DST_BPC * 2);        \
+    MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC);                    \
+    MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC);      \
+    MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC);      \
+    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                   \
+    SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF);                   \
+    SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF);                   \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
+      src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1);                       \
+    }                                                                         \
+    for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) {                \
+      src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1);                       \
+      src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1);                       \
+    }                                                                         \
+    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                             \
+    memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2);          \
+    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                         \
+    memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2);      \
+    MaskCpuFlags(disable_cpu_flags_);                                         \
+    SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth,   \
+                                   src_v_p, kSrcHalfWidth,                    \
+                                   reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
+                                   reinterpret_cast<DST_T*>(dst_uv_c),        \
+                                   kDstHalfWidth * 2, kWidth, NEG kHeight);   \
+    MaskCpuFlags(benchmark_cpu_info_);                                        \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      SRC_FMT_PLANAR##To##FMT_PLANAR(                                         \
+          src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth,    \
+          reinterpret_cast<DST_T*>(dst_y_opt), kWidth,                        \
+          reinterpret_cast<DST_T*>(dst_uv_opt), kDstHalfWidth * 2, kWidth,    \
+          NEG kHeight);                                                       \
+    }                                                                         \
+    for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) {                    \
+      EXPECT_EQ(dst_y_c[i], dst_y_opt[i]);                                    \
+    }                                                                         \
+    for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) {  \
+      EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]);                                  \
+    }                                                                         \
+    free_aligned_buffer_page_end(dst_y_c);                                    \
+    free_aligned_buffer_page_end(dst_uv_c);                                   \
+    free_aligned_buffer_page_end(dst_y_opt);                                  \
+    free_aligned_buffer_page_end(dst_uv_opt);                                 \
+    free_aligned_buffer_page_end(src_y);                                      \
+    free_aligned_buffer_page_end(src_u);                                      \
+    free_aligned_buffer_page_end(src_v);                                      \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,         \
+                       SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,             \
+                       DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH)               \
+  TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
+                  DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \
+  TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
+                  DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2,          \
+                  SRC_DEPTH)                                                  \
+  TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
+                  DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH)  \
+  TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
+                  DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#else
+#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,       \
+                       SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,           \
+                       DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH)             \
+  TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                  DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#endif
+
+TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
+TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
+
+#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                    DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH,        \
+                    TILE_WIDTH, TILE_HEIGHT)                                  \
+  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {              \
+    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");       \
+    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                   \
+                  "SRC_SUBSAMP_X unsupported");                               \
+    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                   \
+                  "SRC_SUBSAMP_Y unsupported");                               \
+    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                   \
+                  "DST_SUBSAMP_X unsupported");                               \
+    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                   \
+                  "DST_SUBSAMP_Y unsupported");                               \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = benchmark_height_;                                    \
+    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);               \
+    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);               \
+    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);             \
+    const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
+    const int kPaddedHeight =                                                 \
+        (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1);                   \
+    const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X);   \
+    const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
+    align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
+    align_buffer_page_end(                                                    \
+        src_uv,                                                               \
+        2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF);      \
+    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                 \
+    align_buffer_page_end(dst_uv_c,                                           \
+                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);      \
+    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);               \
+    align_buffer_page_end(dst_uv_opt,                                         \
+                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);      \
+    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                   \
+    SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF);                 \
+    for (int i = 0;                                                           \
+         i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T);     \
+         ++i) {                                                               \
+      src_y_p[i] =                                                            \
+          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));      \
+    }                                                                         \
+    for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 *      \
+                            SRC_BPC / (int)sizeof(SRC_T);                     \
+         ++i) {                                                               \
+      src_uv_p[i] =                                                           \
+          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));      \
+    }                                                                         \
+    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                             \
+    memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);        \
+    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                         \
+    memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);    \
+    MaskCpuFlags(disable_cpu_flags_);                                         \
+    SRC_FMT_PLANAR##To##FMT_PLANAR(                                           \
+        src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p,              \
+        2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T),                     \
+        DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth,               \
+        reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth,        \
+        NEG kHeight);                                                         \
+    MaskCpuFlags(benchmark_cpu_info_);                                        \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      SRC_FMT_PLANAR##To##FMT_PLANAR(                                         \
+          src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p,            \
+          2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T),                   \
+          DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth,           \
+          reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth,    \
+          NEG kHeight);                                                       \
+    }                                                                         \
+    if (DOY) {                                                                \
+      for (int i = 0; i < kHeight; ++i) {                                     \
+        for (int j = 0; j < kWidth; ++j) {                                    \
+          EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);      \
+        }                                                                     \
+      }                                                                       \
+    }                                                                         \
+    for (int i = 0; i < kDstHalfHeight; ++i) {                                \
+      for (int j = 0; j < 2 * kDstHalfWidth; ++j) {                           \
+        EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j],                        \
+                  dst_uv_opt[i * 2 * kDstHalfWidth + j]);                     \
+      }                                                                       \
+    }                                                                         \
+    free_aligned_buffer_page_end(dst_y_c);                                    \
+    free_aligned_buffer_page_end(dst_uv_c);                                   \
+    free_aligned_buffer_page_end(dst_y_opt);                                  \
+    free_aligned_buffer_page_end(dst_uv_opt);                                 \
+    free_aligned_buffer_page_end(src_y);                                      \
+    free_aligned_buffer_page_end(src_uv);                                     \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                   DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH,    \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH,  \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH,     \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH,        \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH,      \
+              TILE_HEIGHT)
+#else
+#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                   DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH,      \
+              TILE_HEIGHT)
+#endif
+
+TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
+
+#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+                       W1280, N, NEG, OFF)                                     \
+  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) {                        \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
+    const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8;           \
+    align_buffer_page_end(src_argb, kStride* kHeight + OFF);                   \
+    align_buffer_page_end(dst_y_c, kWidth* kHeight);                           \
+    align_buffer_page_end(dst_uv_c,                                            \
+                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));      \
+    align_buffer_page_end(dst_y_opt, kWidth* kHeight);                         \
+    align_buffer_page_end(dst_uv_opt,                                          \
+                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));      \
+    memset(dst_y_c, 1, kWidth* kHeight);                                       \
+    memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
+    memset(dst_y_opt, 101, kWidth* kHeight);                                   \
+    memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));    \
+    for (int i = 0; i < kHeight; ++i)                                          \
+      for (int j = 0; j < kStride; ++j)                                        \
+        src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff);               \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c,  \
+                          kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2,  \
+                          kWidth, NEG kHeight);                                \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth,        \
+                            dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \
+                            kStrideUV * 2, kWidth, NEG kHeight);               \
+    }                                                                          \
+    for (int i = 0; i < kHeight; ++i) {                                        \
+      for (int j = 0; j < kWidth; ++j) {                                       \
+        EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);         \
+      }                                                                        \
+    }                                                                          \
+    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) {              \
+      for (int j = 0; j < kStrideUV; ++j) {                                    \
+        EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
+      }                                                                        \
+    }                                                                          \
+    free_aligned_buffer_page_end(dst_y_c);                                     \
+    free_aligned_buffer_page_end(dst_uv_c);                                    \
+    free_aligned_buffer_page_end(dst_y_opt);                                   \
+    free_aligned_buffer_page_end(dst_uv_opt);                                  \
+    free_aligned_buffer_page_end(src_argb);                                    \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                 benchmark_width_ + 1, _Any, +, 0)                            \
+  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                 benchmark_width_, _Unaligned, +, 2)                          \
+  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                 benchmark_width_, _Invert, -, 0)                             \
+  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                 benchmark_width_, _Opt, +, 0)
+#else
+#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+  TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                 benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2)
+TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2)
+TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1)
+TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1)
+TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2)
+TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1)
+TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2)
+TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2)
+TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2)
+TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2)
+#endif
+TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2)
+TESTATOPLANAR(I400, 1, 1, I420, 2, 2)
+TESTATOPLANAR(J400, 1, 1, J420, 2, 2)
+TESTATOPLANAR(RAW, 3, 1, I420, 2, 2)
+TESTATOPLANAR(RAW, 3, 1, J420, 2, 2)
+TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2)
+TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2)
+TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2)
+TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2)
+TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
+TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
+TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
+
+#define TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X,           \
+                        SUBSAMP_Y, W1280, N, NEG, OFF)                         \
+  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) {                        \
+    const int kWidth = W1280;                                                  \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
+    const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8;           \
+    align_buffer_page_end(src_argb, kStride* kHeight + OFF);                   \
+    align_buffer_page_end(dst_a_c, kWidth* kHeight);                           \
+    align_buffer_page_end(dst_y_c, kWidth* kHeight);                           \
+    align_buffer_page_end(dst_uv_c,                                            \
+                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));      \
+    align_buffer_page_end(dst_a_opt, kWidth* kHeight);                         \
+    align_buffer_page_end(dst_y_opt, kWidth* kHeight);                         \
+    align_buffer_page_end(dst_uv_opt,                                          \
+                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));      \
+    memset(dst_a_c, 1, kWidth* kHeight);                                       \
+    memset(dst_y_c, 2, kWidth* kHeight);                                       \
+    memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
+    memset(dst_a_opt, 101, kWidth* kHeight);                                   \
+    memset(dst_y_opt, 102, kWidth* kHeight);                                   \
+    memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));    \
+    for (int i = 0; i < kHeight; ++i)                                          \
+      for (int j = 0; j < kStride; ++j)                                        \
+        src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff);               \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c,  \
+                          kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2,  \
+                          dst_a_c, kWidth, kWidth, NEG kHeight);               \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth,        \
+                            dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \
+                            kStrideUV * 2, dst_a_opt, kWidth, kWidth,          \
+                            NEG kHeight);                                      \
+    }                                                                          \
+    for (int i = 0; i < kHeight; ++i) {                                        \
+      for (int j = 0; j < kWidth; ++j) {                                       \
+        EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);         \
+        EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]);         \
+      }                                                                        \
+    }                                                                          \
+    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) {              \
+      for (int j = 0; j < kStrideUV; ++j) {                                    \
+        EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
+      }                                                                        \
+    }                                                                          \
+    free_aligned_buffer_page_end(dst_a_c);                                     \
+    free_aligned_buffer_page_end(dst_y_c);                                     \
+    free_aligned_buffer_page_end(dst_uv_c);                                    \
+    free_aligned_buffer_page_end(dst_a_opt);                                   \
+    free_aligned_buffer_page_end(dst_y_opt);                                   \
+    free_aligned_buffer_page_end(dst_uv_opt);                                  \
+    free_aligned_buffer_page_end(src_argb);                                    \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                  benchmark_width_ + 1, _Any, +, 0)                            \
+  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                  benchmark_width_, _Unaligned, +, 2)                          \
+  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                  benchmark_width_, _Invert, -, 0)                             \
+  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                  benchmark_width_, _Opt, +, 0)
+#else
+#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+  TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+                  benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2)
+
+#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
+                   W1280, N, NEG, OFF)                                        \
+  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) {                       \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = benchmark_height_;                                    \
+    const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A;                     \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                       \
+    align_buffer_page_end(src_argb, kStride* kHeight + OFF);                  \
+    align_buffer_page_end(dst_y_c, kWidth* kHeight);                          \
+    align_buffer_page_end(dst_uv_c,                                           \
+                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
+    align_buffer_page_end(dst_y_opt, kWidth* kHeight);                        \
+    align_buffer_page_end(dst_uv_opt,                                         \
+                          kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));     \
+    for (int i = 0; i < kHeight; ++i)                                         \
+      for (int j = 0; j < kStride; ++j)                                       \
+        src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff);              \
+    memset(dst_y_c, 1, kWidth* kHeight);                                      \
+    memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));       \
+    memset(dst_y_opt, 101, kWidth* kHeight);                                  \
+    memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y));   \
+    MaskCpuFlags(disable_cpu_flags_);                                         \
+    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
+                          kStrideUV * 2, kWidth, NEG kHeight);                \
+    MaskCpuFlags(benchmark_cpu_info_);                                        \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth,       \
+                            dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight);  \
+    }                                                                         \
+    for (int i = 0; i < kHeight; ++i) {                                       \
+      for (int j = 0; j < kWidth; ++j) {                                      \
+        EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);        \
+      }                                                                       \
+    }                                                                         \
+    for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                 \
+      for (int j = 0; j < kStrideUV * 2; ++j) {                               \
+        EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j],                            \
+                  dst_uv_opt[i * kStrideUV * 2 + j]);                         \
+      }                                                                       \
+    }                                                                         \
+    free_aligned_buffer_page_end(dst_y_c);                                    \
+    free_aligned_buffer_page_end(dst_uv_c);                                   \
+    free_aligned_buffer_page_end(dst_y_opt);                                  \
+    free_aligned_buffer_page_end(dst_uv_opt);                                 \
+    free_aligned_buffer_page_end(src_argb);                                   \
+  }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_ + 1, _Any, +, 0)                           \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Unaligned, +, 2)                         \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Invert, -, 0)                            \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Opt, +, 0)
+#else
+#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
+TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
+TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
+TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
+TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV21, 2, 2)
+
+#if !defined(LEAN_TESTS)
+
+#ifdef HAVE_JPEG
+TEST_F(LibYUVConvertTest, ValidateJpeg) {
+  const int kOff = 10;
+  const int kMinJpeg = 64;
+  const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
+                             ? benchmark_width_ * benchmark_height_
+                             : kMinJpeg;
+  const int kSize = kImageSize + kOff;
+  align_buffer_page_end(orig_pixels, kSize);
+
+  // No SOI or EOI. Expect fail.
+  memset(orig_pixels, 0, kSize);
+  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+
+  // Test special value that matches marker start.
+  memset(orig_pixels, 0xff, kSize);
+  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+
+  // EOI, SOI. Expect pass.
+  orig_pixels[0] = 0xff;
+  orig_pixels[1] = 0xd8;  // SOI.
+  orig_pixels[2] = 0xff;
+  orig_pixels[kSize - kOff + 0] = 0xff;
+  orig_pixels[kSize - kOff + 1] = 0xd9;  // EOI.
+  for (int times = 0; times < benchmark_iterations_; ++times) {
+    EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize));
+  }
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVConvertTest, ValidateJpegLarge) {
+  const int kOff = 10;
+  const int kMinJpeg = 64;
+  const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
+                             ? benchmark_width_ * benchmark_height_
+                             : kMinJpeg;
+  const int kSize = kImageSize + kOff;
+  const int kMultiple = 10;
+  const int kBufSize = kImageSize * kMultiple + kOff;
+  align_buffer_page_end(orig_pixels, kBufSize);
+
+  // No SOI or EOI. Expect fail.
+  memset(orig_pixels, 0, kBufSize);
+  EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize));
+
+  // EOI, SOI. Expect pass.
+  orig_pixels[0] = 0xff;
+  orig_pixels[1] = 0xd8;  // SOI.
+  orig_pixels[2] = 0xff;
+  orig_pixels[kSize - kOff + 0] = 0xff;
+  orig_pixels[kSize - kOff + 1] = 0xd9;  // EOI.
+  for (int times = 0; times < benchmark_iterations_; ++times) {
+    EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize));
+  }
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVConvertTest, InvalidateJpeg) {
+  const int kOff = 10;
+  const int kMinJpeg = 64;
+  const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
+                             ? benchmark_width_ * benchmark_height_
+                             : kMinJpeg;
+  const int kSize = kImageSize + kOff;
+  align_buffer_page_end(orig_pixels, kSize);
+
+  // NULL pointer. Expect fail.
+  EXPECT_FALSE(ValidateJpeg(NULL, kSize));
+
+  // Negative size. Expect fail.
+  EXPECT_FALSE(ValidateJpeg(orig_pixels, -1));
+
+  // Too large size. Expect fail.
+  EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull));
+
+  // No SOI or EOI. Expect fail.
+  memset(orig_pixels, 0, kSize);
+  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+
+  // SOI but no EOI. Expect fail.
+  orig_pixels[0] = 0xff;
+  orig_pixels[1] = 0xd8;  // SOI.
+  orig_pixels[2] = 0xff;
+  for (int times = 0; times < benchmark_iterations_; ++times) {
+    EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+  }
+
+  // EOI but no SOI. Expect fail.
+  orig_pixels[0] = 0;
+  orig_pixels[1] = 0;
+  orig_pixels[kSize - kOff + 0] = 0xff;
+  orig_pixels[kSize - kOff + 1] = 0xd9;  // EOI.
+  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVConvertTest, FuzzJpeg) {
+  // SOI but no EOI. Expect fail.
+  for (int times = 0; times < benchmark_iterations_; ++times) {
+    const int kSize = fastrand() % 5000 + 3;
+    align_buffer_page_end(orig_pixels, kSize);
+    MemRandomize(orig_pixels, kSize);
+
+    // Add SOI so frame will be scanned.
+    orig_pixels[0] = 0xff;
+    orig_pixels[1] = 0xd8;  // SOI.
+    orig_pixels[2] = 0xff;
+    orig_pixels[kSize - 1] = 0xff;
+    ValidateJpeg(orig_pixels,
+                 kSize);  // Failure normally expected.
+    free_aligned_buffer_page_end(orig_pixels);
+  }
+}
+
+// Test data created in GIMP.  In export jpeg, disable
+// thumbnails etc, choose a subsampling, and use low quality
+// (50) to keep size small. Generated with xxd -i test.jpg
+// test 0 is J400
+static const uint8_t kTest0Jpg[] = {
+    0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+    0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
+    0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
+    0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
+    0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
+    0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
+    0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
+    0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10,
+    0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01,
+    0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01,
+    0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4,
+    0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
+    0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
+    0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
+    0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03,
+    0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff,
+    0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28,
+    0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4,
+    0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51,
+    0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
+    0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb,
+    0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9,
+    0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08,
+    0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10,
+    0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91,
+    0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
+    0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b,
+    0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec,
+    0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c,
+    0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff,
+    0xd9};
+static const size_t kTest0JpgLen = 421;
+
+// test 1 is J444
+static const uint8_t kTest1Jpg[] = {
+    0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+    0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
+    0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
+    0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
+    0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
+    0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
+    0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
+    0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
+    0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
+    0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
+    0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4,
+    0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda,
+    0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01,
+    0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb,
+    0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11,
+    0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00,
+    0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99,
+    0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00,
+    0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08,
+    0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31,
+    0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+    0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01,
+    0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72,
+    0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11,
+    0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00,
+    0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2,
+    0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c,
+    0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61,
+    0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21,
+    0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01,
+    0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48,
+    0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01,
+    0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff,
+    0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21,
+    0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f,
+    0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26,
+    0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01,
+    0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02,
+    0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5,
+    0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00,
+    0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61,
+    0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01,
+    0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a,
+    0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96,
+    0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad,
+    0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7,
+    0xd4, 0xff, 0xd9};
+static const size_t kTest1JpgLen = 735;
+
+// test 2 is J420
+static const uint8_t kTest2Jpg[] = {
+    0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+    0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
+    0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
+    0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
+    0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
+    0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
+    0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
+    0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
+    0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
+    0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
+    0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff,
+    0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff,
+    0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
+    0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00,
+    0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10,
+    0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02,
+    0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62,
+    0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f,
+    0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f,
+    0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03,
+    0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff,
+    0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28,
+    0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4,
+    0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51,
+    0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
+    0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb,
+    0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9,
+    0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c,
+    0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f,
+    0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11,
+    0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e,
+    0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01,
+    0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10,
+    0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91,
+    0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
+    0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b,
+    0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec,
+    0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c,
+    0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff,
+    0xd9};
+static const size_t kTest2JpgLen = 685;
+
+// test 3 is J422
+static const uint8_t kTest3Jpg[] = {
+    0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+    0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
+    0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
+    0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
+    0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
+    0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
+    0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
+    0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
+    0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
+    0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
+    0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4,
+    0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff,
+    0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
+    0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4,
+    0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
+    0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
+    0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
+    0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03,
+    0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18,
+    0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda,
+    0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84,
+    0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda,
+    0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32,
+    0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00,
+    0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31,
+    0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f,
+    0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9,
+    0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6,
+    0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03,
+    0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff,
+    0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21,
+    0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53,
+    0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00,
+    0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08,
+    0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca,
+    0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04,
+    0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+    0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff,
+    0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9,
+    0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5,
+    0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c,
+    0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00,
+    0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9};
+static const size_t kTest3JpgLen = 704;
+
+// test 4 is J422 vertical - not supported
+static const uint8_t kTest4Jpg[] = {
+    0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+    0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
+    0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
+    0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
+    0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
+    0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
+    0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
+    0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
+    0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+    0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
+    0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
+    0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff,
+    0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff,
+    0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
+    0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4,
+    0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
+    0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
+    0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
+    0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01,
+    0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00,
+    0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08,
+    0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff,
+    0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02,
+    0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01,
+    0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9,
+    0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01,
+    0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0,
+    0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e,
+    0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde,
+    0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a,
+    0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02,
+    0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19,
+    0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff,
+    0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca,
+    0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01,
+    0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff,
+    0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31,
+    0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08,
+    0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a,
+    0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd,
+    0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30,
+    0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03,
+    0x0b, 0xb7, 0xd4, 0xff, 0xd9};
+static const size_t kTest4JpgLen = 701;
+
+TEST_F(LibYUVConvertTest, TestMJPGSize) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  printf("test jpeg size %d x %d\n", width, height);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToI420) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_u, half_width * half_height);
+  align_buffer_page_end(dst_v, half_width * half_height);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width,
+                     dst_v, half_width, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Test result matches known hash value.
+  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+  uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381);
+  uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381);
+  EXPECT_EQ(dst_y_hash, 2682851208u);
+  EXPECT_EQ(dst_u_hash, 2501859930u);
+  EXPECT_EQ(dst_v_hash, 2126459123u);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_u);
+  free_aligned_buffer_page_end(dst_v);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  // Convert to NV21
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_vu, half_width * half_height * 2);
+
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu,
+                     half_width * 2, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Convert to I420
+  align_buffer_page_end(dst2_y, width * height);
+  align_buffer_page_end(dst2_u, half_width * half_height);
+  align_buffer_page_end(dst2_v, half_width * half_height);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width,
+                     dst2_v, half_width, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Convert I420 to NV21
+  align_buffer_page_end(dst3_y, width * height);
+  align_buffer_page_end(dst3_vu, half_width * half_height * 2);
+
+  I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y,
+             width, dst3_vu, half_width * 2, width, height);
+
+  for (int i = 0; i < width * height; ++i) {
+    EXPECT_EQ(dst_y[i], dst3_y[i]);
+  }
+  for (int i = 0; i < half_width * half_height * 2; ++i) {
+    EXPECT_EQ(dst_vu[i], dst3_vu[i]);
+    EXPECT_EQ(dst_vu[i], dst3_vu[i]);
+  }
+
+  free_aligned_buffer_page_end(dst3_y);
+  free_aligned_buffer_page_end(dst3_vu);
+
+  free_aligned_buffer_page_end(dst2_y);
+  free_aligned_buffer_page_end(dst2_u);
+  free_aligned_buffer_page_end(dst2_v);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_vu);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  // Convert to NV12
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_uv, half_width * half_height * 2);
+
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
+                     half_width * 2, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Convert to I420
+  align_buffer_page_end(dst2_y, width * height);
+  align_buffer_page_end(dst2_u, half_width * half_height);
+  align_buffer_page_end(dst2_v, half_width * half_height);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width,
+                     dst2_v, half_width, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Convert I420 to NV12
+  align_buffer_page_end(dst3_y, width * height);
+  align_buffer_page_end(dst3_uv, half_width * half_height * 2);
+
+  I420ToNV12(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y,
+             width, dst3_uv, half_width * 2, width, height);
+
+  for (int i = 0; i < width * height; ++i) {
+    EXPECT_EQ(dst_y[i], dst3_y[i]);
+  }
+  for (int i = 0; i < half_width * half_height * 2; ++i) {
+    EXPECT_EQ(dst_uv[i], dst3_uv[i]);
+    EXPECT_EQ(dst_uv[i], dst3_uv[i]);
+  }
+
+  free_aligned_buffer_page_end(dst3_y);
+  free_aligned_buffer_page_end(dst3_uv);
+
+  free_aligned_buffer_page_end(dst2_y);
+  free_aligned_buffer_page_end(dst2_u);
+  free_aligned_buffer_page_end(dst2_v);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_uv, half_width * half_height * 2);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
+                     half_width * 2, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Test result matches known hash value.
+  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+  uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
+  EXPECT_EQ(dst_y_hash, 2682851208u);
+  EXPECT_EQ(dst_uv_hash, 1069662856u);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_uv, half_width * half_height * 2);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
+                     half_width * 2, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Test result matches known hash value. Hashes are for VU so flip the plane.
+  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+  align_buffer_page_end(dst_vu, half_width * half_height * 2);
+  SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+              half_height);
+  uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+  EXPECT_EQ(dst_y_hash, 2682851208u);
+  EXPECT_EQ(dst_vu_hash, 1069662856u);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_uv);
+  free_aligned_buffer_page_end(dst_vu);
+}
+
+// TODO(fbarchard): Improve test to compare against I422, not checksum
+TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_uv, half_width * half_height * 2);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv,
+                     half_width * 2, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Test result matches known hash value.
+  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+  uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
+  EXPECT_EQ(dst_y_hash, 2682851208u);
+  EXPECT_EQ(dst_uv_hash, 493520167u);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_uv, half_width * half_height * 2);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToNV12(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv,
+                     half_width * 2, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Test result matches known hash value. Hashes are for VU so flip the plane.
+  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+  align_buffer_page_end(dst_vu, half_width * half_height * 2);
+  SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+              half_height);
+  uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+  EXPECT_EQ(dst_y_hash, 2682851208u);
+  EXPECT_EQ(dst_vu_hash, 493520167u);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_uv);
+  free_aligned_buffer_page_end(dst_vu);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_uv, half_width * half_height * 2);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv,
+                     half_width * 2, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Test result matches known hash value.
+  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+  uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
+  EXPECT_EQ(dst_y_hash, 330644005u);
+  EXPECT_EQ(dst_uv_hash, 135214341u);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_uv, half_width * half_height * 2);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToNV12(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv,
+                     half_width * 2, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Test result matches known hash value. Hashes are for VU so flip the plane.
+  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+  align_buffer_page_end(dst_vu, half_width * half_height * 2);
+  SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+              half_height);
+  uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+  EXPECT_EQ(dst_y_hash, 330644005u);
+  EXPECT_EQ(dst_vu_hash, 135214341u);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_uv);
+  free_aligned_buffer_page_end(dst_vu);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_uv, half_width * half_height * 2);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv,
+                     half_width * 2, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Test result matches known hash value.
+  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+  uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
+  EXPECT_EQ(dst_y_hash, 2682851208u);
+  EXPECT_EQ(dst_uv_hash, 506143297u);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int half_width = (width + 1) / 2;
+  int half_height = (height + 1) / 2;
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  align_buffer_page_end(dst_y, width * height);
+  align_buffer_page_end(dst_uv, half_width * half_height * 2);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToNV12(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv,
+                     half_width * 2, width, height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Test result matches known hash value. Hashes are for VU so flip the plane.
+  uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+  align_buffer_page_end(dst_vu, half_width * half_height * 2);
+  SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+              half_height);
+  uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+  EXPECT_EQ(dst_y_hash, 2682851208u);
+  EXPECT_EQ(dst_vu_hash, 506143297u);
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_uv);
+  free_aligned_buffer_page_end(dst_vu);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToARGB) {
+  int width = 0;
+  int height = 0;
+  int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
+  EXPECT_EQ(0, ret);
+
+  int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+                             benchmark_height_ / (width * height);
+  if (benchmark_iterations < 1) {
+    benchmark_iterations = 1;
+  }
+
+  align_buffer_page_end(dst_argb, width * height * 4);
+  for (int times = 0; times < benchmark_iterations; ++times) {
+    ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width,
+                     height, width, height);
+  }
+  // Expect sucesss
+  EXPECT_EQ(0, ret);
+
+  // Test result matches known hash value.
+  uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381);
+#ifdef LIBYUV_UNLIMITED_DATA
+  EXPECT_EQ(dst_argb_hash, 3900633302u);
+#else
+  EXPECT_EQ(dst_argb_hash, 2355976473u);
+#endif
+
+  free_aligned_buffer_page_end(dst_argb);
+}
+
+static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) {
+  MJpegDecoder mjpeg_decoder;
+  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+
+  int width = mjpeg_decoder.GetWidth();
+  int height = mjpeg_decoder.GetHeight();
+
+  // YUV420
+  if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
+      mjpeg_decoder.GetNumComponents() == 3 &&
+      mjpeg_decoder.GetVertSampFactor(0) == 2 &&
+      mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+      mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+      mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+      mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+      mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+    printf("JPeg is J420, %dx%d %d bytes\n", width, height,
+           static_cast<int>(sample_size));
+    // YUV422
+  } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
+             mjpeg_decoder.GetNumComponents() == 3 &&
+             mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+             mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+             mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+             mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+             mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+             mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+    printf("JPeg is J422, %dx%d %d bytes\n", width, height,
+           static_cast<int>(sample_size));
+    // YUV444
+  } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
+             mjpeg_decoder.GetNumComponents() == 3 &&
+             mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+             mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
+             mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+             mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+             mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+             mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+    printf("JPeg is J444, %dx%d %d bytes\n", width, height,
+           static_cast<int>(sample_size));
+    // YUV400
+  } else if (mjpeg_decoder.GetColorSpace() ==
+                 MJpegDecoder::kColorSpaceGrayscale &&
+             mjpeg_decoder.GetNumComponents() == 1 &&
+             mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+             mjpeg_decoder.GetHorizSampFactor(0) == 1) {
+    printf("JPeg is J400, %dx%d %d bytes\n", width, height,
+           static_cast<int>(sample_size));
+  } else {
+    // Unknown colorspace.
+    printf("JPeg is Unknown colorspace.\n");
+  }
+  mjpeg_decoder.UnloadFrame();
+  return ret;
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGInfo) {
+  EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen));
+  EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen));
+  EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen));
+  EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen));
+  EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg,
+                            kTest4JpgLen));  // Valid but unsupported.
+}
+#endif  // HAVE_JPEG
+
+TEST_F(LibYUVConvertTest, NV12Crop) {
+  const int SUBSAMP_X = 2;
+  const int SUBSAMP_Y = 2;
+  const int kWidth = benchmark_width_;
+  const int kHeight = benchmark_height_;
+  const int crop_y =
+      ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1;
+  const int kDestWidth = benchmark_width_;
+  const int kDestHeight = benchmark_height_ - crop_y * 2;
+  const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);
+  const int sample_size =
+      kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;
+  align_buffer_page_end(src_y, sample_size);
+  uint8_t* src_uv = src_y + kWidth * kHeight;
+
+  align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
+  align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+                                   SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+  align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+                                   SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+
+  align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight);
+  align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+                                     SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+  align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+                                     SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+
+  for (int i = 0; i < kHeight * kWidth; ++i) {
+    src_y[i] = (fastrand() & 0xff);
+  }
+  for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideUV) * 2; ++i) {
+    src_uv[i] = (fastrand() & 0xff);
+  }
+  memset(dst_y, 1, kDestWidth * kDestHeight);
+  memset(dst_u, 2,
+         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+  memset(dst_v, 3,
+         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+  memset(dst_y_2, 1, kDestWidth * kDestHeight);
+  memset(dst_u_2, 2,
+         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+  memset(dst_v_2, 3,
+         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+
+  ConvertToI420(src_y, sample_size, dst_y_2, kDestWidth, dst_u_2,
+                SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v_2,
+                SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight,
+                kDestWidth, kDestHeight, libyuv::kRotate0, libyuv::FOURCC_NV12);
+
+  NV12ToI420(src_y + crop_y * kWidth, kWidth,
+             src_uv + (crop_y / 2) * kStrideUV * 2, kStrideUV * 2, dst_y,
+             kDestWidth, dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v,
+             SUBSAMPLE(kDestWidth, SUBSAMP_X), kDestWidth, kDestHeight);
+
+  for (int i = 0; i < kDestHeight; ++i) {
+    for (int j = 0; j < kDestWidth; ++j) {
+      EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]);
+    }
+  }
+  for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
+    for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
+      EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j],
+                dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
+    }
+  }
+  for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
+    for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
+      EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j],
+                dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
+    }
+  }
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_u);
+  free_aligned_buffer_page_end(dst_v);
+  free_aligned_buffer_page_end(dst_y_2);
+  free_aligned_buffer_page_end(dst_u_2);
+  free_aligned_buffer_page_end(dst_v_2);
+  free_aligned_buffer_page_end(src_y);
+}
+
+TEST_F(LibYUVConvertTest, I420CropOddY) {
+  const int SUBSAMP_X = 2;
+  const int SUBSAMP_Y = 2;
+  const int kWidth = benchmark_width_;
+  const int kHeight = benchmark_height_;
+  const int crop_y = benchmark_height_ > 1 ? 1 : 0;
+  const int kDestWidth = benchmark_width_;
+  const int kDestHeight = benchmark_height_ - crop_y * 2;
+  const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X);
+  const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X);
+  const int sample_size = kWidth * kHeight +
+                          kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) +
+                          kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y);
+  align_buffer_page_end(src_y, sample_size);
+  uint8_t* src_u = src_y + kWidth * kHeight;
+  uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y);
+
+  align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
+  align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+                                   SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+  align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+                                   SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+
+  for (int i = 0; i < kHeight * kWidth; ++i) {
+    src_y[i] = (fastrand() & 0xff);
+  }
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) {
+    src_u[i] = (fastrand() & 0xff);
+  }
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) {
+    src_v[i] = (fastrand() & 0xff);
+  }
+  memset(dst_y, 1, kDestWidth * kDestHeight);
+  memset(dst_u, 2,
+         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+  memset(dst_v, 3,
+         SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+
+  MaskCpuFlags(benchmark_cpu_info_);
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u,
+                  SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v,
+                  SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight,
+                  kDestWidth, kDestHeight, libyuv::kRotate0,
+                  libyuv::FOURCC_I420);
+  }
+
+  for (int i = 0; i < kDestHeight; ++i) {
+    for (int j = 0; j < kDestWidth; ++j) {
+      EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j],
+                dst_y[i * kDestWidth + j]);
+    }
+  }
+  for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
+    for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
+      EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j],
+                dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
+    }
+  }
+  for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
+    for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
+      EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j],
+                dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
+    }
+  }
+
+  free_aligned_buffer_page_end(dst_y);
+  free_aligned_buffer_page_end(dst_u);
+  free_aligned_buffer_page_end(dst_v);
+  free_aligned_buffer_page_end(src_y);
+}
+
+#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12)                                \
+  TEST_F(LibYUVConvertTest, NAME) {                                           \
+    const int kWidth = benchmark_width_;                                      \
+    const int kHeight = benchmark_height_;                                    \
+                                                                              \
+    align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight);     \
+    align_buffer_page_end(orig_y, kWidth* kHeight);                           \
+    align_buffer_page_end(orig_u,                                             \
+                          SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));      \
+    align_buffer_page_end(orig_v,                                             \
+                          SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));      \
+                                                                              \
+    align_buffer_page_end(dst_y_orig, kWidth* kHeight);                       \
+    align_buffer_page_end(dst_uv_orig,                                        \
+                          2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));  \
+                                                                              \
+    align_buffer_page_end(dst_y, kWidth* kHeight);                            \
+    align_buffer_page_end(dst_uv,                                             \
+                          2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));  \
+                                                                              \
+    MemRandomize(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight);              \
+                                                                              \
+    /* Convert UYVY to NV12 in 2 steps for reference */                       \
+    libyuv::UYVYTOI420(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), orig_y, kWidth,   \
+                       orig_u, SUBSAMPLE(kWidth, 2), orig_v,                  \
+                       SUBSAMPLE(kWidth, 2), kWidth, kHeight);                \
+    libyuv::I420ToNV12(orig_y, kWidth, orig_u, SUBSAMPLE(kWidth, 2), orig_v,  \
+                       SUBSAMPLE(kWidth, 2), dst_y_orig, kWidth, dst_uv_orig, \
+                       2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);            \
+                                                                              \
+    /* Convert to NV12 */                                                     \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      libyuv::UYVYTONV12(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), dst_y, kWidth,  \
+                         dst_uv, 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);  \
+    }                                                                         \
+                                                                              \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
+      EXPECT_EQ(orig_y[i], dst_y[i]);                                         \
+    }                                                                         \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                              \
+      EXPECT_EQ(dst_y_orig[i], dst_y[i]);                                     \
+    }                                                                         \
+    for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2);     \
+         ++i) {                                                               \
+      EXPECT_EQ(dst_uv_orig[i], dst_uv[i]);                                   \
+    }                                                                         \
+                                                                              \
+    free_aligned_buffer_page_end(orig_uyvy);                                  \
+    free_aligned_buffer_page_end(orig_y);                                     \
+    free_aligned_buffer_page_end(orig_u);                                     \
+    free_aligned_buffer_page_end(orig_v);                                     \
+    free_aligned_buffer_page_end(dst_y_orig);                                 \
+    free_aligned_buffer_page_end(dst_uv_orig);                                \
+    free_aligned_buffer_page_end(dst_y);                                      \
+    free_aligned_buffer_page_end(dst_uv);                                     \
+  }
+
+TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12)
+TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
+
+TEST_F(LibYUVConvertTest, MM21ToYUY2) {
+  const int kWidth = (benchmark_width_ + 15) & (~15);
+  const int kHeight = (benchmark_height_ + 31) & (~31);
+
+  align_buffer_page_end(orig_y, kWidth * kHeight);
+  align_buffer_page_end(orig_uv,
+                        2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+  align_buffer_page_end(tmp_y, kWidth * kHeight);
+  align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+  align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+  align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
+  align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
+
+  MemRandomize(orig_y, kWidth * kHeight);
+  MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+  /* Convert MM21 to YUY2 in 2 steps for reference */
+  libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y,
+                     kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
+                     SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+  libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
+                     SUBSAMPLE(kWidth, 2), golden_yuyv,
+                     4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+
+  /* Convert to NV12 */
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2),
+                       dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+  }
+
+  for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) {
+    EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]);
+  }
+
+  free_aligned_buffer_page_end(orig_y);
+  free_aligned_buffer_page_end(orig_uv);
+  free_aligned_buffer_page_end(tmp_y);
+  free_aligned_buffer_page_end(tmp_u);
+  free_aligned_buffer_page_end(tmp_v);
+  free_aligned_buffer_page_end(dst_yuyv);
+  free_aligned_buffer_page_end(golden_yuyv);
+}
+
+// Test RGB24 to J420 is exact
+#if defined(LIBYUV_BIT_EXACT)
+TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
+  const int kSize = 256;
+  align_buffer_page_end(orig_rgb24, kSize * 3 * 2);  // 2 rows of RGB24
+  align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2);
+  int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
+                      (kSize * 2) * benchmark_iterations_;
+
+  for (int i = 0; i < kSize * 3 * 2; ++i) {
+    orig_rgb24[i] = i;
+  }
+
+  for (int i = 0; i < iterations256; ++i) {
+    RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize,  // Y plane
+                dest_j420 + kSize * 2, kSize / 2,         // U plane
+                dest_j420 + kSize * 5 / 2, kSize / 2,     // V plane
+                kSize, 2);
+  }
+
+  uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381);
+  EXPECT_EQ(2755440272u, checksum);
+
+  free_aligned_buffer_page_end(orig_rgb24);
+  free_aligned_buffer_page_end(dest_j420);
+}
+#endif
+
+// Test RGB24 to I420 is exact
+#if defined(LIBYUV_BIT_EXACT)
+TEST_F(LibYUVConvertTest, TestRGB24ToI420) {
+  const int kSize = 256;
+  align_buffer_page_end(orig_rgb24, kSize * 3 * 2);  // 2 rows of RGB24
+  align_buffer_page_end(dest_i420, kSize * 3 / 2 * 2);
+  int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
+                      (kSize * 2) * benchmark_iterations_;
+
+  for (int i = 0; i < kSize * 3 * 2; ++i) {
+    orig_rgb24[i] = i;
+  }
+
+  for (int i = 0; i < iterations256; ++i) {
+    RGB24ToI420(orig_rgb24, kSize * 3, dest_i420, kSize,  // Y plane
+                dest_i420 + kSize * 2, kSize / 2,         // U plane
+                dest_i420 + kSize * 5 / 2, kSize / 2,     // V plane
+                kSize, 2);
+  }
+
+  uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381);
+  EXPECT_EQ(1526656597u, checksum);
+
+  free_aligned_buffer_page_end(orig_rgb24);
+  free_aligned_buffer_page_end(dest_i420);
+}
+#endif
+
+#endif  // !defined(LEAN_TESTS)
+
+}  // namespace libyuv
diff --git a/files/unit_test/cpu_test.cc b/unit_test/cpu_test.cc
index 93867fa7..437b6632 100644
--- a/files/unit_test/cpu_test.cc
+++ b/unit_test/cpu_test.cc
@@ -47,14 +47,15 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
   int has_erms = TestCpuFlag(kCpuHasERMS);
   int has_fma3 = TestCpuFlag(kCpuHasFMA3);
   int has_f16c = TestCpuFlag(kCpuHasF16C);
-  int has_gfni = TestCpuFlag(kCpuHasGFNI);
   int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
   int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
   int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI);
   int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
   int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
   int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
-  int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
+  int has_avx10 = TestCpuFlag(kCpuHasAVX10);
+  int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI);
+  int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8);
   printf("Has X86 0x%x\n", has_x86);
   printf("Has SSE2 0x%x\n", has_sse2);
   printf("Has SSSE3 0x%x\n", has_ssse3);
@@ -65,14 +66,15 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
   printf("Has ERMS 0x%x\n", has_erms);
   printf("Has FMA3 0x%x\n", has_fma3);
   printf("Has F16C 0x%x\n", has_f16c);
-  printf("Has GFNI 0x%x\n", has_gfni);
   printf("Has AVX512BW 0x%x\n", has_avx512bw);
   printf("Has AVX512VL 0x%x\n", has_avx512vl);
   printf("Has AVX512VNNI 0x%x\n", has_avx512vnni);
   printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi);
   printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2);
   printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg);
-  printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq);
+  printf("Has AVX10 0x%x\n", has_avx10);
+  printf("HAS AVXVNNI 0x%x\n", has_avxvnni);
+  printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
 #endif
 #if defined(__mips__)
   int has_mips = TestCpuFlag(kCpuHasMIPS);
@@ -137,6 +139,9 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
 #ifdef __riscv_vector
   printf("__riscv_vector %d\n", __riscv_vector);
 #endif
+#ifdef __riscv_v_intrinsic
+  printf("__riscv_v_intrinsic %d\n", __riscv_v_intrinsic);
+#endif
 #ifdef __APPLE__
   printf("__APPLE__ %d\n", __APPLE__);
 #endif
@@ -180,7 +185,7 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
   printf("__pnacl__ %d\n", __pnacl__);
 #endif
 #ifdef GG_LONGLONG
-  printf("GG_LONGLONG %d\n", GG_LONGLONG);
+  printf("GG_LONGLONG %lld\n", GG_LONGLONG(1));
 #endif
 #ifdef INT_TYPES_DEFINED
   printf("INT_TYPES_DEFINED\n");
diff --git a/files/unit_test/cpu_thread_test.cc b/unit_test/cpu_thread_test.cc
index 69aab74e..69aab74e 100644
--- a/files/unit_test/cpu_thread_test.cc
+++ b/unit_test/cpu_thread_test.cc
diff --git a/files/unit_test/math_test.cc b/unit_test/math_test.cc
index a1544c12..a1544c12 100644
--- a/files/unit_test/math_test.cc
+++ b/unit_test/math_test.cc
diff --git a/files/unit_test/planar_test.cc b/unit_test/planar_test.cc
index ad97b87e..ec1d72eb 100644
--- a/files/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -30,9 +30,9 @@
 #endif
 
 #if defined(LIBYUV_BIT_EXACT)
-#define EXPECTED_ATTENUATE_DIFF 0
+#define EXPECTED_UNATTENUATE_DIFF 0
 #else
-#define EXPECTED_ATTENUATE_DIFF 2
+#define EXPECTED_UNATTENUATE_DIFF 2
 #endif
 
 namespace libyuv {
@@ -57,12 +57,17 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
   orig_pixels[2 * 4 + 0] = 16u;
   orig_pixels[2 * 4 + 1] = 64u;
   orig_pixels[2 * 4 + 2] = 192u;
-  orig_pixels[2 * 4 + 3] = 255u;
+  orig_pixels[2 * 4 + 3] = 128u;
   orig_pixels[3 * 4 + 0] = 16u;
   orig_pixels[3 * 4 + 1] = 64u;
   orig_pixels[3 * 4 + 2] = 192u;
-  orig_pixels[3 * 4 + 3] = 128u;
-  ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
+  orig_pixels[3 * 4 + 3] = 255u;
+  orig_pixels[4 * 4 + 0] = 255u;
+  orig_pixels[4 * 4 + 1] = 255u;
+  orig_pixels[4 * 4 + 2] = 255u;
+  orig_pixels[4 * 4 + 3] = 255u;
+
+  ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 5, 1);
   EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
   EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
   EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
@@ -71,14 +76,55 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
-  EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
-  EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
-  EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
-  EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
-  EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
-  EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
-  EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
-  EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
+  EXPECT_EQ(32u, unatten_pixels[2 * 4 + 0]);
+  EXPECT_EQ(128u, unatten_pixels[2 * 4 + 1]);
+  EXPECT_EQ(255u, unatten_pixels[2 * 4 + 2]);
+  EXPECT_EQ(128u, unatten_pixels[2 * 4 + 3]);
+  EXPECT_EQ(16u, unatten_pixels[3 * 4 + 0]);
+  EXPECT_EQ(64u, unatten_pixels[3 * 4 + 1]);
+  EXPECT_EQ(192u, unatten_pixels[3 * 4 + 2]);
+  EXPECT_EQ(255u, unatten_pixels[3 * 4 + 3]);
+  EXPECT_EQ(255u, unatten_pixels[4 * 4 + 0]);
+  EXPECT_EQ(255u, unatten_pixels[4 * 4 + 1]);
+  EXPECT_EQ(255u, unatten_pixels[4 * 4 + 2]);
+  EXPECT_EQ(255u, unatten_pixels[4 * 4 + 3]);
+
+  ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 5, 1);
+  EXPECT_EQ(100u, atten_pixels[0 * 4 + 0]);
+  EXPECT_EQ(65u, atten_pixels[0 * 4 + 1]);
+  EXPECT_EQ(64u, atten_pixels[0 * 4 + 2]);
+  EXPECT_EQ(128u, atten_pixels[0 * 4 + 3]);
+  EXPECT_EQ(0u, atten_pixels[1 * 4 + 0]);
+  EXPECT_EQ(0u, atten_pixels[1 * 4 + 1]);
+  EXPECT_EQ(0u, atten_pixels[1 * 4 + 2]);
+  EXPECT_EQ(0u, atten_pixels[1 * 4 + 3]);
+  EXPECT_EQ(8u, atten_pixels[2 * 4 + 0]);
+  EXPECT_EQ(32u, atten_pixels[2 * 4 + 1]);
+  EXPECT_EQ(96u, atten_pixels[2 * 4 + 2]);
+  EXPECT_EQ(128u, atten_pixels[2 * 4 + 3]);
+  EXPECT_EQ(16u, atten_pixels[3 * 4 + 0]);
+  EXPECT_EQ(64u, atten_pixels[3 * 4 + 1]);
+  EXPECT_EQ(192u, atten_pixels[3 * 4 + 2]);
+  EXPECT_EQ(255u, atten_pixels[3 * 4 + 3]);
+  EXPECT_EQ(255u, atten_pixels[4 * 4 + 0]);
+  EXPECT_EQ(255u, atten_pixels[4 * 4 + 1]);
+  EXPECT_EQ(255u, atten_pixels[4 * 4 + 2]);
+  EXPECT_EQ(255u, atten_pixels[4 * 4 + 3]);
+
+  // test 255
+  for (int i = 0; i < 256; ++i) {
+    orig_pixels[i * 4 + 0] = i;
+    orig_pixels[i * 4 + 1] = 0;
+    orig_pixels[i * 4 + 2] = 0;
+    orig_pixels[i * 4 + 3] = 255;
+  }
+  ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 256, 1);
+  for (int i = 0; i < 256; ++i) {
+    EXPECT_EQ(orig_pixels[i * 4 + 0], atten_pixels[i * 4 + 0]);
+    EXPECT_EQ(0, atten_pixels[i * 4 + 1]);
+    EXPECT_EQ(0, atten_pixels[i * 4 + 2]);
+    EXPECT_EQ(255, atten_pixels[i * 4 + 3]);
+  }
 
   for (int i = 0; i < 1280; ++i) {
     orig_pixels[i * 4 + 0] = i;
@@ -92,10 +138,10 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
     ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
   }
   for (int i = 0; i < 1280; ++i) {
-    EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
-    EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
-    EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
-    EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
+    EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 1);
+    EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 1);
+    EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 1);
+    EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 1);
   }
   // Make sure transparent, 50% and opaque are fully accurate.
   EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
@@ -106,9 +152,9 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
   EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
   EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
   EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
-  EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF);
-  EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF);
-  EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF);
+  EXPECT_EQ(255, atten_pixels[255 * 4 + 0]);
+  EXPECT_EQ(127, atten_pixels[255 * 4 + 1]);
+  EXPECT_EQ(85, atten_pixels[255 * 4 + 2]);
   EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
 
   free_aligned_buffer_page_end(atten2_pixels);
@@ -165,28 +211,28 @@ TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
                                 benchmark_iterations_, disable_cpu_flags_,
                                 benchmark_cpu_info_, +1, 0);
 
-  EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+  EXPECT_EQ(max_diff, 0);
 }
 
 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
   int max_diff =
       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
                      disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
-  EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+  EXPECT_EQ(max_diff, 0);
 }
 
 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
   int max_diff =
       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
                      disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
-  EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+  EXPECT_EQ(max_diff, 0);
 }
 
 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
   int max_diff =
       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
                      disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
-  EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+  EXPECT_EQ(max_diff, 0);
 }
 
 static int TestUnattenuateI(int width,
@@ -238,28 +284,28 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
   int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
                                   benchmark_iterations_, disable_cpu_flags_,
                                   benchmark_cpu_info_, +1, 0);
-  EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+  EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
 }
 
 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
                                   benchmark_iterations_, disable_cpu_flags_,
                                   benchmark_cpu_info_, +1, 1);
-  EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+  EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
 }
 
 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
                                   benchmark_iterations_, disable_cpu_flags_,
                                   benchmark_cpu_info_, -1, 0);
-  EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+  EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
 }
 
 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
                                   benchmark_iterations_, disable_cpu_flags_,
                                   benchmark_cpu_info_, +1, 0);
-  EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+  EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
 }
 
 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
@@ -2749,12 +2795,23 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
   MaskCpuFlags(disable_cpu_flags_);
   ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
                    benchmark_width_, benchmark_width_, benchmark_height_);
-  MaskCpuFlags(benchmark_cpu_info_);
+  double c_time = get_time();
+  ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
+                   benchmark_width_, benchmark_width_, benchmark_height_);
+  c_time = (get_time() - c_time);
 
+  MaskCpuFlags(benchmark_cpu_info_);
+  ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
+                   benchmark_width_, benchmark_width_, benchmark_height_);
+  double opt_time = get_time();
   for (int i = 0; i < benchmark_iterations_; ++i) {
     ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
                      benchmark_width_, benchmark_width_, benchmark_height_);
   }
+  opt_time = (get_time() - opt_time) / benchmark_iterations_;
+  // Report performance of C vs OPT
+  printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
+         static_cast<int>(opt_time * 1e6));
   for (int i = 0; i < kPixels; ++i) {
     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
   }
@@ -2777,12 +2834,24 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
   MaskCpuFlags(disable_cpu_flags_);
   ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
                    benchmark_width_ * 4, benchmark_width_, benchmark_height_);
-  MaskCpuFlags(benchmark_cpu_info_);
+  double c_time = get_time();
+  ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
+                   benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+  c_time = (get_time() - c_time);
 
+  MaskCpuFlags(benchmark_cpu_info_);
+  ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
+                   benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+  double opt_time = get_time();
   for (int i = 0; i < benchmark_iterations_; ++i) {
     ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
                      benchmark_width_ * 4, benchmark_width_, benchmark_height_);
   }
+  opt_time = (get_time() - opt_time) / benchmark_iterations_;
+
+  // Report performance of C vs OPT
+  printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
+         static_cast<int>(opt_time * 1e6));
   for (int i = 0; i < kPixels * 4; ++i) {
     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
   }
@@ -4468,4 +4537,83 @@ TEST_F(LibYUVPlanarTest, NV21Copy) {
   free_aligned_buffer_page_end(dst_vu);
 }
 
+#if defined(ENABLE_ROW_TESTS) && !defined(LIBYUV_DISABLE_NEON) && \
+    defined(__aarch64__)
+
+TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32) {
+  int i, j;
+  const int y_plane_size = benchmark_width_ * benchmark_height_;
+
+  align_buffer_page_end(orig_f, y_plane_size * 4);
+  align_buffer_page_end(orig_y, y_plane_size * 2);
+  align_buffer_page_end(dst_opt, y_plane_size * 4);
+  align_buffer_page_end(rec_opt, y_plane_size * 2);
+
+  for (i = 0; i < y_plane_size; ++i) {
+    ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
+  }
+  memset(orig_y, 1, y_plane_size * 2);
+  memset(dst_opt, 2, y_plane_size * 4);
+  memset(rec_opt, 3, y_plane_size * 2);
+
+  ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
+                            y_plane_size);
+
+  for (j = 0; j < benchmark_iterations_; j++) {
+    ConvertFP16ToFP32Row_NEON((const uint16_t*)orig_y, (float*)dst_opt,
+                              y_plane_size);
+  }
+
+  ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
+                            y_plane_size);
+
+  for (i = 0; i < y_plane_size; ++i) {
+    EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
+  }
+
+  free_aligned_buffer_page_end(orig_f);
+  free_aligned_buffer_page_end(orig_y);
+  free_aligned_buffer_page_end(dst_opt);
+  free_aligned_buffer_page_end(rec_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32Column) {
+  int i, j;
+  const int y_plane_size = benchmark_width_ * benchmark_height_;
+
+  align_buffer_page_end(orig_f, y_plane_size * 4);
+  align_buffer_page_end(orig_y, y_plane_size * 2);
+  align_buffer_page_end(dst_opt, y_plane_size * 4);
+  align_buffer_page_end(rec_opt, y_plane_size * 2);
+
+  for (i = 0; i < y_plane_size; ++i) {
+    ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
+  }
+  memset(orig_y, 1, y_plane_size * 2);
+  memset(dst_opt, 2, y_plane_size * 4);
+  memset(rec_opt, 3, y_plane_size * 2);
+
+  ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
+                            y_plane_size);
+
+  for (j = 0; j < benchmark_iterations_; j++) {
+    ConvertFP16ToFP32Column_NEON((const uint16_t*)orig_y, 1, (float*)dst_opt,
+                                 y_plane_size);
+  }
+
+  ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
+                            y_plane_size);
+
+  for (i = 0; i < y_plane_size; ++i) {
+    EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
+  }
+
+  free_aligned_buffer_page_end(orig_f);
+  free_aligned_buffer_page_end(orig_y);
+  free_aligned_buffer_page_end(dst_opt);
+  free_aligned_buffer_page_end(rec_opt);
+}
+
+#endif  // defined(ENABLE_ROW_TESTS) && defined(__aarch64__)
+
 }  // namespace libyuv
diff --git a/files/unit_test/rotate_argb_test.cc b/unit_test/rotate_argb_test.cc
index 74952c4e..74952c4e 100644
--- a/files/unit_test/rotate_argb_test.cc
+++ b/unit_test/rotate_argb_test.cc
diff --git a/files/unit_test/rotate_test.cc b/unit_test/rotate_test.cc
index abc08efa..abc08efa 100644
--- a/files/unit_test/rotate_test.cc
+++ b/unit_test/rotate_test.cc
diff --git a/files/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc
index f54a68f1..f54a68f1 100644
--- a/files/unit_test/scale_argb_test.cc
+++ b/unit_test/scale_argb_test.cc
diff --git a/unit_test/scale_plane_test.cc b/unit_test/scale_plane_test.cc
new file mode 100644
index 00000000..9ce47a02
--- /dev/null
+++ b/unit_test/scale_plane_test.cc
@@ -0,0 +1,470 @@
+/*
+ *  Copyright 2023 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <time.h>
+
+#include "../unit_test/unit_test.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/scale.h"
+
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/scale_row.h"  // For ScaleRowDown2Box_Odd_C
+#endif
+
+#define STRINGIZE(line) #line
+#define FILELINESTR(file, line) file ":" STRINGIZE(line)
+
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#undef ENABLE_ROW_TESTS
+#define LEAN_TESTS
+#endif
+
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
+#endif
+
+namespace libyuv {
+
+#ifdef ENABLE_ROW_TESTS
+#ifdef HAS_SCALEROWDOWN2_SSSE3
+TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
+  SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
+  SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
+  SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
+  memset(orig_pixels, 0, sizeof(orig_pixels));
+  memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
+  memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
+
+  int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
+  if (!has_ssse3) {
+    printf("Warning SSSE3 not detected; Skipping test.\n");
+  } else {
+    // TL.
+    orig_pixels[0] = 255u;
+    orig_pixels[1] = 0u;
+    orig_pixels[128 + 0] = 0u;
+    orig_pixels[128 + 1] = 0u;
+    // TR.
+    orig_pixels[2] = 0u;
+    orig_pixels[3] = 100u;
+    orig_pixels[128 + 2] = 0u;
+    orig_pixels[128 + 3] = 0u;
+    // BL.
+    orig_pixels[4] = 0u;
+    orig_pixels[5] = 0u;
+    orig_pixels[128 + 4] = 50u;
+    orig_pixels[128 + 5] = 0u;
+    // BR.
+    orig_pixels[6] = 0u;
+    orig_pixels[7] = 0u;
+    orig_pixels[128 + 6] = 0u;
+    orig_pixels[128 + 7] = 20u;
+    // Odd.
+    orig_pixels[126] = 4u;
+    orig_pixels[127] = 255u;
+    orig_pixels[128 + 126] = 16u;
+    orig_pixels[128 + 127] = 255u;
+
+    // Test regular half size.
+    ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
+
+    EXPECT_EQ(64u, dst_pixels_c[0]);
+    EXPECT_EQ(25u, dst_pixels_c[1]);
+    EXPECT_EQ(13u, dst_pixels_c[2]);
+    EXPECT_EQ(5u, dst_pixels_c[3]);
+    EXPECT_EQ(0u, dst_pixels_c[4]);
+    EXPECT_EQ(133u, dst_pixels_c[63]);
+
+    // Test Odd width version - Last pixel is just 1 horizontal pixel.
+    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
+
+    EXPECT_EQ(64u, dst_pixels_c[0]);
+    EXPECT_EQ(25u, dst_pixels_c[1]);
+    EXPECT_EQ(13u, dst_pixels_c[2]);
+    EXPECT_EQ(5u, dst_pixels_c[3]);
+    EXPECT_EQ(0u, dst_pixels_c[4]);
+    EXPECT_EQ(10u, dst_pixels_c[63]);
+
+    // Test one pixel less, should skip the last pixel.
+    memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
+    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
+
+    EXPECT_EQ(64u, dst_pixels_c[0]);
+    EXPECT_EQ(25u, dst_pixels_c[1]);
+    EXPECT_EQ(13u, dst_pixels_c[2]);
+    EXPECT_EQ(5u, dst_pixels_c[3]);
+    EXPECT_EQ(0u, dst_pixels_c[4]);
+    EXPECT_EQ(0u, dst_pixels_c[63]);
+
+    // Test regular half size SSSE3.
+    ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
+
+    EXPECT_EQ(64u, dst_pixels_opt[0]);
+    EXPECT_EQ(25u, dst_pixels_opt[1]);
+    EXPECT_EQ(13u, dst_pixels_opt[2]);
+    EXPECT_EQ(5u, dst_pixels_opt[3]);
+    EXPECT_EQ(0u, dst_pixels_opt[4]);
+    EXPECT_EQ(133u, dst_pixels_opt[63]);
+
+    // Compare C and SSSE3 match.
+    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
+    ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
+    for (int i = 0; i < 64; ++i) {
+      EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+    }
+  }
+}
+#endif  // HAS_SCALEROWDOWN2_SSSE3
+
+extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
+                                         ptrdiff_t src_stride,
+                                         uint16_t* dst,
+                                         int dst_width);
+
+TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
+  SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
+  SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
+  SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
+
+  memset(orig_pixels, 0, sizeof(orig_pixels));
+  memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
+  memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
+
+  for (int i = 0; i < 2560 * 2; ++i) {
+    orig_pixels[i] = i;
+  }
+  ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
+  for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+    int has_neon = TestCpuFlag(kCpuHasNEON);
+    if (has_neon) {
+      ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
+    } else {
+      ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
+    }
+#else
+    ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
+#endif
+  }
+
+  for (int i = 0; i < 1280; ++i) {
+    EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+  }
+
+  EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
+  EXPECT_EQ(dst_pixels_c[1279], 3839);
+}
+#endif  // ENABLE_ROW_TESTS
+
+// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
+// difference.
+// 0 = exact.
+static int TestPlaneFilter_16(int src_width,
+                              int src_height,
+                              int dst_width,
+                              int dst_height,
+                              FilterMode f,
+                              int benchmark_iterations,
+                              int disable_cpu_flags,
+                              int benchmark_cpu_info) {
+  if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+    return 0;
+  }
+
+  int i;
+  int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+  int src_stride_y = Abs(src_width);
+  int dst_y_plane_size = dst_width * dst_height;
+  int dst_stride_y = dst_width;
+
+  align_buffer_page_end(src_y, src_y_plane_size);
+  align_buffer_page_end(src_y_16, src_y_plane_size * 2);
+  align_buffer_page_end(dst_y_8, dst_y_plane_size);
+  align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
+  uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
+  uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
+
+  MemRandomize(src_y, src_y_plane_size);
+  memset(dst_y_8, 0, dst_y_plane_size);
+  memset(dst_y_16, 1, dst_y_plane_size * 2);
+
+  for (i = 0; i < src_y_plane_size; ++i) {
+    p_src_y_16[i] = src_y[i] & 255;
+  }
+
+  MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
+  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
+             dst_width, dst_height, f);
+  MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
+
+  for (i = 0; i < benchmark_iterations; ++i) {
+    ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
+                  dst_stride_y, dst_width, dst_height, f);
+  }
+
+  // Expect an exact match.
+  int max_diff = 0;
+  for (i = 0; i < dst_y_plane_size; ++i) {
+    int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
+    if (abs_diff > max_diff) {
+      max_diff = abs_diff;
+    }
+  }
+
+  free_aligned_buffer_page_end(dst_y_8);
+  free_aligned_buffer_page_end(dst_y_16);
+  free_aligned_buffer_page_end(src_y);
+  free_aligned_buffer_page_end(src_y_16);
+
+  return max_diff;
+}
+
+// The following adjustments in dimensions ensure the scale factor will be
+// exactly achieved.
+// 2 is chroma subsample.
+#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
+#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
+
+#define TEST_FACTOR1(name, filter, nom, denom, max_diff)                       \
+  TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
+    int diff = TestPlaneFilter_16(                                             \
+        SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),   \
+        DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),   \
+        kFilter##filter, benchmark_iterations_, disable_cpu_flags_,            \
+        benchmark_cpu_info_);                                                  \
+    EXPECT_LE(diff, max_diff);                                                 \
+  }
+
+// Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
+// filtering is different fixed point implementations for SSSE3, Neon and C.
+#define TEST_FACTOR(name, nom, denom, boxdiff)      \
+  TEST_FACTOR1(name, None, nom, denom, 0)           \
+  TEST_FACTOR1(name, Linear, nom, denom, boxdiff)   \
+  TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
+  TEST_FACTOR1(name, Box, nom, denom, boxdiff)
+
+TEST_FACTOR(2, 1, 2, 0)
+TEST_FACTOR(4, 1, 4, 0)
+// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance.  Takes 90 seconds.
+TEST_FACTOR(3by4, 3, 4, 1)
+TEST_FACTOR(3by8, 3, 8, 1)
+TEST_FACTOR(3, 1, 3, 0)
+#undef TEST_FACTOR1
+#undef TEST_FACTOR
+#undef SX
+#undef DX
+
+TEST_F(LibYUVScaleTest, PlaneTest3x) {
+  const int kSrcStride = 480;
+  const int kDstStride = 160;
+  const int kSize = kSrcStride * 3;
+  align_buffer_page_end(orig_pixels, kSize);
+  for (int i = 0; i < 480 * 3; ++i) {
+    orig_pixels[i] = i;
+  }
+  align_buffer_page_end(dest_pixels, kDstStride);
+
+  int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+                      benchmark_iterations_;
+  for (int i = 0; i < iterations160; ++i) {
+    ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+               kFilterBilinear);
+  }
+
+  EXPECT_EQ(225, dest_pixels[0]);
+
+  ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+             kFilterNone);
+
+  EXPECT_EQ(225, dest_pixels[0]);
+
+  free_aligned_buffer_page_end(dest_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest4x) {
+  const int kSrcStride = 640;
+  const int kDstStride = 160;
+  const int kSize = kSrcStride * 4;
+  align_buffer_page_end(orig_pixels, kSize);
+  for (int i = 0; i < 640 * 4; ++i) {
+    orig_pixels[i] = i;
+  }
+  align_buffer_page_end(dest_pixels, kDstStride);
+
+  int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+                      benchmark_iterations_;
+  for (int i = 0; i < iterations160; ++i) {
+    ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+               kFilterBilinear);
+  }
+
+  EXPECT_EQ(66, dest_pixels[0]);
+
+  ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+             kFilterNone);
+
+  EXPECT_EQ(2, dest_pixels[0]);  // expect the 3rd pixel of the 3rd row
+
+  free_aligned_buffer_page_end(dest_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+// Intent is to test 200x50 to 50x200 but width and height can be parameters.
+TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
+  const int kSize = benchmark_width_ * benchmark_height_;
+  align_buffer_page_end(orig_pixels, kSize);
+  for (int i = 0; i < kSize; ++i) {
+    orig_pixels[i] = i;
+  }
+  align_buffer_page_end(dest_opt_pixels, kSize);
+  align_buffer_page_end(dest_c_pixels, kSize);
+
+  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
+  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+             dest_c_pixels, benchmark_height_, benchmark_height_,
+             benchmark_width_, kFilterNone);
+  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
+
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+               benchmark_height_, dest_opt_pixels, benchmark_height_,
+               benchmark_height_, benchmark_width_, kFilterNone);
+  }
+
+  for (int i = 0; i < kSize; ++i) {
+    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+  }
+
+  free_aligned_buffer_page_end(dest_c_pixels);
+  free_aligned_buffer_page_end(dest_opt_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
+  const int kSize = benchmark_width_ * benchmark_height_;
+  align_buffer_page_end(orig_pixels, kSize);
+  for (int i = 0; i < kSize; ++i) {
+    orig_pixels[i] = i;
+  }
+  align_buffer_page_end(dest_opt_pixels, kSize);
+  align_buffer_page_end(dest_c_pixels, kSize);
+
+  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
+  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+             dest_c_pixels, benchmark_height_, benchmark_height_,
+             benchmark_width_, kFilterBilinear);
+  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
+
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+               benchmark_height_, dest_opt_pixels, benchmark_height_,
+               benchmark_height_, benchmark_width_, kFilterBilinear);
+  }
+
+  for (int i = 0; i < kSize; ++i) {
+    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+  }
+
+  free_aligned_buffer_page_end(dest_c_pixels);
+  free_aligned_buffer_page_end(dest_opt_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+// Intent is to test 200x50 to 50x200 but width and height can be parameters.
+TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
+  const int kSize = benchmark_width_ * benchmark_height_;
+  align_buffer_page_end(orig_pixels, kSize);
+  for (int i = 0; i < kSize; ++i) {
+    orig_pixels[i] = i;
+  }
+  align_buffer_page_end(dest_opt_pixels, kSize);
+  align_buffer_page_end(dest_c_pixels, kSize);
+
+  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
+  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+             dest_c_pixels, benchmark_height_, benchmark_height_,
+             benchmark_width_, kFilterBox);
+  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
+
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+               benchmark_height_, dest_opt_pixels, benchmark_height_,
+               benchmark_height_, benchmark_width_, kFilterBox);
+  }
+
+  for (int i = 0; i < kSize; ++i) {
+    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+  }
+
+  free_aligned_buffer_page_end(dest_c_pixels);
+  free_aligned_buffer_page_end(dest_opt_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
+  align_buffer_page_end(orig_pixels, 3);
+  align_buffer_page_end(dst_pixels, 3);
+
+  // Pad the 1x1 byte image with invalid values before and after in case libyuv
+  // reads outside the memory boundaries.
+  orig_pixels[0] = 0;
+  orig_pixels[1] = 1;  // scale this pixel
+  orig_pixels[2] = 2;
+  dst_pixels[0] = 3;
+  dst_pixels[1] = 3;
+  dst_pixels[2] = 3;
+
+  libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
+                     /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
+                     /* dst_width= */ 1, /* dst_height= */ 2,
+                     libyuv::kFilterBox);
+
+  EXPECT_EQ(dst_pixels[0], 1);
+  EXPECT_EQ(dst_pixels[1], 1);
+  EXPECT_EQ(dst_pixels[2], 3);
+
+  free_aligned_buffer_page_end(dst_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
+  align_buffer_page_end(orig_pixels_alloc, 3 * 2);
+  align_buffer_page_end(dst_pixels_alloc, 3 * 2);
+  uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
+  uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
+
+  // Pad the 1x1 byte image with invalid values before and after in case libyuv
+  // reads outside the memory boundaries.
+  orig_pixels[0] = 0;
+  orig_pixels[1] = 1;  // scale this pixel
+  orig_pixels[2] = 2;
+  dst_pixels[0] = 3;
+  dst_pixels[1] = 3;
+  dst_pixels[2] = 3;
+
+  libyuv::ScalePlane_16(
+      orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
+      /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
+      /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
+
+  EXPECT_EQ(dst_pixels[0], 1);
+  EXPECT_EQ(dst_pixels[1], 1);
+  EXPECT_EQ(dst_pixels[2], 3);
+
+  free_aligned_buffer_page_end(dst_pixels_alloc);
+  free_aligned_buffer_page_end(orig_pixels_alloc);
+}
+}  // namespace libyuv
diff --git a/files/unit_test/scale_rgb_test.cc b/unit_test/scale_rgb_test.cc
index 8296abe3..8296abe3 100644
--- a/files/unit_test/scale_rgb_test.cc
+++ b/unit_test/scale_rgb_test.cc
diff --git a/files/unit_test/scale_test.cc b/unit_test/scale_test.cc
index a8c95268..6e3b9271 100644
--- a/files/unit_test/scale_test.cc
+++ b/unit_test/scale_test.cc
@@ -22,6 +22,11 @@
 #define STRINGIZE(line) #line
 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
 
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#endif
+
 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
 // SLOW TESTS are those that are unoptimized C code.
 // FULL TESTS are optimized but test many variations of the same code.
@@ -1123,479 +1128,6 @@ TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
 TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
 #endif
 #endif
-
 #undef TEST_SCALESWAPXY1
 
-#ifdef ENABLE_ROW_TESTS
-#ifdef HAS_SCALEROWDOWN2_SSSE3
-TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
-  SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
-  SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
-  SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
-  memset(orig_pixels, 0, sizeof(orig_pixels));
-  memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
-  memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
-
-  int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
-  if (!has_ssse3) {
-    printf("Warning SSSE3 not detected; Skipping test.\n");
-  } else {
-    // TL.
-    orig_pixels[0] = 255u;
-    orig_pixels[1] = 0u;
-    orig_pixels[128 + 0] = 0u;
-    orig_pixels[128 + 1] = 0u;
-    // TR.
-    orig_pixels[2] = 0u;
-    orig_pixels[3] = 100u;
-    orig_pixels[128 + 2] = 0u;
-    orig_pixels[128 + 3] = 0u;
-    // BL.
-    orig_pixels[4] = 0u;
-    orig_pixels[5] = 0u;
-    orig_pixels[128 + 4] = 50u;
-    orig_pixels[128 + 5] = 0u;
-    // BR.
-    orig_pixels[6] = 0u;
-    orig_pixels[7] = 0u;
-    orig_pixels[128 + 6] = 0u;
-    orig_pixels[128 + 7] = 20u;
-    // Odd.
-    orig_pixels[126] = 4u;
-    orig_pixels[127] = 255u;
-    orig_pixels[128 + 126] = 16u;
-    orig_pixels[128 + 127] = 255u;
-
-    // Test regular half size.
-    ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
-
-    EXPECT_EQ(64u, dst_pixels_c[0]);
-    EXPECT_EQ(25u, dst_pixels_c[1]);
-    EXPECT_EQ(13u, dst_pixels_c[2]);
-    EXPECT_EQ(5u, dst_pixels_c[3]);
-    EXPECT_EQ(0u, dst_pixels_c[4]);
-    EXPECT_EQ(133u, dst_pixels_c[63]);
-
-    // Test Odd width version - Last pixel is just 1 horizontal pixel.
-    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
-
-    EXPECT_EQ(64u, dst_pixels_c[0]);
-    EXPECT_EQ(25u, dst_pixels_c[1]);
-    EXPECT_EQ(13u, dst_pixels_c[2]);
-    EXPECT_EQ(5u, dst_pixels_c[3]);
-    EXPECT_EQ(0u, dst_pixels_c[4]);
-    EXPECT_EQ(10u, dst_pixels_c[63]);
-
-    // Test one pixel less, should skip the last pixel.
-    memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
-    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
-
-    EXPECT_EQ(64u, dst_pixels_c[0]);
-    EXPECT_EQ(25u, dst_pixels_c[1]);
-    EXPECT_EQ(13u, dst_pixels_c[2]);
-    EXPECT_EQ(5u, dst_pixels_c[3]);
-    EXPECT_EQ(0u, dst_pixels_c[4]);
-    EXPECT_EQ(0u, dst_pixels_c[63]);
-
-    // Test regular half size SSSE3.
-    ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
-
-    EXPECT_EQ(64u, dst_pixels_opt[0]);
-    EXPECT_EQ(25u, dst_pixels_opt[1]);
-    EXPECT_EQ(13u, dst_pixels_opt[2]);
-    EXPECT_EQ(5u, dst_pixels_opt[3]);
-    EXPECT_EQ(0u, dst_pixels_opt[4]);
-    EXPECT_EQ(133u, dst_pixels_opt[63]);
-
-    // Compare C and SSSE3 match.
-    ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
-    ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
-    for (int i = 0; i < 64; ++i) {
-      EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
-    }
-  }
-}
-#endif  // HAS_SCALEROWDOWN2_SSSE3
-
-extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
-                                    ptrdiff_t src_stride,
-                                    uint16_t* dst,
-                                    int dst_width);
-extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
-                                 ptrdiff_t src_stride,
-                                 uint16_t* dst,
-                                 int dst_width);
-
-TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
-  SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]);  // 2 rows + 1 pixel overrun.
-  SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
-  SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
-
-  memset(orig_pixels, 0, sizeof(orig_pixels));
-  memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
-  memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
-
-  for (int i = 0; i < 640 * 2 + 1; ++i) {
-    orig_pixels[i] = i;
-  }
-  ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
-  for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-    int has_neon = TestCpuFlag(kCpuHasNEON);
-    if (has_neon) {
-      ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
-    } else {
-      ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
-    }
-#else
-    ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
-#endif
-  }
-
-  for (int i = 0; i < 1280; ++i) {
-    EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
-  }
-  EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
-  EXPECT_EQ(dst_pixels_c[1279], 800);
-}
-
-extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
-                                         ptrdiff_t src_stride,
-                                         uint16_t* dst,
-                                         int dst_width);
-
-TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
-  SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
-  SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
-  SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
-
-  memset(orig_pixels, 0, sizeof(orig_pixels));
-  memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
-  memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
-
-  for (int i = 0; i < 2560 * 2; ++i) {
-    orig_pixels[i] = i;
-  }
-  ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
-  for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-    int has_neon = TestCpuFlag(kCpuHasNEON);
-    if (has_neon) {
-      ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
-    } else {
-      ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
-    }
-#else
-    ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
-#endif
-  }
-
-  for (int i = 0; i < 1280; ++i) {
-    EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
-  }
-
-  EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
-  EXPECT_EQ(dst_pixels_c[1279], 3839);
-}
-#endif  // ENABLE_ROW_TESTS
-
-// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
-// difference.
-// 0 = exact.
-static int TestPlaneFilter_16(int src_width,
-                              int src_height,
-                              int dst_width,
-                              int dst_height,
-                              FilterMode f,
-                              int benchmark_iterations,
-                              int disable_cpu_flags,
-                              int benchmark_cpu_info) {
-  if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
-    return 0;
-  }
-
-  int i;
-  int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
-  int src_stride_y = Abs(src_width);
-  int dst_y_plane_size = dst_width * dst_height;
-  int dst_stride_y = dst_width;
-
-  align_buffer_page_end(src_y, src_y_plane_size);
-  align_buffer_page_end(src_y_16, src_y_plane_size * 2);
-  align_buffer_page_end(dst_y_8, dst_y_plane_size);
-  align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
-  uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
-  uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
-
-  MemRandomize(src_y, src_y_plane_size);
-  memset(dst_y_8, 0, dst_y_plane_size);
-  memset(dst_y_16, 1, dst_y_plane_size * 2);
-
-  for (i = 0; i < src_y_plane_size; ++i) {
-    p_src_y_16[i] = src_y[i] & 255;
-  }
-
-  MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
-  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
-             dst_width, dst_height, f);
-  MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
-
-  for (i = 0; i < benchmark_iterations; ++i) {
-    ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
-                  dst_stride_y, dst_width, dst_height, f);
-  }
-
-  // Expect an exact match.
-  int max_diff = 0;
-  for (i = 0; i < dst_y_plane_size; ++i) {
-    int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
-    if (abs_diff > max_diff) {
-      max_diff = abs_diff;
-    }
-  }
-
-  free_aligned_buffer_page_end(dst_y_8);
-  free_aligned_buffer_page_end(dst_y_16);
-  free_aligned_buffer_page_end(src_y);
-  free_aligned_buffer_page_end(src_y_16);
-
-  return max_diff;
-}
-
-// The following adjustments in dimensions ensure the scale factor will be
-// exactly achieved.
-// 2 is chroma subsample.
-#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
-#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
-
-#define TEST_FACTOR1(name, filter, nom, denom, max_diff)                       \
-  TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
-    int diff = TestPlaneFilter_16(                                             \
-        SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),   \
-        DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),   \
-        kFilter##filter, benchmark_iterations_, disable_cpu_flags_,            \
-        benchmark_cpu_info_);                                                  \
-    EXPECT_LE(diff, max_diff);                                                 \
-  }
-
-// Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
-// filtering is different fixed point implementations for SSSE3, Neon and C.
-#define TEST_FACTOR(name, nom, denom, boxdiff)      \
-  TEST_FACTOR1(name, None, nom, denom, 0)           \
-  TEST_FACTOR1(name, Linear, nom, denom, boxdiff)   \
-  TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
-  TEST_FACTOR1(name, Box, nom, denom, boxdiff)
-
-TEST_FACTOR(2, 1, 2, 0)
-TEST_FACTOR(4, 1, 4, 0)
-// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance.  Takes 90 seconds.
-TEST_FACTOR(3by4, 3, 4, 1)
-TEST_FACTOR(3by8, 3, 8, 1)
-TEST_FACTOR(3, 1, 3, 0)
-#undef TEST_FACTOR1
-#undef TEST_FACTOR
-#undef SX
-#undef DX
-
-TEST_F(LibYUVScaleTest, PlaneTest3x) {
-  const int kSrcStride = 480;
-  const int kDstStride = 160;
-  const int kSize = kSrcStride * 3;
-  align_buffer_page_end(orig_pixels, kSize);
-  for (int i = 0; i < 480 * 3; ++i) {
-    orig_pixels[i] = i;
-  }
-  align_buffer_page_end(dest_pixels, kDstStride);
-
-  int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
-                      benchmark_iterations_;
-  for (int i = 0; i < iterations160; ++i) {
-    ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
-               kFilterBilinear);
-  }
-
-  EXPECT_EQ(225, dest_pixels[0]);
-
-  ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
-             kFilterNone);
-
-  EXPECT_EQ(225, dest_pixels[0]);
-
-  free_aligned_buffer_page_end(dest_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTest4x) {
-  const int kSrcStride = 640;
-  const int kDstStride = 160;
-  const int kSize = kSrcStride * 4;
-  align_buffer_page_end(orig_pixels, kSize);
-  for (int i = 0; i < 640 * 4; ++i) {
-    orig_pixels[i] = i;
-  }
-  align_buffer_page_end(dest_pixels, kDstStride);
-
-  int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
-                      benchmark_iterations_;
-  for (int i = 0; i < iterations160; ++i) {
-    ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
-               kFilterBilinear);
-  }
-
-  EXPECT_EQ(66, dest_pixels[0]);
-
-  ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
-             kFilterNone);
-
-  EXPECT_EQ(2, dest_pixels[0]);  // expect the 3rd pixel of the 3rd row
-
-  free_aligned_buffer_page_end(dest_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-// Intent is to test 200x50 to 50x200 but width and height can be parameters.
-TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
-  const int kSize = benchmark_width_ * benchmark_height_;
-  align_buffer_page_end(orig_pixels, kSize);
-  for (int i = 0; i < kSize; ++i) {
-    orig_pixels[i] = i;
-  }
-  align_buffer_page_end(dest_opt_pixels, kSize);
-  align_buffer_page_end(dest_c_pixels, kSize);
-
-  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
-  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
-             dest_c_pixels, benchmark_height_, benchmark_height_,
-             benchmark_width_, kFilterNone);
-  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
-
-  for (int i = 0; i < benchmark_iterations_; ++i) {
-    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
-               benchmark_height_, dest_opt_pixels, benchmark_height_,
-               benchmark_height_, benchmark_width_, kFilterNone);
-  }
-
-  for (int i = 0; i < kSize; ++i) {
-    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
-  }
-
-  free_aligned_buffer_page_end(dest_c_pixels);
-  free_aligned_buffer_page_end(dest_opt_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
-  const int kSize = benchmark_width_ * benchmark_height_;
-  align_buffer_page_end(orig_pixels, kSize);
-  for (int i = 0; i < kSize; ++i) {
-    orig_pixels[i] = i;
-  }
-  align_buffer_page_end(dest_opt_pixels, kSize);
-  align_buffer_page_end(dest_c_pixels, kSize);
-
-  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
-  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
-             dest_c_pixels, benchmark_height_, benchmark_height_,
-             benchmark_width_, kFilterBilinear);
-  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
-
-  for (int i = 0; i < benchmark_iterations_; ++i) {
-    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
-               benchmark_height_, dest_opt_pixels, benchmark_height_,
-               benchmark_height_, benchmark_width_, kFilterBilinear);
-  }
-
-  for (int i = 0; i < kSize; ++i) {
-    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
-  }
-
-  free_aligned_buffer_page_end(dest_c_pixels);
-  free_aligned_buffer_page_end(dest_opt_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-// Intent is to test 200x50 to 50x200 but width and height can be parameters.
-TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
-  const int kSize = benchmark_width_ * benchmark_height_;
-  align_buffer_page_end(orig_pixels, kSize);
-  for (int i = 0; i < kSize; ++i) {
-    orig_pixels[i] = i;
-  }
-  align_buffer_page_end(dest_opt_pixels, kSize);
-  align_buffer_page_end(dest_c_pixels, kSize);
-
-  MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
-  ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
-             dest_c_pixels, benchmark_height_, benchmark_height_,
-             benchmark_width_, kFilterBox);
-  MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
-
-  for (int i = 0; i < benchmark_iterations_; ++i) {
-    ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
-               benchmark_height_, dest_opt_pixels, benchmark_height_,
-               benchmark_height_, benchmark_width_, kFilterBox);
-  }
-
-  for (int i = 0; i < kSize; ++i) {
-    EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
-  }
-
-  free_aligned_buffer_page_end(dest_c_pixels);
-  free_aligned_buffer_page_end(dest_opt_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
-  align_buffer_page_end(orig_pixels, 3);
-  align_buffer_page_end(dst_pixels, 3);
-
-  // Pad the 1x1 byte image with invalid values before and after in case libyuv
-  // reads outside the memory boundaries.
-  orig_pixels[0] = 0;
-  orig_pixels[1] = 1;  // scale this pixel
-  orig_pixels[2] = 2;
-  dst_pixels[0] = 3;
-  dst_pixels[1] = 3;
-  dst_pixels[2] = 3;
-
-  libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
-                     /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
-                     /* dst_width= */ 1, /* dst_height= */ 2,
-                     libyuv::kFilterBox);
-
-  EXPECT_EQ(dst_pixels[0], 1);
-  EXPECT_EQ(dst_pixels[1], 1);
-  EXPECT_EQ(dst_pixels[2], 3);
-
-  free_aligned_buffer_page_end(dst_pixels);
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
-  align_buffer_page_end(orig_pixels_alloc, 3 * 2);
-  align_buffer_page_end(dst_pixels_alloc, 3 * 2);
-  uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
-  uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
-
-  // Pad the 1x1 byte image with invalid values before and after in case libyuv
-  // reads outside the memory boundaries.
-  orig_pixels[0] = 0;
-  orig_pixels[1] = 1;  // scale this pixel
-  orig_pixels[2] = 2;
-  dst_pixels[0] = 3;
-  dst_pixels[1] = 3;
-  dst_pixels[2] = 3;
-
-  libyuv::ScalePlane_16(
-      orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
-      /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
-      /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
-
-  EXPECT_EQ(dst_pixels[0], 1);
-  EXPECT_EQ(dst_pixels[1], 1);
-  EXPECT_EQ(dst_pixels[2], 3);
-
-  free_aligned_buffer_page_end(dst_pixels_alloc);
-  free_aligned_buffer_page_end(orig_pixels_alloc);
-}
 }  // namespace libyuv
diff --git a/files/unit_test/scale_uv_test.cc b/unit_test/scale_uv_test.cc
index dab217c9..dab217c9 100644
--- a/files/unit_test/scale_uv_test.cc
+++ b/unit_test/scale_uv_test.cc
diff --git a/files/unit_test/testdata/arm_v7.txt b/unit_test/testdata/arm_v7.txt
index 5d7dbd04..5d7dbd04 100644
--- a/files/unit_test/testdata/arm_v7.txt
+++ b/unit_test/testdata/arm_v7.txt
diff --git a/files/unit_test/testdata/juno.txt b/unit_test/testdata/juno.txt
index dd465272..dd465272 100644
--- a/files/unit_test/testdata/juno.txt
+++ b/unit_test/testdata/juno.txt
diff --git a/files/unit_test/testdata/mips.txt b/unit_test/testdata/mips.txt
index d9f28cbf..d9f28cbf 100644
--- a/files/unit_test/testdata/mips.txt
+++ b/unit_test/testdata/mips.txt
diff --git a/files/unit_test/testdata/mips_loongson2k.txt b/unit_test/testdata/mips_loongson2k.txt
index 8a88d38f..8a88d38f 100644
--- a/files/unit_test/testdata/mips_loongson2k.txt
+++ b/unit_test/testdata/mips_loongson2k.txt
diff --git a/files/unit_test/testdata/mips_loongson3.txt b/unit_test/testdata/mips_loongson3.txt
index 1f540b12..1f540b12 100644
--- a/files/unit_test/testdata/mips_loongson3.txt
+++ b/unit_test/testdata/mips_loongson3.txt
diff --git a/files/unit_test/testdata/mips_loongson_mmi.txt b/unit_test/testdata/mips_loongson_mmi.txt
index 0f10b8bb..0f10b8bb 100644
--- a/files/unit_test/testdata/mips_loongson_mmi.txt
+++ b/unit_test/testdata/mips_loongson_mmi.txt
diff --git a/files/unit_test/testdata/mips_msa.txt b/unit_test/testdata/mips_msa.txt
index ac930615..ac930615 100644
--- a/files/unit_test/testdata/mips_msa.txt
+++ b/unit_test/testdata/mips_msa.txt
diff --git a/files/unit_test/testdata/riscv64.txt b/unit_test/testdata/riscv64.txt
index fbb4200f..fbb4200f 100644
--- a/files/unit_test/testdata/riscv64.txt
+++ b/unit_test/testdata/riscv64.txt
diff --git a/files/unit_test/testdata/riscv64_rvv.txt b/unit_test/testdata/riscv64_rvv.txt
index af1b3f36..af1b3f36 100644
--- a/files/unit_test/testdata/riscv64_rvv.txt
+++ b/unit_test/testdata/riscv64_rvv.txt
diff --git a/files/unit_test/testdata/riscv64_rvv_zvfh.txt b/unit_test/testdata/riscv64_rvv_zvfh.txt
index c416c1af..c416c1af 100644
--- a/files/unit_test/testdata/riscv64_rvv_zvfh.txt
+++ b/unit_test/testdata/riscv64_rvv_zvfh.txt
diff --git a/files/unit_test/testdata/tegra3.txt b/unit_test/testdata/tegra3.txt
index d1b09f6b..d1b09f6b 100644
--- a/files/unit_test/testdata/tegra3.txt
+++ b/unit_test/testdata/tegra3.txt
diff --git a/files/unit_test/testdata/test0.jpg b/unit_test/testdata/test0.jpg
index f4461a81..f4461a81 100644
--- a/files/unit_test/testdata/test0.jpg
+++ b/unit_test/testdata/test0.jpg
diff --git a/files/unit_test/testdata/test1.jpg b/unit_test/testdata/test1.jpg
index a0210e9d..a0210e9d 100644
--- a/files/unit_test/testdata/test1.jpg
+++ b/unit_test/testdata/test1.jpg
diff --git a/files/unit_test/testdata/test2.jpg b/unit_test/testdata/test2.jpg
index 816ca767..816ca767 100644
--- a/files/unit_test/testdata/test2.jpg
+++ b/unit_test/testdata/test2.jpg
diff --git a/files/unit_test/testdata/test3.jpg b/unit_test/testdata/test3.jpg
index 792d91dc..792d91dc 100644
--- a/files/unit_test/testdata/test3.jpg
+++ b/unit_test/testdata/test3.jpg
diff --git a/files/unit_test/testdata/test4.jpg b/unit_test/testdata/test4.jpg
index 1ef41668..1ef41668 100644
--- a/files/unit_test/testdata/test4.jpg
+++ b/unit_test/testdata/test4.jpg
diff --git a/files/unit_test/unit_test.cc b/unit_test/unit_test.cc
index b66ebfab..239d5b92 100644
--- a/files/unit_test/unit_test.cc
+++ b/unit_test/unit_test.cc
@@ -144,11 +144,14 @@ int TestCpuEnv(int cpu_info) {
   if (TestEnv("LIBYUV_DISABLE_AVX512VBITALG")) {
     cpu_info &= ~libyuv::kCpuHasAVX512VBITALG;
   }
-  if (TestEnv("LIBYUV_DISABLE_AVX512VPOPCNTDQ")) {
-    cpu_info &= ~libyuv::kCpuHasAVX512VPOPCNTDQ;
+  if (TestEnv("LIBYUV_DISABLE_AVX10")) {
+    cpu_info &= ~libyuv::kCpuHasAVX10;
   }
-  if (TestEnv("LIBYUV_DISABLE_GFNI")) {
-    cpu_info &= ~libyuv::kCpuHasGFNI;
+  if (TestEnv("LIBYUV_DISABLE_AVXVNNI")) {
+    cpu_info &= ~libyuv::kCpuHasAVXVNNI;
+  }
+  if (TestEnv("LIBYUV_DISABLE_AVXVNNIINT8")) {
+    cpu_info &= ~libyuv::kCpuHasAVXVNNIINT8;
   }
 #endif
   if (TestEnv("LIBYUV_DISABLE_ASM")) {
diff --git a/files/unit_test/unit_test.h b/unit_test/unit_test.h
index 99cc8d19..99cc8d19 100644
--- a/files/unit_test/unit_test.h
+++ b/unit_test/unit_test.h
diff --git a/files/unit_test/video_common_test.cc b/unit_test/video_common_test.cc
index 36728ea9..36728ea9 100644
--- a/files/unit_test/video_common_test.cc
+++ b/unit_test/video_common_test.cc
diff --git a/files/util/Makefile b/util/Makefile
index 40e74b65..40e74b65 100644
--- a/files/util/Makefile
+++ b/util/Makefile
diff --git a/files/util/color.cc b/util/color.cc
index 8c3bbefd..8c3bbefd 100644
--- a/files/util/color.cc
+++ b/util/color.cc
diff --git a/files/util/compare.cc b/util/compare.cc
index a16613ee..a16613ee 100644
--- a/files/util/compare.cc
+++ b/util/compare.cc
diff --git a/files/util/cpuid.c b/util/cpuid.c
index edc6a26e..c07e6e95 100644
--- a/files/util/cpuid.c
+++ b/util/cpuid.c
@@ -96,14 +96,15 @@ int main(int argc, const char* argv[]) {
     int has_erms = TestCpuFlag(kCpuHasERMS);
     int has_fma3 = TestCpuFlag(kCpuHasFMA3);
     int has_f16c = TestCpuFlag(kCpuHasF16C);
-    int has_gfni = TestCpuFlag(kCpuHasGFNI);
     int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
     int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
     int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI);
     int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
     int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
     int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
-    int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
+    int has_avx10 = TestCpuFlag(kCpuHasAVX10);
+    int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI);
+    int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8);
     printf("Has X86 0x%x\n", has_x86);
     printf("Has SSE2 0x%x\n", has_sse2);
     printf("Has SSSE3 0x%x\n", has_ssse3);
@@ -114,14 +115,15 @@ int main(int argc, const char* argv[]) {
     printf("Has ERMS 0x%x\n", has_erms);
     printf("Has FMA3 0x%x\n", has_fma3);
     printf("Has F16C 0x%x\n", has_f16c);
-    printf("Has GFNI 0x%x\n", has_gfni);
     printf("Has AVX512BW 0x%x\n", has_avx512bw);
     printf("Has AVX512VL 0x%x\n", has_avx512vl);
     printf("Has AVX512VNNI 0x%x\n", has_avx512vnni);
     printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi);
     printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2);
     printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg);
-    printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq);
+    printf("Has AVX10 0x%x\n", has_avx10);
+    printf("HAS AVXVNNI 0x%x\n", has_avxvnni);
+    printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
   }
   return 0;
 }
diff --git a/files/util/i444tonv12_eg.cc b/util/i444tonv12_eg.cc
index 0fcb4095..0fcb4095 100644
--- a/files/util/i444tonv12_eg.cc
+++ b/util/i444tonv12_eg.cc
diff --git a/files/util/psnr.cc b/util/psnr.cc
index c7bee7f9..c7bee7f9 100644
--- a/files/util/psnr.cc
+++ b/util/psnr.cc
diff --git a/files/util/psnr.h b/util/psnr.h
index aac128cb..aac128cb 100644
--- a/files/util/psnr.h
+++ b/util/psnr.h
diff --git a/files/util/psnr_main.cc b/util/psnr_main.cc
index 8b9fd972..8b9fd972 100644
--- a/files/util/psnr_main.cc
+++ b/util/psnr_main.cc
diff --git a/files/util/ssim.cc b/util/ssim.cc
index 096fbcf0..096fbcf0 100644
--- a/files/util/ssim.cc
+++ b/util/ssim.cc
diff --git a/files/util/ssim.h b/util/ssim.h
index a855f1d1..a855f1d1 100644
--- a/files/util/ssim.h
+++ b/util/ssim.h
diff --git a/files/util/yuvconstants.c b/util/yuvconstants.c
index 4e5185af..4e5185af 100644
--- a/files/util/yuvconstants.c
+++ b/util/yuvconstants.c
diff --git a/files/util/yuvconvert.cc b/util/yuvconvert.cc
index 93b52668..93b52668 100644
--- a/files/util/yuvconvert.cc
+++ b/util/yuvconvert.cc
diff --git a/files/winarm.mk b/winarm.mk
index b0a344ae..b0a344ae 100644
--- a/files/winarm.mk
+++ b/winarm.mk
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2024-03-29 18:11:18 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	2024-03-29 18:11:18 +0000
commit	e91b3430dd4106d54524333bde0062343a801770 (patch)
tree	7aa355fd0b89ec0b2611e17ee84a14c6fa449e22
parent	eaf9564965f32c3a944ed1adf82cf4f80fdba734 (diff)
parent	c334ec011622efb99936768259a97d2a9b8dcd83 (diff)
download	libyuv-androidx-vectordrawable-release.tar.gz