aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan E. Egge <unlord@xiph.org>2024-02-24 06:57:29 -0500
committerNathan E. Egge <unlord@xiph.org>2024-02-27 04:47:36 -0500
commit701225128a9fab71fb31bb5017c77c2bee7df709 (patch)
treed38767e690ad4c9fbb9351120f44dbba914bf746
parentafeeb3cc901a94ded8e20086a06beb45c728fbf0 (diff)
downloadlibdav1d-701225128a9fab71fb31bb5017c77c2bee7df709.tar.gz
riscv64/itx: Add 8x16 8bpc eob test
Kendryte K230 Before After inv_txfm_add_8x16_adst_adst_0_8bpc_rvv: 853.9 ( 9.00x) 698.3 (11.03x) inv_txfm_add_8x16_adst_adst_1_8bpc_rvv: 853.8 ( 9.00x) 698.3 (11.03x) inv_txfm_add_8x16_adst_dct_0_8bpc_rvv: 763.0 ( 9.55x) 609.2 (12.00x) inv_txfm_add_8x16_adst_dct_1_8bpc_rvv: 763.1 ( 9.55x) 609.3 (11.94x) inv_txfm_add_8x16_adst_flipadst_0_8bpc_rvv: 857.1 ( 8.99x) 701.6 (11.00x) inv_txfm_add_8x16_adst_flipadst_1_8bpc_rvv: 856.8 ( 8.98x) 701.3 (10.97x) inv_txfm_add_8x16_adst_identity_0_8bpc_rvv: 622.9 ( 9.22x) 468.5 (12.36x) inv_txfm_add_8x16_adst_identity_1_8bpc_rvv: 622.9 ( 9.23x) 468.6 (12.37x) inv_txfm_add_8x16_dct_adst_0_8bpc_rvv: 770.1 ( 9.32x) 655.1 (10.93x) inv_txfm_add_8x16_dct_adst_1_8bpc_rvv: 770.1 ( 9.34x) 655.4 (10.93x) inv_txfm_add_8x16_dct_dct_0_8bpc_rvv: 679.8 ( 1.23x) 566.1 ( 1.48x) inv_txfm_add_8x16_dct_dct_1_8bpc_rvv: 679.8 ( 9.98x) 566.5 (11.89x) inv_txfm_add_8x16_dct_flipadst_0_8bpc_rvv: 771.1 ( 9.34x) 667.4 (10.75x) inv_txfm_add_8x16_dct_flipadst_1_8bpc_rvv: 771.1 ( 9.34x) 667.3 (10.76x) inv_txfm_add_8x16_dct_identity_0_8bpc_rvv: 532.3 ( 9.84x) 422.1 (12.42x) inv_txfm_add_8x16_dct_identity_1_8bpc_rvv: 532.4 ( 9.85x) 422.2 (12.40x) inv_txfm_add_8x16_flipadst_adst_0_8bpc_rvv: 858.4 ( 8.98x) 699.2 (11.03x) inv_txfm_add_8x16_flipadst_adst_1_8bpc_rvv: 858.5 ( 8.98x) 699.3 (11.03x) inv_txfm_add_8x16_flipadst_dct_0_8bpc_rvv: 768.6 ( 9.52x) 609.7 (11.97x) inv_txfm_add_8x16_flipadst_dct_1_8bpc_rvv: 768.4 ( 9.52x) 609.6 (11.97x) inv_txfm_add_8x16_flipadst_flipadst_0_8bpc_rvv: 866.5 ( 8.91x) 706.5 (10.92x) inv_txfm_add_8x16_flipadst_flipadst_1_8bpc_rvv: 866.4 ( 8.92x) 706.6 (10.95x) inv_txfm_add_8x16_flipadst_identity_0_8bpc_rvv: 621.9 ( 9.28x) 464.6 (12.46x) inv_txfm_add_8x16_flipadst_identity_1_8bpc_rvv: 621.8 ( 9.28x) 464.6 (12.46x) inv_txfm_add_8x16_identity_adst_0_8bpc_rvv: 584.9 ( 9.78x) 564.1 (10.12x) inv_txfm_add_8x16_identity_adst_1_8bpc_rvv: 584.8 ( 9.78x) 563.9 (10.12x) inv_txfm_add_8x16_identity_dct_0_8bpc_rvv: 495.0 (10.75x) 474.6 (11.13x) inv_txfm_add_8x16_identity_dct_1_8bpc_rvv: 494.3 (10.75x) 474.7 (11.12x) inv_txfm_add_8x16_identity_flipadst_0_8bpc_rvv: 588.1 ( 9.76x) 568.1 (10.07x) inv_txfm_add_8x16_identity_flipadst_1_8bpc_rvv: 588.7 ( 9.74x) 568.0 (10.07x) inv_txfm_add_8x16_identity_identity_0_8bpc_rvv: 349.5 (10.78x) 328.8 (11.46x) inv_txfm_add_8x16_identity_identity_1_8bpc_rvv: 349.4 (10.79x) 328.7 (11.46x)
-rw-r--r--src/riscv/64/itx.S47
1 files changed, 30 insertions, 17 deletions
diff --git a/src/riscv/64/itx.S b/src/riscv/64/itx.S
index 5ed3c90..c446a90 100644
--- a/src/riscv/64/itx.S
+++ b/src/riscv/64/itx.S
@@ -1866,6 +1866,8 @@ function inv_txfm_\variant\()add_8x16_rvv, export=1, ext=v
vsetivli zero, 8, e16, m1, ta, ma
+ blt a3, a6, 1f
+
vmv.v.x v16, zero
addi t0, a2, 16
vle16.v v0, (t0)
@@ -1903,6 +1905,14 @@ function inv_txfm_\variant\()add_8x16_rvv, export=1, ext=v
vssra.vi v15, v7, 1
.endif
+ j 2f
+
+1:
+.irp i, 8, 9, 10, 11, 12, 13, 14, 15
+ vmv.v.x v\i, zero
+.endr
+
+2:
vmv.v.x v16, zero
vle16.v v0, (a2)
vse16.v v16, (a2)
@@ -2050,12 +2060,15 @@ endfunc
def_fn_816_base identity_
def_fn_816_base
-.macro def_fn_816 w, h, txfm1, txfm2
+.macro def_fn_816 w, h, txfm1, txfm2, eob_half
function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_rvv, export=1
.ifnc \txfm1, identity
la a4, inv_\txfm1\()_e16_x\w\()_rvv
.endif
la a5, inv_\txfm2\()_e16_x\h\()_rvv
+.if \w == 8
+ li a6, \eob_half
+.endif
.ifc \txfm1, identity
j inv_txfm_identity_add_\w\()x\h\()_rvv
.else
@@ -2065,22 +2078,22 @@ endfunc
.endm
.macro def_fns_816 w, h
-def_fn_816 \w, \h, dct, dct
-def_fn_816 \w, \h, identity, identity
-def_fn_816 \w, \h, dct, adst
-def_fn_816 \w, \h, dct, flipadst
-def_fn_816 \w, \h, dct, identity
-def_fn_816 \w, \h, adst, dct
-def_fn_816 \w, \h, adst, adst
-def_fn_816 \w, \h, adst, flipadst
-def_fn_816 \w, \h, flipadst, dct
-def_fn_816 \w, \h, flipadst, adst
-def_fn_816 \w, \h, flipadst, flipadst
-def_fn_816 \w, \h, identity, dct
-def_fn_816 \w, \h, adst, identity
-def_fn_816 \w, \h, flipadst, identity
-def_fn_816 \w, \h, identity, adst
-def_fn_816 \w, \h, identity, flipadst
+def_fn_816 \w, \h, dct, dct, 43
+def_fn_816 \w, \h, identity, identity, 43
+def_fn_816 \w, \h, dct, adst, 43
+def_fn_816 \w, \h, dct, flipadst, 43
+def_fn_816 \w, \h, dct, identity, 8
+def_fn_816 \w, \h, adst, dct, 43
+def_fn_816 \w, \h, adst, adst, 43
+def_fn_816 \w, \h, adst, flipadst, 43
+def_fn_816 \w, \h, flipadst, dct, 43
+def_fn_816 \w, \h, flipadst, adst, 43
+def_fn_816 \w, \h, flipadst, flipadst, 43
+def_fn_816 \w, \h, identity, dct, 64
+def_fn_816 \w, \h, adst, identity, 8
+def_fn_816 \w, \h, flipadst, identity, 8
+def_fn_816 \w, \h, identity, adst, 64
+def_fn_816 \w, \h, identity, flipadst, 64
.endm
def_fns_816 8, 16