diff options
author | Nathan E. Egge <unlord@xiph.org> | 2024-02-24 06:57:29 -0500 |
---|---|---|
committer | Nathan E. Egge <unlord@xiph.org> | 2024-02-27 04:47:36 -0500 |
commit | 701225128a9fab71fb31bb5017c77c2bee7df709 (patch) | |
tree | d38767e690ad4c9fbb9351120f44dbba914bf746 | |
parent | afeeb3cc901a94ded8e20086a06beb45c728fbf0 (diff) | |
download | libdav1d-701225128a9fab71fb31bb5017c77c2bee7df709.tar.gz |
riscv64/itx: Add 8x16 8bpc eob test
Kendryte K230 Before After
inv_txfm_add_8x16_adst_adst_0_8bpc_rvv: 853.9 ( 9.00x) 698.3 (11.03x)
inv_txfm_add_8x16_adst_adst_1_8bpc_rvv: 853.8 ( 9.00x) 698.3 (11.03x)
inv_txfm_add_8x16_adst_dct_0_8bpc_rvv: 763.0 ( 9.55x) 609.2 (12.00x)
inv_txfm_add_8x16_adst_dct_1_8bpc_rvv: 763.1 ( 9.55x) 609.3 (11.94x)
inv_txfm_add_8x16_adst_flipadst_0_8bpc_rvv: 857.1 ( 8.99x) 701.6 (11.00x)
inv_txfm_add_8x16_adst_flipadst_1_8bpc_rvv: 856.8 ( 8.98x) 701.3 (10.97x)
inv_txfm_add_8x16_adst_identity_0_8bpc_rvv: 622.9 ( 9.22x) 468.5 (12.36x)
inv_txfm_add_8x16_adst_identity_1_8bpc_rvv: 622.9 ( 9.23x) 468.6 (12.37x)
inv_txfm_add_8x16_dct_adst_0_8bpc_rvv: 770.1 ( 9.32x) 655.1 (10.93x)
inv_txfm_add_8x16_dct_adst_1_8bpc_rvv: 770.1 ( 9.34x) 655.4 (10.93x)
inv_txfm_add_8x16_dct_dct_0_8bpc_rvv: 679.8 ( 1.23x) 566.1 ( 1.48x)
inv_txfm_add_8x16_dct_dct_1_8bpc_rvv: 679.8 ( 9.98x) 566.5 (11.89x)
inv_txfm_add_8x16_dct_flipadst_0_8bpc_rvv: 771.1 ( 9.34x) 667.4 (10.75x)
inv_txfm_add_8x16_dct_flipadst_1_8bpc_rvv: 771.1 ( 9.34x) 667.3 (10.76x)
inv_txfm_add_8x16_dct_identity_0_8bpc_rvv: 532.3 ( 9.84x) 422.1 (12.42x)
inv_txfm_add_8x16_dct_identity_1_8bpc_rvv: 532.4 ( 9.85x) 422.2 (12.40x)
inv_txfm_add_8x16_flipadst_adst_0_8bpc_rvv: 858.4 ( 8.98x) 699.2 (11.03x)
inv_txfm_add_8x16_flipadst_adst_1_8bpc_rvv: 858.5 ( 8.98x) 699.3 (11.03x)
inv_txfm_add_8x16_flipadst_dct_0_8bpc_rvv: 768.6 ( 9.52x) 609.7 (11.97x)
inv_txfm_add_8x16_flipadst_dct_1_8bpc_rvv: 768.4 ( 9.52x) 609.6 (11.97x)
inv_txfm_add_8x16_flipadst_flipadst_0_8bpc_rvv: 866.5 ( 8.91x) 706.5 (10.92x)
inv_txfm_add_8x16_flipadst_flipadst_1_8bpc_rvv: 866.4 ( 8.92x) 706.6 (10.95x)
inv_txfm_add_8x16_flipadst_identity_0_8bpc_rvv: 621.9 ( 9.28x) 464.6 (12.46x)
inv_txfm_add_8x16_flipadst_identity_1_8bpc_rvv: 621.8 ( 9.28x) 464.6 (12.46x)
inv_txfm_add_8x16_identity_adst_0_8bpc_rvv: 584.9 ( 9.78x) 564.1 (10.12x)
inv_txfm_add_8x16_identity_adst_1_8bpc_rvv: 584.8 ( 9.78x) 563.9 (10.12x)
inv_txfm_add_8x16_identity_dct_0_8bpc_rvv: 495.0 (10.75x) 474.6 (11.13x)
inv_txfm_add_8x16_identity_dct_1_8bpc_rvv: 494.3 (10.75x) 474.7 (11.12x)
inv_txfm_add_8x16_identity_flipadst_0_8bpc_rvv: 588.1 ( 9.76x) 568.1 (10.07x)
inv_txfm_add_8x16_identity_flipadst_1_8bpc_rvv: 588.7 ( 9.74x) 568.0 (10.07x)
inv_txfm_add_8x16_identity_identity_0_8bpc_rvv: 349.5 (10.78x) 328.8 (11.46x)
inv_txfm_add_8x16_identity_identity_1_8bpc_rvv: 349.4 (10.79x) 328.7 (11.46x)
-rw-r--r-- | src/riscv/64/itx.S | 47 |
1 files changed, 30 insertions, 17 deletions
diff --git a/src/riscv/64/itx.S b/src/riscv/64/itx.S index 5ed3c90..c446a90 100644 --- a/src/riscv/64/itx.S +++ b/src/riscv/64/itx.S @@ -1866,6 +1866,8 @@ function inv_txfm_\variant\()add_8x16_rvv, export=1, ext=v vsetivli zero, 8, e16, m1, ta, ma + blt a3, a6, 1f + vmv.v.x v16, zero addi t0, a2, 16 vle16.v v0, (t0) @@ -1903,6 +1905,14 @@ function inv_txfm_\variant\()add_8x16_rvv, export=1, ext=v vssra.vi v15, v7, 1 .endif + j 2f + +1: +.irp i, 8, 9, 10, 11, 12, 13, 14, 15 + vmv.v.x v\i, zero +.endr + +2: vmv.v.x v16, zero vle16.v v0, (a2) vse16.v v16, (a2) @@ -2050,12 +2060,15 @@ endfunc def_fn_816_base identity_ def_fn_816_base -.macro def_fn_816 w, h, txfm1, txfm2 +.macro def_fn_816 w, h, txfm1, txfm2, eob_half function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_rvv, export=1 .ifnc \txfm1, identity la a4, inv_\txfm1\()_e16_x\w\()_rvv .endif la a5, inv_\txfm2\()_e16_x\h\()_rvv +.if \w == 8 + li a6, \eob_half +.endif .ifc \txfm1, identity j inv_txfm_identity_add_\w\()x\h\()_rvv .else @@ -2065,22 +2078,22 @@ endfunc .endm .macro def_fns_816 w, h -def_fn_816 \w, \h, dct, dct -def_fn_816 \w, \h, identity, identity -def_fn_816 \w, \h, dct, adst -def_fn_816 \w, \h, dct, flipadst -def_fn_816 \w, \h, dct, identity -def_fn_816 \w, \h, adst, dct -def_fn_816 \w, \h, adst, adst -def_fn_816 \w, \h, adst, flipadst -def_fn_816 \w, \h, flipadst, dct -def_fn_816 \w, \h, flipadst, adst -def_fn_816 \w, \h, flipadst, flipadst -def_fn_816 \w, \h, identity, dct -def_fn_816 \w, \h, adst, identity -def_fn_816 \w, \h, flipadst, identity -def_fn_816 \w, \h, identity, adst -def_fn_816 \w, \h, identity, flipadst +def_fn_816 \w, \h, dct, dct, 43 +def_fn_816 \w, \h, identity, identity, 43 +def_fn_816 \w, \h, dct, adst, 43 +def_fn_816 \w, \h, dct, flipadst, 43 +def_fn_816 \w, \h, dct, identity, 8 +def_fn_816 \w, \h, adst, dct, 43 +def_fn_816 \w, \h, adst, adst, 43 +def_fn_816 \w, \h, adst, flipadst, 43 +def_fn_816 \w, \h, flipadst, dct, 43 +def_fn_816 \w, \h, flipadst, adst, 43 +def_fn_816 \w, \h, flipadst, flipadst, 43 +def_fn_816 \w, \h, identity, dct, 64 +def_fn_816 \w, \h, adst, identity, 8 +def_fn_816 \w, \h, flipadst, identity, 8 +def_fn_816 \w, \h, identity, adst, 64 +def_fn_816 \w, \h, identity, flipadst, 64 .endm def_fns_816 8, 16 |