mirror of
https://github.com/gcc-mirror/gcc.git
synced 2026-05-06 23:25:24 +02:00
RISC-V: Add patterns for vector-scalar multiply-(subtract-)accumulate [PR119100]
This pattern enables the combine pass (or late-combine, depending on the case) to merge a vec_duplicate into a plus-mult or minus-mult RTL instruction. Before this patch, we have two instructions, e.g.: vfmv.v.f v6,fa0 vfmacc.vv v2,v6,v4 After, we get only one: vfmacc.vf v2,fa0,v4 PR target/119100 gcc/ChangeLog: * config/riscv/autovec-opt.md (*<optab>_vf_<mode>): Handle both add and acc FMA variants. * config/riscv/vector.md (*pred_mul_<optab><mode>_scalar_undef): New. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfmacc and vfmsac. * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h: Add support for acc variants. * gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_run.h: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c: Define TEST_OUT. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f64.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f32.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f64.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f32.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f64.c: New test.
This commit is contained in:
committed by
Jeff Law
parent
5bc92717b8
commit
92e1893e01
@@ -1721,24 +1721,26 @@
|
||||
;; - vfmsub.vf
|
||||
;; - vfnmadd.vf
|
||||
;; - vfnmsub.vf
|
||||
;; - vfmacc.vf
|
||||
;; - vfmsac.vf
|
||||
;; =============================================================================
|
||||
|
||||
;; vfmadd.vf, vfmsub.vf
|
||||
;; vfmadd.vf, vfmsub.vf, vfmacc.vf, vfmsac.vf
|
||||
(define_insn_and_split "*<optab>_vf_<mode>"
|
||||
[(set (match_operand:V_VLSF 0 "register_operand" "=vd")
|
||||
[(set (match_operand:V_VLSF 0 "register_operand")
|
||||
(plus_minus:V_VLSF
|
||||
(mult:V_VLSF
|
||||
(vec_duplicate:V_VLSF
|
||||
(match_operand:<VEL> 1 "register_operand" " f"))
|
||||
(match_operand:V_VLSF 2 "register_operand" " 0"))
|
||||
(match_operand:V_VLSF 3 "register_operand" " vr")))]
|
||||
(match_operand:<VEL> 1 "register_operand"))
|
||||
(match_operand:V_VLSF 2 "register_operand"))
|
||||
(match_operand:V_VLSF 3 "register_operand")))]
|
||||
"TARGET_VECTOR && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
|
||||
operands[2]};
|
||||
RVV_VUNDEF(<MODE>mode)};
|
||||
riscv_vector::emit_vlmax_insn (code_for_pred_mul_scalar (<CODE>, <MODE>mode),
|
||||
riscv_vector::TERNARY_OP_FRM_DYN, ops);
|
||||
DONE;
|
||||
|
||||
@@ -6599,9 +6599,42 @@
|
||||
(match_operand:<VEL> 2 "register_operand"))
|
||||
(match_operand:V_VLSF 3 "register_operand"))
|
||||
(match_operand:V_VLSF 4 "register_operand"))
|
||||
(match_operand:V_VLSF 5 "register_operand")))]
|
||||
(match_operand:V_VLSF 5 "vector_merge_operand")))]
|
||||
"TARGET_VECTOR"
|
||||
{})
|
||||
{
|
||||
riscv_vector::prepare_ternary_operands (operands);
|
||||
})
|
||||
|
||||
(define_insn "*pred_mul_<optab><mode>_scalar_undef"
|
||||
[(set (match_operand:V_VLSF 0 "register_operand" "=vd,vd, vr, vr")
|
||||
(if_then_else:V_VLSF
|
||||
(unspec:<VM>
|
||||
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
|
||||
(match_operand 6 "vector_length_operand" "rvl,rvl,rvl,rvl")
|
||||
(match_operand 7 "const_int_operand" " i, i, i, i")
|
||||
(match_operand 8 "const_int_operand" " i, i, i, i")
|
||||
(match_operand 9 "const_int_operand" " i, i, i, i")
|
||||
(match_operand 10 "const_int_operand" " i, i, i, i")
|
||||
(reg:SI VL_REGNUM)
|
||||
(reg:SI VTYPE_REGNUM)
|
||||
(reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
|
||||
(plus_minus:V_VLSF
|
||||
(mult:V_VLSF
|
||||
(vec_duplicate:V_VLSF
|
||||
(match_operand:<VEL> 3 "register_operand" " f, f, f, f"))
|
||||
(match_operand:V_VLSF 4 "register_operand" " 0, vr, 0, vr"))
|
||||
(match_operand:V_VLSF 5 "register_operand" " vr, 0, vr, 0"))
|
||||
(match_operand:V_VLSF 2 "vector_undef_operand")))]
|
||||
"TARGET_VECTOR"
|
||||
"@
|
||||
vf<madd_msub>.vf\t%0,%3,%5%p1
|
||||
vf<macc_msac>.vf\t%0,%3,%4%p1
|
||||
vf<madd_msub>.vf\t%0,%3,%5%p1
|
||||
vf<macc_msac>.vf\t%0,%3,%4%p1"
|
||||
[(set_attr "type" "vfmuladd")
|
||||
(set_attr "mode" "<MODE>")
|
||||
(set (attr "frm_mode")
|
||||
(symbol_ref "riscv_vector::get_frm_mode (operands[10])"))])
|
||||
|
||||
(define_insn "*pred_<madd_msub><mode>_scalar"
|
||||
[(set (match_operand:V_VLSF 0 "register_operand" "=vd, vr")
|
||||
|
||||
@@ -7,8 +7,12 @@ DEF_VF_MULOP_CASE_0 (_Float16, +, +, add)
|
||||
DEF_VF_MULOP_CASE_0 (_Float16, -, +, sub)
|
||||
DEF_VF_MULOP_CASE_0 (_Float16, +, -, nadd)
|
||||
DEF_VF_MULOP_CASE_0 (_Float16, -, -, nsub)
|
||||
DEF_VF_MULOP_ACC_CASE_0 (_Float16, +, +, acc)
|
||||
DEF_VF_MULOP_ACC_CASE_0 (_Float16, -, +, sac)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfnmadd.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfnmsub.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfmacc.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfmsac.vf} 1 } } */
|
||||
|
||||
@@ -7,8 +7,12 @@ DEF_VF_MULOP_CASE_0 (float, +, +, add)
|
||||
DEF_VF_MULOP_CASE_0 (float, -, +, sub)
|
||||
DEF_VF_MULOP_CASE_0 (float, +, -, nadd)
|
||||
DEF_VF_MULOP_CASE_0 (float, -, -, nsub)
|
||||
DEF_VF_MULOP_ACC_CASE_0 (float, +, +, acc)
|
||||
DEF_VF_MULOP_ACC_CASE_0 (float, -, +, sac)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfnmadd.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfnmsub.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfmacc.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfmsac.vf} 1 } } */
|
||||
|
||||
@@ -7,8 +7,12 @@ DEF_VF_MULOP_CASE_0 (double, +, +, add)
|
||||
DEF_VF_MULOP_CASE_0 (double, -, +, sub)
|
||||
DEF_VF_MULOP_CASE_0 (double, +, -, nadd)
|
||||
DEF_VF_MULOP_CASE_0 (double, -, -, nsub)
|
||||
DEF_VF_MULOP_ACC_CASE_0 (double, +, +, acc)
|
||||
DEF_VF_MULOP_ACC_CASE_0 (double, -, +, sac)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfnmadd.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfnmsub.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfmacc.vf} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vfmsac.vf} 1 } } */
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d --param=fpr2vr-cost=1" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
|
||||
DEF_VF_MULOP_CASE_0 (_Float16, +, +, add)
|
||||
DEF_VF_MULOP_CASE_0 (_Float16, -, +, sub)
|
||||
DEF_VF_MULOP_CASE_0 (_Float16, +, -, nadd)
|
||||
DEF_VF_MULOP_CASE_0 (_Float16, -, -, nsub)
|
||||
#include "vf-1-f16.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not {vfmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmacc.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsac.vf} } } */
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=1" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
|
||||
DEF_VF_MULOP_CASE_0 (float, +, +, add)
|
||||
DEF_VF_MULOP_CASE_0 (float, -, +, sub)
|
||||
DEF_VF_MULOP_CASE_0 (float, +, -, nadd)
|
||||
DEF_VF_MULOP_CASE_0 (float, -, -, nsub)
|
||||
#include "vf-1-f32.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not {vfmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmacc.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsac.vf} } } */
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=1" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
|
||||
DEF_VF_MULOP_CASE_0 (float, +, +, add)
|
||||
DEF_VF_MULOP_CASE_0 (float, -, +, sub)
|
||||
DEF_VF_MULOP_CASE_0 (float, +, -, nadd)
|
||||
DEF_VF_MULOP_CASE_0 (float, -, -, nsub)
|
||||
#include "vf-1-f64.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not {vfmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmacc.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsac.vf} } } */
|
||||
|
||||
@@ -7,8 +7,12 @@ DEF_VF_MULOP_CASE_1 (_Float16, +, +, add, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (_Float16, -, +, sub, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (_Float16, +, -, nadd, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (_Float16, -, -, nsub, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_ACC_CASE_1 (_Float16, +, +, acc, VF_MULOP_ACC_BODY_X128)
|
||||
DEF_VF_MULOP_ACC_CASE_1 (_Float16, -, +, sac, VF_MULOP_ACC_BODY_X128)
|
||||
|
||||
/* { dg-final { scan-assembler {vfmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfnmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfnmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfmacc.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfmsac.vf} } } */
|
||||
|
||||
@@ -7,8 +7,12 @@ DEF_VF_MULOP_CASE_1 (float, +, +, add, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (float, -, +, sub, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (float, +, -, nadd, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (float, -, -, nsub, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_ACC_CASE_1 (float, +, +, acc, VF_MULOP_ACC_BODY_X128)
|
||||
DEF_VF_MULOP_ACC_CASE_1 (float, -, +, sac, VF_MULOP_ACC_BODY_X128)
|
||||
|
||||
/* { dg-final { scan-assembler {vfmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfnmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfnmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfmacc.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfmsac.vf} } } */
|
||||
|
||||
@@ -7,8 +7,12 @@ DEF_VF_MULOP_CASE_1 (double, +, +, add, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (double, -, +, sub, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (double, +, -, nadd, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (double, -, -, nsub, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_ACC_CASE_1 (double, +, +, acc, VF_MULOP_ACC_BODY_X128)
|
||||
DEF_VF_MULOP_ACC_CASE_1 (double, -, +, sac, VF_MULOP_ACC_BODY_X128)
|
||||
|
||||
/* { dg-final { scan-assembler {vfmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfnmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfnmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfmacc.vf} } } */
|
||||
/* { dg-final { scan-assembler {vfmsac.vf} } } */
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d --param=fpr2vr-cost=4" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
|
||||
DEF_VF_MULOP_CASE_1 (_Float16, +, +, add, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (_Float16, -, +, sub, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (_Float16, +, -, nadd, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (_Float16, -, -, nsub, VF_MULOP_BODY_X16)
|
||||
#include "vf-3-f16.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not {vfmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmacc.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsac.vf} } } */
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=4" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
|
||||
DEF_VF_MULOP_CASE_1 (float, +, +, add, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (float, -, +, sub, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (float, +, -, nadd, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (float, -, -, nsub, VF_MULOP_BODY_X16)
|
||||
#include "vf-3-f32.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not {vfmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmacc.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsac.vf} } } */
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=4" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
|
||||
DEF_VF_MULOP_CASE_1 (double, +, +, add, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (double, -, +, sub, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (double, +, -, nadd, VF_MULOP_BODY_X16)
|
||||
DEF_VF_MULOP_CASE_1 (double, -, -, nsub, VF_MULOP_BODY_X16)
|
||||
#include "vf-3-f64.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not {vfmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmadd.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfnmsub.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmacc.vf} } } */
|
||||
/* { dg-final { scan-assembler-not {vfmsac.vf} } } */
|
||||
|
||||
@@ -17,6 +17,23 @@
|
||||
#define RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n) \
|
||||
RUN_VF_MULOP_CASE_0(T, NAME, out, in, x, n)
|
||||
|
||||
#define DEF_VF_MULOP_ACC_CASE_0(T, OP, NEG, NAME) \
|
||||
T test_vf_mulop_acc_##NAME##_##T##_case_0 (T *restrict out, T *restrict in, \
|
||||
T f, unsigned n) \
|
||||
{ \
|
||||
unsigned i; \
|
||||
for (i = 0; i < n; i++) \
|
||||
out[i] = NEG (f * in[i] OP out[i]); \
|
||||
/* Ensure that we get acc rather than add by reusing the multiplicand. */ \
|
||||
return in[i - 1]; \
|
||||
}
|
||||
#define DEF_VF_MULOP_ACC_CASE_0_WRAP(T, OP, NEG, NAME) \
|
||||
DEF_VF_MULOP_ACC_CASE_0 (T, OP, NEG, NAME)
|
||||
#define RUN_VF_MULOP_ACC_CASE_0(T, NAME, out, in, x, n) \
|
||||
test_vf_mulop_acc_##NAME##_##T##_case_0 (out, in, x, n)
|
||||
#define RUN_VF_MULOP_ACC_CASE_0_WRAP(T, NAME, out, in, x, n) \
|
||||
RUN_VF_MULOP_ACC_CASE_0 (T, NAME, out, in, x, n)
|
||||
|
||||
#define VF_MULOP_BODY(op, neg) \
|
||||
out[k + 0] = neg (tmp * out[k + 0] op in[k + 0]); \
|
||||
out[k + 1] = neg (tmp * out[k + 1] op in[k + 1]); \
|
||||
@@ -62,4 +79,53 @@
|
||||
#define DEF_VF_MULOP_CASE_1_WRAP(T, OP, NEG, NAME, BODY) \
|
||||
DEF_VF_MULOP_CASE_1 (T, OP, NEG, NAME, BODY)
|
||||
|
||||
#define VF_MULOP_ACC_BODY(op, neg) \
|
||||
out[k + 0] = neg (tmp * in[k + 0] op out[k + 1]); \
|
||||
out[k + 1] = neg (tmp * in[k + 1] op out[k + 1]); \
|
||||
k += 2;
|
||||
|
||||
#define VF_MULOP_ACC_BODY_X4(op, neg) \
|
||||
VF_MULOP_ACC_BODY (op, neg) \
|
||||
VF_MULOP_ACC_BODY (op, neg)
|
||||
|
||||
#define VF_MULOP_ACC_BODY_X8(op, neg) \
|
||||
VF_MULOP_ACC_BODY_X4 (op, neg) \
|
||||
VF_MULOP_ACC_BODY_X4 (op, neg)
|
||||
|
||||
#define VF_MULOP_ACC_BODY_X16(op, neg) \
|
||||
VF_MULOP_ACC_BODY_X8 (op, neg) \
|
||||
VF_MULOP_ACC_BODY_X8 (op, neg)
|
||||
|
||||
#define VF_MULOP_ACC_BODY_X32(op, neg) \
|
||||
VF_MULOP_ACC_BODY_X16 (op, neg) \
|
||||
VF_MULOP_ACC_BODY_X16 (op, neg)
|
||||
|
||||
#define VF_MULOP_ACC_BODY_X64(op, neg) \
|
||||
VF_MULOP_ACC_BODY_X32 (op, neg) \
|
||||
VF_MULOP_ACC_BODY_X32 (op, neg)
|
||||
|
||||
#define VF_MULOP_ACC_BODY_X128(op, neg) \
|
||||
VF_MULOP_ACC_BODY_X64 (op, neg) \
|
||||
VF_MULOP_ACC_BODY_X64 (op, neg)
|
||||
|
||||
#define VF_MULOP_ACC_BODY_X256(op, neg) \
|
||||
VF_MULOP_ACC_BODY_X128 (op, neg) \
|
||||
VF_MULOP_ACC_BODY_X128 (op, neg)
|
||||
|
||||
#define DEF_VF_MULOP_ACC_CASE_1(T, OP, NEG, NAME, BODY) \
|
||||
T test_vf_mulop_acc_##NAME##_##T##_case_1 (T *restrict out, T *restrict in, \
|
||||
T x, unsigned n) \
|
||||
{ \
|
||||
unsigned k = 0; \
|
||||
T tmp = x + 3; \
|
||||
\
|
||||
while (k < n) \
|
||||
{ \
|
||||
tmp = tmp * 0x3f; \
|
||||
BODY (OP, NEG) \
|
||||
} \
|
||||
}
|
||||
#define DEF_VF_MULOP_ACC_CASE_1_WRAP(T, OP, NEG, NAME, BODY) \
|
||||
DEF_VF_MULOP_ACC_CASE_1 (T, OP, NEG, NAME, BODY)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -14,15 +14,15 @@ main ()
|
||||
for (i = 0; i < sizeof (TEST_DATA) / sizeof (TEST_DATA[0]); i++)
|
||||
{
|
||||
T f = TEST_DATA[i][0][0];
|
||||
T *in = TEST_DATA[i][1];
|
||||
T *out = TEST_DATA[i][2];
|
||||
T *b = TEST_DATA[i][1];
|
||||
T *c = TEST_DATA[i][2];
|
||||
T *expect = TEST_DATA[i][3];
|
||||
|
||||
TEST_RUN (T, NAME, out, in, f, N);
|
||||
TEST_RUN (T, NAME, c, b, f, N);
|
||||
|
||||
for (k = 0; k < N; k++)
|
||||
{
|
||||
T diff = expect[k] - out[k];
|
||||
T diff = expect[k] - TEST_OUT[k];
|
||||
if (TYPE_FABS (diff, T) > .01 * TYPE_FABS (expect[k], T))
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
/* { dg-do run { target { riscv_v } } } */
|
||||
/* { dg-additional-options "--param=fpr2vr-cost=0" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
#include "vf_mulop_data.h"
|
||||
|
||||
#define T _Float16
|
||||
#define NAME add
|
||||
|
||||
DEF_VF_MULOP_ACC_CASE_0_WRAP (T, +, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, c, b, x, n) RUN_VF_MULOP_ACC_CASE_0_WRAP(T, NAME, b, c, x, n)
|
||||
#define TEST_OUT b
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
@@ -0,0 +1,16 @@
|
||||
/* { dg-do run { target { riscv_v } } } */
|
||||
/* { dg-additional-options "--param=fpr2vr-cost=0" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
#include "vf_mulop_data.h"
|
||||
|
||||
#define T float
|
||||
#define NAME add
|
||||
|
||||
DEF_VF_MULOP_ACC_CASE_0_WRAP (T, +, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, c, b, x, n) RUN_VF_MULOP_ACC_CASE_0_WRAP(T, NAME, b, c, x, n)
|
||||
#define TEST_OUT b
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
@@ -0,0 +1,16 @@
|
||||
/* { dg-do run { target { riscv_v } } } */
|
||||
/* { dg-additional-options "--param=fpr2vr-cost=0" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
#include "vf_mulop_data.h"
|
||||
|
||||
#define T double
|
||||
#define NAME add
|
||||
|
||||
DEF_VF_MULOP_ACC_CASE_0_WRAP (T, +, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, c, b, x, n) RUN_VF_MULOP_ACC_CASE_0_WRAP(T, NAME, b, c, x, n)
|
||||
#define TEST_OUT b
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, +, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, +, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, +, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
/* { dg-do run { target { riscv_v } } } */
|
||||
/* { dg-additional-options "--param=fpr2vr-cost=0" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
#include "vf_mulop_data.h"
|
||||
|
||||
#define T _Float16
|
||||
#define NAME sub
|
||||
|
||||
DEF_VF_MULOP_ACC_CASE_0_WRAP (T, -, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, c, b, x, n) RUN_VF_MULOP_ACC_CASE_0_WRAP(T, NAME, b, c, x, n)
|
||||
#define TEST_OUT b
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
@@ -0,0 +1,16 @@
|
||||
/* { dg-do run { target { riscv_v } } } */
|
||||
/* { dg-additional-options "--param=fpr2vr-cost=0" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
#include "vf_mulop_data.h"
|
||||
|
||||
#define T float
|
||||
#define NAME sub
|
||||
|
||||
DEF_VF_MULOP_ACC_CASE_0_WRAP (T, -, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, c, b, x, n) RUN_VF_MULOP_ACC_CASE_0_WRAP(T, NAME, b, c, x, n)
|
||||
#define TEST_OUT b
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
@@ -0,0 +1,16 @@
|
||||
/* { dg-do run { target { riscv_v } } } */
|
||||
/* { dg-additional-options "--param=fpr2vr-cost=0" } */
|
||||
|
||||
#include "vf_mulop.h"
|
||||
#include "vf_mulop_data.h"
|
||||
|
||||
#define T double
|
||||
#define NAME sub
|
||||
|
||||
DEF_VF_MULOP_ACC_CASE_0_WRAP (T, -, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, c, b, x, n) RUN_VF_MULOP_ACC_CASE_0_WRAP(T, NAME, b, c, x, n)
|
||||
#define TEST_OUT b
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, -, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, -, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, -, +, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP(T, +, -, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, +, -, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, +, -, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, -, -, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, -, -, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
@@ -11,5 +11,6 @@ DEF_VF_MULOP_CASE_0_WRAP (T, -, -, NAME)
|
||||
|
||||
#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME)
|
||||
#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n)
|
||||
#define TEST_OUT c
|
||||
|
||||
#include "vf_mulop_run.h"
|
||||
|
||||
Reference in New Issue
Block a user