diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index b9172e57df4..f55907a410e 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1056,51 +1056,6 @@ change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info) change_insn (rinsn, new_pat); } -static void -local_eliminate_vsetvl_insn (const vector_insn_info &dem) -{ - const insn_info *insn = dem.get_insn (); - if (!insn || insn->is_artificial ()) - return; - rtx_insn *rinsn = insn->rtl (); - const bb_info *bb = insn->bb (); - if (vsetvl_insn_p (rinsn)) - { - rtx vl = get_vl (rinsn); - for (insn_info *i = insn->next_nondebug_insn (); - real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) - { - if (i->is_call () || i->is_asm () - || find_access (i->defs (), VL_REGNUM) - || find_access (i->defs (), VTYPE_REGNUM)) - return; - - if (has_vtype_op (i->rtl ())) - { - if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ()))) - return; - rtx avl = get_avl (i->rtl ()); - if (avl != vl) - return; - set_info *def = find_access (i->uses (), REGNO (avl))->def (); - if (def->insn () != insn) - return; - - vector_insn_info new_info; - new_info.parse_insn (i); - if (!new_info.skip_avl_compatible_p (dem)) - return; - - new_info.set_avl_info (dem.get_avl_info ()); - new_info = dem.merge (new_info, LOCAL_MERGE); - change_vsetvl_insn (insn, new_info); - eliminate_insn (PREV_INSN (i->rtl ())); - return; - } - } - } -} - static bool source_equal_p (insn_info *insn1, insn_info *insn2) { @@ -2674,6 +2629,7 @@ private: void pre_vsetvl (void); /* Phase 5. */ + void local_eliminate_vsetvl_insn (const vector_insn_info &) const; void cleanup_insns (void) const; /* Phase 6. */ @@ -3995,6 +3951,62 @@ pass_vsetvl::pre_vsetvl (void) commit_edge_insertions (); } +/* Local user vsetvl optimizaiton: + + Case 1: + vsetvl a5,a4,e8,mf8 + ... + vsetvl zero,a5,e8,mf8 --> Eliminate directly. + + Case 2: + vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 + ... + vsetvl zero,a5,e32,mf2 --> Eliminate directly. */ +void +pass_vsetvl::local_eliminate_vsetvl_insn (const vector_insn_info &dem) const +{ + const insn_info *insn = dem.get_insn (); + if (!insn || insn->is_artificial ()) + return; + rtx_insn *rinsn = insn->rtl (); + const bb_info *bb = insn->bb (); + if (vsetvl_insn_p (rinsn)) + { + rtx vl = get_vl (rinsn); + for (insn_info *i = insn->next_nondebug_insn (); + real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) + { + if (i->is_call () || i->is_asm () + || find_access (i->defs (), VL_REGNUM) + || find_access (i->defs (), VTYPE_REGNUM)) + return; + + if (has_vtype_op (i->rtl ())) + { + if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ()))) + return; + rtx avl = get_avl (i->rtl ()); + if (avl != vl) + return; + set_info *def = find_access (i->uses (), REGNO (avl))->def (); + if (def->insn () != insn) + return; + + vector_insn_info new_info + = m_vector_manager->vector_insn_infos[i->uid ()]; + if (!new_info.skip_avl_compatible_p (dem)) + return; + + new_info.set_avl_info (dem.get_avl_info ()); + new_info = dem.merge (new_info, LOCAL_MERGE); + change_vsetvl_insn (insn, new_info); + eliminate_insn (PREV_INSN (i->rtl ())); + return; + } + } + } +} + /* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand implicitly. Since we will emit VSETVL instruction and make RVV instructions depending on VL/VTYPE global status registers, we remove the such AVL operand diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c new file mode 100644 index 00000000000..81c42c5a82a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */ + +#include "riscv_vector.h" + +int byte_mac_vec(unsigned char *a, unsigned char *b, int len) { + size_t vlmax = __riscv_vsetvlmax_e8m1(); + vint32m4_t vec_s = __riscv_vmv_v_x_i32m4(0, vlmax); + vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vlmax); + int k = len; + + for (size_t vl; k > 0; k -= vl, a += vl, b += vl) { + vl = __riscv_vsetvl_e8m1(k); + + vuint8m1_t a8s = __riscv_vle8_v_u8m1(a, vl); + vuint8m1_t b8s = __riscv_vle8_v_u8m1(b, vl); + vuint32m4_t a8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl); + vuint32m4_t b8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl); + + vint32m4_t a8s_as_i32 = __riscv_vreinterpret_v_u32m4_i32m4(a8s_extended); + vint32m4_t b8s_as_i32 = __riscv_vreinterpret_v_u32m4_i32m4(b8s_extended); + + vec_s = __riscv_vmacc_vv_i32m4_tu(vec_s, a8s_as_i32, b8s_as_i32, vl); + } + + vint32m1_t vec_sum = __riscv_vredsum_vs_i32m4_i32m1(vec_s, vec_zero, __riscv_vsetvl_e32m4(len)); + int sum = __riscv_vmv_x_s_i32m1_i32(vec_sum); + + return sum; +} + +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */