Make riscv_vector::legitimize_move adjust SRC in the caller.

2023-09-29 Joern Rennecke <joern.rennecke@embecosm.com> Juzhe-Zhong <juzhe.zhong@rivai.ai> PR target/111566 gcc/ * config/riscv/riscv-protos.h (riscv_vector::legitimize_move): Change second parameter to rtx *. * config/riscv/riscv-v.cc (risv_vector::legitimize_move): Likewise. * config/riscv/vector.md: Changed callers of riscv_vector::legitimize_move. (*mov<mode>_mem_to_mem): Remove. gcc/testsuite/ * gcc.target/riscv/rvv/autovec/vls/mov-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/mov-10.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/mov-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/mov-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/mov-7.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/mov-8.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/mov-9.c: Ditto.1 * gcc.target/riscv/rvv/autovec/vls/mov-2.c: Removed. * gcc.target/riscv/rvv/autovec/vls/mov-4.c: Removed. * gcc.target/riscv/rvv/autovec/vls/mov-6.c: Removed. * gcc.target/riscv/rvv/fortran/pr111566.f90: New test. Co-Authored-By: Juzhe-Zhong <juzhe.zhong@rivai.ai>
2026-05-06 14:59:39 +02:00 · 2023-10-01 06:13:37 +01:00
parent 125781fb2c
commit f416a3fdbe
14 changed files with 43 additions and 296 deletions
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -421,7 +421,7 @@ rtx expand_builtin (unsigned int, tree, rtx);
 bool check_builtin_call (location_t, vec<location_t>, unsigned int,
 			   tree, unsigned int, tree *);
 bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
-bool legitimize_move (rtx, rtx);
+bool legitimize_move (rtx, rtx *);
 void emit_vlmax_vsetvl (machine_mode, rtx);
 void emit_hard_vlmax_vsetvl (machine_mode, rtx);
 void emit_vlmax_insn (unsigned, unsigned, rtx *);
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1217,10 +1217,12 @@ get_frm_mode (rtx operand)
 }

 /* Expand a pre-RA RVV data move from SRC to DEST.
-   It expands move for RVV fractional vector modes.  */
+   It expands move for RVV fractional vector modes.
+   Return true if the move as already been emitted.  */
 bool
-legitimize_move (rtx dest, rtx src)
+legitimize_move (rtx dest, rtx *srcp)
 {
+  rtx src = *srcp;
  machine_mode mode = GET_MODE (dest);
  if (CONST_VECTOR_P (src))
    {
@@ -1238,7 +1240,7 @@ legitimize_move (rtx dest, rtx src)
 	    {
 	      /* Need to force register if mem <- !reg.  */
 	      if (MEM_P (dest) && !REG_P (src))
-		src = force_reg (mode, src);
+		*srcp = force_reg (mode, src);

 	      return false;
 	    }
@@ -1269,7 +1271,7 @@ legitimize_move (rtx dest, rtx src)
 	{
 	  /* Need to force register if mem <- !reg.  */
 	  if (MEM_P (dest) && !REG_P (src))
-	    src = force_reg (mode, src);
+	    *srcp = force_reg (mode, src);

 	  return false;
 	}
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1037,7 +1037,7 @@
 	 before spilling. The clobber scratch is used by spilling fractional
 	 registers in IRA/LRA so it's too early.  */

-  if (riscv_vector::legitimize_move (operands[0], operands[1]))
+  if (riscv_vector::legitimize_move (operands[0], &operands[1]))
    DONE;
 })

@@ -1093,7 +1093,7 @@
 	(match_operand:VB 1 "general_operand"))]
  "TARGET_VECTOR"
 {
-  if (riscv_vector::legitimize_move (operands[0], operands[1]))
+  if (riscv_vector::legitimize_move (operands[0], &operands[1]))
    DONE;
 })

@@ -1218,47 +1218,10 @@
 	(match_operand:VLS_AVL_IMM 1 "general_operand"))]
  "TARGET_VECTOR"
 {
-  if (riscv_vector::legitimize_move (operands[0], operands[1]))
+  if (riscv_vector::legitimize_move (operands[0], &operands[1]))
    DONE;
 })

-(define_insn_and_split "*mov<mode>_mem_to_mem"
-  [(set (match_operand:VLS_AVL_IMM 0 "memory_operand")
-	(match_operand:VLS_AVL_IMM 1 "memory_operand"))]
-  "TARGET_VECTOR && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-  {
-    if (GET_MODE_BITSIZE (<MODE>mode).to_constant () <= MAX_BITS_PER_WORD)
-      {
-        /* Opitmize the following case:
-
-	    typedef int8_t v2qi __attribute__ ((vector_size (2)));
-	    v2qi v = *(v2qi*)in;
-	    *(v2qi*)out = v;
-
-	    We prefer scalar load/store instead of vle.v/vse.v when
-	    the VLS modes size is smaller scalar mode.  */
-        machine_mode mode;
-        unsigned size = GET_MODE_BITSIZE (<MODE>mode).to_constant ();
-        if (FLOAT_MODE_P (<MODE>mode))
-	  mode = mode_for_size (size, MODE_FLOAT, 0).require ();
-        else
-	  mode = mode_for_size (size, MODE_INT, 0).require ();
-        emit_move_insn (gen_lowpart (mode, operands[0]),
-		        gen_lowpart (mode, operands[1]));
-      }
-    else
-      {
-	operands[1] = force_reg (<MODE>mode, operands[1]);
-	emit_move_insn (operands[0], operands[1]);
-      }
-    DONE;
-  }
-  [(set_attr "type" "vmov")]
-)
-
 (define_insn_and_split "*mov<mode>"
  [(set (match_operand:VLS_AVL_IMM 0 "reg_or_mem_operand" "=vr, m, vr")
 	(match_operand:VLS_AVL_IMM 1 "reg_or_mem_operand" "  m,vr, vr"))]
@@ -1274,7 +1237,7 @@
       || !register_operand (operands[1], <MODE>mode))"
  [(const_int 0)]
  {
-    bool ok_p = riscv_vector::legitimize_move (operands[0], operands[1]);
+    bool ok_p = riscv_vector::legitimize_move (operands[0], &operands[1]);
    gcc_assert (ok_p);
    DONE;
  }
@@ -1286,7 +1249,7 @@
 	(match_operand:VLS_AVL_REG 1 "general_operand"))]
  "TARGET_VECTOR"
 {
-  bool ok_p = riscv_vector::legitimize_move (operands[0], operands[1]);
+  bool ok_p = riscv_vector::legitimize_move (operands[0], &operands[1]);
  gcc_assert (ok_p);
  DONE;
 })
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-1.c
@@ -4,54 +4,6 @@

 #include "def.h"

-/*
-** mov0:
-**	lbu\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sb\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov0 (int8_t *in, int8_t *out)
-{
-  v1qi v = *(v1qi*)in;
-  *(v1qi*)out = v;
-}
-
-/*
-** mov1:
-**	lhu\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sh\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov1 (int8_t *in, int8_t *out)
-{
-  v2qi v = *(v2qi*)in;
-  *(v2qi*)out = v;
-}
-
-/*
-** mov2:
-**	lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov2 (int8_t *in, int8_t *out)
-{
-  v4qi v = *(v4qi*)in;
-  *(v4qi*)out = v;
-}
-
-/*
-** mov3:
-**	ld\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sd\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov3 (int8_t *in, int8_t *out)
-{
-  v8qi v = *(v8qi*)in;
-  *(v8qi*)out = v;
-}
-
 /*
 ** mov4:
 **	vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au]
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-10.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-10.c
@@ -4,18 +4,6 @@

 #include "def.h"

-/*
-** mov0:
-**	fld\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	fsd\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov0 (double *in, double *out)
-{
-  v1df v = *(v1df*)in;
-  *(v1df*)out = v;
-}
-
 /*
 ** mov1:
 **	vsetivli\s+zero,\s*2,\s*e64,\s*m1,\s*t[au],\s*m[au]
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-2.c
@@ -1,19 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv_zvfh_zvl4096b -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
-/* { dg-final { check-function-bodies "**" "" } } */
-
-#include "def.h"
-
-/*
-** mov:
-**	lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	lw\s+[a-x0-9]+,4\s*\([a-x0-9]+\)
-**	sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sw\s+[a-x0-9]+,4\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov (int8_t *in, int8_t *out)
-{
-  v8qi v = *(v8qi*)in;
-  *(v8qi*)out = v;
-}
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-3.c
@@ -4,42 +4,6 @@

 #include "def.h"

-/*
-** mov0:
-**	lhu\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sh\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov0 (int16_t *in, int16_t *out)
-{
-  v1hi v = *(v1hi*)in;
-  *(v1hi*)out = v;
-}
-
-/*
-** mov1:
-**	lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov1 (int16_t *in, int16_t *out)
-{
-  v2hi v = *(v2hi*)in;
-  *(v2hi*)out = v;
-}
-
-/*
-** mov2:
-**	ld\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sd\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov2 (int16_t *in, int16_t *out)
-{
-  v4hi v = *(v4hi*)in;
-  *(v4hi*)out = v;
-}
-
 /*
 ** mov3:
 **	vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-4.c
@@ -1,19 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv_zvfh_zvl4096b -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
-/* { dg-final { check-function-bodies "**" "" } } */
-
-#include "def.h"
-
-/*
-** mov:
-**	lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	lw\s+[a-x0-9]+,4\s*\([a-x0-9]+\)
-**	sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sw\s+[a-x0-9]+,4\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov (int16_t *in, int16_t *out)
-{
-  v4hi v = *(v4hi*)in;
-  *(v4hi*)out = v;
-}
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-5.c
@@ -4,30 +4,6 @@

 #include "def.h"

-/*
-** mov0:
-**	lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov0 (int32_t *in, int32_t *out)
-{
-  v1si v = *(v1si*)in;
-  *(v1si*)out = v;
-}
-
-/*
-** mov1:
-**	ld\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sd\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov1 (int32_t *in, int32_t *out)
-{
-  v2si v = *(v2si*)in;
-  *(v2si*)out = v;
-}
-
 /*
 ** mov2:
 **	vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-6.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-6.c
@@ -1,19 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv_zvfh_zvl4096b -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
-/* { dg-final { check-function-bodies "**" "" } } */
-
-#include "def.h"
-
-/*
-** mov:
-**	lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	lw\s+[a-x0-9]+,4\s*\([a-x0-9]+\)
-**	sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sw\s+[a-x0-9]+,4\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov (int32_t *in, int32_t *out)
-{
-  v2si v = *(v2si*)in;
-  *(v2si*)out = v;
-}
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-7.c
@@ -4,18 +4,6 @@

 #include "def.h"

-/*
-** mov0:
-**	ld\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	sd\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov0 (int64_t *in, int64_t *out)
-{
-  v1di v = *(v1di*)in;
-  *(v1di*)out = v;
-}
-
 /*
 ** mov1:
 **	vsetivli\s+zero,\s*2,\s*e64,\s*m1,\s*t[au],\s*m[au]
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-8.c
@@ -4,42 +4,6 @@

 #include "def.h"

-/*
-** mov0:
-**	flh\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	fsh\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov0 (_Float16 *in, _Float16 *out)
-{
-  v1hf v = *(v1hf*)in;
-  *(v1hf*)out = v;
-}
-
-/*
-** mov1:
-**	flw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	fsw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov1 (_Float16 *in, _Float16 *out)
-{
-  v2hf v = *(v2hf*)in;
-  *(v2hf*)out = v;
-}
-
-/*
-** mov2:
-**	fld\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	fsd\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov2 (_Float16 *in, _Float16 *out)
-{
-  v4hf v = *(v4hf*)in;
-  *(v4hf*)out = v;
-}
-
 /*
 ** mov3:
 **	vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-9.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-9.c
@@ -4,30 +4,6 @@

 #include "def.h"

-/*
-** mov0:
-**	flw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	fsw\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov0 (float *in, float *out)
-{
-  v1sf v = *(v1sf*)in;
-  *(v1sf*)out = v;
-}
-
-/*
-** mov1:
-**	fld\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**	fsd\s+[a-x0-9]+,0\s*\([a-x0-9]+\)
-**  ret
-*/
-void mov1 (float *in, float *out)
-{
-  v2sf v = *(v2sf*)in;
-  *(v2sf*)out = v;
-}
-
 /*
 ** mov2:
 **	vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
--- a/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90
+++ b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90
@@ -0,0 +1,31 @@
+! { dg-do compile }
+! { dg-options &quot;-march=rv64gcv -mabi=lp64d -Ofast -fallow-argument-mismatch -fmax-stack-var-size=65536 -S  -std=legacy -w&quot; }
+
+module a
+  integer,parameter :: SHR_KIND_R8 = selected_real_kind(12)
+end module a
+module b
+  use a,  c =&gt; shr_kind_r8
+contains
+  subroutine d(cg , km, i1, i2)
+    real (c) ch(i2,km)
+    real (c) cg(4,i1:i2,km)
+    real  dc(i2,km)
+    real(c) ci(i2,km)
+    real(c) cj(i2,km)
+    do k=2,ck
+       do i=i1,0
+          cl = ci(i,k) *ci(i,1) /      cj(i,k)+ch(i,1)
+          cm = cg(1,i,k) - min(e,cg(1,i,co))
+          dc(i,k) = sign(cm, cl)
+       enddo
+    enddo
+    if ( cq == 0 ) then
+       do i=i1,i2
+          if( cr &lt;=  cs ) then
+             cg= sign( min(ct,   cg),  cg)
+          endif
+       enddo
+    endif
+  end subroutine d
+end module b