avoid-store-forwarding: Reject overlapping stores [PR124476]

The redundant-store tracking introduced in ec5349c37a replaced a safe bitmap_bit_in_range_p check (which bailed on any overlap) with bitmap_all_bits_in_range_p (which only removed fully redundant stores). This broke "last writer wins" semantics for partially overlapping stores: when an earlier store's BFI was applied after the base store's value, it overwrote bytes that should have belonged to the later store. Restore the original overlap check from 1d8de1e93e: bail out of the optimization when any bit in a store's byte range is already claimed by a later store in program order. Remove the now-unnecessary redundant-store tracking (redundant_stores, store_ind_to_remove). gcc/ChangeLog: PR rtl-optimization/124476 * avoid-store-forwarding.cc (store_forwarding_analyzer::process_store_forwarding): Replace bitmap_all_bits_in_range_p with bitmap_any_bit_in_range_p and return false on partial overlap. Remove redundant-store vectors and their associated removal, dump, and deletion logic. gcc/testsuite/ChangeLog: PR rtl-optimization/124476 * gcc.dg/pr124476.c: New test.
2026-05-06 06:49:09 +02:00 · 2026-03-13 11:01:45 +01:00
parent 1f97354f55
commit a48fbdd6a8
2 changed files with 50 additions and 35 deletions
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -171,28 +171,23 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
  /* Memory sizes should be constants at this stage.  */
  HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();

-  /* If the stores cover all the bytes of the load, then we can eliminate
-     the load entirely and use the computed value instead.
-     We can also eliminate stores on addresses that are overwritten
-     by later stores.  */
+  /* If the stores cover all the bytes of the load without overlap then we can
+     eliminate the load entirely and use the computed value instead.
+     Bail out when partially overlapping stores are detected, as the pass
+     cannot correctly handle "last writer wins" semantics for the
+     overlapping byte ranges (see PR124476).  */

  auto_sbitmap forwarded_bytes (load_size);
  bitmap_clear (forwarded_bytes);

  unsigned int i;
  store_fwd_info* it;
-  auto_vec<store_fwd_info> redundant_stores;
-  auto_vec<int> store_ind_to_remove;
  FOR_EACH_VEC_ELT (stores, i, it)
    {
      HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
-      if (bitmap_all_bits_in_range_p (forwarded_bytes, it->offset,
-				      it->offset + store_size - 1))
-	{
-	  redundant_stores.safe_push (*it);
-	  store_ind_to_remove.safe_push (i);
-	  continue;
-	}
+      if (bitmap_any_bit_in_range_p (forwarded_bytes, it->offset,
+				 it->offset + store_size - 1))
+	return false;
      bitmap_set_range (forwarded_bytes, it->offset, store_size);
    }

@@ -218,15 +213,6 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
 	fprintf (dump_file, "(Load elimination candidate)\n");
    }

-  /* Remove redundant stores from the vector.  Although this is quadratic,
-     there doesn't seem to be much point optimizing it.  The number of
-     redundant stores is expected to be low and the length of the list is
-     limited by a --param.  The dependence checking that we did earlier is
-     also quadratic in the size of this list.  */
-  store_ind_to_remove.reverse ();
-  for (int i : store_ind_to_remove)
-    stores.ordered_remove (i);
-
  rtx load = single_set (load_insn);
  rtx dest;

@@ -413,15 +399,6 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
 	    }
 	}

-      if (redundant_stores.length () > 0)
-	{
-	  fprintf (dump_file, "\nRedundant stores that have been removed:\n");
-	  FOR_EACH_VEC_ELT (redundant_stores, i, it)
-	    {
-	      fprintf (dump_file, "  ");
-	      print_rtl_single (dump_file, it->store_insn);
-	    }
-	}
    }

  stats_sf_avoided++;
@@ -441,10 +418,6 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
      delete_insn (it->store_insn);
    }

-  /* Delete redundant stores.  */
-  FOR_EACH_VEC_ELT (redundant_stores, i, it)
-    delete_insn (it->store_insn);
-
  df_insn_rescan (load_insn);

  if (load_elim)
--- a/gcc/testsuite/gcc.dg/pr124476.c
+++ b/gcc/testsuite/gcc.dg/pr124476.c
@@ -0,0 +1,42 @@
+/* PR rtl-optimization/124476 */
+/* { dg-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O1 -favoid-store-forwarding" } */
+
+/* Verify that overlapping stores are handled correctly.
+   The avoid-store-forwarding pass must respect "last writer wins"
+   semantics when stores have partially overlapping byte ranges.
+
+   The expected value is little-endian specific: the memmove writes
+   bytes 0-3 last (zeroing them), leaving bytes 4-7 as 0x05 from the
+   earlier memset, so f = 0x0505050500000000 on little-endian.  */
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+int main () { return 0; }
+#else
+
+int y;
+
+__attribute__((noipa)) void
+foo (int a, long *ret)
+{
+  long f;
+  char c;
+  __builtin_memset (2 + (char *) &f, a, 6);
+  char n = *(char *) __builtin_memset (&c, a, 1);
+  __builtin_memmove (&f, &y, 4);
+  long r = f + a + n;
+  *ret = r;
+}
+
+int
+main ()
+{
+  long x;
+  foo (5, &x);
+  if (x != 0x050505050000000aL)
+    __builtin_abort ();
+  return 0;
+}
+
+#endif