mirror of
https://github.com/gcc-mirror/gcc.git
synced 2026-05-06 06:49:09 +02:00
avoid-store-forwarding: Reject overlapping stores [PR124476]
The redundant-store tracking introduced inec5349c37areplaced a safe bitmap_bit_in_range_p check (which bailed on any overlap) with bitmap_all_bits_in_range_p (which only removed fully redundant stores). This broke "last writer wins" semantics for partially overlapping stores: when an earlier store's BFI was applied after the base store's value, it overwrote bytes that should have belonged to the later store. Restore the original overlap check from1d8de1e93e: bail out of the optimization when any bit in a store's byte range is already claimed by a later store in program order. Remove the now-unnecessary redundant-store tracking (redundant_stores, store_ind_to_remove). gcc/ChangeLog: PR rtl-optimization/124476 * avoid-store-forwarding.cc (store_forwarding_analyzer::process_store_forwarding): Replace bitmap_all_bits_in_range_p with bitmap_any_bit_in_range_p and return false on partial overlap. Remove redundant-store vectors and their associated removal, dump, and deletion logic. gcc/testsuite/ChangeLog: PR rtl-optimization/124476 * gcc.dg/pr124476.c: New test.
This commit is contained in:
@@ -171,28 +171,23 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
|
||||
/* Memory sizes should be constants at this stage. */
|
||||
HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
|
||||
|
||||
/* If the stores cover all the bytes of the load, then we can eliminate
|
||||
the load entirely and use the computed value instead.
|
||||
We can also eliminate stores on addresses that are overwritten
|
||||
by later stores. */
|
||||
/* If the stores cover all the bytes of the load without overlap then we can
|
||||
eliminate the load entirely and use the computed value instead.
|
||||
Bail out when partially overlapping stores are detected, as the pass
|
||||
cannot correctly handle "last writer wins" semantics for the
|
||||
overlapping byte ranges (see PR124476). */
|
||||
|
||||
auto_sbitmap forwarded_bytes (load_size);
|
||||
bitmap_clear (forwarded_bytes);
|
||||
|
||||
unsigned int i;
|
||||
store_fwd_info* it;
|
||||
auto_vec<store_fwd_info> redundant_stores;
|
||||
auto_vec<int> store_ind_to_remove;
|
||||
FOR_EACH_VEC_ELT (stores, i, it)
|
||||
{
|
||||
HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
|
||||
if (bitmap_all_bits_in_range_p (forwarded_bytes, it->offset,
|
||||
it->offset + store_size - 1))
|
||||
{
|
||||
redundant_stores.safe_push (*it);
|
||||
store_ind_to_remove.safe_push (i);
|
||||
continue;
|
||||
}
|
||||
if (bitmap_any_bit_in_range_p (forwarded_bytes, it->offset,
|
||||
it->offset + store_size - 1))
|
||||
return false;
|
||||
bitmap_set_range (forwarded_bytes, it->offset, store_size);
|
||||
}
|
||||
|
||||
@@ -218,15 +213,6 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
|
||||
fprintf (dump_file, "(Load elimination candidate)\n");
|
||||
}
|
||||
|
||||
/* Remove redundant stores from the vector. Although this is quadratic,
|
||||
there doesn't seem to be much point optimizing it. The number of
|
||||
redundant stores is expected to be low and the length of the list is
|
||||
limited by a --param. The dependence checking that we did earlier is
|
||||
also quadratic in the size of this list. */
|
||||
store_ind_to_remove.reverse ();
|
||||
for (int i : store_ind_to_remove)
|
||||
stores.ordered_remove (i);
|
||||
|
||||
rtx load = single_set (load_insn);
|
||||
rtx dest;
|
||||
|
||||
@@ -413,15 +399,6 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
|
||||
}
|
||||
}
|
||||
|
||||
if (redundant_stores.length () > 0)
|
||||
{
|
||||
fprintf (dump_file, "\nRedundant stores that have been removed:\n");
|
||||
FOR_EACH_VEC_ELT (redundant_stores, i, it)
|
||||
{
|
||||
fprintf (dump_file, " ");
|
||||
print_rtl_single (dump_file, it->store_insn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stats_sf_avoided++;
|
||||
@@ -441,10 +418,6 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
|
||||
delete_insn (it->store_insn);
|
||||
}
|
||||
|
||||
/* Delete redundant stores. */
|
||||
FOR_EACH_VEC_ELT (redundant_stores, i, it)
|
||||
delete_insn (it->store_insn);
|
||||
|
||||
df_insn_rescan (load_insn);
|
||||
|
||||
if (load_elim)
|
||||
|
||||
42
gcc/testsuite/gcc.dg/pr124476.c
Normal file
42
gcc/testsuite/gcc.dg/pr124476.c
Normal file
@@ -0,0 +1,42 @@
|
||||
/* PR rtl-optimization/124476 */
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
/* { dg-options "-O1 -favoid-store-forwarding" } */
|
||||
|
||||
/* Verify that overlapping stores are handled correctly.
|
||||
The avoid-store-forwarding pass must respect "last writer wins"
|
||||
semantics when stores have partially overlapping byte ranges.
|
||||
|
||||
The expected value is little-endian specific: the memmove writes
|
||||
bytes 0-3 last (zeroing them), leaving bytes 4-7 as 0x05 from the
|
||||
earlier memset, so f = 0x0505050500000000 on little-endian. */
|
||||
|
||||
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
|
||||
int main () { return 0; }
|
||||
#else
|
||||
|
||||
int y;
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (int a, long *ret)
|
||||
{
|
||||
long f;
|
||||
char c;
|
||||
__builtin_memset (2 + (char *) &f, a, 6);
|
||||
char n = *(char *) __builtin_memset (&c, a, 1);
|
||||
__builtin_memmove (&f, &y, 4);
|
||||
long r = f + a + n;
|
||||
*ret = r;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
long x;
|
||||
foo (5, &x);
|
||||
if (x != 0x050505050000000aL)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user