diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc index 75ba30a62eb..260c135c219 100644 --- a/gcc/avoid-store-forwarding.cc +++ b/gcc/avoid-store-forwarding.cc @@ -171,28 +171,23 @@ process_store_forwarding (vec &stores, rtx_insn *load_insn, /* Memory sizes should be constants at this stage. */ HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant (); - /* If the stores cover all the bytes of the load, then we can eliminate - the load entirely and use the computed value instead. - We can also eliminate stores on addresses that are overwritten - by later stores. */ + /* If the stores cover all the bytes of the load without overlap then we can + eliminate the load entirely and use the computed value instead. + Bail out when partially overlapping stores are detected, as the pass + cannot correctly handle "last writer wins" semantics for the + overlapping byte ranges (see PR124476). */ auto_sbitmap forwarded_bytes (load_size); bitmap_clear (forwarded_bytes); unsigned int i; store_fwd_info* it; - auto_vec redundant_stores; - auto_vec store_ind_to_remove; FOR_EACH_VEC_ELT (stores, i, it) { HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant (); - if (bitmap_all_bits_in_range_p (forwarded_bytes, it->offset, - it->offset + store_size - 1)) - { - redundant_stores.safe_push (*it); - store_ind_to_remove.safe_push (i); - continue; - } + if (bitmap_any_bit_in_range_p (forwarded_bytes, it->offset, + it->offset + store_size - 1)) + return false; bitmap_set_range (forwarded_bytes, it->offset, store_size); } @@ -218,15 +213,6 @@ process_store_forwarding (vec &stores, rtx_insn *load_insn, fprintf (dump_file, "(Load elimination candidate)\n"); } - /* Remove redundant stores from the vector. Although this is quadratic, - there doesn't seem to be much point optimizing it. The number of - redundant stores is expected to be low and the length of the list is - limited by a --param. The dependence checking that we did earlier is - also quadratic in the size of this list. */ - store_ind_to_remove.reverse (); - for (int i : store_ind_to_remove) - stores.ordered_remove (i); - rtx load = single_set (load_insn); rtx dest; @@ -413,15 +399,6 @@ process_store_forwarding (vec &stores, rtx_insn *load_insn, } } - if (redundant_stores.length () > 0) - { - fprintf (dump_file, "\nRedundant stores that have been removed:\n"); - FOR_EACH_VEC_ELT (redundant_stores, i, it) - { - fprintf (dump_file, " "); - print_rtl_single (dump_file, it->store_insn); - } - } } stats_sf_avoided++; @@ -441,10 +418,6 @@ process_store_forwarding (vec &stores, rtx_insn *load_insn, delete_insn (it->store_insn); } - /* Delete redundant stores. */ - FOR_EACH_VEC_ELT (redundant_stores, i, it) - delete_insn (it->store_insn); - df_insn_rescan (load_insn); if (load_elim) diff --git a/gcc/testsuite/gcc.dg/pr124476.c b/gcc/testsuite/gcc.dg/pr124476.c new file mode 100644 index 00000000000..7e0325d54fa --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr124476.c @@ -0,0 +1,42 @@ +/* PR rtl-optimization/124476 */ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O1 -favoid-store-forwarding" } */ + +/* Verify that overlapping stores are handled correctly. + The avoid-store-forwarding pass must respect "last writer wins" + semantics when stores have partially overlapping byte ranges. + + The expected value is little-endian specific: the memmove writes + bytes 0-3 last (zeroing them), leaving bytes 4-7 as 0x05 from the + earlier memset, so f = 0x0505050500000000 on little-endian. */ + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ +int main () { return 0; } +#else + +int y; + +__attribute__((noipa)) void +foo (int a, long *ret) +{ + long f; + char c; + __builtin_memset (2 + (char *) &f, a, 6); + char n = *(char *) __builtin_memset (&c, a, 1); + __builtin_memmove (&f, &y, 4); + long r = f + a + n; + *ret = r; +} + +int +main () +{ + long x; + foo (5, &x); + if (x != 0x050505050000000aL) + __builtin_abort (); + return 0; +} + +#endif