mirror of
https://github.com/gcc-mirror/gcc.git
synced 2026-05-06 14:59:39 +02:00
xtensa: Optimize assignment of certain constants to hardware FP registers
This patch introduces an optimization that replaces assignments of signed 12-bit integer values divided by 0th through 15th power of two to hardware FP registers with assignments of that integer values to address (GP) registers followed by negatively-scaled floating-point conversion instructions. For example, 0.12005615234375f is exactly equal to (1967.f / (1 << 14)), so we can emit such as: movi a9, 1967 float.s f0, a9, 14 if such conversion reduces costs. gcc/ChangeLog: * config/xtensa/xtensa.cc (xt_full_rtx_costs): New struct, derived from full_rtx_costs. (FPreg_neg_scaled_simm12b_1, FPreg_neg_scaled_simm12b): New worker functions. (do_largeconst): Add a call to FPreg_neg_scaled_simm12b() to the insn enumeration loop.
This commit is contained in:
committed by
Max Filippov
parent
1eefa6e0c8
commit
4864f24c86
@@ -5730,6 +5730,168 @@ xtensa_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED,
|
||||
namespace
|
||||
{
|
||||
|
||||
/* Cheap full_rtx_costs derivative for concise handling of insn sequence
|
||||
costs. */
|
||||
|
||||
struct xt_full_rtx_costs : public full_rtx_costs
|
||||
{
|
||||
inline xt_full_rtx_costs ()
|
||||
{
|
||||
init_costs_to_zero (this);
|
||||
}
|
||||
|
||||
/* "Less-than" cost comparison. */
|
||||
inline bool operator< (xt_full_rtx_costs &rhs)
|
||||
{
|
||||
return costs_lt_p (this, &rhs, !optimize_size);
|
||||
}
|
||||
|
||||
/* Accumulate the costs of a specified insn. */
|
||||
xt_full_rtx_costs &operator+= (rtx_insn *insn)
|
||||
{
|
||||
speed += xtensa_insn_cost (insn, true);
|
||||
size += xtensa_insn_cost (insn, false);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/* Create a new instance from the specified insn sequence. */
|
||||
explicit xt_full_rtx_costs (rtx_insn *seq)
|
||||
: xt_full_rtx_costs ()
|
||||
{
|
||||
for (; seq; seq = NEXT_INSN (seq))
|
||||
*this += seq;
|
||||
}
|
||||
|
||||
/* superior/inferior parts of the costs. */
|
||||
inline int major ()
|
||||
{
|
||||
return optimize_size ? size : speed;
|
||||
}
|
||||
inline int minor ()
|
||||
{
|
||||
return optimize_size ? speed : size;
|
||||
}
|
||||
};
|
||||
|
||||
/* Optimize assignment of negatively-scaled (up to the minus 15th power
|
||||
of two) signed 12-bit integer immediate values to hardware floating-
|
||||
point registers. For example, 0.12005615234375f is exactly equal to
|
||||
(1967.f / (1 << 14)), so we can emit such as:
|
||||
movi a9, 1967
|
||||
float.s f0, a9, 14
|
||||
if such conversion reduces costs. */
|
||||
|
||||
static bool
|
||||
FPreg_neg_scaled_simm12b_1 (const REAL_VALUE_TYPE *rval,
|
||||
HOST_WIDE_INT &v, int &scale)
|
||||
{
|
||||
REAL_VALUE_TYPE r;
|
||||
int shift;
|
||||
|
||||
/* Non-zero finite values can only be accepted. */
|
||||
if (! real_isfinite (rval) || rval->cl == rvc_zero)
|
||||
return false;
|
||||
|
||||
/* Check whether the value multiplied by 32768 is an exact integer and
|
||||
the result after truncating the trailing '0' bits fits into a signed
|
||||
12-bit. */
|
||||
real_ldexp (&r, rval, 15);
|
||||
if (! real_isinteger (&r, &v)
|
||||
|| ! xtensa_simm12b (v >>= (shift = MIN (ctz_hwi (v), 15))))
|
||||
return false;
|
||||
|
||||
scale = shift - 15;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
FPreg_neg_scaled_simm12b (rtx_insn *insn)
|
||||
{
|
||||
rtx pat, dest, src, pat_1, dest_1, note, dest_2, pat_2;
|
||||
HOST_WIDE_INT v;
|
||||
int scale;
|
||||
rtx_insn *next, *last, *seq;
|
||||
REAL_VALUE_TYPE r;
|
||||
|
||||
/* It matches RTL expressions of the following format:
|
||||
(set (reg:SF gpr) (const_double:SF cst))
|
||||
(set (reg:SF fpr) (reg:SF gpr))
|
||||
REG_DEAD (reg:SF gpr)
|
||||
where cst is a negatively-scaled signed 12-bit integer immediate
|
||||
value. */
|
||||
if (TARGET_HARD_FLOAT && !TARGET_CONST16
|
||||
&& GET_CODE (pat = PATTERN (insn)) == SET
|
||||
&& REG_P (dest = SET_DEST (pat)) && GP_REG_P (REGNO (dest))
|
||||
&& GET_MODE (dest) == SFmode
|
||||
&& CONST_DOUBLE_P (src = avoid_constant_pool_reference (SET_SRC (pat)))
|
||||
&& GET_MODE (src) == SFmode
|
||||
&& FPreg_neg_scaled_simm12b_1 (CONST_DOUBLE_REAL_VALUE (src),
|
||||
v, scale)
|
||||
&& (next = next_nonnote_nondebug_insn (insn))
|
||||
&& NONJUMP_INSN_P (next)
|
||||
&& GET_CODE (pat_1 = PATTERN (next)) == SET
|
||||
&& REG_P (dest_1 = SET_DEST (pat_1)) && FP_REG_P (REGNO (dest_1))
|
||||
&& GET_MODE (dest_1) == SFmode
|
||||
&& rtx_equal_p (SET_SRC (pat_1), dest)
|
||||
&& (note = find_reg_note (next, REG_DEAD, dest)))
|
||||
{
|
||||
/* Estimate the costs of two matching insns. */
|
||||
xt_full_rtx_costs costs;
|
||||
costs += insn, costs += next;
|
||||
|
||||
/* Prepare alternative insns and estimate their costs. */
|
||||
start_sequence ();
|
||||
emit_insn (gen_rtx_SET (dest_2 = gen_rtx_REG (SImode, REGNO (dest)),
|
||||
GEN_INT (v)));
|
||||
pat_2 = gen_rtx_FLOAT (SFmode, dest_2);
|
||||
if (scale < 0)
|
||||
{
|
||||
real_ldexp (&r, &dconst1, scale);
|
||||
pat_2 = gen_rtx_MULT (SFmode, pat_2,
|
||||
const_double_from_real_value (r, SFmode));
|
||||
}
|
||||
last = emit_insn (gen_rtx_SET (dest_1, pat_2));
|
||||
xt_full_rtx_costs costs_1 (seq = end_sequence ());
|
||||
|
||||
/* If the alternative is more cost effective, it replaces the original
|
||||
insns. */
|
||||
if (costs_1 < costs)
|
||||
{
|
||||
if (dump_file)
|
||||
{
|
||||
fputs ("FPreg_neg_scaled_simm12b: ", dump_file);
|
||||
dump_value_slim (dump_file, src, 0);
|
||||
fprintf (dump_file,
|
||||
"f = (" HOST_WIDE_INT_PRINT_DEC ".f/(1<<%d))\n",
|
||||
v, -scale);
|
||||
dump_insn_slim (dump_file, insn);
|
||||
dump_insn_slim (dump_file, next);
|
||||
}
|
||||
remove_reg_equal_equiv_notes (insn);
|
||||
validate_change (insn, &PATTERN (insn),
|
||||
PATTERN (seq), 0);
|
||||
remove_reg_equal_equiv_notes (next);
|
||||
remove_note (next, note);
|
||||
validate_change (next, &PATTERN (next),
|
||||
PATTERN (last), 0);
|
||||
add_reg_note (next, REG_EQUIV, src);
|
||||
add_reg_note (next, REG_DEAD, dest_2);
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file,
|
||||
"FPreg_neg_scaled_simm12b: costs (%d,%d) -> (%d,%d)\n",
|
||||
costs.major (), costs.minor (),
|
||||
costs_1.major (), costs_1.minor ());
|
||||
dump_insn_slim (dump_file, insn);
|
||||
dump_insn_slim (dump_file, next);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Replace the source of [SH]Imode allocation whose value does not fit
|
||||
into signed 12 bits with a reference to litpool entry. */
|
||||
|
||||
@@ -5791,11 +5953,19 @@ static void
|
||||
do_largeconst (void)
|
||||
{
|
||||
bool replacing_required = !TARGET_CONST16 && !TARGET_AUTO_LITPOOLS;
|
||||
bool optimize_enabled = optimize && !optimize_debug;
|
||||
rtx_insn *insn;
|
||||
|
||||
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
||||
if (NONJUMP_INSN_P (insn))
|
||||
{
|
||||
/* Optimize assignment of negatively scaled (up to the minus
|
||||
15th power of two) signed 12-bit immediate values to hardware
|
||||
floating-point registers. */
|
||||
if (optimize_enabled
|
||||
&& FPreg_neg_scaled_simm12b (insn))
|
||||
continue;
|
||||
|
||||
/* Replace the source of [SH]Imode allocation whose value does not
|
||||
fit into signed 12 bits with a reference to litpool entry. */
|
||||
if (replacing_required)
|
||||
|
||||
Reference in New Issue
Block a user