mirror of
https://github.com/gcc-mirror/gcc.git
synced 2026-05-06 14:59:39 +02:00
xtensa: Apply further improvement to xtensa_legitimize_address()
The load/store instructions in the Xtensa ISA have an unsigned 8-bit
displacement immediate field that scales with the byte width of the
reference. That is, for a 1-byte reference, the displacement is between
0 and 255, for 2-bytes between 0 and 510, and for 4-bytes between 0 and
1020.
However, xtensa_legitimize_address() has not been able to take advantage
of this fact until now, and has limited the maximum displacement to 255
regardless of the reference byte width.
This patch resolves the above limitation and slightly improves the effi-
ciency of large positive displacements during memory accesses wider than
1-byte.
/* example */
int test(short a[]) {
return a[32767] + a[16511] + a[1];
}
;; before (-O2)
.literal_position
.literal .LC0, 65534
test:
entry sp, 32
l32r a8, .LC0
addmi a9, a2, 0x100
add.n a8, a2, a8
addmi a9, a9, 0x7f00
l16si a8, a8, 0 ;; 32767 = 65534 / 2
l16si a9, a9, 254 ;; 16551 = (32512 + 256 + 254) / 2
l16si a2, a2, 2
add.n a8, a8, a9
add.n a2, a8, a2
retw.n
;; after (-O2)
test:
entry sp, 32
addmi a9, a2, 0x7f00 ;; CSEd
addmi a8, a9, 0x7f00
l16si a8, a8, 510 ;; 32767 = (32512 + 32512 + 510) / 2
l16si a9, a9, 510 ;; 16511 = (32512 + 510) / 2
l16si a2, a2, 2
add.n a8, a8, a9
add.n a2, a8, a2
retw.n
gcc/ChangeLog:
* config/xtensa/xtensa.cc (xtensa_legitimize_address):
Modify to extend the upper limit of the coverable offset if the
address displacement of the corresponding machine instruction is
greater than 255.
This commit is contained in:
committed by
Max Filippov
parent
234d9acfd2
commit
bba0342a57
@@ -2340,8 +2340,9 @@ xtensa_legitimize_address (rtx x,
|
||||
rtx oldx ATTRIBUTE_UNUSED,
|
||||
machine_mode mode)
|
||||
{
|
||||
rtx plus0, plus1, temp;
|
||||
HOST_WIDE_INT offset, mem_offset, addmi_offset;
|
||||
rtx plus0, plus1, temp0, temp1;
|
||||
HOST_WIDE_INT offset, mem_disp, delta, offset2;
|
||||
int mode_size;
|
||||
|
||||
if (xtensa_tls_symbol_p (x))
|
||||
return xtensa_legitimize_tls_address (x);
|
||||
@@ -2353,33 +2354,57 @@ xtensa_legitimize_address (rtx x,
|
||||
if (! REG_P (plus0) && REG_P (plus1))
|
||||
std::swap (plus0, plus1);
|
||||
|
||||
/* Try to split up the offset to use up to two ADDMI instructions. */
|
||||
if (REG_P (plus0) && CONST_INT_P (plus1)
|
||||
&& ! xtensa_mem_offset (offset = INTVAL (plus1), mode)
|
||||
&& ! xtensa_simm8 (offset)
|
||||
&& xtensa_mem_offset (mem_offset = offset & 0xff, mode))
|
||||
/* Try to split up the offset to use up to two ADDMI instructions;
|
||||
The two ADDMIs are slightly more efficient than "L32R w/litpool + ADD"
|
||||
or "CONST16 pair + ADD", if applicable. */
|
||||
if (! REG_P (plus0) || ! CONST_INT_P (plus1)
|
||||
|| xtensa_mem_offset (offset = INTVAL (plus1), mode)
|
||||
|| xtensa_simm8 (offset)
|
||||
|| ! xtensa_mem_offset (mem_disp = offset & 0xff, mode))
|
||||
return x;
|
||||
|
||||
/* The above assumes that the displacement within the load/store instruc-
|
||||
tion is unsigned 8 bits, regardless of the load/store width. However,
|
||||
in actual 2- or 4-byte width load/store instructions, a displacement
|
||||
shifted by 1 or 2 bits, respectively, is added to the base register.
|
||||
Here, determine the amount of displacement delta that these instructions
|
||||
can cover extra range. */
|
||||
delta = (mode_size = GET_MODE_SIZE (mode)) >= 4 ? 768 :
|
||||
mode_size == 2 ? 256 : 0;
|
||||
|
||||
/* The upper limit of the ADDMI instruction's addition is allowed to be
|
||||
widened by the delta amount calculated above, and the excess is later
|
||||
renormalized to the displacement of the load/store instrution. */
|
||||
offset2 = offset & ~0xff, offset = 0;
|
||||
if (! IN_RANGE (offset2, -32768, 32512 + delta))
|
||||
{
|
||||
/* The two ADDMIs are slightly more efficient than
|
||||
"L32R w/litpool + ADD" or "CONST16 pair + ADD", if applicable. */
|
||||
addmi_offset = offset & ~0xff;
|
||||
if (addmi_offset > 32512)
|
||||
offset = 32512, addmi_offset -= 32512;
|
||||
else if (addmi_offset < -32768)
|
||||
offset = -32768, addmi_offset += 32768;
|
||||
else
|
||||
offset = 0;
|
||||
if (offset2 > 32512)
|
||||
offset = 32512, offset2 -= 32512;
|
||||
else if (offset2 < -32768)
|
||||
offset = -32768, offset2 += 32768;
|
||||
|
||||
if (xtensa_simm8x256 (addmi_offset))
|
||||
{
|
||||
emit_insn (gen_addsi3 (temp = gen_reg_rtx (Pmode),
|
||||
plus0, GEN_INT (addmi_offset)));
|
||||
if (offset)
|
||||
emit_insn (gen_addsi3 (temp, temp, GEN_INT (offset)));
|
||||
return gen_rtx_PLUS (Pmode, temp, GEN_INT (mem_offset));
|
||||
}
|
||||
/* If two ADDMIs are not enough, the process will be canceled. */
|
||||
if (! IN_RANGE (offset2, -32768, 32512 + delta))
|
||||
return x;
|
||||
}
|
||||
if (offset2 > 32512)
|
||||
mem_disp += offset2 - 32512, offset2 = 32512;
|
||||
|
||||
return x;
|
||||
/* Emit one or two ADDMI instructions, and then return an address RTX
|
||||
with the remaining offset.
|
||||
By adding the offset with the largest absolute value first via
|
||||
temporary pseudos, the likelihood of those pseudos being consolidated
|
||||
by the CSE increases. */
|
||||
temp0 = gen_reg_rtx (Pmode);
|
||||
if (offset)
|
||||
{
|
||||
emit_insn (gen_addsi3 (temp1 = gen_reg_rtx (Pmode),
|
||||
plus0, GEN_INT (offset)));
|
||||
emit_insn (gen_addsi3 (temp0, temp1, GEN_INT (offset2)));
|
||||
}
|
||||
else
|
||||
emit_insn (gen_addsi3 (temp0, plus0, GEN_INT (offset2)));
|
||||
return gen_rtx_PLUS (Pmode, temp0, GEN_INT (mem_disp));
|
||||
}
|
||||
|
||||
/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P.
|
||||
|
||||
Reference in New Issue
Block a user