diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def index 9277167d481..555be73373e 100644 --- a/gcc/config/s390/s390-modes.def +++ b/gcc/config/s390/s390-modes.def @@ -28,6 +28,8 @@ FLOAT_MODE (TF, 16, ieee_quad_format); /* 128-bit float stored in a FPR pair. */ FLOAT_MODE (FPRX2, 16, ieee_quad_format); +FLOAT_MODE (HF, 2, ieee_half_format); + /* Add any extra modes needed to represent the condition code. */ /* @@ -265,6 +267,10 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI V2TI */ +VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ +VECTOR_MODE (FLOAT, HF, 4); /* V4HF */ +VECTOR_MODE (FLOAT, HF, 8); /* V8HF */ +VECTOR_MODE (FLOAT, HF, 16); /* V16HF */ VECTOR_MODE (FLOAT, SF, 2); /* V2SF */ VECTOR_MODE (FLOAT, SF, 4); /* V4SF */ VECTOR_MODE (FLOAT, SF, 8); /* V8SF */ @@ -277,6 +283,7 @@ VECTOR_MODE (INT, SI, 1); /* V1SI */ VECTOR_MODE (INT, DI, 1); /* V1DI */ VECTOR_MODE (INT, TI, 1); /* V1TI */ +VECTOR_MODE (FLOAT, HF, 1); /* V1HF */ VECTOR_MODE (FLOAT, SF, 1); /* V1SF */ VECTOR_MODE (FLOAT, DF, 1); /* V1DF */ VECTOR_MODE (FLOAT, TF, 1); /* V1TF */ diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 7b2da25ea64..c214609d3d3 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -1385,6 +1385,9 @@ s390_scalar_mode_supported_p (scalar_mode mode) if (DECIMAL_FLOAT_MODE_P (mode)) return default_decimal_float_supported_p (); + if (TARGET_64BIT && TARGET_Z10 && mode == HFmode) + return true; + return default_scalar_mode_supported_p (mode); } @@ -1408,6 +1411,7 @@ s390_vector_mode_supported_p (machine_mode mode) case E_SImode: case E_DImode: case E_TImode: + case E_HFmode: case E_SFmode: case E_DFmode: case E_TFmode: @@ -3887,8 +3891,12 @@ s390_register_move_cost (machine_mode mode, } /* Without vector extensions it still becomes somewhat faster having - ldgr/lgdr. */ - if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8) + ldgr/lgdr. + + Although, a GPR<->FPR load for 16-bit values involves a shift, use lower + costs since otherwise unnecessarily many reloads via memory are emitted. + Limit this quirk to HF mode only. */ + if (TARGET_Z10 && (GET_MODE_SIZE (mode) == 8 || mode == HFmode)) { /* ldgr is single cycle. */ if (reg_classes_intersect_p (from, GENERAL_REGS) @@ -4509,6 +4517,9 @@ s390_legitimate_constant_p (machine_mode mode, rtx op) return 0; } + if (mode == HFmode) + return satisfies_constraint_j00 (op); + /* Accept all non-symbolic constants. */ if (!SYMBOLIC_CONST (op)) return 1; @@ -4923,6 +4934,22 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, if (reg_classes_intersect_p (CC_REGS, rclass)) return GENERAL_REGS; + /* A 2-byte GPR-to-FPR move requires a scratch register if no vector + extensions are available but instruction ldgr. */ + if (TARGET_Z10 && !TARGET_VX && GET_MODE_SIZE (mode) == 2 + && ((in_p && true_regnum (x) >= 0 + && reg_classes_intersect_p (rclass, FP_REGS)) + || (!in_p && FP_REGNO_P (true_regnum (x)) + && reg_classes_intersect_p (rclass, GENERAL_REGS)))) + { + sri->icode = CODE_FOR_reload_half_gprtofpr_z10; + return NO_REGS; + } + + if (TARGET_Z10 && !TARGET_VX && GET_MODE_SIZE (mode) == 2 + && MEM_P (x) && reg_classes_intersect_p (rclass, FP_REGS)) + return GENERAL_REGS; + if (TARGET_VX) { /* The vst/vl vector move instructions allow only for short @@ -4989,6 +5016,7 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, __SECONDARY_RELOAD_CASE (SI, si); __SECONDARY_RELOAD_CASE (DI, di); __SECONDARY_RELOAD_CASE (TI, ti); + __SECONDARY_RELOAD_CASE (HF, hf); __SECONDARY_RELOAD_CASE (SF, sf); __SECONDARY_RELOAD_CASE (DF, df); __SECONDARY_RELOAD_CASE (TF, tf); @@ -5095,6 +5123,16 @@ static bool s390_secondary_memory_needed (machine_mode mode, reg_class_t class1, reg_class_t class2) { + /* A 2-byte GPR<->FPR move is implemented for 64-bit targets and z10 which is + realized via ldgr/lgdr in conjunction with shifts in order satisfy + alignment requirements, or via vector loads. Thus, there is no secondary + memory needed. */ + if (TARGET_64BIT && TARGET_Z10 && GET_MODE_SIZE (mode) == 2 + && ((reg_classes_intersect_p (class1, VEC_REGS) + && reg_classes_intersect_p (class2, GENERAL_REGS)) + || (reg_classes_intersect_p (class2, VEC_REGS) + && reg_classes_intersect_p (class1, GENERAL_REGS)))) + return false; return (((reg_classes_intersect_p (class1, VEC_REGS) && reg_classes_intersect_p (class2, GENERAL_REGS)) || (reg_classes_intersect_p (class1, GENERAL_REGS) @@ -8451,6 +8489,13 @@ s390_mangle_type (const_tree type) } #endif +static bool +s390_libgcc_floating_mode_supported_p (scalar_float_mode mode) +{ + return (TARGET_64BIT && TARGET_Z10 && mode == HFmode) + || default_libgcc_floating_mode_supported_p (mode); +} + /* In the name of slightly smaller debug output, and to cater to general assembler lossage, recognize various UNSPEC sequences and turn them back into a direct symbol reference. */ @@ -9699,7 +9744,7 @@ static machine_mode constant_modes[] = V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode, SFmode, SImode, SDmode, V4QImode, V2HImode, V1SImode, V1SFmode, - HImode, + HImode, HFmode, V2QImode, V1HImode, QImode, V1QImode @@ -11404,6 +11449,7 @@ s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode) && s390_class_max_nregs (VEC_REGS, mode) == 1) || mode == DFmode || (TARGET_VXE && mode == SFmode) + || mode == HFmode || s390_vector_mode_supported_p (mode)); break; case FP_REGS: @@ -13305,7 +13351,8 @@ s390_function_arg_float (machine_mode mode, const_tree type) /* No type info available for some library calls ... */ if (!type) - return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode; + return mode == HFmode || mode == SFmode || mode == DFmode + || mode == SDmode || mode == DDmode; if (!s390_single_field_struct_p (REAL_TYPE, type, false)) return false; @@ -17688,13 +17735,28 @@ s390_excess_precision (enum excess_precision_type type) float is evaluated to the range and precision of double. */ return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE; case EXCESS_PRECISION_TYPE_FLOAT16: - error ("%<-fexcess-precision=16%> is not supported on this target"); - break; + return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; default: gcc_unreachable (); } return FLT_EVAL_METHOD_UNPREDICTABLE; } +#else +static enum flt_eval_method +s390_excess_precision (enum excess_precision_type type) +{ + /* As time of writing this, there is no hardware support for _Float16 on + s390. Therefore, operations have to be extended and truncated. In case + of EXCESS_PRECISION_TYPE_FLOAT16, this can happen on tree or rtl level. + The former might lead to cases were _Float16 operations cannot be folded + anymore by tree passes as e.g. FRE due to extends/truncates. Therefore, + return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 in this case in order to stay in + _Float16 for as long as possible. */ + if (type == EXCESS_PRECISION_TYPE_FLOAT16) + return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; + + return default_excess_precision (type); +} #endif void @@ -18853,12 +18915,8 @@ s390_bitint_type_info (int n, struct bitint_info *info) #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true -#if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1 -/* This hook is only needed to maintain the historic behavior with glibc - versions that typedef float_t to double. */ #undef TARGET_C_EXCESS_PRECISION #define TARGET_C_EXCESS_PRECISION s390_excess_precision -#endif #undef TARGET_SCHED_ADJUST_PRIORITY #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority @@ -19117,6 +19175,10 @@ s390_bitint_type_info (int n, struct bitint_info *info) #undef TARGET_C_BITINT_TYPE_INFO #define TARGET_C_BITINT_TYPE_INFO s390_bitint_type_info +#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P +#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ + s390_libgcc_floating_mode_supported_p + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h" diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 263e2a7defb..7219c9b4a6d 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -120,6 +120,7 @@ ; Test Data Class (TDC) UNSPEC_TDC_INSN + UNSPEC_TDC_SIGNBIT_HF UNSPEC_SIGNBIT ; Byte-wise Population Count @@ -253,6 +254,8 @@ UNSPEC_FMAX UNSPEC_FMIN + + UNSPEC_HALF_GPRTOFPR ]) ;; @@ -532,7 +535,7 @@ (define_attr "type" "none,integer,load,lr,la,larl,lm,stm, cs,vs,store,sem,idiv, imulhi,imulsi,imuldi, - branch,jsr,fsimptf,fsimpdf,fsimpsf,fhex, + branch,jsr,fsimptf,fsimpdf,fsimpsf,fsimphf,fhex, floadtf,floaddf,floadsf,fstoredf,fstoresf, fmultf,fmuldf,fmulsf,fdivtf,fdivdf,fdivsf, ftoi,fsqrttf,fsqrtdf,fsqrtsf, @@ -743,7 +746,7 @@ ;; Iterators -(define_mode_iterator ALL [TI DI SI HI QI TF FPRX2 DF SF TD DD SD V1QI V2QI +(define_mode_iterator ALL [TI DI SI HI QI TF FPRX2 DF SF HF TD DD SD V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF V2SF V4SF V1TI V1DF V2DF V1TF]) @@ -877,7 +880,7 @@ ;; sign bit instructions only handle single source and target fp registers ;; these instructions can only be used for TFmode values if the source and ;; target operand uses the same fp register. -(define_mode_attr fT0 [(TF "0") (FPRX2 "0") (DF "f") (SF "f")]) +(define_mode_attr fT0 [(TF "0") (FPRX2 "0") (DF "f") (SF "f") (HF "f")]) ;; This attribute adds b for bfp instructions and t for dfp instructions and is used ;; within instruction mnemonics. @@ -981,12 +984,12 @@ (define_code_iterator ANY_RETURN [return simple_return]) ;; Facilitate dispatching TFmode expanders on z14+. -(define_mode_attr tf_fpr [(TF "_fpr") (FPRX2 "") (DF "") (SF "") (TD "") - (DD "") (SD "")]) +(define_mode_attr tf_fpr [(TF "_fpr") (FPRX2 "") (DF "") (SF "") (HF "") + (TD "") (DD "") (SD "")]) ;; Mode names as seen in type mode_attr values. -(define_mode_attr type [(TF "tf") (FPRX2 "tf") (DF "df") (SF "sf") (TD "td") - (DD "dd") (SD "sd")]) +(define_mode_attr type [(TF "tf") (FPRX2 "tf") (DF "df") (SF "sf") (HF "hf") + (TD "td") (DD "dd") (SD "sd")]) ; Condition code modes generated by vector fp comparisons. These will @@ -3105,6 +3108,67 @@ (set_attr "cpu_facility" "z196,vx,*,vx,*,longdisp,*,longdisp,*,*,z10,*,longdisp,z10,*,longdisp,vx,vx,vx,vx,vx,vx") (set_attr "relative_long" "*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*,*,*,*,*")]) +; +; movhf instruction pattern +; + +(define_insn "movhf" + [(set (match_operand:HF 0 "nonimmediate_operand") + (match_operand:HF 1 "general_operand"))] + "TARGET_64BIT" + {@ [ cons: =0, 1; attrs: op_type, cpu_facility ] + [ v , v ; VRR , vx ] vlr\t%v0,%v1 + [ v , d ; VRS , vx ] vlvgh\t%v0,%1,0 + [ d , v ; VRS , vx ] vlgvh\t%0,%v1,0 + [ v , R ; VRX , vx ] vleh\t%v0,%1,0 + [ R , v ; VRX , vx ] vsteh\t%v1,%0,0 + [ v , j00 ; VRI , vx ] vzero\t%v0 + [ v , jm1 ; VRI , vx ] vone\t%v0 + [ f , G ; RRE , * ] lzdr\t%0 + [ f , f ; RR , * ] ldr\t%0,%1 + [ d , f ; * , z10 ] # + [ d , d ; RR , * ] lr\t%0,%1 + [ d , R ; RX , * ] lh\t%0,%1 + [ d , T ; RXY , longdisp ] lhy\t%0,%1 + [ d , b ; RIL , z10 ] lhrl\t%0,%1 + [ R , d ; RX , * ] sth\t%1,%0 + [ T , d ; RXY , longdisp ] sthy\t%1,%0 + [ b , d ; RIL , z10 ] sthrl\t%1,%0 + }) + +; exploit instruction lgdr +(define_split + [(set (match_operand:HF 0 "register_operand") + (match_operand:HF 1 "register_operand"))] + "TARGET_Z10 && !TARGET_VX && reload_completed && GENERAL_REG_P (operands[0]) && FP_REG_P (operands[1])" + [(const_int 0)] +{ + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); + emit_insn (gen_rtx_SET (operands[0], operands[1])); + emit_insn (gen_rtx_SET (operands[0], gen_rtx_LSHIFTRT (DImode, operands[0], GEN_INT (48)))); + DONE; +}) + +; exploit instruction ldgr +(define_insn_and_split "reload_half_gprtofpr_z10" + [(set (match_operand 0 "register_operand" "=f") + (unspec [(match_operand 1 "register_operand" "d")] + UNSPEC_HALF_GPRTOFPR)) + (clobber (match_operand:DI 2 "register_operand" "=d"))] + "TARGET_Z10 && !TARGET_VX" + "#" + "&& reload_completed" + [(const_int 0)] +{ + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); + emit_insn (gen_rtx_SET (operands[2], gen_rtx_ASHIFT (DImode, operands[1], GEN_INT (48)))); + emit_insn (gen_rtx_SET (operands[0], operands[2])); + DONE; +}) + + ; ; movcc instruction pattern ; @@ -3804,8 +3868,26 @@ (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))] "TARGET_HARD_DFP") +(define_expand "signbithf2" + [(match_operand:SI 0 "register_operand") + (match_operand:HF 1 "nonimmediate_operand")] + "TARGET_HARD_FLOAT && TARGET_Z10 && TARGET_64BIT" +{ + if (TARGET_VX) + { + rtx tmp = gen_reg_rtx (HImode); + emit_insn (gen_rtx_SET (tmp, simplify_gen_subreg (HImode, operands[1], HFmode, 0))); + emit_insn (gen_rtx_SET (operands[0], simplify_gen_unary (ZERO_EXTEND, SImode, tmp, HImode))); + emit_insn (gen_rtx_SET (operands[0], gen_rtx_LSHIFTRT (SImode, operands[0], GEN_INT (15)))); + } + else + emit_insn (gen_signbithf2_z10 (operands[0], operands[1])); + DONE; +}) + (define_mode_iterator SIGNBIT_SINGLE [(SF "TARGET_HARD_FLOAT") (SD "TARGET_HARD_DFP")]) + (define_expand "signbit2" [(match_operand:SI 0 "register_operand") (match_operand:SIGNBIT_SINGLE 1 "nonimmediate_operand")] @@ -3823,16 +3905,20 @@ DONE; }) +(define_mode_iterator SIGNBIT_HALF_SINGLE [(HF "TARGET_HARD_FLOAT") + (SF "TARGET_HARD_FLOAT") + (SD "TARGET_HARD_DFP")]) + (define_insn "signbit2_z10" [(set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(match_operand:SIGNBIT_SINGLE 1 "nonimmediate_operand" "fRT")] + (unspec:SI [(match_operand:SIGNBIT_HALF_SINGLE 1 "nonimmediate_operand" "fRT")] UNSPEC_SIGNBIT))] "TARGET_Z10 && TARGET_64BIT" "#") (define_split [(set (match_operand:SI 0 "register_operand") - (unspec:SI [(match_operand:SIGNBIT_SINGLE 1 "register_operand")] + (unspec:SI [(match_operand:SIGNBIT_HALF_SINGLE 1 "register_operand")] UNSPEC_SIGNBIT))] "TARGET_Z10 && TARGET_64BIT && reload_completed" [(set (match_dup 0) (match_dup 1)) @@ -3844,13 +3930,23 @@ (define_split [(set (match_operand:SI 0 "register_operand") - (unspec:SI [(match_operand:SIGNBIT_SINGLE 1 "memory_operand")] + (unspec:SI [(match_operand:SIGNBIT_HALF_SINGLE 1 "memory_operand")] UNSPEC_SIGNBIT))] "TARGET_Z10 && TARGET_64BIT && reload_completed" [(set (match_dup 0) (match_dup 1)) (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 31)))] { - operands[1] = change_address (operands[1], SImode, 0); + if (mode == HFmode) + { + rtx op0_hi = gen_rtx_REG (HImode, REGNO (operands[0])); + rtx op1_hi = change_address (operands[1], HImode, 0); + emit_insn (gen_rtx_SET (op0_hi, op1_hi)); + emit_insn (gen_rtx_SET (operands[0], simplify_gen_unary (ZERO_EXTEND, SImode, op0_hi, HImode))); + emit_insn (gen_rtx_SET (operands[0], gen_rtx_LSHIFTRT (SImode, operands[0], GEN_INT (15)))); + DONE; + } + else + operands[1] = change_address (operands[1], SImode, 0); }) (define_mode_iterator SIGNBIT_DBL_TETRA [(DF "TARGET_HARD_FLOAT") @@ -12489,10 +12585,12 @@ ;;- Copy sign instructions ;; +(define_mode_iterator FP_COPYSIGN [(TF "!TARGET_VXE") (FPRX2 "TARGET_VXE") DF SF HF + (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")]) (define_insn "copysign3" - [(set (match_operand:FP 0 "register_operand" "=f") - (copysign:FP (match_operand:FP 1 "register_operand" "") - (match_operand:FP 2 "register_operand" "f")))] + [(set (match_operand:FP_COPYSIGN 0 "register_operand" "=f") + (copysign:FP_COPYSIGN (match_operand:FP_COPYSIGN 1 "register_operand" "") + (match_operand:FP_COPYSIGN 2 "register_operand" "f")))] "TARGET_Z196" "cpsdr\t%0,%2,%1" [(set_attr "op_type" "RRF") diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index c43ce60fbf0..4b4957ec1ad 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -20,8 +20,8 @@ ; All vector modes supported in a vector register (define_mode_iterator V - [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF - V2SF V4SF V1DF V2DF]) + [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI + V1HF V2HF V4HF V8HF V1SF V2SF V4SF V1DF V2DF]) (define_mode_iterator VT [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF V2SF V4SF V1DF V2DF V1TF V1TI TI]) @@ -75,7 +75,7 @@ V1DF V2DF (V1TF "TARGET_VXE") (TF "TARGET_VXE")]) -(define_mode_iterator VF [V2SF V4SF V2DF]) +(define_mode_iterator VF [V1HF V2HF V4HF V8HF V2SF V4SF V2DF]) ; All modes present in V_HW1 and VFT. (define_mode_iterator V_HW1_FT [V16QI V8HI V4SI V2DI V1TI V1DF @@ -97,12 +97,12 @@ (define_mode_iterator V_8 [V1QI]) -(define_mode_iterator V_16 [V2QI V1HI]) -(define_mode_iterator V_32 [V4QI V2HI V1SI V1SF]) -(define_mode_iterator V_64 [V8QI V4HI V2SI V2SF V1DI V1DF]) -(define_mode_iterator V_128 [V16QI V8HI V4SI V4SF V2DI V2DF V1TI V1TF +(define_mode_iterator V_16 [V2QI V1HI V1HF]) +(define_mode_iterator V_32 [V4QI V2HI V2HF V1SI V1SF]) +(define_mode_iterator V_64 [V8QI V4HI V4HF V2SI V2SF V1DI V1DF]) +(define_mode_iterator V_128 [V16QI V8HI V8HF V4SI V4SF V2DI V2DF V1TI V1TF (TF "TARGET_VXE")]) -(define_mode_iterator V_128_NOSINGLE [V16QI V8HI V4SI V4SF V2DI V2DF]) +(define_mode_iterator V_128_NOSINGLE [V16QI V8HI V8HF V4SI V4SF V2DI V2DF]) ; 32 bit int<->fp vector conversion instructions are available since VXE2 (z15). (define_mode_iterator VX_VEC_CONV_BFP [V2DF (V4SF "TARGET_VXE2")]) @@ -122,7 +122,7 @@ (V1TF "") (TF "")]) ;; Facilitate dispatching TFmode expanders on z14+. -(define_mode_attr tf_vr [(TF "_vr") (V4SF "") (V2DF "") (V1TF "") (V1SF "") +(define_mode_attr tf_vr [(TF "_vr") (V8HF "") (V4SF "") (V2DF "") (V1TF "") (V1SF "") (V2SF "") (V1DF "") (V16QI "") (V8HI "") (V4SI "") (V2DI "") (V1TI "")]) @@ -132,6 +132,7 @@ (V1SI "SI") (V2SI "SI") (V4SI "SI") (V1DI "DI") (V2DI "DI") (V1TI "TI") (TI "TI") + (V1HF "HF") (V2HF "HF") (V4HF "HF") (V8HF "HF") (V1SF "SF") (V2SF "SF") (V4SF "SF") (V1DF "DF") (V2DF "DF") (V1TF "TF") (TF "TF")]) @@ -143,6 +144,7 @@ (V1SI "si") (V2SI "si") (V4SI "si") (V1DI "di") (V2DI "di") (V1TI "ti") (TI "ti") + (V1HF "hf") (V2HF "hf") (V4HF "hf") (V8HF "hf") (V1SF "sf") (V2SF "sf") (V4SF "sf") (V1DF "df") (V2DF "df") (V1TF "tf") (TF "tf")]) @@ -154,6 +156,7 @@ (V1SI "f") (V2SI "f") (V4SI "f") (SI "f") (V1DI "g") (V2DI "g") (DI "g") (V1TI "q") (TI "q") + (V1HF "h") (V2HF "h") (V4HF "h") (V8HF "h") (HF "h") (V1SF "f") (V2SF "f") (V4SF "f") (SF "f") (V1DF "g") (V2DF "g") (DF "g") (V1TF "q") (TF "q")]) @@ -173,6 +176,7 @@ (V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI") (V1DI "V1DI") (V2DI "V2DI") (V1TI "V1TI") (TI "TI") + (V1HF "V1HI") (V2HF "V2HI") (V4HF "V4HI") (V8HF "V8HI") (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI") (V1DF "V1DI") (V2DF "V2DI") (V1TF "V1TI") (TF "TI")]) @@ -182,6 +186,7 @@ (V1SI "v1si") (V2SI "v2si") (V4SI "v4si") (V1DI "v1di") (V2DI "v2di") (V1TI "v1ti") (TI "ti") + (V1HF "v1hi") (V2HF "v2hi") (V4HF "v4hi") (V8HF "v8hi") (V1SF "v1si") (V2SF "v2si") (V4SF "v4si") (V1DF "v1di") (V2DF "v2di") (V1TF "v1ti") (TF "ti")]) @@ -517,9 +522,9 @@ ; Iterator for vec_set that does not use special float/vect overlay tricks (define_mode_iterator VEC_SET_NONFLOAT - [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V2SF V4SF]) + [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V2HF V4HF V8HF V2SF V4SF]) ; Iterator for single element float vectors -(define_mode_iterator VEC_SET_SINGLEFLOAT [V1SF V1DF (V1TF "TARGET_VXE")]) +(define_mode_iterator VEC_SET_SINGLEFLOAT [V1HF V1SF V1DF (V1TF "TARGET_VXE")]) ; FIXME: Support also vector mode operands for 1 ; FIXME: A target memory operand seems to be useful otherwise we end diff --git a/gcc/testsuite/g++.target/s390/float16-1.C b/gcc/testsuite/g++.target/s390/float16-1.C new file mode 100644 index 00000000000..172b264696a --- /dev/null +++ b/gcc/testsuite/g++.target/s390/float16-1.C @@ -0,0 +1,9 @@ +// { dg-do compile { target float16 } } +// { dg-options "-std=c++23" } + +// Ensure that macro __STDCPP_FLOAT16_T__ evaluates to 1 since this in turn +// ensures that common tests for float16_t are executed. + +#if __STDCPP_FLOAT16_T__ != 1 +# error "Type float16_t is supported for 64-bit targets starting with z10" +#endif diff --git a/gcc/testsuite/g++.target/s390/float16-2.C b/gcc/testsuite/g++.target/s390/float16-2.C new file mode 100644 index 00000000000..8db99bb996c --- /dev/null +++ b/gcc/testsuite/g++.target/s390/float16-2.C @@ -0,0 +1,11 @@ +// { dg-do compile { target float16 } } + +// Test name mangling + +void f1 (_Float16) {} +void f2 (_Float16 *) {} +void f3 (_Float16 const *) {} + +// { dg-final { scan-assembler "_Z2f1DF16_:" } } +// { dg-final { scan-assembler "_Z2f2PDF16_:" } } +// { dg-final { scan-assembler "_Z2f3PKDF16_:" } } diff --git a/gcc/testsuite/gcc.target/s390/float16-1-2.h b/gcc/testsuite/gcc.target/s390/float16-1-2.h new file mode 100644 index 00000000000..54250e2a72e --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-1-2.h @@ -0,0 +1,36 @@ +unsigned short +fpr_to_gpr (_Float16 x) +{ + unsigned short y; + __builtin_memcpy (&y, &x, 2); + return y; +} + +_Float16 +gpr_to_fpr (unsigned short x) +{ + _Float16 y; + __builtin_memcpy (&y, &x, 2); + return y; +} + +_Float16 +load_into_fpr (_Float16 *x) +{ + return *x; +} + +unsigned short +load_into_gpr (_Float16 *x) +{ + _Float16 xx = *x; + unsigned short y; + __builtin_memcpy (&y, &xx, 2); + return y; +} + +void +store (_Float16 *x, _Float16 y) +{ + *x = y; +} diff --git a/gcc/testsuite/gcc.target/s390/float16-1.c b/gcc/testsuite/gcc.target/s390/float16-1.c new file mode 100644 index 00000000000..ac8c0d85bd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-1.c @@ -0,0 +1,42 @@ +/* { dg-do compile { target float16 } } */ +/* { dg-options "-O2 -march=z10" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** fpr_to_gpr: +** lgdr (%r[0-9]+),%f0 +** srlg (%r[0-9]+),\1,48 +** llghr %r2,\2 +** br %r14 +*/ + +/* +** gpr_to_fpr: +** sllg (%r[0-9]+),%r2,48 +** ldgr %f0,\1 +** br %r14 +*/ + +/* +** load_into_fpr: +** lh (%r[0-9]+),0\(%r2\) +** sllg (%r[0-9]+),\1,48 +** ldgr %f0,\2 +** br %r14 +*/ + +/* +** load_into_gpr: +** llgh %r2,0\(%r2\) +** br %r14 +*/ + +/* +** store: +** lgdr (%r[0-9]+),%f0 +** srlg (%r[0-9]+),\1,48 +** sth \2,0\(%r2\) +** br %r14 +*/ + +#include "float16-1-2.h" diff --git a/gcc/testsuite/gcc.target/s390/float16-10.c b/gcc/testsuite/gcc.target/s390/float16-10.c new file mode 100644 index 00000000000..051f83b3580 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-10.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target float16 } } */ +/* { dg-require-effective-target s390_mvx } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** copysign_reg: +** cpsdr %f0,%f2,%f0 +** br %r14 +*/ + +_Float16 +copysign_reg (_Float16 x, _Float16 y) +{ + return __builtin_copysignf16 (x, y); +} + +/* +** copysign_mem: +** vleh %v([0-9]+),0\(%r2\),0 +** vleh %v([0-9]+),0\(%r3\),0 +** cpsdr %f0,%f\2,%f\1 +** br %r14 +*/ + +_Float16 +copysign_mem (_Float16 *x, _Float16 *y) +{ + return __builtin_copysignf16 (*x, *y); +} diff --git a/gcc/testsuite/gcc.target/s390/float16-2.c b/gcc/testsuite/gcc.target/s390/float16-2.c new file mode 100644 index 00000000000..4a2e6ea043d --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-2.c @@ -0,0 +1,36 @@ +/* { dg-do compile { target float16 } } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target s390_mvx } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** fpr_to_gpr: +** vlgvh %r2,%v0,0 +** br %r14 +*/ + +/* +** gpr_to_fpr: +** vlvgh %v0,%r2,0 +** br %r14 +*/ + +/* +** load_into_fpr: +** vleh %v0,0\(%r2\),0 +** br %r14 +*/ + +/* +** load_into_gpr: +** llgh %r2,0\(%r2\) +** br %r14 +*/ + +/* +** store: +** vsteh %v0,0\(%r2\),0 +** br %r14 +*/ + +#include "float16-1-2.h" diff --git a/gcc/testsuite/gcc.target/s390/float16-3.c b/gcc/testsuite/gcc.target/s390/float16-3.c new file mode 100644 index 00000000000..95820d7c86e --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-3.c @@ -0,0 +1,68 @@ +/* { dg-do compile { target float16 } } */ +/* { dg-require-effective-target s390_mvx } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* Calling Convention Tests */ + +/* Parameters of type _Float16 are passed via FPRs, if possible. Return values + are passed via FPR f2. */ + +/* +** test_arg: +** vlr %v0,%v2 +** br %r14 +*/ + +_Float16 +test_arg (double unused, _Float16 x) +{ + return x; +} + +/* Test passing a struct with a single member. */ + +struct s { _Float16 x; }; + +/* +** test: +** br %r14 +*/ + +_Float16 +test (struct s y) +{ + return y.x; +} + +/* Test _Float16 _Complex which must be returned via reference. */ + +/* +** test_complex_return: +** ... +** ste %[fv][0-9]+,0\(%r2\) +** br %r14 +*/ + +_Float16 _Complex +test_complex_return (_Float16 a, _Float16 b) +{ + _Float16 _Complex x; + __real__ x = a; + __imag__ x = b; + return x; +} + +/* Likewise, an argument of type _Float16 _Complex is passed via reference. */ + +/* +** test_complex_arg: +** vleh %v0,0\(%r2\),0 +** br %r14 +*/ + +_Float16 +test_complex_arg (_Float16 _Complex x) +{ + return __real__ x; +} diff --git a/gcc/testsuite/gcc.target/s390/float16-4.c b/gcc/testsuite/gcc.target/s390/float16-4.c new file mode 100644 index 00000000000..df25777e4ac --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-4.c @@ -0,0 +1,104 @@ +/* { dg-do run { target float16 } } */ +/* { dg-options "-O2 -fsignaling-nans -save-temps" } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__truncsfhf2@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__truncdfhf2@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__trunctfhf2@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__extendhfsf2@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__extendhfdf2@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__extendhftf2@PLT" 1 } } */ + +#define _GNU_SOURCE + +#include +#include +#include + +#pragma STDC FENV_ACCESS ON + +#define fn_truncate(mode, type) \ + [[gnu::noipa]] _Float16 \ + truncate##mode##hf (type x) { return x; } + +fn_truncate (sf, float) +fn_truncate (df, double) +fn_truncate (tf, long double) + +#define fn_extend(mode, type) \ + [[gnu::noipa]] type \ + extendhf##mode (_Float16 x) { return x; } + +fn_extend (sf, float) +fn_extend (df, double) +fn_extend (tf, long double) + +int +main (void) +{ + feclearexcept (FE_ALL_EXCEPT); + + /* Don't use isnan() but rather check manually since otherwise values of type + _Float16 would be extended before being passed to isnan() and we really + want to verify type _Float16 here. */ +#define test_truncate_nan(mode, fn) \ + { \ + unsigned short tmp; \ + _Float16 x; \ + x = truncate##mode##hf (__builtin_nans##fn ("42")); \ + assert (fetestexcept (FE_ALL_EXCEPT) == FE_INVALID); \ + __builtin_memcpy (&tmp, &x, 2); \ + assert (tmp == 0x7E00); \ + feclearexcept (FE_ALL_EXCEPT); \ + x = truncate##mode##hf (__builtin_nan##fn ("42")); \ + assert (fetestexcept (FE_ALL_EXCEPT) == 0); \ + __builtin_memcpy (&tmp, &x, 2); \ + assert (tmp == 0x7E00); \ + } + + test_truncate_nan (sf, f) + test_truncate_nan (df, ) + test_truncate_nan (tf, l) + +#define test_truncate_inexact_overflow(mode) \ + { \ + truncate##mode##hf (__FLT_MAX__); \ + assert (fetestexcept (FE_ALL_EXCEPT) == (FE_INEXACT | FE_OVERFLOW)); \ + feclearexcept (FE_ALL_EXCEPT); \ + truncate##mode##hf (42.f); \ + assert (fetestexcept (FE_ALL_EXCEPT) == 0); \ + } + + test_truncate_inexact_overflow (sf) + test_truncate_inexact_overflow (df) + test_truncate_inexact_overflow (tf) + +#define test_truncate_inexact_underflow(mode) \ + { \ + truncate##mode##hf (__FLT_MIN__); \ + assert (fetestexcept (FE_ALL_EXCEPT) == (FE_INEXACT | FE_UNDERFLOW)); \ + feclearexcept (FE_ALL_EXCEPT); \ + truncate##mode##hf (-42.f); \ + assert (fetestexcept (FE_ALL_EXCEPT) == 0); \ + } + + test_truncate_inexact_underflow (sf) + test_truncate_inexact_underflow (df) + test_truncate_inexact_underflow (tf) + +#define test_extend(type, mode) \ + { \ + type x; \ + x = extendhf##mode (__builtin_nansf16 ("42")); \ + assert (fetestexcept (FE_ALL_EXCEPT) == FE_INVALID); \ + assert (isnan (x)); \ + feclearexcept (FE_ALL_EXCEPT); \ + x = extendhf##mode (__builtin_nanf16 ("42")); \ + assert (fetestexcept (FE_ALL_EXCEPT) == 0); \ + assert (isnan (x)); \ + } + + test_extend (float, sf) + test_extend (double, df) + test_extend (long double, tf) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/s390/float16-5.c b/gcc/testsuite/gcc.target/s390/float16-5.c new file mode 100644 index 00000000000..1d9b6fe6954 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-5.c @@ -0,0 +1,105 @@ +/* { dg-do run { target float16 } } */ +/* { dg-options "-O2 -fsignaling-nans -save-temps" } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__dpd_truncsdhf@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__dpd_truncddhf@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__dpd_trunctdhf@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__dpd_extendhfsd@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__dpd_extendhfdd@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__dpd_extendhftd@PLT" 1 } } */ + +#define _GNU_SOURCE + +#include +#include +#include + +#pragma STDC FENV_ACCESS ON + +#define fn_truncate(mode, type) \ + [[gnu::noipa]] _Float16 \ + truncate##mode##hf (type x) { return x; } + +fn_truncate (sd, _Decimal32) +fn_truncate (dd, _Decimal64) +fn_truncate (td, _Decimal128) + +#define fn_extend(mode, type) \ + [[gnu::noipa]] type \ + extendhf##mode (_Float16 x) { return x; } + +fn_extend (sd, _Decimal32) +fn_extend (dd, _Decimal64) +fn_extend (td, _Decimal128) + +int +main (void) +{ + feclearexcept (FE_ALL_EXCEPT); + + /* Don't use isnan() but rather check manually since otherwise values of type + _Float16 would be extended before being passed to isnan() and we really + want to verify type _Float16 here. */ +#define test_truncate_nan(mode, fn) \ + { \ + unsigned short tmp; \ + _Float16 x; \ + x = truncate##mode##hf (__builtin_nans##fn ("42")); \ + assert (fetestexcept (FE_ALL_EXCEPT) == FE_INVALID); \ + __builtin_printf ("%u\n", tmp); \ + __builtin_memcpy (&tmp, &x, 2); \ + assert (tmp == 0x7E00); \ + feclearexcept (FE_ALL_EXCEPT); \ + x = truncate##mode##hf (__builtin_nan##fn ("42")); \ + assert (fetestexcept (FE_ALL_EXCEPT) == 0); \ + __builtin_memcpy (&tmp, &x, 2); \ + assert (tmp == 0x7E00); \ + } + + test_truncate_nan (sd, d32) + test_truncate_nan (dd, d64) + test_truncate_nan (td, d128) + +#define test_truncate_inexact_overflow(mode) \ + { \ + truncate##mode##hf (__FLT_MAX__); \ + assert (fetestexcept (FE_ALL_EXCEPT) == (FE_INEXACT | FE_OVERFLOW)); \ + feclearexcept (FE_ALL_EXCEPT); \ + truncate##mode##hf (42.f); \ + assert (fetestexcept (FE_ALL_EXCEPT) == 0); \ + } + + test_truncate_inexact_overflow (sd) + test_truncate_inexact_overflow (dd) + test_truncate_inexact_overflow (td) + +#define test_truncate_inexact_underflow(mode) \ + { \ + truncate##mode##hf (__FLT_MIN__); \ + assert (fetestexcept (FE_ALL_EXCEPT) == (FE_INEXACT | FE_UNDERFLOW)); \ + feclearexcept (FE_ALL_EXCEPT); \ + truncate##mode##hf (-42.f); \ + assert (fetestexcept (FE_ALL_EXCEPT) == 0); \ + } + + test_truncate_inexact_underflow (sd) + test_truncate_inexact_underflow (dd) + test_truncate_inexact_underflow (td) + +#define test_extend(type, mode) \ + { \ + type x; \ + x = extendhf##mode (__builtin_nansf16 ("42")); \ + assert (fetestexcept (FE_ALL_EXCEPT) == FE_INVALID); \ + assert (isnan (x)); \ + feclearexcept (FE_ALL_EXCEPT); \ + x = extendhf##mode (__builtin_nanf16 ("42")); \ + assert (fetestexcept (FE_ALL_EXCEPT) == 0); \ + assert (isnan (x)); \ + } + + test_extend (_Decimal32, sd) + test_extend (_Decimal64, dd) + test_extend (_Decimal128, td) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/s390/float16-6.c b/gcc/testsuite/gcc.target/s390/float16-6.c new file mode 100644 index 00000000000..012285d20fa --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-6.c @@ -0,0 +1,71 @@ +/* { dg-do run { target float16 } } */ +/* { dg-options "-O2 -save-temps" } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__truncsfhf2@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__truncdfhf2@PLT" 1 } } */ +/* { dg-final { scan-assembler-times "brasl\t%r14,__trunctfhf2@PLT" 1 } } */ + +#include +#include + +#define fn_truncate(mode, type) \ + [[gnu::noipa]] _Float16 \ + truncate##mode##hf (type x) { return x; } + +fn_truncate (sf, float) +fn_truncate (df, double) +fn_truncate (tf, long double) + +#define fn_test_truncate(mode, type) \ +void \ +test_truncate##mode (type x, unsigned short a, unsigned short b, \ + unsigned short c, unsigned short d) \ +{ \ + _Float16 y; \ + unsigned short z; \ +\ + fesetround (FE_TONEAREST); \ + y = truncate##mode##hf (x); \ + __builtin_memcpy (&z, &y, sizeof (z)); \ + assert (z == a); \ +\ + fesetround (FE_TOWARDZERO); \ + y = truncate##mode##hf (x); \ + __builtin_memcpy (&z, &y, sizeof (z)); \ + assert (z == b); \ +\ + fesetround (FE_DOWNWARD); \ + y = truncate##mode##hf (x); \ + __builtin_memcpy (&z, &y, sizeof (z)); \ + assert (z == c); \ +\ + fesetround (FE_UPWARD); \ + y = truncate##mode##hf (x); \ + __builtin_memcpy (&z, &y, sizeof (z)); \ + assert (z == d); \ +} + +fn_test_truncate (sf, float) +fn_test_truncate (df, double) +fn_test_truncate (tf, long double) + +int +main (void) +{ + test_truncatesf (__FLT_MAX__, 0x7c00, 0x7bff, 0x7bff, 0x7c00); + test_truncatesf (__FLT_MIN__, 0, 0, 0, 1); + test_truncatesf (0.5f, 0x3800, 0x3800, 0x3800, 0x3800); + test_truncatesf (-0.0000001f, 0x8002, 0x8001, 0x8002, 0x8001); + test_truncatesf (0.0000001f, 2, 1, 1, 2); + + test_truncatedf (__DBL_MAX__, 0x7c00, 0x7bff, 0x7bff, 0x7c00); + test_truncatedf (__DBL_MIN__, 0, 0, 0, 1); + test_truncatedf (0.5f, 0x3800, 0x3800, 0x3800, 0x3800); + test_truncatedf (-0.0000001f, 0x8002, 0x8001, 0x8002, 0x8001); + test_truncatedf (0.0000001f, 2, 1, 1, 2); + + test_truncatetf (__LDBL_MAX__, 0x7c00, 0x7bff, 0x7bff, 0x7c00); + test_truncatetf (__LDBL_MIN__, 0, 0, 0, 1); + test_truncatetf (0.5f, 0x3800, 0x3800, 0x3800, 0x3800); + test_truncatetf (-0.0000001f, 0x8002, 0x8001, 0x8002, 0x8001); + test_truncatetf (0.0000001f, 2, 1, 1, 2); +} diff --git a/gcc/testsuite/gcc.target/s390/float16-7.c b/gcc/testsuite/gcc.target/s390/float16-7.c new file mode 100644 index 00000000000..f82150ca56a --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-7.c @@ -0,0 +1,194 @@ +/* { dg-do compile { target float16 } } */ +/* { dg-require-effective-target s390_mvx } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** test_asm_constant_zero_via_f: +** vzero %v([0-9]+) +** foo %f\1 +** br %r14 +*/ + +void +test_asm_constant_zero_via_f (void) +{ + __asm__ __volatile__ ("foo\t%0" :: "f" (0.f16)); +} + +/* +** test_asm_constant_zero_via_v: +** vzero %v([0-9]+) +** foo %f\1 +** br %r14 +*/ + +void +test_asm_constant_zero_via_v (void) +{ + __asm__ __volatile__ ("foo\t%0" :: "v" (0.f16)); +} + +/* +** test_asm_constant_via_f: +** larl (%r[0-9]+),\.L[0-9]+ +** vleh %v([0-9]+),\.L[0-9]+-\.L[0-9]+\(\1\),0 +** foo %f\2 +** br %r14 +*/ + +void +test_asm_constant_via_f (void) +{ + __asm__ __volatile__ ("foo\t%0" :: "f" (42.f16)); +} + +/* +** test_asm_constant_via_v: +** larl (%r[0-9]+),\.L[0-9]+ +** vleh %v([0-9]+),\.L[0-9]+-\.L[0-9]+\(\1\),0 +** foo %f\2 +** br %r14 +*/ + +void +test_asm_constant_via_v (void) +{ + __asm__ __volatile__ ("foo\t%0" :: "v" (42.f16)); +} + +/* +** test_asm_in_float16_via_f: +** foo %f0 +** br %r14 +*/ + +void +test_asm_in_float16_via_f (_Float16 x) +{ + __asm__ __volatile__ ("foo\t%0" :: "f" (x)); +} + +/* +** test_asm_in_float16_via_v: +** foo %f0 +** br %r14 +*/ + +void +test_asm_in_float16_via_v (_Float16 x) +{ + __asm__ __volatile__ ("foo\t%0" :: "v" (x)); +} + +/* +** test_asm_in_float16_via_r: +** vlgvh (%r[0-9]+),%v0,0 +** foo \1 +** br %r14 +*/ + +void +test_asm_in_float16_via_r (_Float16 x) +{ + __asm__ __volatile__ ("foo\t%0" :: "r" (x)); +} + +/* +** test_asm_in_ushort_via_f: +** vlvgh %v([0-9]+),%r2,0 +** foo %f\1 +** br %r14 +*/ + +void +test_asm_in_ushort_via_f (unsigned short x) +{ + __asm__ __volatile__ ("foo\t%0" :: "f" (x)); +} + +/* +** test_asm_in_ushort_via_v: +** vlvgh %v([0-9]+),%r2,0 +** foo %f\1 +** br %r14 +*/ + +void +test_asm_in_ushort_via_v (unsigned short x) +{ + __asm__ __volatile__ ("foo\t%0" :: "v" (x)); +} + +/* +** test_asm_out_float16_via_f: +** foo %f0 +** br %r14 +*/ + +_Float16 +test_asm_out_float16_via_f (void) +{ + _Float16 x; + __asm__ ("foo\t%0" : "=f" (x)); + return x; +} + +/* +** test_asm_out_float16_via_v: +** foo %f0 +** br %r14 +*/ + +_Float16 +test_asm_out_float16_via_v (void) +{ + _Float16 x; + __asm__ ("foo\t%0" : "=v" (x)); + return x; +} + +/* +** test_asm_out_float16_via_r: +** foo (%r[0-9]+) +** vlvgh %v0,\1,0 +** br %r14 +*/ + +_Float16 +test_asm_out_float16_via_r (void) +{ + _Float16 x; + __asm__ ("foo\t%0" : "=r" (x)); + return x; +} + +/* +** test_asm_out_ushort_via_f: +** foo %f([0-9]+) +** vlgvh %r2,%v\1,0 +** br %r14 +*/ + +unsigned short +test_asm_out_ushort_via_f (void) +{ + unsigned short x; + __asm__ ("foo\t%0" : "=f" (x)); + return x; +} + +/* +** test_asm_out_ushort_via_v: +** foo %f([0-9]+) +** vlgvh %r2,%v\1,0 +** br %r14 +*/ + +unsigned short +test_asm_out_ushort_via_v (void) +{ + unsigned short x; + __asm__ ("foo\t%0" : "=v" (x)); + return x; +} diff --git a/gcc/testsuite/gcc.target/s390/float16-8.c b/gcc/testsuite/gcc.target/s390/float16-8.c new file mode 100644 index 00000000000..ae37b771ec3 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-8.c @@ -0,0 +1,22 @@ +/* { dg-do run { target float16 } } */ +/* { dg-options "-O2 -march=z10 -save-temps" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** signbit_reg: +** lgdr (%r[0-9]+),%f0 +** srlg (%r[0-9]+),\1,63 +** lgfr %r2,\2 +** br %r14 +*/ + +/* +** signbit_mem: +** lh (%r[0-9]+),0\(%r2\) +** llhr (%r[0-9]+),\1 +** srl \2,15 +** lgfr %r2,\2 +** br %r14 +*/ + +#include "float16-signbit.h" diff --git a/gcc/testsuite/gcc.target/s390/float16-9.c b/gcc/testsuite/gcc.target/s390/float16-9.c new file mode 100644 index 00000000000..1d02a3f5da8 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-9.c @@ -0,0 +1,20 @@ +/* { dg-do run { target float16 } } */ +/* { dg-require-effective-target s390_mvx } */ +/* { dg-options "-O2 -save-temps" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** signbit_reg: +** vlgvh (%r[0-9]+),%v0,0 +** risbgn %r2,\1,64-1,128\+63,48\+1 +** br %r14 +*/ + +/* +** signbit_mem: +** llh (%r[0-9]+),0\(%r2\) +** risbgn %r2,\1,64-1,128\+63,48\+1 +** br %r14 +*/ + +#include "float16-signbit.h" diff --git a/gcc/testsuite/gcc.target/s390/float16-signbit.h b/gcc/testsuite/gcc.target/s390/float16-signbit.h new file mode 100644 index 00000000000..137bc247383 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/float16-signbit.h @@ -0,0 +1,56 @@ +[[gnu::noipa]] int +signbit_reg (_Float16 x) +{ + return __builtin_signbit (x); +} + +[[gnu::noipa]] int +signbit_mem (_Float16 *x) +{ + return __builtin_signbit (*x); +} + +int +main (void) +{ + _Float16 x; + int res = 0; + + x = __builtin_nanf16 ("42"); + res += signbit_reg (x); + res += signbit_mem (&x); + + x = __builtin_inff16 (); + res += signbit_reg (x); + res += signbit_mem (&x); + + x = 0.f16; + res += signbit_reg (x); + res += signbit_mem (&x); + + x = 42.42f16; + res += signbit_reg (x); + res += signbit_mem (&x); + + if (res != 0) + __builtin_abort (); + + x = -__builtin_nanf16 ("42"); + res += signbit_reg (x); + res += signbit_mem (&x); + + x = -__builtin_inff16 (); + res += signbit_reg (x); + res += signbit_mem (&x); + + x = -0.f16; + res += signbit_reg (x); + res += signbit_mem (&x); + + x = -42.42f16; + res += signbit_reg (x); + res += signbit_mem (&x); + + if (res != 8) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-extract-4.c b/gcc/testsuite/gcc.target/s390/vector/vec-extract-4.c new file mode 100644 index 00000000000..923bcb07c8d --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vec-extract-4.c @@ -0,0 +1,320 @@ +/* { dg-do compile { target float16 } } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target s390_mvx } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +typedef _Float16 __attribute__ ((vector_size (2))) v1hf; +typedef _Float16 __attribute__ ((vector_size (4))) v2hf; +typedef _Float16 __attribute__ ((vector_size (8))) v4hf; +typedef _Float16 __attribute__ ((vector_size (16))) v8hf; + + + +/************ + * REGISTER * + ************/ + + + +/* +** vec_extract_first_v1hf: +** vlr %v0,%v24 +** br %r14 +*/ + +/* +** vec_extract_first_v2hf: +** vlr %v0,%v24 +** br %r14 +*/ + +/* +** vec_extract_first_v4hf: +** vlr %v0,%v24 +** br %r14 +*/ + +/* +** vec_extract_first_v8hf: +** vlr %v0,%v24 +** br %r14 +*/ + +_Float16 vec_extract_first_v1hf (v1hf x) { return x[0]; } +_Float16 vec_extract_first_v2hf (v2hf x) { return x[0]; } +_Float16 vec_extract_first_v4hf (v4hf x) { return x[0]; } +_Float16 vec_extract_first_v8hf (v8hf x) { return x[0]; } + +/* +** vec_extract_second_v2hf: +** vreph %v0,%v24,1 +** br %r14 +*/ + +/* +** vec_extract_second_v4hf: +** vreph %v0,%v24,1 +** br %r14 +*/ + +/* +** vec_extract_second_v8hf: +** vreph %v0,%v24,1 +** br %r14 +*/ + +_Float16 vec_extract_second_v2hf (v2hf x) { return x[1]; } +_Float16 vec_extract_second_v4hf (v4hf x) { return x[1]; } +_Float16 vec_extract_second_v8hf (v8hf x) { return x[1]; } + +/* +** vec_extract_third_v4hf: +** vreph %v0,%v24,2 +** br %r14 +*/ + +/* +** vec_extract_third_v8hf: +** vreph %v0,%v24,2 +** br %r14 +*/ + +_Float16 vec_extract_third_v4hf (v4hf x) { return x[2]; } +_Float16 vec_extract_third_v8hf (v8hf x) { return x[2]; } + +/* +** vec_extract_fourth_v4hf: +** vreph %v0,%v24,3 +** br %r14 +*/ + +/* +** vec_extract_fourth_v8hf: +** vreph %v0,%v24,3 +** br %r14 +*/ + +_Float16 vec_extract_fourth_v4hf (v4hf x) { return x[3]; } +_Float16 vec_extract_fourth_v8hf (v8hf x) { return x[3]; } + +/* +** vec_extract_fifth_v8hf: +** vreph %v0,%v24,4 +** br %r14 +*/ + +_Float16 vec_extract_fifth_v8hf (v8hf x) { return x[4]; } + +/* +** vec_extract_sixth_v8hf: +** vreph %v0,%v24,5 +** br %r14 +*/ + +_Float16 vec_extract_sixth_v8hf (v8hf x) { return x[5]; } + +/* +** vec_extract_seventh_v8hf: +** vreph %v0,%v24,6 +** br %r14 +*/ + +_Float16 vec_extract_seventh_v8hf (v8hf x) { return x[6]; } + +/* +** vec_extract_eighth_v8hf: +** vreph %v0,%v24,7 +** br %r14 +*/ + +_Float16 vec_extract_eighth_v8hf (v8hf x) { return x[7]; } + +/* +** vec_extract_nth_plus_v1hf: +** vlgvh (%r[0-9]+),%v24,3\(%r2\) +** vlvgh %v0,\1,0 +** br %r14 +*/ + +_Float16 vec_extract_nth_plus_v1hf (v8hf x, int n) { return x[n + 3]; } + +/* +** vec_extract_nth_plus_v2hf: +** vlgvh (%r[0-9]+),%v24,3\(%r2\) +** vlvgh %v0,\1,0 +** br %r14 +*/ + +_Float16 vec_extract_nth_plus_v2hf (v8hf x, int n) { return x[n + 3]; } + +/* +** vec_extract_nth_plus_v4hf: +** vlgvh (%r[0-9]+),%v24,3\(%r2\) +** vlvgh %v0,\1,0 +** br %r14 +*/ + +_Float16 vec_extract_nth_plus_v4hf (v8hf x, int n) { return x[n + 3]; } + +/* +** vec_extract_nth_plus_v8hf: +** vlgvh (%r[0-9]+),%v24,3\(%r2\) +** vlvgh %v0,\1,0 +** br %r14 +*/ + +_Float16 vec_extract_nth_plus_v8hf (v8hf x, int n) { return x[n + 3]; } + + + +/********** + * MEMORY * + **********/ + + + +/* +** vec_extract_first_v1hf_mem: +** vsteh %v24,0\(%r2\),0 +** br %r14 +*/ + +/* +** vec_extract_first_v2hf_mem: +** vsteh %v24,0\(%r2\),0 +** br %r14 +*/ + +/* +** vec_extract_first_v4hf_mem: +** vsteh %v24,0\(%r2\),0 +** br %r14 +*/ + +/* +** vec_extract_first_v8hf_mem: +** vsteh %v24,0\(%r2\),0 +** br %r14 +*/ + +void vec_extract_first_v1hf_mem (_Float16 *r, v1hf x) { *r = x[0]; } +void vec_extract_first_v2hf_mem (_Float16 *r, v2hf x) { *r = x[0]; } +void vec_extract_first_v4hf_mem (_Float16 *r, v4hf x) { *r = x[0]; } +void vec_extract_first_v8hf_mem (_Float16 *r, v8hf x) { *r = x[0]; } + +/* +** vec_extract_second_v2hf_mem: +** vsteh %v24,0\(%r2\),1 +** br %r14 +*/ + +/* +** vec_extract_second_v4hf_mem: +** vsteh %v24,0\(%r2\),1 +** br %r14 +*/ + +/* +** vec_extract_second_v8hf_mem: +** vsteh %v24,0\(%r2\),1 +** br %r14 +*/ + +void vec_extract_second_v2hf_mem (_Float16 *r, v2hf x) { *r = x[1]; } +void vec_extract_second_v4hf_mem (_Float16 *r, v4hf x) { *r = x[1]; } +void vec_extract_second_v8hf_mem (_Float16 *r, v8hf x) { *r = x[1]; } + +/* +** vec_extract_third_v4hf_mem: +** vsteh %v24,0\(%r2\),2 +** br %r14 +*/ + +/* +** vec_extract_third_v8hf_mem: +** vsteh %v24,0\(%r2\),2 +** br %r14 +*/ + +void vec_extract_third_v4hf_mem (_Float16 *r, v4hf x) { *r = x[2]; } +void vec_extract_third_v8hf_mem (_Float16 *r, v8hf x) { *r = x[2]; } + +/* +** vec_extract_fourth_v4hf_mem: +** vsteh %v24,0\(%r2\),3 +** br %r14 +*/ + +/* +** vec_extract_fourth_v8hf_mem: +** vsteh %v24,0\(%r2\),3 +** br %r14 +*/ + +void vec_extract_fourth_v4hf_mem (_Float16 *r, v4hf x) { *r = x[3]; } +void vec_extract_fourth_v8hf_mem (_Float16 *r, v8hf x) { *r = x[3]; } + +/* +** vec_extract_fifth_v8hf_mem: +** vsteh %v24,0\(%r2\),4 +** br %r14 +*/ + +void vec_extract_fifth_v8hf_mem (_Float16 *r, v8hf x) { *r = x[4]; } + +/* +** vec_extract_sixth_v8hf_mem: +** vsteh %v24,0\(%r2\),5 +** br %r14 +*/ + +void vec_extract_sixth_v8hf_mem (_Float16 *r, v8hf x) { *r = x[5]; } + +/* +** vec_extract_seventh_v8hf_mem: +** vsteh %v24,0\(%r2\),6 +** br %r14 +*/ + +void vec_extract_seventh_v8hf_mem (_Float16 *r, v8hf x) { *r = x[6]; } + +/* +** vec_extract_eighth_v8hf_mem: +** vsteh %v24,0\(%r2\),7 +** br %r14 +*/ + +void vec_extract_eighth_v8hf_mem (_Float16 *r, v8hf x) { *r = x[7]; } + +/* +** vec_extract_nth_plus_v1hf_mem: +** vlgvh (%r[0-9]+),%v24,3\(%r3\) +** sth \1,0\(%r2\) +** br %r14 +*/ + +/* +** vec_extract_nth_plus_v2hf_mem: +** vlgvh (%r[0-9]+),%v24,3\(%r3\) +** sth \1,0\(%r2\) +** br %r14 +*/ + +/* +** vec_extract_nth_plus_v4hf_mem: +** vlgvh (%r[0-9]+),%v24,3\(%r3\) +** sth \1,0\(%r2\) +** br %r14 +*/ + +/* +** vec_extract_nth_plus_v8hf_mem: +** vlgvh (%r[0-9]+),%v24,3\(%r3\) +** sth \1,0\(%r2\) +** br %r14 +*/ + +void vec_extract_nth_plus_v1hf_mem (_Float16 *r, v1hf x, int n) { *r = x[n + 3]; } +void vec_extract_nth_plus_v2hf_mem (_Float16 *r, v2hf x, int n) { *r = x[n + 3]; } +void vec_extract_nth_plus_v4hf_mem (_Float16 *r, v4hf x, int n) { *r = x[n + 3]; } +void vec_extract_nth_plus_v8hf_mem (_Float16 *r, v8hf x, int n) { *r = x[n + 3]; } diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-float16-1.c b/gcc/testsuite/gcc.target/s390/vector/vec-float16-1.c new file mode 100644 index 00000000000..c6d890640b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vec-float16-1.c @@ -0,0 +1,371 @@ +/* { dg-do compile { target float16 } } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target s390_mvx } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* ABI tests. Arguments and return value are passed via vector registers for + vector sizes less than or equal to 16 bytes. Larger vectors or more than 8 + vectors are passed via reference. */ + +typedef _Float16 __attribute__ ((vector_size (2))) v1hf; +typedef _Float16 __attribute__ ((vector_size (4))) v2hf; +typedef _Float16 __attribute__ ((vector_size (8))) v4hf; +typedef _Float16 __attribute__ ((vector_size (16))) v8hf; +typedef _Float16 __attribute__ ((vector_size (32))) v16hf; + +#define ZERO_v1hf (v1hf){0} +#define ZERO_v2hf (v2hf){0,0} +#define ZERO_v4hf (v4hf){0,0,0,0} +#define ZERO_v8hf (v8hf){0,0,0,0,0,0,0,0} + +#define T(V) \ + V V##_callee_arg_1 (V x1, V x2, V x3, V x4, V x5, V x6, V x7, V x8, V x9) { return x1; } \ + V V##_callee_arg_2 (V x1, V x2, V x3, V x4, V x5, V x6, V x7, V x8, V x9) { return x2; } \ + V V##_callee_arg_3 (V x1, V x2, V x3, V x4, V x5, V x6, V x7, V x8, V x9) { return x3; } \ + V V##_callee_arg_4 (V x1, V x2, V x3, V x4, V x5, V x6, V x7, V x8, V x9) { return x4; } \ + V V##_callee_arg_5 (V x1, V x2, V x3, V x4, V x5, V x6, V x7, V x8, V x9) { return x5; } \ + V V##_callee_arg_6 (V x1, V x2, V x3, V x4, V x5, V x6, V x7, V x8, V x9) { return x6; } \ + V V##_callee_arg_7 (V x1, V x2, V x3, V x4, V x5, V x6, V x7, V x8, V x9) { return x7; } \ + V V##_callee_arg_8 (V x1, V x2, V x3, V x4, V x5, V x6, V x7, V x8, V x9) { return x8; } \ + V V##_callee_arg_9 (V x1, V x2, V x3, V x4, V x5, V x6, V x7, V x8, V x9) { return x9; } \ + void V##_caller_nargs_9 (void) \ + { \ + V x = ZERO_##V; \ + extern void V##_fun_9 (V, V, V, V, V, V, V, V, V); \ + V##_fun_9 (x, x, x, x, x, x, x, x, x); \ + } + + + +/******** + * V1HF * + ********/ + +T (v1hf) + +/* +** v1hf_callee_arg_1: +** br %r14 +*/ + +/* +** v1hf_callee_arg_2: +** vlr %v24,%v26 +** br %r14 +*/ + +/* +** v1hf_callee_arg_3: +** vlr %v24,%v28 +** br %r14 +*/ + +/* +** v1hf_callee_arg_4: +** vlr %v24,%v30 +** br %r14 +*/ + +/* +** v1hf_callee_arg_5: +** vlr %v24,%v25 +** br %r14 +*/ + +/* +** v1hf_callee_arg_6: +** vlr %v24,%v27 +** br %r14 +*/ + +/* +** v1hf_callee_arg_7: +** vlr %v24,%v29 +** br %r14 +*/ + +/* +** v1hf_callee_arg_8: +** vlr %v24,%v31 +** br %r14 +*/ + +/* +** v1hf_callee_arg_9: +** vleh %v24,160\(%r15\),0 +** br %r14 +*/ + +/* +** v1hf_caller_nargs_9: +** stmg %r14,%r15,112\(%r15\) +** vzero %v31 +** lay %r15,-168\(%r15\) +** mvhhi 160\(%r15\),0 +** vlr %v29,%v31 +** vlr %v27,%v31 +** vlr %v25,%v31 +** vlr %v30,%v31 +** vlr %v28,%v31 +** vlr %v26,%v31 +** vlr %v24,%v31 +** brasl %r14,v1hf_fun_9@PLT +** lmg %r14,%r15,280\(%r15\) +** br %r14 +*/ + + + +/******** + * V2HF * + ********/ + +T (v2hf) + +/* +** v2hf_callee_arg_1: +** br %r14 +*/ + +/* +** v2hf_callee_arg_2: +** vlr %v24,%v26 +** br %r14 +*/ + +/* +** v2hf_callee_arg_3: +** vlr %v24,%v28 +** br %r14 +*/ + +/* +** v2hf_callee_arg_4: +** vlr %v24,%v30 +** br %r14 +*/ + +/* +** v2hf_callee_arg_5: +** vlr %v24,%v25 +** br %r14 +*/ + +/* +** v2hf_callee_arg_6: +** vlr %v24,%v27 +** br %r14 +*/ + +/* +** v2hf_callee_arg_7: +** vlr %v24,%v29 +** br %r14 +*/ + +/* +** v2hf_callee_arg_8: +** vlr %v24,%v31 +** br %r14 +*/ + +/* +** v2hf_callee_arg_9: +** vlef %v24,160\(%r15\),0 +** br %r14 +*/ + +/* +** v2hf_caller_nargs_9: +** stmg %r14,%r15,112\(%r15\) +** vzero %v31 +** lay %r15,-168\(%r15\) +** mvhi 160\(%r15\),0 +** vlr %v29,%v31 +** vlr %v27,%v31 +** vlr %v25,%v31 +** vlr %v30,%v31 +** vlr %v28,%v31 +** vlr %v26,%v31 +** vlr %v24,%v31 +** brasl %r14,v2hf_fun_9@PLT +** lmg %r14,%r15,280\(%r15\) +** br %r14 +*/ + + + +/******** + * V4HF * + ********/ + +T (v4hf) + +/* +** v4hf_callee_arg_1: +** br %r14 +*/ + +/* +** v4hf_callee_arg_2: +** vlr %v24,%v26 +** br %r14 +*/ + +/* +** v4hf_callee_arg_3: +** vlr %v24,%v28 +** br %r14 +*/ + +/* +** v4hf_callee_arg_4: +** vlr %v24,%v30 +** br %r14 +*/ + +/* +** v4hf_callee_arg_5: +** vlr %v24,%v25 +** br %r14 +*/ + +/* +** v4hf_callee_arg_6: +** vlr %v24,%v27 +** br %r14 +*/ + +/* +** v4hf_callee_arg_7: +** vlr %v24,%v29 +** br %r14 +*/ + +/* +** v4hf_callee_arg_8: +** vlr %v24,%v31 +** br %r14 +*/ + +/* +** v4hf_callee_arg_9: +** vleg %v24,160\(%r15\),0 +** br %r14 +*/ + +/* +** v4hf_caller_nargs_9: +** stmg %r14,%r15,112\(%r15\) +** vzero %v31 +** lay %r15,-168\(%r15\) +** mvghi 160\(%r15\),0 +** vlr %v29,%v31 +** vlr %v27,%v31 +** vlr %v25,%v31 +** vlr %v30,%v31 +** vlr %v28,%v31 +** vlr %v26,%v31 +** vlr %v24,%v31 +** brasl %r14,v4hf_fun_9@PLT +** lmg %r14,%r15,280\(%r15\) +** br %r14 +*/ + + + +/******** + * V8HF * + ********/ + +T (v8hf) + +/* +** v8hf_callee_arg_1: +** br %r14 +*/ + +/* +** v8hf_callee_arg_2: +** vlr %v24,%v26 +** br %r14 +*/ + +/* +** v8hf_callee_arg_3: +** vlr %v24,%v28 +** br %r14 +*/ + +/* +** v8hf_callee_arg_4: +** vlr %v24,%v30 +** br %r14 +*/ + +/* +** v8hf_callee_arg_5: +** vlr %v24,%v25 +** br %r14 +*/ + +/* +** v8hf_callee_arg_6: +** vlr %v24,%v27 +** br %r14 +*/ + +/* +** v8hf_callee_arg_7: +** vlr %v24,%v29 +** br %r14 +*/ + +/* +** v8hf_callee_arg_8: +** vlr %v24,%v31 +** br %r14 +*/ + +/* +** v8hf_callee_arg_9: +** vl %v24,160\(%r15\),3 +** br %r14 +*/ + +/* +** v8hf_caller_nargs_9: +** stmg %r14,%r15,112\(%r15\) +** vzero %v31 +** lay %r15,-176\(%r15\) +** vst %v31,160\(%r15\),3 +** vlr %v29,%v31 +** vlr %v27,%v31 +** vlr %v25,%v31 +** vlr %v30,%v31 +** vlr %v28,%v31 +** vlr %v26,%v31 +** vlr %v24,%v31 +** brasl %r14,v8hf_fun_9@PLT +** lmg %r14,%r15,288\(%r15\) +** br %r14 +*/ + + + +/********* + * V16HF * + *********/ + +/* +** v16hf_callee_arg_2: +** mvc 0\(16,%r2\),0\(%r4\) +** mvc 16\(16,%r2\),16\(%r4\) +** br %r14 +*/ + +v16hf +v16hf_callee_arg_2 (v16hf x1, v16hf x2) +{ + return x2; +} diff --git a/libgcc/config.host b/libgcc/config.host index 443b4567a7c..850a37b0555 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -1393,6 +1393,9 @@ s390x-*-linux*) if test "${host_address}" = 32; then tmake_file="${tmake_file} s390/32/t-floattodi" else + if test "$libgcc_cv_s390_float16" = "yes"; then + tmake_file="${tmake_file} s390/t-float16" + fi tmake_file="${tmake_file} s390/t-softfp t-softfp" fi md_unwind_header=s390/linux-unwind.h diff --git a/libgcc/config/s390/_dpd_dd_to_hf.c b/libgcc/config/s390/_dpd_dd_to_hf.c new file mode 100644 index 00000000000..d2b7a347847 --- /dev/null +++ b/libgcc/config/s390/_dpd_dd_to_hf.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2001-2025 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +typedef float HFtype __attribute__ ((mode (HF))); +typedef float DFtype __attribute__ ((mode (DF))); + +HFtype __dpd_truncddhf (_Decimal64); + +__attribute__ ((noinline)) +static DFtype +force_convert (_Decimal32 x) +{ return x; } + +HFtype +__dpd_truncddhf (_Decimal64 x) +{ + DFtype xdf = force_convert (x); + return xdf; +} diff --git a/libgcc/config/s390/_dpd_hf_to_dd.c b/libgcc/config/s390/_dpd_hf_to_dd.c new file mode 100644 index 00000000000..00d2f3fd8e1 --- /dev/null +++ b/libgcc/config/s390/_dpd_hf_to_dd.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2001-2025 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +typedef float HFtype __attribute__ ((mode (HF))); +typedef float DFtype __attribute__ ((mode (DF))); + +_Decimal64 __dpd_extendhfdd (HFtype); + +__attribute__ ((noinline)) +static DFtype +force_bfp_extend (HFtype x) +{ return x; } + +_Decimal64 +__dpd_extendhfdd (HFtype x) +{ + DFtype xdf = force_bfp_extend (x); + return xdf; +} diff --git a/libgcc/config/s390/_dpd_hf_to_sd.c b/libgcc/config/s390/_dpd_hf_to_sd.c new file mode 100644 index 00000000000..3e0dede6805 --- /dev/null +++ b/libgcc/config/s390/_dpd_hf_to_sd.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2001-2025 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +typedef float HFtype __attribute__ ((mode (HF))); +typedef float SFtype __attribute__ ((mode (SF))); + +_Decimal32 __dpd_extendhfsd (HFtype); + +__attribute__ ((noinline)) +static SFtype +force_bfp_extend (HFtype x) +{ return x; } + +_Decimal32 +__dpd_extendhfsd (HFtype x) +{ + SFtype xsf = force_bfp_extend (x); + return xsf; +} diff --git a/libgcc/config/s390/_dpd_hf_to_td.c b/libgcc/config/s390/_dpd_hf_to_td.c new file mode 100644 index 00000000000..37eece8667b --- /dev/null +++ b/libgcc/config/s390/_dpd_hf_to_td.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2001-2025 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +typedef float HFtype __attribute__ ((mode (HF))); +typedef float TFtype __attribute__ ((mode (TF))); + +_Decimal128 __dpd_extendhftd (HFtype); + +__attribute__ ((noinline)) +static TFtype +force_bfp_extend (HFtype x) +{ return x; } + +_Decimal128 +__dpd_extendhftd (HFtype x) +{ + TFtype xtf = force_bfp_extend (x); + return xtf; +} diff --git a/libgcc/config/s390/_dpd_sd_to_hf.c b/libgcc/config/s390/_dpd_sd_to_hf.c new file mode 100644 index 00000000000..b93d053d3c9 --- /dev/null +++ b/libgcc/config/s390/_dpd_sd_to_hf.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2001-2025 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +typedef float HFtype __attribute__ ((mode (HF))); +typedef float SFtype __attribute__ ((mode (SF))); + +HFtype __dpd_truncsdhf (_Decimal32); + +__attribute__ ((noinline)) +static SFtype +force_convert (_Decimal32 x) +{ return x; } + +HFtype +__dpd_truncsdhf (_Decimal32 x) +{ + SFtype xsf = force_convert (x); + return xsf; +} diff --git a/libgcc/config/s390/_dpd_td_to_hf.c b/libgcc/config/s390/_dpd_td_to_hf.c new file mode 100644 index 00000000000..8f9e2586849 --- /dev/null +++ b/libgcc/config/s390/_dpd_td_to_hf.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2001-2025 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +typedef float HFtype __attribute__ ((mode (HF))); +typedef float TFtype __attribute__ ((mode (TF))); + +HFtype __dpd_trunctdhf (_Decimal128); + +__attribute__ ((noinline)) +static TFtype +force_convert (_Decimal128 x) +{ return x; } + +HFtype +__dpd_trunctdhf (_Decimal128 x) +{ + TFtype xtf = force_convert (x); + return xtf; +} diff --git a/libgcc/config/s390/libgcc-glibc.ver b/libgcc/config/s390/libgcc-glibc.ver index 4aa07c1bc92..8170506e5cc 100644 --- a/libgcc/config/s390/libgcc-glibc.ver +++ b/libgcc/config/s390/libgcc-glibc.ver @@ -123,8 +123,19 @@ GCC_16.0.0 { __fixsfbitint __fixdfbitint __fixtfbitint + __floatbitinthf __floatbitintsf __floatbitintdf __floatbitinttf + __truncsfhf2 + __truncdfhf2 + __trunctfhf2 + __extendhfsf2 + __extendhfdf2 + __extendhftf2 + __fixhfti + __fixunshfti + __floattihf + __floatuntihf } %endif diff --git a/libgcc/config/s390/t-float16 b/libgcc/config/s390/t-float16 new file mode 100644 index 00000000000..09c6a67c7e5 --- /dev/null +++ b/libgcc/config/s390/t-float16 @@ -0,0 +1,11 @@ +LIB2ADD += $(addprefix $(srcdir)/config/s390/, \ + _dpd_hf_to_sd.c \ + _dpd_hf_to_dd.c \ + _dpd_hf_to_td.c \ + _dpd_sd_to_hf.c \ + _dpd_dd_to_hf.c \ + _dpd_td_to_hf.c) + +softfp_extensions += hfsf hfdf hftf +softfp_truncations += sfhf dfhf tfhf +softfp_extras += fixhfti fixunshfti floattihf floatuntihf floatbitinthf diff --git a/libgcc/config/s390/t-softfp b/libgcc/config/s390/t-softfp index 724b15e83ba..0614fefbd23 100644 --- a/libgcc/config/s390/t-softfp +++ b/libgcc/config/s390/t-softfp @@ -1,2 +1,2 @@ LIB2ADD += $(srcdir)/config/s390/sfp-exceptions.c -softfp_extras := fixtfbitint floatbitinttf +softfp_extras += fixtfbitint floatbitinttf diff --git a/libgcc/configure b/libgcc/configure index 8cdc021f9c1..f2bee6d90fe 100755 --- a/libgcc/configure +++ b/libgcc/configure @@ -5336,6 +5336,32 @@ $as_echo "$libgcc_cv_powerpc_3_1_float128_hw" >&6; } CFLAGS="$saved_CFLAGS" esac +case ${host} in +s390*-*-linux*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking Support float16 on s390" >&5 +$as_echo_n "checking Support float16 on s390... " >&6; } +if ${libgcc_cv_s390_float16+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#if ! __s390x__ || __ARCH__ < 8 + # error "HF not supported" + #endif + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + libgcc_cv_s390_float16=yes +else + libgcc_cv_s390_float16=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_s390_float16" >&5 +$as_echo "$libgcc_cv_s390_float16" >&6; } + ;; +esac + # Collect host-machine-specific information. . ${srcdir}/config.host diff --git a/libgcc/configure.ac b/libgcc/configure.ac index 1ae782c480c..72d23b427d0 100644 --- a/libgcc/configure.ac +++ b/libgcc/configure.ac @@ -479,6 +479,20 @@ powerpc*-*-linux*) CFLAGS="$saved_CFLAGS" esac +case ${host} in +s390*-*-linux*) + AC_CACHE_CHECK([Support float16 on s390], + [libgcc_cv_s390_float16], + [AC_COMPILE_IFELSE( + [AC_LANG_SOURCE([#if ! __s390x__ || __ARCH__ < 8 + # error "HF not supported" + #endif + ])], + [libgcc_cv_s390_float16=yes], + [libgcc_cv_s390_float16=no])]) + ;; +esac + # Collect host-machine-specific information. . ${srcdir}/config.host diff --git a/libstdc++-v3/config/abi/post/s390x-linux-gnu/baseline_symbols.txt b/libstdc++-v3/config/abi/post/s390x-linux-gnu/baseline_symbols.txt index 8c204af66fc..b29c71a0753 100644 --- a/libstdc++-v3/config/abi/post/s390x-linux-gnu/baseline_symbols.txt +++ b/libstdc++-v3/config/abi/post/s390x-linux-gnu/baseline_symbols.txt @@ -4951,6 +4951,7 @@ OBJECT:15:_ZTSSt8messagesIwE@@GLIBCXX_3.4 OBJECT:15:_ZTSSt8numpunctIcE@@GLIBCXX_3.4 OBJECT:15:_ZTSSt8numpunctIwE@@GLIBCXX_3.4 OBJECT:16:_ZTIDF128_@@CXXABI_1.3.14 +OBJECT:16:_ZTIDF16_@@CXXABI_1.3.14 OBJECT:16:_ZTIDF32_@@CXXABI_1.3.14 OBJECT:16:_ZTIDF32x@@CXXABI_1.3.14 OBJECT:16:_ZTIDF64_@@CXXABI_1.3.14 @@ -5615,6 +5616,7 @@ OBJECT:30:_ZTSSt7codecvtIDsDu11__mbstate_tE@@GLIBCXX_3.4.26 OBJECT:32:_ZNSbIwSt11char_traitsIwESaIwEE4_Rep20_S_empty_rep_storageE@@GLIBCXX_3.4 OBJECT:32:_ZNSs4_Rep20_S_empty_rep_storageE@@GLIBCXX_3.4 OBJECT:32:_ZTIPDF128_@@CXXABI_1.3.14 +OBJECT:32:_ZTIPDF16_@@CXXABI_1.3.14 OBJECT:32:_ZTIPDF32_@@CXXABI_1.3.14 OBJECT:32:_ZTIPDF32x@@CXXABI_1.3.14 OBJECT:32:_ZTIPDF64_@@CXXABI_1.3.14 @@ -5627,6 +5629,7 @@ OBJECT:32:_ZTIPDn@@CXXABI_1.3.5 OBJECT:32:_ZTIPDs@@CXXABI_1.3.3 OBJECT:32:_ZTIPDu@@CXXABI_1.3.12 OBJECT:32:_ZTIPKDF128_@@CXXABI_1.3.14 +OBJECT:32:_ZTIPKDF16_@@CXXABI_1.3.14 OBJECT:32:_ZTIPKDF32_@@CXXABI_1.3.14 OBJECT:32:_ZTIPKDF32x@@CXXABI_1.3.14 OBJECT:32:_ZTIPKDF64_@@CXXABI_1.3.14