From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 25294 invoked by alias); 29 Dec 2007 15:35:40 -0000 Received: (qmail 25286 invoked by uid 22791); 29 Dec 2007 15:35:39 -0000 X-Spam-Check-By: sourceware.org Received: from fg-out-1718.google.com (HELO fg-out-1718.google.com) (72.14.220.152) by sourceware.org (qpsmtpd/0.31) with ESMTP; Sat, 29 Dec 2007 15:35:31 +0000 Received: by fg-out-1718.google.com with SMTP id d23so2053357fga.28 for ; Sat, 29 Dec 2007 07:35:28 -0800 (PST) Received: by 10.86.97.7 with SMTP id u7mr10155716fgb.65.1198942528710; Sat, 29 Dec 2007 07:35:28 -0800 (PST) Received: from ?194.249.3.167? ( [194.249.3.167]) by mx.google.com with ESMTPS id y18sm14547045fkd.17.2007.12.29.07.35.25 (version=TLSv1/SSLv3 cipher=RC4-MD5); Sat, 29 Dec 2007 07:35:27 -0800 (PST) Message-ID: <47766939.2030505@gmail.com> Date: Sat, 29 Dec 2007 16:11:00 -0000 From: Uros Bizjak User-Agent: Thunderbird 1.5.0.7 (X11/20061008) MIME-Version: 1.0 To: GCC CC: GCC Patches , tbp Subject: Re: censored naked SSE reciprocals, -mrecip Content-Type: multipart/mixed; boundary="------------070805080101060402010204" Mailing-List: contact gcc-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-owner@gcc.gnu.org X-SW-Source: 2007-12/txt/msg00701.txt.bz2 This is a multi-part message in MIME format. --------------070805080101060402010204 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Content-length: 1399 Hello! > i lately had some use for -mrecip but it turned out to come with all > sorts of strings attached and apparently no opt-out. Briefly, barring > inline asm, i can't get gcc to emit those ops without a NR fixup. > > Questions: > a) is that really by design? No. Attached patch fixes these problems by using correct shortcuts when generating intrinsic functions. 2007-12-29 Uros Bizjak * config/i386/sse.md ("*divv4sf3"): Rename to "sse_divv4sf3". ("*sse_rsqrtv4sf2"): Export. ("*sse_sqrtv4sf2"): Ditto. * config/i386/i386.c (enum ix86_builtins) [IX86_BUILTIN_RSQRTPS_NR, IX86_BUILTIN_SQRTPS_NR]: New constants. (struct builtin_description) [IX86_BUILTIN_DIVPS]: Use CODE_FOR_sse_divv4sf3. [IX86_BUILTIN_SQRTPS]: Use CODE_FOR_sse_sqrtv4sf2. [IX86_BUILTIN_SQRTPS_NR]: New. [IX86_BUILTIN_RSQRTPS_NR]: Ditto. (ix86_init_mmx_sse_builtins): Initialize __builtin_ia32_rsqrtps_nr and __builtin_ia32_sqrtps_nr. (ix86_builtin_vectorized_function): Convert BUILT_IN_SQRTF to IX86_BUILTIN_SQRTPS_NR. (ix86_builtin_reciprocal): Convert IX86_BUILTIN_SQRTPS_NR to IX86_BUILTIN_RSQRTPS_NR. Patch was bootstrapped and regression tested with {,-m32} on x86_64-pc-linux-gnu. Patch is committed to SVN. Thanks a lot for your report, Uros. --------------070805080101060402010204 Content-Type: text/plain; name="r.diff.txt" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="r.diff.txt" Content-length: 6407 Index: sse.md =================================================================== --- sse.md (revision 131218) +++ sse.md (working copy) @@ -490,7 +490,7 @@ } }) -(define_insn "*divv4sf3" +(define_insn "sse_divv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") (div:V4SF (match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] @@ -532,16 +532,7 @@ [(set_attr "type" "sse") (set_attr "mode" "SF")]) -(define_insn "*sse_rsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] - "TARGET_SSE" - "rsqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_expand "sse_rsqrtv4sf2" +(define_expand "rsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "") (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))] @@ -556,6 +547,15 @@ } }) +(define_insn "sse_rsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] + "TARGET_SSE" + "rsqrtps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + (define_insn "sse_vmrsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF @@ -568,14 +568,6 @@ [(set_attr "type" "sse") (set_attr "mode" "SF")]) -(define_insn "*sqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "sqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - (define_expand "sqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=") (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] @@ -590,6 +582,14 @@ } }) +(define_insn "sse_sqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "sqrtps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + (define_insn "sse_vmsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF Index: i386.c =================================================================== --- i386.c (revision 131218) +++ i386.c (working copy) @@ -17093,9 +17093,11 @@ enum ix86_builtins IX86_BUILTIN_RCPPS, IX86_BUILTIN_RCPSS, IX86_BUILTIN_RSQRTPS, + IX86_BUILTIN_RSQRTPS_NR, IX86_BUILTIN_RSQRTSS, IX86_BUILTIN_RSQRTF, IX86_BUILTIN_SQRTPS, + IX86_BUILTIN_SQRTPS_NR, IX86_BUILTIN_SQRTSS, IX86_BUILTIN_UNPCKHPS, @@ -17849,7 +17851,7 @@ static const struct builtin_description { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 }, @@ -18158,8 +18160,10 @@ static const struct builtin_description { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS_NR, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 }, @@ -19279,12 +19283,14 @@ ix86_init_mmx_sse_builtins (void) def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); + def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS_NR); def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); ftype = build_function_type_list (float_type_node, float_type_node, NULL_TREE); def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF); def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); + def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS_NR); def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); @@ -21301,7 +21307,7 @@ ix86_builtin_vectorized_function (unsign case BUILT_IN_SQRTF: if (out_mode == SFmode && out_n == 4 && in_mode == SFmode && in_n == 4) - return ix86_builtins[IX86_BUILTIN_SQRTPS]; + return ix86_builtins[IX86_BUILTIN_SQRTPS_NR]; break; case BUILT_IN_LRINT: @@ -21463,8 +21469,8 @@ ix86_builtin_reciprocal (unsigned int fn switch (fn) { /* Vectorized version of sqrt to rsqrt conversion. */ - case IX86_BUILTIN_SQRTPS: - return ix86_builtins[IX86_BUILTIN_RSQRTPS]; + case IX86_BUILTIN_SQRTPS_NR: + return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR]; default: return NULL_TREE; --------------070805080101060402010204--