From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <sayle@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1251)
 id 094F33858411; Tue, 16 Nov 2021 08:56:44 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 094F33858411
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="utf-8"
From: Roger Sayle <sayle@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r12-5293] x86_64: Avoid rorx rotation instructions with -Os.
X-Act-Checkin: gcc
X-Git-Author: Roger Sayle <roger@nextmovesoftware.com>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: e69b7c5779863469479698f863ab25e0d9b4586e
X-Git-Newrev: 473b5e87346edf9885abc28b7de68e3cd7059746
Message-Id: <20211116085644.094F33858411@sourceware.org>
Date: Tue, 16 Nov 2021 08:56:44 +0000 (GMT)
X-BeenThere: gcc-cvs@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-cvs mailing list <gcc-cvs.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-cvs>,
 <mailto:gcc-cvs-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-cvs/>
List-Help: <mailto:gcc-cvs-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-cvs>,
 <mailto:gcc-cvs-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Tue, 16 Nov 2021 08:56:44 -0000

https://gcc.gnu.org/g:473b5e87346edf9885abc28b7de68e3cd7059746

commit r12-5293-g473b5e87346edf9885abc28b7de68e3cd7059746
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Tue Nov 16 08:55:21 2021 +0000

    x86_64: Avoid rorx rotation instructions with -Os.
    
    This patch teaches the i386 backend to avoid using BMI2's rorx
    instructions when optimizing for size.  The benefits are shown
    with the following example:
    
    unsigned int ror1(unsigned int x) { return (x >> 1) | (x << 31); }
    unsigned int ror2(unsigned int x) { return (x >> 2) | (x << 30); }
    unsigned int rol2(unsigned int x) { return (x >> 30) | (x << 2); }
    unsigned int rol1(unsigned int x) { return (x >> 31) | (x << 1); }
    
    which currently with -Os -march=cascadelake generates:
    
    ror1:   rorx    $1, %edi, %eax          // 6 bytes
            ret
    ror2:   rorx    $2, %edi, %eax          // 6 bytes
            ret
    rol2:   rorx    $30, %edi, %eax         // 6 bytes
            ret
    rol1:   rorx    $31, %edi, %eax         // 6 bytes
            ret
    
    but with this patch now generates:
    
    ror1:   movl    %edi, %eax              // 2 bytes
            rorl    %eax                    // 2 bytes
            ret
    ror2:   movl    %edi, %eax              // 2 bytes
            rorl    $2, %eax                // 3 bytes
            ret
    rol2:   movl    %edi, %eax              // 2 bytes
            roll    $2, %eax                // 3 bytes
            ret
    rol1:   movl    %edi, %eax              // 2 bytes
            roll    %eax                    // 2 bytes
            ret
    
    I've confirmed that this patch is a win on the CSiBE benchmark,
    even though rotations are rare, where for example libmspack/test/md5.o
    shrinks from 5824 bytes to 5632 bytes.
    
    2021-11-16  Roger Sayle  <roger@nextmovesoftware.com>
    
    gcc/ChangeLog
            * config/i386/i386.md (*bmi2_rorx<mode3>_1): Make conditional
            on !optimize_function_for_size_p.
            (*<any_rotate><mode>3_1): Add preferred_for_size attribute.
            (define_splits): Conditionalize on !optimize_function_for_size_p.
            (*bmi2_rorxsi3_1_zext): Likewise.
            (*<any_rotate>si2_1_zext): Add preferred_for_size attribute.
            (define_splits): Conditionalize on !optimize_function_for_size_p.

Diff:
---
 gcc/config/i386/i386.md | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6eb9de81921..73949063030 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12775,7 +12775,7 @@
 	(rotatert:SWI48
 	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")
 	  (match_operand:QI 2 "<rorx_immediate_operand>" "<S>")))]
-  "TARGET_BMI2"
+  "TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
   "rorx\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "rotatex")
    (set_attr "mode" "<MODE>")])
@@ -12803,6 +12803,10 @@
 }
   [(set_attr "isa" "*,bmi2")
    (set_attr "type" "rotate,rotatex")
+   (set (attr "preferred_for_size")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "true")]
+	   (symbol_ref "false")))
    (set (attr "length_immediate")
      (if_then_else
        (and (eq_attr "type" "rotate")
@@ -12819,7 +12823,7 @@
 	(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
 		      (match_operand:QI 2 "const_int_operand")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI2 && reload_completed"
+  "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
   [(set (match_dup 0)
 	(rotatert:SWI48 (match_dup 1) (match_dup 2)))]
 {
@@ -12833,7 +12837,7 @@
 	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
 			(match_operand:QI 2 "const_int_operand")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI2 && reload_completed"
+  "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
   [(set (match_dup 0)
 	(rotatert:SWI48 (match_dup 1) (match_dup 2)))])
 
@@ -12842,7 +12846,7 @@
 	(zero_extend:DI
 	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
 		       (match_operand:QI 2 "const_0_to_31_operand" "I"))))]
-  "TARGET_64BIT && TARGET_BMI2"
+  "TARGET_64BIT && TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
   "rorx\t{%2, %1, %k0|%k0, %1, %2}"
   [(set_attr "type" "rotatex")
    (set_attr "mode" "SI")])
@@ -12870,6 +12874,10 @@
 }
   [(set_attr "isa" "*,bmi2")
    (set_attr "type" "rotate,rotatex")
+   (set (attr "preferred_for_size")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "true")]
+	   (symbol_ref "false")))
    (set (attr "length_immediate")
      (if_then_else
        (and (eq_attr "type" "rotate")
@@ -12887,7 +12895,8 @@
 	  (rotate:SI (match_operand:SI 1 "nonimmediate_operand")
 		     (match_operand:QI 2 "const_int_operand"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed
+   && !optimize_function_for_size_p (cfun)"
   [(set (match_dup 0)
 	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
 {
@@ -12902,7 +12911,8 @@
 	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand")
 		       (match_operand:QI 2 "const_int_operand"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed
+   && !optimize_function_for_size_p (cfun)"
   [(set (match_dup 0)
 	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])