[gcc r14-4713] middle-end: Fold vec_cond into conditional ternary or binary operation when sharing operand [PR10915

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

From: Tamar Christina <tnfchris@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r14-4713] middle-end: Fold vec_cond into conditional ternary or binary operation when sharing operand [PR10915
Date: Wed, 18 Oct 2023 08:54:45 +0000 (GMT)	[thread overview]
Message-ID: <20231018085445.5A2693858CDB@sourceware.org> (raw)

https://gcc.gnu.org/g:4b39aeef594f311e2c1715f15608f1d7ebc2d868

commit r14-4713-g4b39aeef594f311e2c1715f15608f1d7ebc2d868
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Wed Oct 18 09:32:55 2023 +0100

    middle-end: Fold vec_cond into conditional ternary or binary operation when sharing operand [PR109154]
    
    When we have a vector conditional on a masked target which is doing a selection
    on the result of a conditional operation where one of the operands of the
    conditional operation is the other operand of the select, then we can fold the
    vector conditional into the operation.
    
    Concretely this transforms
    
      c = mask1 ? (masked_op mask2 a b) : b
    
    into
    
      c = masked_op (mask1 & mask2) a b
    
    The mask is then propagated upwards by the compiler.  In the SVE case we don't
    end up needing a mask AND here since `mask2` will end up in the instruction
    creating `mask` which gives us a natural &.
    
    Such transformations are more common now in GCC 13+ as PRE has not started
    unsharing of common code in case it can make one branch fully independent.
    
    e.g. in this case `b` becomes a loop invariant value after PRE.
    
    This transformation removes the extra select for masked architectures but
    doesn't fix the general case.
    
    gcc/ChangeLog:
    
            PR tree-optimization/109154
            * match.pd: Add new cond_op rule.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/109154
            * gcc.target/aarch64/sve/pre_cond_share_1.c: New test.

Diff:
---
 gcc/match.pd                                       |  24 ++++
 .../gcc.target/aarch64/sve/pre_cond_share_1.c      | 132 +++++++++++++++++++++
 2 files changed, 156 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 067328a2a16b..a56838fb388a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8970,6 +8970,30 @@ and,
 	    && fold_real_zero_addition_p (type, NULL_TREE, @5, 0)))
     (IFN_COND_LEN_ADD @1 @0 @2 @0 @3 @4)))
 
+/* Detect simplification for vector condition folding where
+
+  c = mask1 ? (masked_op mask2 a b) : b
+
+  into
+
+  c = masked_op (mask1 & mask2) a b
+
+  where the operation can be partially applied to one operand. */
+
+(for cond_op (COND_BINARY)
+ (simplify
+  (vec_cond @0
+   (cond_op:s @1 @2 @3 @4) @3)
+  (cond_op (bit_and @1 @0) @2 @3 @4)))
+
+/* And same for ternary expressions.  */
+
+(for cond_op (COND_TERNARY)
+ (simplify
+  (vec_cond @0
+   (cond_op:s @1 @2 @3 @4 @5) @4)
+  (cond_op (bit_and @1 @0) @2 @3 @4 @5)))
+
 /* For pointers @0 and @2 and nonnegative constant offset @1, look for
    expressions like:
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c
new file mode 100644
index 000000000000..b51d0f298ea1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-optimized" } */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <math.h>
+#include <float.h>
+
+typedef struct __attribute__((__packed__)) _Atom {
+    float x, y, z;
+    int32_t type;
+} Atom;
+
+typedef struct __attribute__((__packed__)) _FFParams {
+    int32_t hbtype;
+    float radius;
+    float hphb;
+    float elsc;
+} FFParams;
+
+#ifndef PPWI
+#define PPWI (64)
+#endif
+
+#ifndef ITERS
+#define ITERS 8
+#endif
+
+#define DIFF_TOLERANCE_PCT 0.025f
+
+#define POSES_SIZE 393216
+#define PROTEIN_SIZE 938
+#define LIGAND_SIZE 26
+#define FORCEFIELD_SIZE 34
+
+#define ZERO 0.0f
+#define QUARTER 0.25f
+#define HALF 0.5f
+#define ONE 1.0f
+#define TWO 2.0f
+#define FOUR 4.0f
+#define CNSTNT 45.0f
+
+// Energy evaluation parameters
+#define HBTYPE_F 70
+#define HBTYPE_E 69
+#define HARDNESS 38.0f
+#define NPNPDIST 5.5f
+#define NPPDIST 1.0f
+
+void
+fasten_main(size_t group, size_t ntypes, size_t nposes, size_t natlig, size_t natpro,        //
+            const Atom *protein, const Atom *ligand,                                         //
+            const float *transforms_0, const float *transforms_1, const float *transforms_2, //
+            const float *transforms_3, const float *transforms_4, const float *transforms_5, //
+            const FFParams *forcefield, float *energies                                      //
+) {
+
+    float etot[PPWI];
+    float lpos_x[PPWI];
+
+    for (int l = 0; l < PPWI; l++) {
+        etot[l] = 0.f;
+        lpos_x[l] = 0.f;
+    }
+
+    // Loop over ligand atoms
+    for (int il = 0; il < natlig; il++) {
+        // Load ligand atom data
+        const Atom l_atom = ligand[il];
+        const FFParams l_params = forcefield[l_atom.type];
+        const int lhphb_ltz = l_params.hphb < 0.f;
+        const int lhphb_gtz = l_params.hphb > 0.f;
+
+        // Transform ligand atom
+
+        // Loop over protein atoms
+        for (int ip = 0; ip < natpro; ip++) {
+            // Load protein atom data
+            const Atom p_atom = protein[ip];
+            const FFParams p_params = forcefield[p_atom.type];
+
+            const float radij = p_params.radius + l_params.radius;
+            const float r_radij = ONE / radij;
+
+            const float elcdst = (p_params.hbtype == HBTYPE_F && l_params.hbtype == HBTYPE_F) ? FOUR
+                                                                                              : TWO;
+            const float elcdst1 = (p_params.hbtype == HBTYPE_F && l_params.hbtype == HBTYPE_F)
+                                  ? QUARTER : HALF;
+            const int type_E = ((p_params.hbtype == HBTYPE_E || l_params.hbtype == HBTYPE_E));
+
+            const int phphb_ltz = p_params.hphb < 0.f;
+            const int phphb_gtz = p_params.hphb > 0.f;
+            const int phphb_nz = p_params.hphb != 0.f;
+            const float p_hphb = p_params.hphb * (phphb_ltz && lhphb_gtz ? -ONE : ONE);
+            const float l_hphb = l_params.hphb * (phphb_gtz && lhphb_ltz ? -ONE : ONE);
+            const float distdslv = (phphb_ltz ? (lhphb_ltz ? NPNPDIST : NPPDIST) : (lhphb_ltz
+                                                                                    ? NPPDIST
+                                                                                    : -FLT_MAX));
+            const float r_distdslv = ONE / distdslv;
+
+            const float chrg_init = l_params.elsc * p_params.elsc;
+            const float dslv_init = p_hphb + l_hphb;
+
+            for (int l = 0; l < PPWI; l++) {
+                // Calculate distance between atoms
+                const float x = lpos_x[l] - p_atom.x;
+                const float distij = (x * x);
+
+                // Calculate the sum of the sphere radii
+                const float distbb = distij - radij;
+
+                const int zone1 = (distbb < ZERO);
+
+                // Calculate formal and dipole charge interactions
+                float chrg_e = chrg_init * ((zone1 ? ONE : (ONE - distbb * elcdst1)) *
+                                            (distbb < elcdst ? ONE : ZERO));
+                float neg_chrg_e = -fabsf(chrg_e);
+                chrg_e = type_E ? neg_chrg_e : chrg_e;
+                etot[l] += chrg_e * CNSTNT;
+            }
+        }
+    }
+
+    // Write result
+    for (int l = 0; l < PPWI; l++) {
+        energies[group * PPWI + l] = etot[l] * HALF;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times {\.COND_MUL} 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times {\.VCOND} 1 "optimized" } } */

                 reply	other threads:[~2023-10-18  8:54 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231018085445.5A2693858CDB@sourceware.org \
    --to=tnfchris@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).