public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "rdapp at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug middle-end/111401] Middle-end: Missed optimization of MASK_LEN_FOLD_LEFT_PLUS
Date: Wed, 13 Sep 2023 21:25:51 +0000	[thread overview]
Message-ID: <bug-111401-4-65XwPgHIHw@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-111401-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111401

--- Comment #3 from Robin Dapp <rdapp at gcc dot gnu.org> ---
Several other things came up, so I'm just going to post the latest status here
without having revised or tested it.  Going to try fixing it and testing
tomorrow.

--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3672,7 +3672,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared
*shared)
 static bool
 fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn)
 {
-  if (code == PLUS_EXPR)
+  if (code == PLUS_EXPR || code == IFN_COND_ADD)
     {
       *reduc_fn = IFN_FOLD_LEFT_PLUS;
       return true;
@@ -4106,8 +4106,13 @@ vect_is_simple_reduction (loop_vec_info loop_info,
stmt_vec_info phi_info,
           return NULL;
         }

-      nphi_def_loop_uses++;
-      phi_use_stmt = use_stmt;
+      /* We might have two uses in the same instruction, only count them as
+        one. */
+      if (use_stmt != phi_use_stmt)
+       {
+         nphi_def_loop_uses++;
+         phi_use_stmt = use_stmt;
+       }
     }

   tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
@@ -6861,7 +6866,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
                               gimple **vec_stmt, slp_tree slp_node,
                               gimple *reduc_def_stmt,
                               tree_code code, internal_fn reduc_fn,
-                              tree ops[3], tree vectype_in,
+                              tree *ops, int num_ops, tree vectype_in,
                               int reduc_index, vec_loop_masks *masks,
                               vec_loop_lens *lens)
 {
@@ -6883,11 +6888,24 @@ vectorize_fold_left_reduction (loop_vec_info
loop_vinfo,
     gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
                          TYPE_VECTOR_SUBPARTS (vectype_in)));

-  tree op0 = ops[1 - reduc_index];
+  /* The operands either come from a binary operation or a COND_ADD operation.
+     The former is a gimple assign and the latter is a gimple call with four
+     arguments.  */
+  gcc_assert (num_ops == 2 || num_ops == 4);
+  bool is_cond_add = num_ops == 4;
+  tree op0, opmask;
+  if (!is_cond_add)
+    op0 = ops[1 - reduc_index];
+  else
+    {
+      op0 = ops[2];
+      opmask = ops[0];
+      gcc_assert (!slp_node);
+    }
   int group_size = 1;
   stmt_vec_info scalar_dest_def_info;
-  auto_vec<tree> vec_oprnds0;
+  auto_vec<tree> vec_oprnds0, vec_opmask;
   if (slp_node)
     {
       auto_vec<vec<tree> > vec_defs (2);
@@ -6903,9 +6921,18 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
       vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
                                     op0, &vec_oprnds0);
       scalar_dest_def_info = stmt_info;
+      if (is_cond_add)
+       {
+         vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
+                                        opmask, &vec_opmask);
+         gcc_assert (vec_opmask.length() == 1);
+       }
     }

-  tree scalar_dest = gimple_assign_lhs (scalar_dest_def_info->stmt);
+  gimple *sdef = scalar_dest_def_info->stmt;
+  tree scalar_dest = is_gimple_call (sdef)
+                      ? gimple_call_lhs (sdef)
+                      : gimple_assign_lhs (scalar_dest_def_info->stmt);
   tree scalar_type = TREE_TYPE (scalar_dest);
   tree reduc_var = gimple_phi_result (reduc_def_stmt);

@@ -6945,7 +6972,11 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
                                   i, 1);
          signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
(loop_vinfo);
          bias = build_int_cst (intQI_type_node, biasval);
-         mask = build_minus_one_cst (truth_type_for (vectype_in));
+         /* If we have a COND_ADD take its mask.  Otherwise use {-1, ...}.  */
+         if (is_cond_add)
+           mask = vec_opmask[0];
+         else
+           mask = build_minus_one_cst (truth_type_for (vectype_in));
        }

       /* Handle MINUS by adding the negative.  */
@@ -7440,6 +7471,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
       if (i == STMT_VINFO_REDUC_IDX (stmt_info))
        continue;

+      if (op.ops[i] == op.ops[STMT_VINFO_REDUC_IDX (stmt_info)])
+       continue;
+
       /* There should be only one cycle def in the stmt, the one
          leading to reduc_def.  */
       if (VECTORIZABLE_CYCLE_DEF (dt))
@@ -8211,8 +8245,21 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
       vec_num = 1;
     }

-  code_helper code = canonicalize_code (op.code, op.type);
-  internal_fn cond_fn = get_conditional_internal_fn (code, op.type);
+  code_helper code (op.code);
+  internal_fn cond_fn;
+
+  if (code.is_internal_fn ())
+    {
+      internal_fn ifn = internal_fn (op.code);
+      code = canonicalize_code (conditional_internal_fn_code (ifn), op.type);
+      cond_fn = ifn;
+    }
+  else
+    {
+      code = canonicalize_code (op.code, op.type);
+      cond_fn = get_conditional_internal_fn (code, op.type);
+    }
+
   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
   vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
   bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn,
vectype_in);
@@ -8240,8 +8287,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
       gcc_assert (code.is_tree_code ());
       return vectorize_fold_left_reduction
          (loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi,
-          tree_code (code), reduc_fn, op.ops, vectype_in, reduc_index, masks,
-          lens);
+          tree_code (code), reduc_fn, op.ops, op.num_ops, vectype_in,
+          reduc_index, masks, lens);
     }

   bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);

  parent reply	other threads:[~2023-09-13 21:25 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-13  9:31 [Bug c/111401] New: " juzhe.zhong at rivai dot ai
2023-09-13  9:46 ` [Bug c/111401] " rguenth at gcc dot gnu.org
2023-09-13 16:52 ` [Bug middle-end/111401] " rdapp at gcc dot gnu.org
2023-09-13 21:25 ` rdapp at gcc dot gnu.org [this message]
2023-09-14  6:46 ` rguenther at suse dot de
2023-09-14  6:51 ` rguenther at suse dot de
2023-09-14 15:07 ` rdapp at gcc dot gnu.org
2023-09-15  6:42 ` rguenth at gcc dot gnu.org
2023-11-02 10:50 ` cvs-commit at gcc dot gnu.org
2023-11-02 22:40 ` juzhe.zhong at rivai dot ai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-111401-4-65XwPgHIHw@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).