public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 0/2] Zext/sext elimination using value range
@ 2014-06-24 11:48 Kugan
  2014-06-24 11:51 ` [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED) Kugan
  2014-06-24 11:53 ` [PATCH 2/2] Enable elimination of zext/sext Kugan
  0 siblings, 2 replies; 58+ messages in thread
From: Kugan @ 2014-06-24 11:48 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches

Hi,

This patch series (2) implements zext/sext extension elimination using
value ranges stored in SSA. Implementation is what was suggested in the
thread https://gcc.gnu.org/ml/gcc/2014-05/msg00213.html.

I have broken this into:

Patch 1 - Changes to store zero and sign extended promotions
(SPR_SIGNED_AND_UNSIGNED) in SUBREG with SUBREG_PROMOTED_VAR_P.
Patch 2 - Enables Zext/sext extensions by checking the value range.

test-cases that motivated this and the asm difference with the patch are:

1.
short foo(unsigned char c)
{
  c = c & (unsigned char)0x0F;
  if( c > 7 )
    return((short)(c - 5));
  else
    return(( short )c);
}

 	and	r0, r0, #15
 	cmp	r0, #7
 	subhi	r0, r0, #5
-	uxth	r0, r0
-	sxth	r0, r0
 	bx	lr

2.
unsigned short
crc2(unsigned short crc, unsigned char data)
{
   unsigned char i, x16, carry;
   for (i = 0; i < 8; i++)
     {
       x16 = (data ^ crc) & 1;
       data >>= 1;
       if (x16 == 1)
         {
           crc ^= 0x4002;
           carry = 1;
         }
       else
         carry = 0;
      crc >>= 1;
       if (carry)
         crc |= 0x8000;
       else
         crc &= 0x7fff;
     }
   return crc;
}

-	mov	r3, #8
+	mov	r2, #8
 .L3:
-	eor	r2, r1, r0
-	sub	r3, r3, #1
-	tst	r2, #1
+	eor	r3, r1, r0
 	mov	r1, r1, lsr #1
+	tst	r3, #1
 	eorne	r0, r0, #16384
 	moveq	r0, r0, lsr #1
 	eorne	r0, r0, #2
 	movne	r0, r0, lsr #1
 	orrne	r0, r0, #32768
-	ands	r3, r3, #255
+	subs	r2, r2, #1
 	bne	.L3
 	bx	lr

Tested both patches on x86_64-unknown-linux-gnu and
arm-none-linux-gnueabi with no new regressions. Is this OK?

Thanks,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-24 11:48 [PATCH 0/2] Zext/sext elimination using value range Kugan
@ 2014-06-24 11:51 ` Kugan
  2014-06-24 12:18   ` Jakub Jelinek
  2014-06-24 11:53 ` [PATCH 2/2] Enable elimination of zext/sext Kugan
  1 sibling, 1 reply; 58+ messages in thread
From: Kugan @ 2014-06-24 11:51 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1688 bytes --]

Changes the the SUBREG flags to be able to set promoted for sign
(SRP_SIGNED), unsigned (SRP_UNSIGNED),  sign and unsigned
(SPR_SIGNED_AND_UNSIGNED) in SUBREG_PROMOTED_VAR_P.

Thanks,
Kugan

gcc/

2014-06-24  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* gcc/calls.c (precompute_arguments): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET
	(expand_call) : Likewise.
	* gcc/expr.c (convert_move) : Use new SUBREG_CHECK_PROMOTED_SIGN
	instead of SUBREG_PROMOTED_UNSIGNED_P.
	(convert_modes) : Likewise.
	(store_expr) : Likewise.
	(expand_expr_real_1) : Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* gcc/function.c (assign_param_setup_reg) : Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* gcc/ifcvt.c (noce_emit_cmove) : Updated to use
	SUBREG_PROMOTED_UNSIGNED_P and SUBREG_PROMOTED_SIGNED_P.
	* gcc/internal-fn.c (ubsan_expand_si_overflow_mul_check) : Use
	SUBREG_PROMOTED_SET instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* gcc/optabs.c (widen_operand): Use new SUBREG_CHECK_PROMOTED_SIGN
	instead of SUBREG_PROMOTED_UNSIGNED_P.
	* gcc/rtl.h (SUBREG_PROMOTED_UNSIGNED_SET) : Remove.
	(SUBREG_PROMOTED_SET) : New define.
	(SUBREG_PROMOTED_GET) : Likewise.
	(SUBREG_PROMOTED_SIGNED_P) : Likewise.
	(SUBREG_CHECK_PROMOTED_SIGN) : Likewise.
	(SUBREG_PROMOTED_UNSIGNED_P) : Updated.
	* gcc/rtlanal.c (simplify_unary_operation_1) : Use new
	SUBREG_PROMOTED_SET instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* gcc/simplify-rtx.c (simplify_unary_operation_1) : Use new
	SUBREG_PROMOTED_SIGNED_P instead of
	!SUBREG_PROMOTED_UNSIGNED_P.
	(simplify_subreg) : Use new SUBREG_PROMOTED_SET instead of
	 SUBREG_PROMOTED_UNSIGNED_SET.

[-- Attachment #2: p1.txt --]
[-- Type: text/plain, Size: 10533 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index 78fe7d8..c1fe3b8 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,8 +1484,7 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (args[i].initial_value,
-					    args[i].unsignedp);
+	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
@@ -3365,7 +3364,8 @@ expand_call (tree exp, rtx target, int ignore)
 
 	  target = gen_rtx_SUBREG (TYPE_MODE (type), target, offset);
 	  SUBREG_PROMOTED_VAR_P (target) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (target, unsignedp);
+	  SUBREG_PROMOTED_SET (target, unsignedp);
+
 	}
 
       /* If size of args is variable or this was a constructor call for a stack
diff --git a/gcc/expr.c b/gcc/expr.c
index 512c024..a8db9f5 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -329,7 +329,7 @@ convert_move (rtx to, rtx from, int unsignedp)
   if (GET_CODE (from) == SUBREG && SUBREG_PROMOTED_VAR_P (from)
       && (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (from)))
 	  >= GET_MODE_PRECISION (to_mode))
-      && SUBREG_PROMOTED_UNSIGNED_P (from) == unsignedp)
+      && (SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp)))
     from = gen_lowpart (to_mode, from), from_mode = to_mode;
 
   gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
@@ -703,7 +703,7 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
 
   if (GET_CODE (x) == SUBREG && SUBREG_PROMOTED_VAR_P (x)
       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))) >= GET_MODE_SIZE (mode)
-      && SUBREG_PROMOTED_UNSIGNED_P (x) == unsignedp)
+      && (SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp)))
     x = gen_lowpart (mode, SUBREG_REG (x));
 
   if (GET_MODE (x) != VOIDmode)
@@ -5202,8 +5202,7 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
 	  && GET_MODE_PRECISION (GET_MODE (target))
 	     == TYPE_PRECISION (TREE_TYPE (exp)))
 	{
-	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
-	      != SUBREG_PROMOTED_UNSIGNED_P (target))
+	  if (!(SUBREG_CHECK_PROMOTED_SIGN (target, TYPE_UNSIGNED (TREE_TYPE (exp)))))
 	    {
 	      /* Some types, e.g. Fortran's logical*4, won't have a signed
 		 version, so use the mode instead.  */
@@ -9513,7 +9512,8 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (temp, unsignedp);
+	  SUBREG_PROMOTED_SET (temp, unsignedp);
+
 	  return temp;
 	}
 
diff --git a/gcc/function.c b/gcc/function.c
index 441289e..9509622 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3093,7 +3093,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
 	  /* The argument is already sign/zero extended, so note it
 	     into the subreg.  */
 	  SUBREG_PROMOTED_VAR_P (tempreg) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (tempreg, unsignedp);
+	  SUBREG_PROMOTED_SET (tempreg, unsignedp);
 	}
 
       /* TREE_USED gets set erroneously during expand_assignment.  */
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 816cdaa..b2a0574 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -1448,8 +1448,11 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 	  || byte_vtrue != byte_vfalse
 	  || (SUBREG_PROMOTED_VAR_P (vtrue)
 	      != SUBREG_PROMOTED_VAR_P (vfalse))
-	  || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
-	      != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
+	  || ((SUBREG_PROMOTED_UNSIGNED_P (vtrue)
+	       != SUBREG_PROMOTED_UNSIGNED_P (vfalse))
+	      && (SUBREG_PROMOTED_SIGNED_P (vtrue)
+		  != SUBREG_PROMOTED_SIGNED_P (vfalse))))
+
 	return NULL_RTX;
 
       promoted_target = gen_reg_rtx (GET_MODE (reg_vtrue));
@@ -1463,7 +1466,7 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 
       target = gen_rtx_SUBREG (GET_MODE (vtrue), promoted_target, byte_vtrue);
       SUBREG_PROMOTED_VAR_P (target) = SUBREG_PROMOTED_VAR_P (vtrue);
-      SUBREG_PROMOTED_UNSIGNED_SET (target, SUBREG_PROMOTED_UNSIGNED_P (vtrue));
+      SUBREG_PROMOTED_SET (target, SUBREG_PROMOTED_GET (vtrue));
       emit_move_insn (x, target);
       return x;
     }
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 78f59d6..4e0b964 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -584,12 +584,12 @@ ubsan_expand_si_overflow_mul_check (gimple stmt)
 	  if (GET_CODE (lopart0) == SUBREG)
 	    {
 	      SUBREG_PROMOTED_VAR_P (lopart0) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (lopart0, 0);
+	      SUBREG_PROMOTED_SET (lopart0, 0);
 	    }
 	  if (GET_CODE (lopart1) == SUBREG)
 	    {
 	      SUBREG_PROMOTED_VAR_P (lopart1) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (lopart1, 0);
+	      SUBREG_PROMOTED_SET (lopart1, 0);
 	    }
 	  tree halfstype = build_nonstandard_integer_type (hprec, 0);
 	  ops.op0 = make_tree (halfstype, lopart0);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index ca1c194..da07afa 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -368,7 +368,7 @@ widen_operand (rtx op, enum machine_mode mode, enum machine_mode oldmode,
      a promoted object differs from our extension.  */
   if (! no_extend
       || (GET_CODE (op) == SUBREG && SUBREG_PROMOTED_VAR_P (op)
-	  && SUBREG_PROMOTED_UNSIGNED_P (op) == unsignedp))
+	  && (SUBREG_CHECK_PROMOTED_SIGN (op, unsignedp))))
     return convert_modes (mode, oldmode, op, unsignedp);
 
   /* If MODE is no wider than a single word, we return a lowpart or paradoxical
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 6ec91a8..e46aa1b 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -1585,29 +1585,69 @@ get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
 #define SUBREG_PROMOTED_VAR_P(RTX)					\
   (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED", (RTX), SUBREG)->in_struct)
 
-#define SUBREG_PROMOTED_UNSIGNED_SET(RTX, VAL)				\
-do {									\
-  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_SET",	\
-				    (RTX), SUBREG);			\
-  if ((VAL) < 0)							\
-    _rtx->volatil = 1;							\
-  else {								\
-    _rtx->volatil = 0;							\
-    _rtx->unchanging = (VAL);						\
-  }									\
-} while (0)
-
 /* Valid for subregs which are SUBREG_PROMOTED_VAR_P().  In that case
    this gives the necessary extensions:
-   0  - signed
-   1  - normal unsigned
+   0  - signed (SPR_SIGNED)
+   1  - normal unsigned (SPR_UNSIGNED)
+   2  - value is both sign and unsign extended for mode
+	(SPR_SIGNED_AND_UNSIGNED).
    -1 - pointer unsigned, which most often can be handled like unsigned
         extension, except for generating instructions where we need to
-	emit special code (ptr_extend insns) on some architectures.  */
+	emit special code (ptr_extend insns) on some architectures
+	(SPR_POINTER). */
+
+const unsigned int SRP_POINTER  = -1;
+const unsigned int SRP_SIGNED   = 0;
+const unsigned int SRP_UNSIGNED = 1;
+const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;
+
+/* Sets promoted mode for SUBREG_PROMOTED_VAR_P(), */
+#define SUBREG_PROMOTED_SET(RTX, VAL)		                        \
+do {								        \
+  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SET",		\
+                                    (RTX), SUBREG);			\
+  switch ((VAL))							\
+  {									\
+    case SRP_POINTER:							\
+      _rtx->volatil = 0;						\
+      _rtx->unchanging = 0;						\
+      break;								\
+    case SRP_SIGNED:							\
+      _rtx->volatil = 0;						\
+      _rtx->unchanging = 1;						\
+      break;								\
+    case SRP_UNSIGNED:							\
+      _rtx->volatil = 1;						\
+      _rtx->unchanging = 0;						\
+      break;								\
+    case SRP_SIGNED_AND_UNSIGNED:					\
+      _rtx->volatil = 1;						\
+      _rtx->unchanging = 1;						\
+      break;								\
+  }									\
+} while (0)
+
+/* Gets promoted mode for SUBREG_PROMOTED_VAR_P(). */
+#define SUBREG_PROMOTED_GET(RTX)	\
+  (2 * ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_GET", (RTX), SUBREG))->volatil)\
+   + (RTX)->unchanging - 1)
 
+/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
+   for SIGNED type. */
+#define SUBREG_PROMOTED_SIGNED_P(RTX)	\
+  ((((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SIGNED_P", (RTX), SUBREG)->volatil)\
+     + (RTX)->unchanging) == 0) ? 0 : ((RTX)->unchanging == 1))
+
+/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
+   for UNSIGNED type. */
 #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
-  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
-   ? -1 : (int) (RTX)->unchanging)
+  ((((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil)\
+     + (RTX)->unchanging) == 0) ? -1 : ((RTX)->volatil == 1))
+
+/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN. */
+#define	SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN) \
+     ((SIGN) ? SUBREG_PROMOTED_UNSIGNED_P((RTX))\
+	     : SUBREG_PROMOTED_SIGNED_P((RTX)))	\
 
 /* True if the subreg was generated by LRA for reload insns.  Such
    subregs are valid only during LRA.  */
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 82cfc1bf..547bdbf 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -4619,7 +4619,7 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
 	 and we are looking at it in a wider mode, we know that at least the
 	 high-order bits are known to be sign bit copies.  */
 
-      if (SUBREG_PROMOTED_VAR_P (x) && ! SUBREG_PROMOTED_UNSIGNED_P (x))
+      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_SIGNED_P (x))
 	{
 	  num0 = cached_num_sign_bit_copies (SUBREG_REG (x), mode,
 					     known_x, known_mode, known_ret);
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 181b56f..81d196f 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1352,7 +1352,7 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	 target mode is the same as the variable's promotion.  */
       if (GET_CODE (op) == SUBREG
 	  && SUBREG_PROMOTED_VAR_P (op)
-	  && ! SUBREG_PROMOTED_UNSIGNED_P (op)
+	  && SUBREG_PROMOTED_SIGNED_P (op)
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
 	{
 	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
@@ -5595,8 +5595,7 @@ simplify_subreg (enum machine_mode outermode, rtx op,
 	      && subreg_lowpart_p (newx))
 	    {
 	      SUBREG_PROMOTED_VAR_P (newx) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET
-		(newx, SUBREG_PROMOTED_UNSIGNED_P (op));
+	      SUBREG_PROMOTED_SET (newx, SUBREG_PROMOTED_GET (op));
 	    }
 	  return newx;
 	}

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 2/2] Enable elimination of zext/sext
  2014-06-24 11:48 [PATCH 0/2] Zext/sext elimination using value range Kugan
  2014-06-24 11:51 ` [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED) Kugan
@ 2014-06-24 11:53 ` Kugan
  2014-06-24 12:21   ` Jakub Jelinek
  1 sibling, 1 reply; 58+ messages in thread
From: Kugan @ 2014-06-24 11:53 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 582 bytes --]

Sets proper flags on the SUBREG based on value
range info and enables elimination of zext/sext when possible.

Thanks,
Kugan


gcc/
2014-06-24  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* gcc/calls.c (precompute_arguments: Check is_promoted_for_type
	and set the promoted mode.
	(is_promoted_for_type) : New function.
	(expand_expr_real_1) : Check is_promoted_for_type
	and set the promoted mode.
	* gcc/expr.h (is_promoted_for_type) : New function definition.
	* gcc/cfgexpand.c (expand_gimple_stmt_1) : Call emit_move_insn if
	SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.

[-- Attachment #2: p2.txt --]
[-- Type: text/plain, Size: 4620 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index c1fe3b8..4ef9df8 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,7 +1484,10 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
+	      if (is_promoted_for_type (args[i].tree_value, mode, !args[i].unsignedp))
+		SUBREG_PROMOTED_SET (args[i].initial_value, SRP_SIGNED_AND_UNSIGNED);
+	      else
+		SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index e8cd87f..0540b4d 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
 					  GET_MODE (target), temp, unsignedp);
 		  }
 
-		convert_move (SUBREG_REG (target), temp, unsignedp);
+		if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
+		    && (GET_CODE (temp) == SUBREG)
+		    && (GET_MODE (target) == GET_MODE (temp))
+		    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
+		  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
+		else
+		  convert_move (SUBREG_REG (target), temp, unsignedp);
 	      }
 	    else if (nontemporal && emit_storent_insn (target, temp))
 	      ;
diff --git a/gcc/expr.c b/gcc/expr.c
index a8db9f5..b2c8146 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9209,6 +9209,59 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 }
 #undef REDUCE_BIT_FIELD
 
+/* Return TRUE if value in SSA is already zero/sign extended for lhs type
+   (type here is the combination of LHS_MODE and LHS_UNS) using value range
+   information stored. Return FALSE otherwise. */
+bool
+is_promoted_for_type (tree ssa, enum machine_mode lhs_mode, bool lhs_uns)
+{
+  wide_int type_min, type_max;
+  wide_int min, max, limit;
+  unsigned int prec;
+  tree lhs_type;
+  bool rhs_uns;
+
+  if (flag_wrapv
+      || (flag_strict_overflow == false)
+      || (ssa == NULL_TREE)
+      || (TREE_CODE (ssa) != SSA_NAME)
+      || !INTEGRAL_TYPE_P (TREE_TYPE (ssa))
+      || POINTER_TYPE_P (TREE_TYPE (ssa)))
+    return false;
+
+  /* Return FALSE if value_range is not recorded for SSA. */
+  if (get_range_info (ssa, &min, &max) != VR_RANGE)
+    return false;
+
+  lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
+  rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
+  prec = min.get_precision ();
+
+  /* Signed maximum value.  */
+  limit = wide_int::from (TYPE_MAX_VALUE (TREE_TYPE (ssa)), prec, SIGNED);
+
+  /* Signedness of LHS and RHS differs but values in range.  */
+  if ((rhs_uns != lhs_uns)
+      && ((!lhs_uns && !wi::neg_p (min, TYPE_SIGN (lhs_type)))
+	  || (lhs_uns && (wi::cmp (max, limit, TYPE_SIGN (TREE_TYPE (ssa))) == -1))))
+    lhs_uns = !lhs_uns;
+
+  /* Signedness of LHS and RHS should match.  */
+  if (rhs_uns != lhs_uns)
+    return false;
+
+  type_min = wide_int::from (TYPE_MIN_VALUE (lhs_type), prec, TYPE_SIGN (TREE_TYPE (ssa)));
+  type_max = wide_int::from (TYPE_MAX_VALUE (lhs_type), prec, TYPE_SIGN (TREE_TYPE (ssa)));
+
+  /* Check if values lies in-between the type range.  */
+  if ((wi::neg_p (max, TYPE_SIGN (TREE_TYPE (ssa)))
+       || (wi::cmp (max, type_max, TYPE_SIGN (TREE_TYPE (ssa))) != 1))
+      && (!wi::neg_p (min, TYPE_SIGN (TREE_TYPE (ssa)))
+	  || (wi::cmp (type_min, min, TYPE_SIGN (TREE_TYPE (ssa))) != 1)))
+    return true;
+
+  return false;
+}
 
 /* Return TRUE if expression STMT is suitable for replacement.  
    Never consider memory loads as replaceable, because those don't ever lead 
@@ -9512,7 +9565,10 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_SET (temp, unsignedp);
+	  if (is_promoted_for_type (ssa_name, mode, !unsignedp))
+	    SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED);
+	  else
+	    SUBREG_PROMOTED_SET (temp, unsignedp);
 
 	  return temp;
 	}
diff --git a/gcc/expr.h b/gcc/expr.h
index 6a1d3ab..e99d000 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -440,6 +440,7 @@ extern rtx expand_expr_real_1 (tree, rtx, enum machine_mode,
 			       enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_2 (sepops, rtx, enum machine_mode,
 			       enum expand_modifier);
+extern bool is_promoted_for_type (tree, enum machine_mode, bool);
 
 /* Generate code for computing expression EXP.
    An rtx for the computed value is returned.  The value is never null.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-24 11:51 ` [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED) Kugan
@ 2014-06-24 12:18   ` Jakub Jelinek
  2014-06-25  7:21     ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Jakub Jelinek @ 2014-06-24 12:18 UTC (permalink / raw)
  To: Kugan, Richard Henderson; +Cc: gcc-patches

On Tue, Jun 24, 2014 at 09:51:51PM +1000, Kugan wrote:
> Changes the the SUBREG flags to be able to set promoted for sign
> (SRP_SIGNED), unsigned (SRP_UNSIGNED),  sign and unsigned
> (SPR_SIGNED_AND_UNSIGNED) in SUBREG_PROMOTED_VAR_P.

> 2014-06-24  Kugan Vivekanandarajah  <kuganv@linaro.org>
> 
> 	* gcc/calls.c (precompute_arguments): Use new SUBREG_PROMOTED_SET
> 	instead of SUBREG_PROMOTED_UNSIGNED_SET
> 	(expand_call) : Likewise.

gcc/ prefix doesn't belong to gcc/ChangeLog entries (everywhere).

> 	* gcc/expr.c (convert_move) : Use new SUBREG_CHECK_PROMOTED_SIGN

No space before : (everywhere).
> @@ -3365,7 +3364,8 @@ expand_call (tree exp, rtx target, int ignore)
>  
>  	  target = gen_rtx_SUBREG (TYPE_MODE (type), target, offset);
>  	  SUBREG_PROMOTED_VAR_P (target) = 1;
> -	  SUBREG_PROMOTED_UNSIGNED_SET (target, unsignedp);
> +	  SUBREG_PROMOTED_SET (target, unsignedp);
> +
>  	}
>  

Please avoid adding useless blank lines.

> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -329,7 +329,7 @@ convert_move (rtx to, rtx from, int unsignedp)
>    if (GET_CODE (from) == SUBREG && SUBREG_PROMOTED_VAR_P (from)
>        && (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (from)))
>  	  >= GET_MODE_PRECISION (to_mode))
> -      && SUBREG_PROMOTED_UNSIGNED_P (from) == unsignedp)
> +      && (SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp)))

Please remove the extra ()s, the macro should have ()s around the definition
to make this unnecessary (many times).

> @@ -5202,8 +5202,7 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
>  	  && GET_MODE_PRECISION (GET_MODE (target))
>  	     == TYPE_PRECISION (TREE_TYPE (exp)))
>  	{
> -	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
> -	      != SUBREG_PROMOTED_UNSIGNED_P (target))
> +	  if (!(SUBREG_CHECK_PROMOTED_SIGN (target, TYPE_UNSIGNED (TREE_TYPE (exp)))))

Too long line.

> -#define SUBREG_PROMOTED_UNSIGNED_SET(RTX, VAL)				\
> -do {									\
> -  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_SET",	\
> -				    (RTX), SUBREG);			\
> -  if ((VAL) < 0)							\
> -    _rtx->volatil = 1;							\
> -  else {								\
> -    _rtx->volatil = 0;							\
> -    _rtx->unchanging = (VAL);						\
> -  }									\
> -} while (0)
> -
>  /* Valid for subregs which are SUBREG_PROMOTED_VAR_P().  In that case
>     this gives the necessary extensions:
> -   0  - signed
> -   1  - normal unsigned
> +   0  - signed (SPR_SIGNED)
> +   1  - normal unsigned (SPR_UNSIGNED)
> +   2  - value is both sign and unsign extended for mode
> +	(SPR_SIGNED_AND_UNSIGNED).
>     -1 - pointer unsigned, which most often can be handled like unsigned
>          extension, except for generating instructions where we need to
> -	emit special code (ptr_extend insns) on some architectures.  */
> +	emit special code (ptr_extend insns) on some architectures
> +	(SPR_POINTER). */
> +
> +const unsigned int SRP_POINTER  = -1;
> +const unsigned int SRP_SIGNED   = 0;
> +const unsigned int SRP_UNSIGNED = 1;
> +const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;

But most importantly, I thought Richard Henderson suggested
to use SRP_POINTER 0, SRP_SIGNED 1, SRP_UNSIGNED 2, SRP_SIGNED_AND_UNSIGNED 3,
that way when checking e.g. SUBREG_PROMOTED_SIGNED_P or
SUBREG_PROMOTED_UNSIGNED_P you can check just the single bit.
Where something tested for SUBREG_PROMOTED_UNSIGNED_P () == -1 just
use SUBREG_PROMOTED_GET.

	Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-06-24 11:53 ` [PATCH 2/2] Enable elimination of zext/sext Kugan
@ 2014-06-24 12:21   ` Jakub Jelinek
  2014-06-25  8:15     ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Jakub Jelinek @ 2014-06-24 12:21 UTC (permalink / raw)
  To: Kugan; +Cc: gcc-patches

On Tue, Jun 24, 2014 at 09:53:35PM +1000, Kugan wrote:
> 2014-06-24  Kugan Vivekanandarajah  <kuganv@linaro.org>
> 
> 	* gcc/calls.c (precompute_arguments: Check is_promoted_for_type
> 	and set the promoted mode.
> 	(is_promoted_for_type) : New function.
> 	(expand_expr_real_1) : Check is_promoted_for_type
> 	and set the promoted mode.
> 	* gcc/expr.h (is_promoted_for_type) : New function definition.
> 	* gcc/cfgexpand.c (expand_gimple_stmt_1) : Call emit_move_insn if
> 	SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.

Similarly to the other patch, no gcc/ prefix in ChangeLog, no space before
:, watch for too long lines, remove useless ()s around conditions.

> +bool
> +is_promoted_for_type (tree ssa, enum machine_mode lhs_mode, bool lhs_uns)
> +{
> +  wide_int type_min, type_max;
> +  wide_int min, max, limit;
> +  unsigned int prec;
> +  tree lhs_type;
> +  bool rhs_uns;
> +
> +  if (flag_wrapv

Why?

> +      || (flag_strict_overflow == false)

Why?  Also, that would be !flag_strict_overflow instead of
(flag_strict_overflow == false)

> +      || (ssa == NULL_TREE)
> +      || (TREE_CODE (ssa) != SSA_NAME)
> +      || !INTEGRAL_TYPE_P (TREE_TYPE (ssa))
> +      || POINTER_TYPE_P (TREE_TYPE (ssa)))

All pointer types are !INTEGRAL_TYPE_P, so the last condition
doesn't make any sense.

	Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-24 12:18   ` Jakub Jelinek
@ 2014-06-25  7:21     ` Kugan
  2014-06-25  7:50       ` Jakub Jelinek
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-06-25  7:21 UTC (permalink / raw)
  To: Jakub Jelinek, Richard Henderson; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 4731 bytes --]

>> +const unsigned int SRP_POINTER  = -1;
>> +const unsigned int SRP_SIGNED   = 0;
>> +const unsigned int SRP_UNSIGNED = 1;
>> +const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;
> 
> But most importantly, I thought Richard Henderson suggested
> to use SRP_POINTER 0, SRP_SIGNED 1, SRP_UNSIGNED 2, SRP_SIGNED_AND_UNSIGNED 3,
> that way when checking e.g. SUBREG_PROMOTED_SIGNED_P or
> SUBREG_PROMOTED_UNSIGNED_P you can check just the single bit.
> Where something tested for SUBREG_PROMOTED_UNSIGNED_P () == -1 just
> use SUBREG_PROMOTED_GET.

The problem with SRP_POINTER 0, SRP_SIGNED 1, SRP_UNSIGNED 2,
SRP_SIGNED_AND_UNSIGNED 3 (as I understand) is that, it will be
incompatible with TYPE_UNSIGNED (tree) and defines of
POINTER_EXTEND_UNSIGNED values. We will have to then translate while
setting to SRP_* values . Also SUBREG_PROMOTED_SIGNED_P is now checked
in some cases for != 0 (meaning SRP_POINTER or SRP_UNSIGNED) and in some
cases > 0 (meaning SRP_UNSIGNED).

Since our aim is to perform single bit checks, why donÂ’t we just use
this representation internally (i.e.  _rtx->unchanging = 1 if SRP_SIGNED
and _rtx->volatil = 1 if SRP_UNSIGNED). As for SUBREG_PROMOTED_SIGNED_P,
we still have to return -1 or 1 depending on SRP_POINTER or SRP_UNSIGNED.


const unsigned int SRP_POINTER	= -1;
const unsigned int SRP_SIGNED   = 0;
const unsigned int SRP_UNSIGNED = 1;
const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;

/* Sets promoted mode for SUBREG_PROMOTED_VAR_P(), */
#define SUBREG_PROMOTED_SET(RTX, VAL)	                        \
do {							        \
  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SET",	\
                                    (RTX), SUBREG);		\
  switch ((VAL))						\
  {								\
    case SRP_POINTER:						\
      _rtx->volatil = 0;					\
      _rtx->unchanging = 0;					\
      break;							\
    case SRP_SIGNED:						\
      _rtx->volatil = 0;					\
      _rtx->unchanging = 1;					\
      break;							\
    case SRP_UNSIGNED:						\
      _rtx->volatil = 1;					\
      _rtx->unchanging = 0;					\
      break;							\
    case SRP_SIGNED_AND_UNSIGNED:				\
      _rtx->volatil = 1;					\
      _rtx->unchanging = 1;					\
      break;							\
  }								\
} while (0)

/* Gets promoted mode for SUBREG_PROMOTED_VAR_P(). */
#define SUBREG_PROMOTED_GET(RTX)	\
  (2 * (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_GET", (RTX), SUBREG)->volatil)\
   + (RTX)->unchanging - 1)

/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
   for SIGNED type.  */
#define SUBREG_PROMOTED_SIGNED_P(RTX)	\
  (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SIGNED_P", (RTX),
SUBREG)->unchanging == 1)

/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
   for UNSIGNED type.  In case of SRP_POINTER, SUBREG_PROMOTED_UNSIGNED_P
   returns -1 as this is in most cases handled like unsigned extension,
   except for generating instructions where special code is emitted for
   (ptr_extend insns) on some architectures.  */
   #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
  ((((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX),
SUBREG)->volatil)\
     + (RTX)->unchanging) == 0) ? -1 : ((RTX)->volatil == 1))

Am I missing anything here? Please let me know. I am attaching the patch
based on this with your other review comments addressed.

Thanks,
Kugan

gcc/
2014-06-25  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* calls.c (precompute_arguments): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET
	(expand_call): Likewise.
	* expr.c (convert_move): Use new SUBREG_CHECK_PROMOTED_SIGN
	instead of SUBREG_PROMOTED_UNSIGNED_P.
	(convert_modes): Likewise.
	(store_expr): Likewise.
	(expand_expr_real_1): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* function.c (assign_param_setup_reg): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* ifcvt.c (noce_emit_cmove): Updated to use
	SUBREG_PROMOTED_UNSIGNED_P and SUBREG_PROMOTED_SIGNED_P.
	* internal-fn.c (ubsan_expand_si_overflow_mul_check): Use
	SUBREG_PROMOTED_SET instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* optabs.c (widen_operand): Use new SUBREG_CHECK_PROMOTED_SIGN
	instead of SUBREG_PROMOTED_UNSIGNED_P.
	* rtl.h (SUBREG_PROMOTED_UNSIGNED_SET): Remove.
	(SUBREG_PROMOTED_SET): New define.
	(SUBREG_PROMOTED_GET): Likewise.
	(SUBREG_PROMOTED_SIGNED_P): Likewise.
	(SUBREG_CHECK_PROMOTED_SIGN): Likewise.
	(SUBREG_PROMOTED_UNSIGNED_P): Updated.
	* rtlanal.c (simplify_unary_operation_1): Use new
	SUBREG_PROMOTED_SET instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* simplify-rtx.c (simplify_unary_operation_1): Use new
	SUBREG_PROMOTED_SIGNED_P instead of
	!SUBREG_PROMOTED_UNSIGNED_P.
	(simplify_subreg): Use new SUBREG_PROMOTED_SET instead of
	 SUBREG_PROMOTED_UNSIGNED_SET.





[-- Attachment #2: p1.txt --]
[-- Type: text/plain, Size: 10718 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index 78fe7d8..a3e6faa 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,8 +1484,7 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (args[i].initial_value,
-					    args[i].unsignedp);
+	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
@@ -3365,7 +3364,7 @@ expand_call (tree exp, rtx target, int ignore)
 
 	  target = gen_rtx_SUBREG (TYPE_MODE (type), target, offset);
 	  SUBREG_PROMOTED_VAR_P (target) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (target, unsignedp);
+	  SUBREG_PROMOTED_SET (target, unsignedp);
 	}
 
       /* If size of args is variable or this was a constructor call for a stack
diff --git a/gcc/expr.c b/gcc/expr.c
index 512c024..da02a06 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -329,7 +329,7 @@ convert_move (rtx to, rtx from, int unsignedp)
   if (GET_CODE (from) == SUBREG && SUBREG_PROMOTED_VAR_P (from)
       && (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (from)))
 	  >= GET_MODE_PRECISION (to_mode))
-      && SUBREG_PROMOTED_UNSIGNED_P (from) == unsignedp)
+      && SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp))
     from = gen_lowpart (to_mode, from), from_mode = to_mode;
 
   gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
@@ -703,7 +703,7 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
 
   if (GET_CODE (x) == SUBREG && SUBREG_PROMOTED_VAR_P (x)
       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))) >= GET_MODE_SIZE (mode)
-      && SUBREG_PROMOTED_UNSIGNED_P (x) == unsignedp)
+      && SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp))
     x = gen_lowpart (mode, SUBREG_REG (x));
 
   if (GET_MODE (x) != VOIDmode)
@@ -5202,8 +5202,8 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
 	  && GET_MODE_PRECISION (GET_MODE (target))
 	     == TYPE_PRECISION (TREE_TYPE (exp)))
 	{
-	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
-	      != SUBREG_PROMOTED_UNSIGNED_P (target))
+	  if (!SUBREG_CHECK_PROMOTED_SIGN (target,
+					   TYPE_UNSIGNED (TREE_TYPE (exp))))
 	    {
 	      /* Some types, e.g. Fortran's logical*4, won't have a signed
 		 version, so use the mode instead.  */
@@ -9513,7 +9513,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (temp, unsignedp);
+	  SUBREG_PROMOTED_SET (temp, unsignedp);
 	  return temp;
 	}
 
diff --git a/gcc/function.c b/gcc/function.c
index 441289e..9509622 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3093,7 +3093,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
 	  /* The argument is already sign/zero extended, so note it
 	     into the subreg.  */
 	  SUBREG_PROMOTED_VAR_P (tempreg) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (tempreg, unsignedp);
+	  SUBREG_PROMOTED_SET (tempreg, unsignedp);
 	}
 
       /* TREE_USED gets set erroneously during expand_assignment.  */
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 816cdaa..b2a0574 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -1448,8 +1448,11 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 	  || byte_vtrue != byte_vfalse
 	  || (SUBREG_PROMOTED_VAR_P (vtrue)
 	      != SUBREG_PROMOTED_VAR_P (vfalse))
-	  || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
-	      != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
+	  || ((SUBREG_PROMOTED_UNSIGNED_P (vtrue)
+	       != SUBREG_PROMOTED_UNSIGNED_P (vfalse))
+	      && (SUBREG_PROMOTED_SIGNED_P (vtrue)
+		  != SUBREG_PROMOTED_SIGNED_P (vfalse))))
+
 	return NULL_RTX;
 
       promoted_target = gen_reg_rtx (GET_MODE (reg_vtrue));
@@ -1463,7 +1466,7 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 
       target = gen_rtx_SUBREG (GET_MODE (vtrue), promoted_target, byte_vtrue);
       SUBREG_PROMOTED_VAR_P (target) = SUBREG_PROMOTED_VAR_P (vtrue);
-      SUBREG_PROMOTED_UNSIGNED_SET (target, SUBREG_PROMOTED_UNSIGNED_P (vtrue));
+      SUBREG_PROMOTED_SET (target, SUBREG_PROMOTED_GET (vtrue));
       emit_move_insn (x, target);
       return x;
     }
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 78f59d6..4e0b964 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -584,12 +584,12 @@ ubsan_expand_si_overflow_mul_check (gimple stmt)
 	  if (GET_CODE (lopart0) == SUBREG)
 	    {
 	      SUBREG_PROMOTED_VAR_P (lopart0) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (lopart0, 0);
+	      SUBREG_PROMOTED_SET (lopart0, 0);
 	    }
 	  if (GET_CODE (lopart1) == SUBREG)
 	    {
 	      SUBREG_PROMOTED_VAR_P (lopart1) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (lopart1, 0);
+	      SUBREG_PROMOTED_SET (lopart1, 0);
 	    }
 	  tree halfstype = build_nonstandard_integer_type (hprec, 0);
 	  ops.op0 = make_tree (halfstype, lopart0);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index ca1c194..a00b383 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -368,7 +368,7 @@ widen_operand (rtx op, enum machine_mode mode, enum machine_mode oldmode,
      a promoted object differs from our extension.  */
   if (! no_extend
       || (GET_CODE (op) == SUBREG && SUBREG_PROMOTED_VAR_P (op)
-	  && SUBREG_PROMOTED_UNSIGNED_P (op) == unsignedp))
+	  && SUBREG_CHECK_PROMOTED_SIGN (op, unsignedp)))
     return convert_modes (mode, oldmode, op, unsignedp);
 
   /* If MODE is no wider than a single word, we return a lowpart or paradoxical
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 6ec91a8..edb449b 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -1585,29 +1585,71 @@ get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
 #define SUBREG_PROMOTED_VAR_P(RTX)					\
   (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED", (RTX), SUBREG)->in_struct)
 
-#define SUBREG_PROMOTED_UNSIGNED_SET(RTX, VAL)				\
-do {									\
-  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_SET",	\
-				    (RTX), SUBREG);			\
-  if ((VAL) < 0)							\
-    _rtx->volatil = 1;							\
-  else {								\
-    _rtx->volatil = 0;							\
-    _rtx->unchanging = (VAL);						\
-  }									\
-} while (0)
-
 /* Valid for subregs which are SUBREG_PROMOTED_VAR_P().  In that case
    this gives the necessary extensions:
-   0  - signed
-   1  - normal unsigned
+   0  - signed (SPR_SIGNED)
+   1  - normal unsigned (SPR_UNSIGNED)
+   2  - value is both sign and unsign extended for mode
+	(SPR_SIGNED_AND_UNSIGNED).
    -1 - pointer unsigned, which most often can be handled like unsigned
         extension, except for generating instructions where we need to
-	emit special code (ptr_extend insns) on some architectures.  */
+	emit special code (ptr_extend insns) on some architectures
+	(SPR_POINTER). */
+
+const unsigned int SRP_POINTER	= -1;
+const unsigned int SRP_SIGNED   = 0;
+const unsigned int SRP_UNSIGNED = 1;
+const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;
+
+/* Sets promoted mode for SUBREG_PROMOTED_VAR_P().  */
+#define SUBREG_PROMOTED_SET(RTX, VAL)		                        \
+do {								        \
+  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SET",		\
+                                    (RTX), SUBREG);			\
+  switch ((VAL))							\
+  {									\
+    case SRP_POINTER:							\
+      _rtx->volatil = 0;						\
+      _rtx->unchanging = 0;						\
+      break;								\
+    case SRP_SIGNED:							\
+      _rtx->volatil = 0;						\
+      _rtx->unchanging = 1;						\
+      break;								\
+    case SRP_UNSIGNED:							\
+      _rtx->volatil = 1;						\
+      _rtx->unchanging = 0;						\
+      break;								\
+    case SRP_SIGNED_AND_UNSIGNED:					\
+      _rtx->volatil = 1;						\
+      _rtx->unchanging = 1;						\
+      break;								\
+  }									\
+} while (0)
 
+/* Gets promoted mode for SUBREG_PROMOTED_VAR_P().  */
+#define SUBREG_PROMOTED_GET(RTX)	\
+  (2 * (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_GET", (RTX), SUBREG)->volatil)\
+   + (RTX)->unchanging - 1)
+
+/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
+   for SIGNED type.  */
+#define SUBREG_PROMOTED_SIGNED_P(RTX)	\
+  (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SIGNED_P", (RTX), SUBREG)->unchanging == 1)
+
+/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
+   for UNSIGNED type.  In case of SRP_POINTER, SUBREG_PROMOTED_UNSIGNED_P
+   returns -1 as this is in most cases handled like unsigned extension,
+   except for generating instructions where special code is emitted for
+   (ptr_extend insns) on some architectures.  */
 #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
-  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
-   ? -1 : (int) (RTX)->unchanging)
+  ((((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil)\
+     + (RTX)->unchanging) == 0) ? -1 : ((RTX)->volatil == 1))
+
+/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN.  */
+#define	SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN) \
+     ((SIGN) ? SUBREG_PROMOTED_UNSIGNED_P ((RTX))	\
+	     : SUBREG_PROMOTED_SIGNED_P ((RTX)))	\
 
 /* True if the subreg was generated by LRA for reload insns.  Such
    subregs are valid only during LRA.  */
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 82cfc1bf..547bdbf 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -4619,7 +4619,7 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
 	 and we are looking at it in a wider mode, we know that at least the
 	 high-order bits are known to be sign bit copies.  */
 
-      if (SUBREG_PROMOTED_VAR_P (x) && ! SUBREG_PROMOTED_UNSIGNED_P (x))
+      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_SIGNED_P (x))
 	{
 	  num0 = cached_num_sign_bit_copies (SUBREG_REG (x), mode,
 					     known_x, known_mode, known_ret);
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 181b56f..81d196f 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1352,7 +1352,7 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	 target mode is the same as the variable's promotion.  */
       if (GET_CODE (op) == SUBREG
 	  && SUBREG_PROMOTED_VAR_P (op)
-	  && ! SUBREG_PROMOTED_UNSIGNED_P (op)
+	  && SUBREG_PROMOTED_SIGNED_P (op)
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
 	{
 	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
@@ -5595,8 +5595,7 @@ simplify_subreg (enum machine_mode outermode, rtx op,
 	      && subreg_lowpart_p (newx))
 	    {
 	      SUBREG_PROMOTED_VAR_P (newx) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET
-		(newx, SUBREG_PROMOTED_UNSIGNED_P (op));
+	      SUBREG_PROMOTED_SET (newx, SUBREG_PROMOTED_GET (op));
 	    }
 	  return newx;
 	}

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-25  7:21     ` Kugan
@ 2014-06-25  7:50       ` Jakub Jelinek
  2014-06-26  1:06         ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Jakub Jelinek @ 2014-06-25  7:50 UTC (permalink / raw)
  To: Kugan; +Cc: Richard Henderson, gcc-patches

On Wed, Jun 25, 2014 at 05:21:08PM +1000, Kugan wrote:
> The problem with SRP_POINTER 0, SRP_SIGNED 1, SRP_UNSIGNED 2,
> SRP_SIGNED_AND_UNSIGNED 3 (as I understand) is that, it will be
> incompatible with TYPE_UNSIGNED (tree) and defines of
> POINTER_EXTEND_UNSIGNED values. We will have to then translate while
> setting to SRP_* values . Also SUBREG_PROMOTED_SIGNED_P is now checked
> in some cases for != 0 (meaning SRP_POINTER or SRP_UNSIGNED) and in some
> cases > 0 (meaning SRP_UNSIGNED).
> 
> Since our aim is to perform single bit checks, why don’t we just use
> this representation internally (i.e.  _rtx->unchanging = 1 if SRP_SIGNED
> and _rtx->volatil = 1 if SRP_UNSIGNED). As for SUBREG_PROMOTED_SIGNED_P,
> we still have to return -1 or 1 depending on SRP_POINTER or SRP_UNSIGNED.

Why don't you make SUBREG_PROMOTED_UNSIGNED_P just return 0/1 (i.e. the
single bit), and for places where it would like to match both
SRP_UNSIGNED and SRP_POINTER use SUBREG_PROMOTED_GET () & SRP_UNSIGNED
or so?

> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -1448,8 +1448,11 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
>  	  || byte_vtrue != byte_vfalse
>  	  || (SUBREG_PROMOTED_VAR_P (vtrue)
>  	      != SUBREG_PROMOTED_VAR_P (vfalse))
> -	  || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
> -	      != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
> +	  || ((SUBREG_PROMOTED_UNSIGNED_P (vtrue)
> +	       != SUBREG_PROMOTED_UNSIGNED_P (vfalse))
> +	      && (SUBREG_PROMOTED_SIGNED_P (vtrue)
> +		  != SUBREG_PROMOTED_SIGNED_P (vfalse))))

Shouldn't this be SUBREG_PROMOTED_GET (vtrue) != SUBREG_PROMOTED_GET (vfalse) ?

> +const unsigned int SRP_POINTER	= -1;
> +const unsigned int SRP_SIGNED   = 0;

Inconsistent whitespace, just use space instead of multiple spaces and/or
tabs.

> +const unsigned int SRP_UNSIGNED = 1;
> +const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;

> +/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
> +   for SIGNED type.  */
> +#define SUBREG_PROMOTED_SIGNED_P(RTX)	\
> +  (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SIGNED_P", (RTX), SUBREG)->unchanging == 1)

Why the " == 1" ?
> +
> +/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
> +   for UNSIGNED type.  In case of SRP_POINTER, SUBREG_PROMOTED_UNSIGNED_P
> +   returns -1 as this is in most cases handled like unsigned extension,
> +   except for generating instructions where special code is emitted for
> +   (ptr_extend insns) on some architectures.  */
>  #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
> -  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
> -   ? -1 : (int) (RTX)->unchanging)
> +  ((((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil)\
> +     + (RTX)->unchanging) == 0) ? -1 : ((RTX)->volatil == 1))
> +
> +/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN.  */
> +#define	SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN) \

Use space rather than tab.  Also, why do we need this macro?
Can't you just use SUBREG_PROMOTED_GET () == sign ?  I mean, sign in that
case is typically just 0 or 1.

	Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-06-24 12:21   ` Jakub Jelinek
@ 2014-06-25  8:15     ` Kugan
  2014-06-25  8:36       ` Jakub Jelinek
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-06-25  8:15 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2298 bytes --]

On 24/06/14 22:21, Jakub Jelinek wrote:
> On Tue, Jun 24, 2014 at 09:53:35PM +1000, Kugan wrote:
>> 2014-06-24  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>
>> 	* gcc/calls.c (precompute_arguments: Check is_promoted_for_type
>> 	and set the promoted mode.
>> 	(is_promoted_for_type) : New function.
>> 	(expand_expr_real_1) : Check is_promoted_for_type
>> 	and set the promoted mode.
>> 	* gcc/expr.h (is_promoted_for_type) : New function definition.
>> 	* gcc/cfgexpand.c (expand_gimple_stmt_1) : Call emit_move_insn if
>> 	SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
> 
> Similarly to the other patch, no gcc/ prefix in ChangeLog, no space before
> :, watch for too long lines, remove useless ()s around conditions.

Changed it.

>> +bool
>> +is_promoted_for_type (tree ssa, enum machine_mode lhs_mode, bool lhs_uns)
>> +{
>> +  wide_int type_min, type_max;
>> +  wide_int min, max, limit;
>> +  unsigned int prec;
>> +  tree lhs_type;
>> +  bool rhs_uns;
>> +
>> +  if (flag_wrapv
> 
> Why?
> 
>> +      || (flag_strict_overflow == false)
> 
> Why?  Also, that would be !flag_strict_overflow instead of
> (flag_strict_overflow == false)

For these flags, value ranges generated are not usable for extension
eliminations. Therefore, without this some of the test cases in
regression fails. For example:

short a;
void
foo (void)
{
  for (a = 0; a >= 0; a++)
    ;
}
-Os  -fno-strict-overflow produces the following range for the index
increment and hence goes into infinite loop.
_10: [1, 32768]
_10 = _4 + 1;

> 
>> +      || (ssa == NULL_TREE)
>> +      || (TREE_CODE (ssa) != SSA_NAME)
>> +      || !INTEGRAL_TYPE_P (TREE_TYPE (ssa))
>> +      || POINTER_TYPE_P (TREE_TYPE (ssa)))
> 
> All pointer types are !INTEGRAL_TYPE_P, so the last condition
> doesn't make any sense.

I have changed this. Please see the attached patch.


Thanks,
Kugan

gcc/
2014-06-25  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* calls.c (precompute_arguments): Check is_promoted_for_type
	and set the promoted mode.
	(is_promoted_for_type): New function.
	(expand_expr_real_1): Check is_promoted_for_type
	and set the promoted mode.
	* expr.h (is_promoted_for_type): New function definition.
	* cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
	SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.



[-- Attachment #2: p2.txt --]
[-- Type: text/plain, Size: 4582 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index a3e6faa..eac512f 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,7 +1484,10 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
+	      if (is_promoted_for_type (args[i].tree_value, mode, !args[i].unsignedp))
+		SUBREG_PROMOTED_SET (args[i].initial_value, SRP_SIGNED_AND_UNSIGNED);
+	      else
+		SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index e8cd87f..0540b4d 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
 					  GET_MODE (target), temp, unsignedp);
 		  }
 
-		convert_move (SUBREG_REG (target), temp, unsignedp);
+		if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
+		    && (GET_CODE (temp) == SUBREG)
+		    && (GET_MODE (target) == GET_MODE (temp))
+		    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
+		  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
+		else
+		  convert_move (SUBREG_REG (target), temp, unsignedp);
 	      }
 	    else if (nontemporal && emit_storent_insn (target, temp))
 	      ;
diff --git a/gcc/expr.c b/gcc/expr.c
index f9103a5..15da092 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9210,6 +9210,59 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 }
 #undef REDUCE_BIT_FIELD
 
+/* Return TRUE if value in SSA is already zero/sign extended for lhs type
+   (type here is the combination of LHS_MODE and LHS_UNS) using value range
+   information stored.  Return FALSE otherwise.  */
+bool
+is_promoted_for_type (tree ssa, enum machine_mode lhs_mode, bool lhs_uns)
+{
+  wide_int type_min, type_max;
+  wide_int min, max, limit;
+  unsigned int prec;
+  tree lhs_type;
+  bool rhs_uns;
+
+  if (flag_wrapv
+      || !flag_strict_overflow
+      || ssa == NULL_TREE
+      || TREE_CODE (ssa) != SSA_NAME
+      || !INTEGRAL_TYPE_P (TREE_TYPE (ssa)))
+    return false;
+
+  /* Return FALSE if value_range is not recorded for SSA.  */
+  if (get_range_info (ssa, &min, &max) != VR_RANGE)
+    return false;
+
+  lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
+  rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
+  prec = min.get_precision ();
+
+  /* Signed maximum value.  */
+  limit = wide_int::from (TYPE_MAX_VALUE (TREE_TYPE (ssa)), prec, SIGNED);
+
+  /* Signedness of LHS and RHS differs but values in range.  */
+  if ((rhs_uns != lhs_uns)
+      && ((!lhs_uns && !wi::neg_p (min, TYPE_SIGN (lhs_type)))
+	  || (lhs_uns && (wi::cmp (max, limit, TYPE_SIGN (TREE_TYPE (ssa))) == -1))))
+    lhs_uns = !lhs_uns;
+
+  /* Signedness of LHS and RHS should match.  */
+  if (rhs_uns != lhs_uns)
+    return false;
+
+  type_min = wide_int::from (TYPE_MIN_VALUE (lhs_type), prec,
+			     TYPE_SIGN (TREE_TYPE (ssa)));
+  type_max = wide_int::from (TYPE_MAX_VALUE (lhs_type), prec,
+			     TYPE_SIGN (TREE_TYPE (ssa)));
+
+  /* Check if values lies in-between the type range.  */
+  if ((wi::neg_p (max, TYPE_SIGN (TREE_TYPE (ssa)))
+       || (wi::cmp (max, type_max, TYPE_SIGN (TREE_TYPE (ssa))) != 1))
+      && (!wi::neg_p (min, TYPE_SIGN (TREE_TYPE (ssa)))
+	  || (wi::cmp (type_min, min, TYPE_SIGN (TREE_TYPE (ssa))) != 1)))
+    return true;
+  return false;
+}
 
 /* Return TRUE if expression STMT is suitable for replacement.  
    Never consider memory loads as replaceable, because those don't ever lead 
@@ -9513,7 +9566,10 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_SET (temp, unsignedp);
+	  if (is_promoted_for_type (ssa_name, mode, !unsignedp))
+	    SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED);
+	  else
+	    SUBREG_PROMOTED_SET (temp, unsignedp);
 
 	  return temp;
 	}
diff --git a/gcc/expr.h b/gcc/expr.h
index 6a1d3ab..e99d000 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -440,6 +440,7 @@ extern rtx expand_expr_real_1 (tree, rtx, enum machine_mode,
 			       enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_2 (sepops, rtx, enum machine_mode,
 			       enum expand_modifier);
+extern bool is_promoted_for_type (tree, enum machine_mode, bool);
 
 /* Generate code for computing expression EXP.
    An rtx for the computed value is returned.  The value is never null.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-06-25  8:15     ` Kugan
@ 2014-06-25  8:36       ` Jakub Jelinek
  2014-07-07  6:55         ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Jakub Jelinek @ 2014-06-25  8:36 UTC (permalink / raw)
  To: Kugan; +Cc: gcc-patches

On Wed, Jun 25, 2014 at 06:14:57PM +1000, Kugan wrote:
> For these flags, value ranges generated are not usable for extension
> eliminations. Therefore, without this some of the test cases in
> regression fails. For example:
> 
> short a;
> void
> foo (void)
> {
>   for (a = 0; a >= 0; a++)
>     ;
> }
> -Os  -fno-strict-overflow produces the following range for the index
> increment and hence goes into infinite loop.
> _10: [1, 32768]
> _10 = _4 + 1;

For -fwrapv I don't see why you'd get into trouble ever, the VRP computation
should be well aware of the -fwrapv semantics and the value ranges should
reflect that.

For -fno-strict-overflow, I have no idea since it is very weirdly defined.

In any case, for your example above, the loop is always well defined,
because for char/short a++ is performed as:
a = (short) ((int) a + 1)
So, if the patch turns it into infinite loop, with -Os -fno-strict-overflow
or -Os, it is simply a problem with the patch.  VR [1, 32768] looks correct,
a++ is performed only if a is >= 0, therefore before addition [0, 32767].
But from VR [1, 32768] you can't optimize away the sign extension, make sure
you don't have there off-by-one?

It would be nice if the patch contained some testcases, it is easy
to construct testcases where you have arbitrary VRs on some SSA_NAMEs,
you just need something to stick the VR on, so you can do something like:
type foo (type a)
{
  if (a < VR_min + 1 || a > VR_max + 1) return; // If VR_min is type minimum or VR_max type maximum this needs to be adjusted of course.
  a = a + 1;
  // now you can try some cast that your optimization would try to optimize
  return a;
}
Or void bar (type a) { a = (a & mask) + bias; (or similarly) }
Make sure to cover the boundary cases, where VR minimum or maximum still
allow optimizing away zero and/or sign extensions, and another case where
they are +- 1 and already don't allow it.

	Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-25  7:50       ` Jakub Jelinek
@ 2014-06-26  1:06         ` Kugan
  2014-06-26  2:48           ` Kugan
  2014-06-26  5:50           ` Jakub Jelinek
  0 siblings, 2 replies; 58+ messages in thread
From: Kugan @ 2014-06-26  1:06 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, gcc-patches

On 25/06/14 17:50, Jakub Jelinek wrote:
> On Wed, Jun 25, 2014 at 05:21:08PM +1000, Kugan wrote:
>> The problem with SRP_POINTER 0, SRP_SIGNED 1, SRP_UNSIGNED 2,
>> SRP_SIGNED_AND_UNSIGNED 3 (as I understand) is that, it will be
>> incompatible with TYPE_UNSIGNED (tree) and defines of
>> POINTER_EXTEND_UNSIGNED values. We will have to then translate while
>> setting to SRP_* values . Also SUBREG_PROMOTED_SIGNED_P is now checked
>> in some cases for != 0 (meaning SRP_POINTER or SRP_UNSIGNED) and in some
>> cases > 0 (meaning SRP_UNSIGNED).
>>
>> Since our aim is to perform single bit checks, why don’t we just use
>> this representation internally (i.e.  _rtx->unchanging = 1 if SRP_SIGNED
>> and _rtx->volatil = 1 if SRP_UNSIGNED). As for SUBREG_PROMOTED_SIGNED_P,
>> we still have to return -1 or 1 depending on SRP_POINTER or SRP_UNSIGNED.
> 
> Why don't you make SUBREG_PROMOTED_UNSIGNED_P just return 0/1 (i.e. the
> single bit), and for places where it would like to match both
> SRP_UNSIGNED and SRP_POINTER use SUBREG_PROMOTED_GET () & SRP_UNSIGNED
> or so?

If we use SUBREG_PROMOTED_GET () & SRP_UNSIGNED, we will miss
the case SRP_SIGNED_AND_UNSIGNED. Though this is not wrong, we might
miss some optimization opportunities here. We can however use
(SUBREG_PROMOTED_GET () != SRP_SIGNED) if you like this. Other option is
to define another macro that explicilty says some think like
SUBREG_PROMOTED_POINTER_OR_UNSIGNED_P.

>> --- a/gcc/ifcvt.c
>> +++ b/gcc/ifcvt.c
>> @@ -1448,8 +1448,11 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
>>  	  || byte_vtrue != byte_vfalse
>>  	  || (SUBREG_PROMOTED_VAR_P (vtrue)
>>  	      != SUBREG_PROMOTED_VAR_P (vfalse))
>> -	  || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
>> -	      != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
>> +	  || ((SUBREG_PROMOTED_UNSIGNED_P (vtrue)
>> +	       != SUBREG_PROMOTED_UNSIGNED_P (vfalse))
>> +	      && (SUBREG_PROMOTED_SIGNED_P (vtrue)
>> +		  != SUBREG_PROMOTED_SIGNED_P (vfalse))))
> 
> Shouldn't this be SUBREG_PROMOTED_GET (vtrue) != SUBREG_PROMOTED_GET (vfalse) ?

The reason why I checked like this to cover one side with
SRP_SIGNED_AND_UNSIGNED and other with  SRP_SIGNED or SRP_UNSIGNED. If
we check SUBREG_PROMOTED_GET (vtrue) != SUBREG_PROMOTED_GET (vfalse) we
will miss that.

>> +
>> +/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
>> +   for UNSIGNED type.  In case of SRP_POINTER, SUBREG_PROMOTED_UNSIGNED_P
>> +   returns -1 as this is in most cases handled like unsigned extension,
>> +   except for generating instructions where special code is emitted for
>> +   (ptr_extend insns) on some architectures.  */
>>  #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
>> -  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
>> -   ? -1 : (int) (RTX)->unchanging)
>> +  ((((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil)\
>> +     + (RTX)->unchanging) == 0) ? -1 : ((RTX)->volatil == 1))
>> +
>> +/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN.  */
>> +#define	SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN) \
> 
> Use space rather than tab.  Also, why do we need this macro?
> Can't you just use SUBREG_PROMOTED_GET () == sign ?  I mean, sign in that
> case is typically just 0 or 1.

Again I wanted to cover SRP_SIGNED_AND_UNSIGNED as well in this case.


Thanks,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-26  1:06         ` Kugan
@ 2014-06-26  2:48           ` Kugan
  2014-06-26  5:50           ` Jakub Jelinek
  1 sibling, 0 replies; 58+ messages in thread
From: Kugan @ 2014-06-26  2:48 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 5668 bytes --]

On 26/06/14 11:06, Kugan wrote:
> On 25/06/14 17:50, Jakub Jelinek wrote:
>> On Wed, Jun 25, 2014 at 05:21:08PM +1000, Kugan wrote:
>>> The problem with SRP_POINTER 0, SRP_SIGNED 1, SRP_UNSIGNED 2,
>>> SRP_SIGNED_AND_UNSIGNED 3 (as I understand) is that, it will be
>>> incompatible with TYPE_UNSIGNED (tree) and defines of
>>> POINTER_EXTEND_UNSIGNED values. We will have to then translate while
>>> setting to SRP_* values . Also SUBREG_PROMOTED_SIGNED_P is now checked
>>> in some cases for != 0 (meaning SRP_POINTER or SRP_UNSIGNED) and in some
>>> cases > 0 (meaning SRP_UNSIGNED).
>>>
>>> Since our aim is to perform single bit checks, why don’t we just use
>>> this representation internally (i.e.  _rtx->unchanging = 1 if SRP_SIGNED
>>> and _rtx->volatil = 1 if SRP_UNSIGNED). As for SUBREG_PROMOTED_SIGNED_P,
>>> we still have to return -1 or 1 depending on SRP_POINTER or SRP_UNSIGNED.
>>
>> Why don't you make SUBREG_PROMOTED_UNSIGNED_P just return 0/1 (i.e. the
>> single bit), and for places where it would like to match both
>> SRP_UNSIGNED and SRP_POINTER use SUBREG_PROMOTED_GET () & SRP_UNSIGNED
>> or so?
> 
> If we use SUBREG_PROMOTED_GET () & SRP_UNSIGNED, we will miss
> the case SRP_SIGNED_AND_UNSIGNED. Though this is not wrong, we might
> miss some optimization opportunities here. We can however use
> (SUBREG_PROMOTED_GET () != SRP_SIGNED) if you like this. Other option is
> to define another macro that explicilty says some think like
> SUBREG_PROMOTED_POINTER_OR_UNSIGNED_P.
> 
>>> --- a/gcc/ifcvt.c
>>> +++ b/gcc/ifcvt.c
>>> @@ -1448,8 +1448,11 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
>>>  	  || byte_vtrue != byte_vfalse
>>>  	  || (SUBREG_PROMOTED_VAR_P (vtrue)
>>>  	      != SUBREG_PROMOTED_VAR_P (vfalse))
>>> -	  || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
>>> -	      != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
>>> +	  || ((SUBREG_PROMOTED_UNSIGNED_P (vtrue)
>>> +	       != SUBREG_PROMOTED_UNSIGNED_P (vfalse))
>>> +	      && (SUBREG_PROMOTED_SIGNED_P (vtrue)
>>> +		  != SUBREG_PROMOTED_SIGNED_P (vfalse))))
>>
>> Shouldn't this be SUBREG_PROMOTED_GET (vtrue) != SUBREG_PROMOTED_GET (vfalse) ?
> 
> The reason why I checked like this to cover one side with
> SRP_SIGNED_AND_UNSIGNED and other with  SRP_SIGNED or SRP_UNSIGNED. If
> we check SUBREG_PROMOTED_GET (vtrue) != SUBREG_PROMOTED_GET (vfalse) we
> will miss that.
> 
>>> +
>>> +/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
>>> +   for UNSIGNED type.  In case of SRP_POINTER, SUBREG_PROMOTED_UNSIGNED_P
>>> +   returns -1 as this is in most cases handled like unsigned extension,
>>> +   except for generating instructions where special code is emitted for
>>> +   (ptr_extend insns) on some architectures.  */
>>>  #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
>>> -  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
>>> -   ? -1 : (int) (RTX)->unchanging)
>>> +  ((((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil)\
>>> +     + (RTX)->unchanging) == 0) ? -1 : ((RTX)->volatil == 1))
>>> +
>>> +/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN.  */
>>> +#define	SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN) \
>>
>> Use space rather than tab.  Also, why do we need this macro?
>> Can't you just use SUBREG_PROMOTED_GET () == sign ?  I mean, sign in that
>> case is typically just 0 or 1.
> 
> Again I wanted to cover SRP_SIGNED_AND_UNSIGNED as well in this case.
> 


I tried SUBREG_PROMOTED_GET () & SRP_UNSIGNED based on what you
suggested (patch attached). the testcases I have for zero/sign extension
are working with this. Bootstrapped and regression tested for
arm-none-linux-gnueabi. If this is OK with you I will do regression
testing on x86_64 and AAcrh64.


Thanks,
Kugan


gcc/
2014-06-26  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* calls.c (precompute_arguments): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET
	(expand_call): Likewise.
	* cfgexpand.c (expand_gimple_stmt_1): Use SUBREG_PROMOTED_GET
	& SRP_UNSIGNED to get promoted mode.
	* combine.c (record_promoted_value): Skip > 0 comaprison with
	SUBREG_PROMOTED_UNSIGNED_P as it now returns only 0 or 1.
	* expr.c (convert_move): Use SUBREG_PROMOTED_GET & SRP_UNSIGNED
	instead of SUBREG_PROMOTED_UNSIGNED_P.
	(convert_modes): Likewise.
	(store_expr): Likewise.
	(expand_expr_real_1): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* function.c (assign_parm_setup_reg): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* ifcvt.c (noce_emit_cmove): Updated to use SUBREG_PROMOTED_GET.
	* internal-fn.c (ubsan_expand_si_overflow_mul_check): Use
	SUBREG_PROMOTED_SET instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* optabs.c (widen_operand): Use new SUBREG_PROMOTED_GET
	instead of SUBREG_PROMOTED_UNSIGNED_P.
	* rtl.h (SUBREG_PROMOTED_UNSIGNED_SET): Remove.
	(SUBREG_PROMOTED_SET): New define.
	(SUBREG_PROMOTED_GET): Likewise.
	(SUBREG_PROMOTED_SIGNED_P): Likewise.
	(SUBREG_PROMOTED_UNSIGNED_P): Updated.
	* rtlanal.c (unsigned_reg_p): Use new SUBREG_PROMOTED_GET
	 instead of SUBREG_PROMOTED_UNSIGNED_GET.
	(nonzero_bits1): Skip > 0 comaprison with the results as
	 SUBREG_PROMOTED_UNSIGNED_P now returns only 0 or 1.
	(num_sign_bit_copies1): Use
	SUBREG_PROMOTED_SIGNED_P instead of !SUBREG_PROMOTED_UNSIGNED_P.
	* simplify-rtx.c (simplify_unary_operation_1): Use new
	SUBREG_PROMOTED_SIGNED_P instead of
	!SUBREG_PROMOTED_UNSIGNED_P.
	(simplify_subreg): Use new SUBREG_PROMOTED_SIGNED_P,
	SUBREG_PROMOTED_UNSIGNED_P and SUBREG_PROMOTED_SET instead of
	SUBREG_PROMOTED_UNSIGNED_P and SUBREG_PROMOTED_UNSIGNED_SET.


[-- Attachment #2: p1.txt --]
[-- Type: text/plain, Size: 14032 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index 78fe7d8..a3e6faa 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,8 +1484,7 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (args[i].initial_value,
-					    args[i].unsignedp);
+	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
@@ -3365,7 +3364,7 @@ expand_call (tree exp, rtx target, int ignore)
 
 	  target = gen_rtx_SUBREG (TYPE_MODE (type), target, offset);
 	  SUBREG_PROMOTED_VAR_P (target) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (target, unsignedp);
+	  SUBREG_PROMOTED_SET (target, unsignedp);
 	}
 
       /* If size of args is variable or this was a constructor call for a stack
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index e8cd87f..bab9645 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3297,7 +3297,7 @@ expand_gimple_stmt_1 (gimple stmt)
 	      ;
 	    else if (promoted)
 	      {
-		int unsignedp = SUBREG_PROMOTED_UNSIGNED_P (target);
+		int unsignedp = SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED;
 		/* If TEMP is a VOIDmode constant, use convert_modes to make
 		   sure that we properly convert it.  */
 		if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
diff --git a/gcc/combine.c b/gcc/combine.c
index 4e7ef55..15ecf1a 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -12427,7 +12427,7 @@ record_promoted_value (rtx insn, rtx subreg)
       rsp = &reg_stat[regno];
       if (rsp->last_set == insn)
 	{
-	  if (SUBREG_PROMOTED_UNSIGNED_P (subreg) > 0)
+	  if (SUBREG_PROMOTED_UNSIGNED_P (subreg))
 	    rsp->last_set_nonzero_bits &= GET_MODE_MASK (mode);
 	}
 
diff --git a/gcc/expr.c b/gcc/expr.c
index 512c024..4fea105 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -329,7 +329,7 @@ convert_move (rtx to, rtx from, int unsignedp)
   if (GET_CODE (from) == SUBREG && SUBREG_PROMOTED_VAR_P (from)
       && (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (from)))
 	  >= GET_MODE_PRECISION (to_mode))
-      && SUBREG_PROMOTED_UNSIGNED_P (from) == unsignedp)
+      && (SUBREG_PROMOTED_GET (from) & SRP_UNSIGNED) == unsignedp)
     from = gen_lowpart (to_mode, from), from_mode = to_mode;
 
   gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
@@ -703,7 +703,7 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
 
   if (GET_CODE (x) == SUBREG && SUBREG_PROMOTED_VAR_P (x)
       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))) >= GET_MODE_SIZE (mode)
-      && SUBREG_PROMOTED_UNSIGNED_P (x) == unsignedp)
+      && (SUBREG_PROMOTED_GET (x) & SRP_UNSIGNED) == unsignedp)
     x = gen_lowpart (mode, SUBREG_REG (x));
 
   if (GET_MODE (x) != VOIDmode)
@@ -5203,24 +5203,25 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
 	     == TYPE_PRECISION (TREE_TYPE (exp)))
 	{
 	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
-	      != SUBREG_PROMOTED_UNSIGNED_P (target))
+	      != (SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED))
 	    {
 	      /* Some types, e.g. Fortran's logical*4, won't have a signed
 		 version, so use the mode instead.  */
 	      tree ntype
 		= (signed_or_unsigned_type_for
-		   (SUBREG_PROMOTED_UNSIGNED_P (target), TREE_TYPE (exp)));
+		   (SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED,
+		    TREE_TYPE (exp)));
 	      if (ntype == NULL)
 		ntype = lang_hooks.types.type_for_mode
 		  (TYPE_MODE (TREE_TYPE (exp)),
-		   SUBREG_PROMOTED_UNSIGNED_P (target));
+		   SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
 
 	      exp = fold_convert_loc (loc, ntype, exp);
 	    }
 
 	  exp = fold_convert_loc (loc, lang_hooks.types.type_for_mode
 				  (GET_MODE (SUBREG_REG (target)),
-				   SUBREG_PROMOTED_UNSIGNED_P (target)),
+				   SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED),
 				  exp);
 
 	  inner_target = SUBREG_REG (target);
@@ -5234,14 +5235,14 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
       if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
 	{
 	  temp = convert_modes (GET_MODE (target), TYPE_MODE (TREE_TYPE (exp)),
-				temp, SUBREG_PROMOTED_UNSIGNED_P (target));
+				temp, SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
 	  temp = convert_modes (GET_MODE (SUBREG_REG (target)),
 			        GET_MODE (target), temp,
-			        SUBREG_PROMOTED_UNSIGNED_P (target));
+			        SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
 	}
 
       convert_move (SUBREG_REG (target), temp,
-		    SUBREG_PROMOTED_UNSIGNED_P (target));
+		    SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
 
       return NULL_RTX;
     }
@@ -9513,7 +9514,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (temp, unsignedp);
+	  SUBREG_PROMOTED_SET (temp, unsignedp);
 	  return temp;
 	}
 
diff --git a/gcc/function.c b/gcc/function.c
index 441289e..9509622 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3093,7 +3093,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
 	  /* The argument is already sign/zero extended, so note it
 	     into the subreg.  */
 	  SUBREG_PROMOTED_VAR_P (tempreg) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (tempreg, unsignedp);
+	  SUBREG_PROMOTED_SET (tempreg, unsignedp);
 	}
 
       /* TREE_USED gets set erroneously during expand_assignment.  */
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 816cdaa..b3788e2 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -1448,8 +1448,8 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 	  || byte_vtrue != byte_vfalse
 	  || (SUBREG_PROMOTED_VAR_P (vtrue)
 	      != SUBREG_PROMOTED_VAR_P (vfalse))
-	  || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
-	      != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
+	  || ((SUBREG_PROMOTED_GET (vtrue) & SRP_UNSIGNED)
+	      != (SUBREG_PROMOTED_GET (vfalse) & SRP_UNSIGNED)))
 	return NULL_RTX;
 
       promoted_target = gen_reg_rtx (GET_MODE (reg_vtrue));
@@ -1463,7 +1463,7 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 
       target = gen_rtx_SUBREG (GET_MODE (vtrue), promoted_target, byte_vtrue);
       SUBREG_PROMOTED_VAR_P (target) = SUBREG_PROMOTED_VAR_P (vtrue);
-      SUBREG_PROMOTED_UNSIGNED_SET (target, SUBREG_PROMOTED_UNSIGNED_P (vtrue));
+      SUBREG_PROMOTED_SET (target, SUBREG_PROMOTED_GET (vtrue));
       emit_move_insn (x, target);
       return x;
     }
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 78f59d6..4e0b964 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -584,12 +584,12 @@ ubsan_expand_si_overflow_mul_check (gimple stmt)
 	  if (GET_CODE (lopart0) == SUBREG)
 	    {
 	      SUBREG_PROMOTED_VAR_P (lopart0) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (lopart0, 0);
+	      SUBREG_PROMOTED_SET (lopart0, 0);
 	    }
 	  if (GET_CODE (lopart1) == SUBREG)
 	    {
 	      SUBREG_PROMOTED_VAR_P (lopart1) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (lopart1, 0);
+	      SUBREG_PROMOTED_SET (lopart1, 0);
 	    }
 	  tree halfstype = build_nonstandard_integer_type (hprec, 0);
 	  ops.op0 = make_tree (halfstype, lopart0);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index ca1c194..d9c57b2 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -368,7 +368,7 @@ widen_operand (rtx op, enum machine_mode mode, enum machine_mode oldmode,
      a promoted object differs from our extension.  */
   if (! no_extend
       || (GET_CODE (op) == SUBREG && SUBREG_PROMOTED_VAR_P (op)
-	  && SUBREG_PROMOTED_UNSIGNED_P (op) == unsignedp))
+	  && (SUBREG_PROMOTED_GET (op) & SRP_UNSIGNED == unsignedp)))
     return convert_modes (mode, oldmode, op, unsignedp);
 
   /* If MODE is no wider than a single word, we return a lowpart or paradoxical
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 6ec91a8..288a77a 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -1585,29 +1585,62 @@ get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
 #define SUBREG_PROMOTED_VAR_P(RTX)					\
   (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED", (RTX), SUBREG)->in_struct)
 
-#define SUBREG_PROMOTED_UNSIGNED_SET(RTX, VAL)				\
-do {									\
-  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_SET",	\
-				    (RTX), SUBREG);			\
-  if ((VAL) < 0)							\
-    _rtx->volatil = 1;							\
-  else {								\
-    _rtx->volatil = 0;							\
-    _rtx->unchanging = (VAL);						\
-  }									\
-} while (0)
-
 /* Valid for subregs which are SUBREG_PROMOTED_VAR_P().  In that case
    this gives the necessary extensions:
-   0  - signed
-   1  - normal unsigned
+   0  - signed (SPR_SIGNED)
+   1  - normal unsigned (SPR_UNSIGNED)
+   2  - value is both sign and unsign extended for mode
+	(SPR_SIGNED_AND_UNSIGNED).
    -1 - pointer unsigned, which most often can be handled like unsigned
         extension, except for generating instructions where we need to
-	emit special code (ptr_extend insns) on some architectures.  */
+	emit special code (ptr_extend insns) on some architectures
+	(SPR_POINTER). */
+
+const unsigned int SRP_POINTER = -1;
+const unsigned int SRP_SIGNED = 0;
+const unsigned int SRP_UNSIGNED = 1;
+const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;
+
+/* Sets promoted mode for SUBREG_PROMOTED_VAR_P().  */
+#define SUBREG_PROMOTED_SET(RTX, VAL)		                        \
+do {								        \
+  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SET",		\
+                                    (RTX), SUBREG);			\
+  switch ((VAL))							\
+  {									\
+    case SRP_POINTER:							\
+      _rtx->volatil = 0;						\
+      _rtx->unchanging = 0;						\
+      break;								\
+    case SRP_SIGNED:							\
+      _rtx->volatil = 0;						\
+      _rtx->unchanging = 1;						\
+      break;								\
+    case SRP_UNSIGNED:							\
+      _rtx->volatil = 1;						\
+      _rtx->unchanging = 0;						\
+      break;								\
+    case SRP_SIGNED_AND_UNSIGNED:					\
+      _rtx->volatil = 1;						\
+      _rtx->unchanging = 1;						\
+      break;								\
+  }									\
+} while (0)
+
+/* Gets promoted mode for SUBREG_PROMOTED_VAR_P().  */
+#define SUBREG_PROMOTED_GET(RTX)	\
+  (2 * (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_GET", (RTX), SUBREG)->volatil)\
+   + (RTX)->unchanging - 1)
+
+/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
+   for SIGNED type.  */
+#define SUBREG_PROMOTED_SIGNED_P(RTX)	\
+  (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SIGNED_P", (RTX), SUBREG)->unchanging)
 
+/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
+   for UNSIGNED type.  */
 #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
-  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
-   ? -1 : (int) (RTX)->unchanging)
+  (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil)
 
 /* True if the subreg was generated by LRA for reload insns.  Such
    subregs are valid only during LRA.  */
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 82cfc1bf..5c8132f 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -670,7 +670,7 @@ unsigned_reg_p (rtx op)
     return true;
 
   if (GET_CODE (op) == SUBREG
-      && SUBREG_PROMOTED_UNSIGNED_P (op))
+      && (SUBREG_PROMOTED_GET (op) & SRP_UNSIGNED))
     return true;
 
   return false;
@@ -4309,7 +4309,7 @@ nonzero_bits1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
 	 been zero-extended, we know that at least the high-order bits
 	 are zero, though others might be too.  */
 
-      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_UNSIGNED_P (x) > 0)
+      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_UNSIGNED_P (x))
 	nonzero = GET_MODE_MASK (GET_MODE (x))
 		  & cached_nonzero_bits (SUBREG_REG (x), GET_MODE (x),
 					 known_x, known_mode, known_ret);
@@ -4619,7 +4619,7 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
 	 and we are looking at it in a wider mode, we know that at least the
 	 high-order bits are known to be sign bit copies.  */
 
-      if (SUBREG_PROMOTED_VAR_P (x) && ! SUBREG_PROMOTED_UNSIGNED_P (x))
+      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_SIGNED_P (x))
 	{
 	  num0 = cached_num_sign_bit_copies (SUBREG_REG (x), mode,
 					     known_x, known_mode, known_ret);
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 181b56f..d2d6b5b 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1352,7 +1352,7 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	 target mode is the same as the variable's promotion.  */
       if (GET_CODE (op) == SUBREG
 	  && SUBREG_PROMOTED_VAR_P (op)
-	  && ! SUBREG_PROMOTED_UNSIGNED_P (op)
+	  && SUBREG_PROMOTED_SIGNED_P (op)
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
 	{
 	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
@@ -1419,7 +1419,7 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	 target mode is the same as the variable's promotion.  */
       if (GET_CODE (op) == SUBREG
 	  && SUBREG_PROMOTED_VAR_P (op)
-	  && SUBREG_PROMOTED_UNSIGNED_P (op) > 0
+	  && SUBREG_PROMOTED_UNSIGNED_P (op)
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
 	{
 	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
@@ -5587,7 +5587,8 @@ simplify_subreg (enum machine_mode outermode, rtx op,
 	{
 	  newx = gen_rtx_SUBREG (outermode, SUBREG_REG (op), final_offset);
 	  if (SUBREG_PROMOTED_VAR_P (op)
-	      && SUBREG_PROMOTED_UNSIGNED_P (op) >= 0
+	      && (SUBREG_PROMOTED_UNSIGNED_P (op)
+		  || SUBREG_PROMOTED_SIGNED_P (op))
 	      && GET_MODE_CLASS (outermode) == MODE_INT
 	      && IN_RANGE (GET_MODE_SIZE (outermode),
 			   GET_MODE_SIZE (innermode),
@@ -5595,8 +5596,7 @@ simplify_subreg (enum machine_mode outermode, rtx op,
 	      && subreg_lowpart_p (newx))
 	    {
 	      SUBREG_PROMOTED_VAR_P (newx) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET
-		(newx, SUBREG_PROMOTED_UNSIGNED_P (op));
+	      SUBREG_PROMOTED_SET (newx, SUBREG_PROMOTED_GET (op));
 	    }
 	  return newx;
 	}

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-26  1:06         ` Kugan
  2014-06-26  2:48           ` Kugan
@ 2014-06-26  5:50           ` Jakub Jelinek
  2014-06-26  9:41             ` Kugan
  1 sibling, 1 reply; 58+ messages in thread
From: Jakub Jelinek @ 2014-06-26  5:50 UTC (permalink / raw)
  To: Kugan; +Cc: Richard Henderson, gcc-patches

On Thu, Jun 26, 2014 at 11:06:26AM +1000, Kugan wrote:
> >> Since our aim is to perform single bit checks, why don’t we just use
> >> this representation internally (i.e.  _rtx->unchanging = 1 if SRP_SIGNED
> >> and _rtx->volatil = 1 if SRP_UNSIGNED). As for SUBREG_PROMOTED_SIGNED_P,
> >> we still have to return -1 or 1 depending on SRP_POINTER or SRP_UNSIGNED.
> > 
> > Why don't you make SUBREG_PROMOTED_UNSIGNED_P just return 0/1 (i.e. the
> > single bit), and for places where it would like to match both
> > SRP_UNSIGNED and SRP_POINTER use SUBREG_PROMOTED_GET () & SRP_UNSIGNED
> > or so?
> 
> If we use SUBREG_PROMOTED_GET () & SRP_UNSIGNED, we will miss
> the case SRP_SIGNED_AND_UNSIGNED. Though this is not wrong, we might
> miss some optimization opportunities here. We can however use
> (SUBREG_PROMOTED_GET () != SRP_SIGNED) if you like this. Other option is
> to define another macro that explicilty says some think like
> SUBREG_PROMOTED_POINTER_OR_UNSIGNED_P.

Ok, sure, if you want to make the test pass for SRP_UNSIGNED, SRP_POINTER
and SRP_UNSIGNED_AND_SIGNED, then != SRP_SIGNED is the right thing.
What I wanted is make SUBREG_PROMOTED_UNSIGNED_P be a 0/1 again.

> >> --- a/gcc/ifcvt.c
> >> +++ b/gcc/ifcvt.c
> >> @@ -1448,8 +1448,11 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
> >>  	  || byte_vtrue != byte_vfalse
> >>  	  || (SUBREG_PROMOTED_VAR_P (vtrue)
> >>  	      != SUBREG_PROMOTED_VAR_P (vfalse))
> >> -	  || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
> >> -	      != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
> >> +	  || ((SUBREG_PROMOTED_UNSIGNED_P (vtrue)
> >> +	       != SUBREG_PROMOTED_UNSIGNED_P (vfalse))
> >> +	      && (SUBREG_PROMOTED_SIGNED_P (vtrue)
> >> +		  != SUBREG_PROMOTED_SIGNED_P (vfalse))))
> > 
> > Shouldn't this be SUBREG_PROMOTED_GET (vtrue) != SUBREG_PROMOTED_GET (vfalse) ?
> 
> The reason why I checked like this to cover one side with
> SRP_SIGNED_AND_UNSIGNED and other with  SRP_SIGNED or SRP_UNSIGNED. If
> we check SUBREG_PROMOTED_GET (vtrue) != SUBREG_PROMOTED_GET (vfalse) we
> will miss that.

What you have above is just wrong though.  Either you need to make sure the
flags are the same (i.e. GET != GET), and keep the SET a few lines below as
is, or you would allow (some?) mismatches of the promotion flags,
but in that case you'd need to deal with it in the SET conservatively.
Like, if one is SRP_SIGNED_AND_UNSIGNED and another one is just
SRP_SIGNED or just SRP_UNSIGNED, you'd use the simpler one, if one
is promoted and another one is not, you'd not make the SUBREG promoted at
all, etc.  Not worth it IMHO, at least not for now.

> 
> >> +
> >> +/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
> >> +   for UNSIGNED type.  In case of SRP_POINTER, SUBREG_PROMOTED_UNSIGNED_P
> >> +   returns -1 as this is in most cases handled like unsigned extension,
> >> +   except for generating instructions where special code is emitted for
> >> +   (ptr_extend insns) on some architectures.  */
> >>  #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
> >> -  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
> >> -   ? -1 : (int) (RTX)->unchanging)
> >> +  ((((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil)\
> >> +     + (RTX)->unchanging) == 0) ? -1 : ((RTX)->volatil == 1))
> >> +
> >> +/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN.  */
> >> +#define	SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN) \
> > 
> > Use space rather than tab.  Also, why do we need this macro?
> > Can't you just use SUBREG_PROMOTED_GET () == sign ?  I mean, sign in that
> > case is typically just 0 or 1.
> 
> Again I wanted to cover SRP_SIGNED_AND_UNSIGNED as well in this case.

Ah, ok.  It is fine as is (with the whitespace change).

	Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-26  5:50           ` Jakub Jelinek
@ 2014-06-26  9:41             ` Kugan
  2014-06-26 10:12               ` Jakub Jelinek
  2014-06-26 10:25               ` Andreas Schwab
  0 siblings, 2 replies; 58+ messages in thread
From: Kugan @ 2014-06-26  9:41 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 6082 bytes --]


On 26/06/14 15:50, Jakub Jelinek wrote:
> On Thu, Jun 26, 2014 at 11:06:26AM +1000, Kugan wrote:
>>>> Since our aim is to perform single bit checks, why don’t we just use
>>>> this representation internally (i.e.  _rtx->unchanging = 1 if SRP_SIGNED
>>>> and _rtx->volatil = 1 if SRP_UNSIGNED). As for SUBREG_PROMOTED_SIGNED_P,
>>>> we still have to return -1 or 1 depending on SRP_POINTER or SRP_UNSIGNED.
>>>
>>> Why don't you make SUBREG_PROMOTED_UNSIGNED_P just return 0/1 (i.e. the
>>> single bit), and for places where it would like to match both
>>> SRP_UNSIGNED and SRP_POINTER use SUBREG_PROMOTED_GET () & SRP_UNSIGNED
>>> or so?
>>
>> If we use SUBREG_PROMOTED_GET () & SRP_UNSIGNED, we will miss
>> the case SRP_SIGNED_AND_UNSIGNED. Though this is not wrong, we might
>> miss some optimization opportunities here. We can however use
>> (SUBREG_PROMOTED_GET () != SRP_SIGNED) if you like this. Other option is
>> to define another macro that explicilty says some think like
>> SUBREG_PROMOTED_POINTER_OR_UNSIGNED_P.
> 
> Ok, sure, if you want to make the test pass for SRP_UNSIGNED, SRP_POINTER
> and SRP_UNSIGNED_AND_SIGNED, then != SRP_SIGNED is the right thing.
> What I wanted is make SUBREG_PROMOTED_UNSIGNED_P be a 0/1 again.
> 
>>>> --- a/gcc/ifcvt.c
>>>> +++ b/gcc/ifcvt.c
>>>> @@ -1448,8 +1448,11 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
>>>>  	  || byte_vtrue != byte_vfalse
>>>>  	  || (SUBREG_PROMOTED_VAR_P (vtrue)
>>>>  	      != SUBREG_PROMOTED_VAR_P (vfalse))
>>>> -	  || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
>>>> -	      != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
>>>> +	  || ((SUBREG_PROMOTED_UNSIGNED_P (vtrue)
>>>> +	       != SUBREG_PROMOTED_UNSIGNED_P (vfalse))
>>>> +	      && (SUBREG_PROMOTED_SIGNED_P (vtrue)
>>>> +		  != SUBREG_PROMOTED_SIGNED_P (vfalse))))
>>>
>>> Shouldn't this be SUBREG_PROMOTED_GET (vtrue) != SUBREG_PROMOTED_GET (vfalse) ?
>>
>> The reason why I checked like this to cover one side with
>> SRP_SIGNED_AND_UNSIGNED and other with  SRP_SIGNED or SRP_UNSIGNED. If
>> we check SUBREG_PROMOTED_GET (vtrue) != SUBREG_PROMOTED_GET (vfalse) we
>> will miss that.
> 
> What you have above is just wrong though.  Either you need to make sure the
> flags are the same (i.e. GET != GET), and keep the SET a few lines below as
> is, or you would allow (some?) mismatches of the promotion flags,
> but in that case you'd need to deal with it in the SET conservatively.
> Like, if one is SRP_SIGNED_AND_UNSIGNED and another one is just
> SRP_SIGNED or just SRP_UNSIGNED, you'd use the simpler one, if one
> is promoted and another one is not, you'd not make the SUBREG promoted at
> all, etc.  Not worth it IMHO, at least not for now.
> 
>>
>>>> +
>>>> +/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
>>>> +   for UNSIGNED type.  In case of SRP_POINTER, SUBREG_PROMOTED_UNSIGNED_P
>>>> +   returns -1 as this is in most cases handled like unsigned extension,
>>>> +   except for generating instructions where special code is emitted for
>>>> +   (ptr_extend insns) on some architectures.  */
>>>>  #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
>>>> -  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
>>>> -   ? -1 : (int) (RTX)->unchanging)
>>>> +  ((((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil)\
>>>> +     + (RTX)->unchanging) == 0) ? -1 : ((RTX)->volatil == 1))
>>>> +
>>>> +/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN.  */
>>>> +#define	SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN) \
>>>
>>> Use space rather than tab.  Also, why do we need this macro?
>>> Can't you just use SUBREG_PROMOTED_GET () == sign ?  I mean, sign in that
>>> case is typically just 0 or 1.
>>
>> Again I wanted to cover SRP_SIGNED_AND_UNSIGNED as well in this case.
> 
> Ah, ok.  It is fine as is (with the whitespace change).

Thanks for the review. I have now changed it based on the comments. Is
this look OK?

Thanks,
Kugan

gcc/

2014-06-26  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* calls.c (precompute_arguments): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET
	(expand_call): Likewise.
	* cfgexpand.c (expand_gimple_stmt_1): Use SUBREG_PROMOTED_GET
	& SRP_UNSIGNED to get promoted mode as SRP_POINTER is treated
	the same way as SRP_UNSIGNED.
	* combine.c (record_promoted_value): Skip > 0 comparison with
	SUBREG_PROMOTED_UNSIGNED_P as it now returns only 0 or 1.
	* expr.c (convert_move): Use SUBREG_CHECK_PROMOTED_SIGN instead
	of SUBREG_PROMOTED_UNSIGNED_P.
	(convert_modes): Likewise.
	(store_expr): Use SUBREG_PROMOTED_GET & SRP_UNSIGNED to get
	promoted mode as SRP_POINTER is treated the same way as
	SRP_UNSIGNED.
	(expand_expr_real_1): Use new SUBREG_PROMOTED_SET instead of
	SUBREG_PROMOTED_UNSIGNED_SET.
	* function.c (assign_parm_setup_reg): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* ifcvt.c (noce_emit_cmove): Updated to use SUBREG_PROMOTED_GET and
	SUBREG_PROMOTED_SET.
	* internal-fn.c (ubsan_expand_si_overflow_mul_check): Use
	SUBREG_PROMOTED_SET instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* optabs.c (widen_operand): Use SUBREG_CHECK_PROMOTED_SIGN instead
	of SUBREG_PROMOTED_UNSIGNED_P.
	* rtl.h (SUBREG_PROMOTED_UNSIGNED_SET): Remove.
	(SUBREG_PROMOTED_SET): New define.
	(SUBREG_PROMOTED_GET): Likewise.
	(SUBREG_PROMOTED_SIGNED_P): Likewise.
	(SUBREG_CHECK_PROMOTED_SIGN): Likewise.
	(SUBREG_PROMOTED_UNSIGNED_P): Updated.
	* rtlanal.c (unsigned_reg_p): Use new SUBREG_PROMOTED_GET
	instead of SUBREG_PROMOTED_UNSIGNED_GET.
	(nonzero_bits1): Skip > 0 comparison with the results as
	SUBREG_PROMOTED_UNSIGNED_P now returns only 0 or 1.
	(num_sign_bit_copies1): Use SUBREG_PROMOTED_SIGNED_P instead
	of !SUBREG_PROMOTED_UNSIGNED_P.
	* simplify-rtx.c (simplify_unary_operation_1): Use new
	SUBREG_PROMOTED_SIGNED_P instead of !SUBREG_PROMOTED_UNSIGNED_P.
	(simplify_subreg): Use new SUBREG_PROMOTED_SIGNED_P,
	SUBREG_PROMOTED_UNSIGNED_P and SUBREG_PROMOTED_SET instead of
	SUBREG_PROMOTED_UNSIGNED_P and SUBREG_PROMOTED_UNSIGNED_SET.


[-- Attachment #2: p1.txt --]
[-- Type: text/plain, Size: 14183 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index 78fe7d8..a3e6faa 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,8 +1484,7 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (args[i].initial_value,
-					    args[i].unsignedp);
+	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
@@ -3365,7 +3364,7 @@ expand_call (tree exp, rtx target, int ignore)
 
 	  target = gen_rtx_SUBREG (TYPE_MODE (type), target, offset);
 	  SUBREG_PROMOTED_VAR_P (target) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (target, unsignedp);
+	  SUBREG_PROMOTED_SET (target, unsignedp);
 	}
 
       /* If size of args is variable or this was a constructor call for a stack
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index e8cd87f..bab9645 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3297,7 +3297,7 @@ expand_gimple_stmt_1 (gimple stmt)
 	      ;
 	    else if (promoted)
 	      {
-		int unsignedp = SUBREG_PROMOTED_UNSIGNED_P (target);
+		int unsignedp = SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED;
 		/* If TEMP is a VOIDmode constant, use convert_modes to make
 		   sure that we properly convert it.  */
 		if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
diff --git a/gcc/combine.c b/gcc/combine.c
index 4e7ef55..15ecf1a 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -12427,7 +12427,7 @@ record_promoted_value (rtx insn, rtx subreg)
       rsp = &reg_stat[regno];
       if (rsp->last_set == insn)
 	{
-	  if (SUBREG_PROMOTED_UNSIGNED_P (subreg) > 0)
+	  if (SUBREG_PROMOTED_UNSIGNED_P (subreg))
 	    rsp->last_set_nonzero_bits &= GET_MODE_MASK (mode);
 	}
 
diff --git a/gcc/expr.c b/gcc/expr.c
index 512c024..f875f2d 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -329,7 +329,7 @@ convert_move (rtx to, rtx from, int unsignedp)
   if (GET_CODE (from) == SUBREG && SUBREG_PROMOTED_VAR_P (from)
       && (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (from)))
 	  >= GET_MODE_PRECISION (to_mode))
-      && SUBREG_PROMOTED_UNSIGNED_P (from) == unsignedp)
+      && SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp))
     from = gen_lowpart (to_mode, from), from_mode = to_mode;
 
   gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
@@ -703,7 +703,7 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
 
   if (GET_CODE (x) == SUBREG && SUBREG_PROMOTED_VAR_P (x)
       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))) >= GET_MODE_SIZE (mode)
-      && SUBREG_PROMOTED_UNSIGNED_P (x) == unsignedp)
+      && SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp))
     x = gen_lowpart (mode, SUBREG_REG (x));
 
   if (GET_MODE (x) != VOIDmode)
@@ -5203,24 +5203,25 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
 	     == TYPE_PRECISION (TREE_TYPE (exp)))
 	{
 	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
-	      != SUBREG_PROMOTED_UNSIGNED_P (target))
+	      != SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED)
 	    {
 	      /* Some types, e.g. Fortran's logical*4, won't have a signed
 		 version, so use the mode instead.  */
 	      tree ntype
 		= (signed_or_unsigned_type_for
-		   (SUBREG_PROMOTED_UNSIGNED_P (target), TREE_TYPE (exp)));
+		   (SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED,
+		    TREE_TYPE (exp)));
 	      if (ntype == NULL)
 		ntype = lang_hooks.types.type_for_mode
 		  (TYPE_MODE (TREE_TYPE (exp)),
-		   SUBREG_PROMOTED_UNSIGNED_P (target));
+		   SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
 
 	      exp = fold_convert_loc (loc, ntype, exp);
 	    }
 
 	  exp = fold_convert_loc (loc, lang_hooks.types.type_for_mode
 				  (GET_MODE (SUBREG_REG (target)),
-				   SUBREG_PROMOTED_UNSIGNED_P (target)),
+				   SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED),
 				  exp);
 
 	  inner_target = SUBREG_REG (target);
@@ -5234,14 +5235,14 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
       if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
 	{
 	  temp = convert_modes (GET_MODE (target), TYPE_MODE (TREE_TYPE (exp)),
-				temp, SUBREG_PROMOTED_UNSIGNED_P (target));
+				temp, SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
 	  temp = convert_modes (GET_MODE (SUBREG_REG (target)),
 			        GET_MODE (target), temp,
-			        SUBREG_PROMOTED_UNSIGNED_P (target));
+			        SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
 	}
 
       convert_move (SUBREG_REG (target), temp,
-		    SUBREG_PROMOTED_UNSIGNED_P (target));
+		    SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
 
       return NULL_RTX;
     }
@@ -9513,7 +9514,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (temp, unsignedp);
+	  SUBREG_PROMOTED_SET (temp, unsignedp);
 	  return temp;
 	}
 
diff --git a/gcc/function.c b/gcc/function.c
index 441289e..9509622 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3093,7 +3093,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
 	  /* The argument is already sign/zero extended, so note it
 	     into the subreg.  */
 	  SUBREG_PROMOTED_VAR_P (tempreg) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (tempreg, unsignedp);
+	  SUBREG_PROMOTED_SET (tempreg, unsignedp);
 	}
 
       /* TREE_USED gets set erroneously during expand_assignment.  */
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 816cdaa..56e2f3a 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -1448,8 +1448,8 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 	  || byte_vtrue != byte_vfalse
 	  || (SUBREG_PROMOTED_VAR_P (vtrue)
 	      != SUBREG_PROMOTED_VAR_P (vfalse))
-	  || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
-	      != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
+	  || (SUBREG_PROMOTED_GET (vtrue)
+	      != SUBREG_PROMOTED_GET (vfalse)))
 	return NULL_RTX;
 
       promoted_target = gen_reg_rtx (GET_MODE (reg_vtrue));
@@ -1463,7 +1463,7 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 
       target = gen_rtx_SUBREG (GET_MODE (vtrue), promoted_target, byte_vtrue);
       SUBREG_PROMOTED_VAR_P (target) = SUBREG_PROMOTED_VAR_P (vtrue);
-      SUBREG_PROMOTED_UNSIGNED_SET (target, SUBREG_PROMOTED_UNSIGNED_P (vtrue));
+      SUBREG_PROMOTED_SET (target, SUBREG_PROMOTED_GET (vtrue));
       emit_move_insn (x, target);
       return x;
     }
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 78f59d6..4e0b964 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -584,12 +584,12 @@ ubsan_expand_si_overflow_mul_check (gimple stmt)
 	  if (GET_CODE (lopart0) == SUBREG)
 	    {
 	      SUBREG_PROMOTED_VAR_P (lopart0) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (lopart0, 0);
+	      SUBREG_PROMOTED_SET (lopart0, 0);
 	    }
 	  if (GET_CODE (lopart1) == SUBREG)
 	    {
 	      SUBREG_PROMOTED_VAR_P (lopart1) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (lopart1, 0);
+	      SUBREG_PROMOTED_SET (lopart1, 0);
 	    }
 	  tree halfstype = build_nonstandard_integer_type (hprec, 0);
 	  ops.op0 = make_tree (halfstype, lopart0);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index ca1c194..a00b383 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -368,7 +368,7 @@ widen_operand (rtx op, enum machine_mode mode, enum machine_mode oldmode,
      a promoted object differs from our extension.  */
   if (! no_extend
       || (GET_CODE (op) == SUBREG && SUBREG_PROMOTED_VAR_P (op)
-	  && SUBREG_PROMOTED_UNSIGNED_P (op) == unsignedp))
+	  && SUBREG_CHECK_PROMOTED_SIGN (op, unsignedp)))
     return convert_modes (mode, oldmode, op, unsignedp);
 
   /* If MODE is no wider than a single word, we return a lowpart or paradoxical
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 6ec91a8..11751f5 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -1585,29 +1585,67 @@ get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
 #define SUBREG_PROMOTED_VAR_P(RTX)					\
   (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED", (RTX), SUBREG)->in_struct)
 
-#define SUBREG_PROMOTED_UNSIGNED_SET(RTX, VAL)				\
-do {									\
-  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_SET",	\
-				    (RTX), SUBREG);			\
-  if ((VAL) < 0)							\
-    _rtx->volatil = 1;							\
-  else {								\
-    _rtx->volatil = 0;							\
-    _rtx->unchanging = (VAL);						\
-  }									\
-} while (0)
-
 /* Valid for subregs which are SUBREG_PROMOTED_VAR_P().  In that case
    this gives the necessary extensions:
-   0  - signed
-   1  - normal unsigned
+   0  - signed (SPR_SIGNED)
+   1  - normal unsigned (SPR_UNSIGNED)
+   2  - value is both sign and unsign extended for mode
+	(SPR_SIGNED_AND_UNSIGNED).
    -1 - pointer unsigned, which most often can be handled like unsigned
         extension, except for generating instructions where we need to
-	emit special code (ptr_extend insns) on some architectures.  */
+	emit special code (ptr_extend insns) on some architectures
+	(SPR_POINTER). */
+
+const unsigned int SRP_POINTER = -1;
+const unsigned int SRP_SIGNED = 0;
+const unsigned int SRP_UNSIGNED = 1;
+const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;
+
+/* Sets promoted mode for SUBREG_PROMOTED_VAR_P().  */
+#define SUBREG_PROMOTED_SET(RTX, VAL)		                        \
+do {								        \
+  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SET",		\
+                                    (RTX), SUBREG);			\
+  switch ((VAL))							\
+  {									\
+    case SRP_POINTER:							\
+      _rtx->volatil = 0;						\
+      _rtx->unchanging = 0;						\
+      break;								\
+    case SRP_SIGNED:							\
+      _rtx->volatil = 0;						\
+      _rtx->unchanging = 1;						\
+      break;								\
+    case SRP_UNSIGNED:							\
+      _rtx->volatil = 1;						\
+      _rtx->unchanging = 0;						\
+      break;								\
+    case SRP_SIGNED_AND_UNSIGNED:					\
+      _rtx->volatil = 1;						\
+      _rtx->unchanging = 1;						\
+      break;								\
+  }									\
+} while (0)
+
+/* Gets promoted mode for SUBREG_PROMOTED_VAR_P().  */
+#define SUBREG_PROMOTED_GET(RTX)	\
+  (2 * (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_GET", (RTX), SUBREG)->volatil)\
+   + (RTX)->unchanging - 1)
 
+/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
+   for SIGNED type.  */
+#define SUBREG_PROMOTED_SIGNED_P(RTX)	\
+  (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SIGNED_P", (RTX), SUBREG)->unchanging)
+
+/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
+   for UNSIGNED type.  */
 #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
-  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
-   ? -1 : (int) (RTX)->unchanging)
+  (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil)
+
+/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN.  */
+#define SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN)	\
+  ((SIGN) ? SUBREG_PROMOTED_GET ((RTX)) != SRP_SIGNED	\
+   : SUBREG_PROMOTED_SIGNED_P ((RTX)))
 
 /* True if the subreg was generated by LRA for reload insns.  Such
    subregs are valid only during LRA.  */
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 82cfc1bf..fc3f448 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -670,7 +670,7 @@ unsigned_reg_p (rtx op)
     return true;
 
   if (GET_CODE (op) == SUBREG
-      && SUBREG_PROMOTED_UNSIGNED_P (op))
+      && SUBREG_PROMOTED_GET (op) != SRP_SIGNED)
     return true;
 
   return false;
@@ -4309,7 +4309,7 @@ nonzero_bits1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
 	 been zero-extended, we know that at least the high-order bits
 	 are zero, though others might be too.  */
 
-      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_UNSIGNED_P (x) > 0)
+      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_UNSIGNED_P (x))
 	nonzero = GET_MODE_MASK (GET_MODE (x))
 		  & cached_nonzero_bits (SUBREG_REG (x), GET_MODE (x),
 					 known_x, known_mode, known_ret);
@@ -4619,7 +4619,7 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
 	 and we are looking at it in a wider mode, we know that at least the
 	 high-order bits are known to be sign bit copies.  */
 
-      if (SUBREG_PROMOTED_VAR_P (x) && ! SUBREG_PROMOTED_UNSIGNED_P (x))
+      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_SIGNED_P (x))
 	{
 	  num0 = cached_num_sign_bit_copies (SUBREG_REG (x), mode,
 					     known_x, known_mode, known_ret);
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 181b56f..5deeb72 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1352,7 +1352,7 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	 target mode is the same as the variable's promotion.  */
       if (GET_CODE (op) == SUBREG
 	  && SUBREG_PROMOTED_VAR_P (op)
-	  && ! SUBREG_PROMOTED_UNSIGNED_P (op)
+	  && SUBREG_PROMOTED_SIGNED_P (op)
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
 	{
 	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
@@ -1419,7 +1419,7 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	 target mode is the same as the variable's promotion.  */
       if (GET_CODE (op) == SUBREG
 	  && SUBREG_PROMOTED_VAR_P (op)
-	  && SUBREG_PROMOTED_UNSIGNED_P (op) > 0
+	  && SUBREG_PROMOTED_UNSIGNED_P (op)
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
 	{
 	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
@@ -5587,7 +5587,8 @@ simplify_subreg (enum machine_mode outermode, rtx op,
 	{
 	  newx = gen_rtx_SUBREG (outermode, SUBREG_REG (op), final_offset);
 	  if (SUBREG_PROMOTED_VAR_P (op)
-	      && SUBREG_PROMOTED_UNSIGNED_P (op) >= 0
+	      && (SUBREG_PROMOTED_UNSIGNED_P (op)
+		  || (SUBREG_PROMOTED_SIGNED_P (op)))
 	      && GET_MODE_CLASS (outermode) == MODE_INT
 	      && IN_RANGE (GET_MODE_SIZE (outermode),
 			   GET_MODE_SIZE (innermode),
@@ -5595,8 +5596,7 @@ simplify_subreg (enum machine_mode outermode, rtx op,
 	      && subreg_lowpart_p (newx))
 	    {
 	      SUBREG_PROMOTED_VAR_P (newx) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET
-		(newx, SUBREG_PROMOTED_UNSIGNED_P (op));
+	      SUBREG_PROMOTED_SET (newx, SUBREG_PROMOTED_GET (op));
 	    }
 	  return newx;
 	}

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-26  9:41             ` Kugan
@ 2014-06-26 10:12               ` Jakub Jelinek
  2014-06-26 10:42                 ` Jakub Jelinek
  2014-07-01  8:21                 ` Kugan
  2014-06-26 10:25               ` Andreas Schwab
  1 sibling, 2 replies; 58+ messages in thread
From: Jakub Jelinek @ 2014-06-26 10:12 UTC (permalink / raw)
  To: Kugan; +Cc: Richard Henderson, gcc-patches

On Thu, Jun 26, 2014 at 07:41:22PM +1000, Kugan wrote:
> 2014-06-26  Kugan Vivekanandarajah  <kuganv@linaro.org>
> 
> 	* calls.c (precompute_arguments): Use new SUBREG_PROMOTED_SET
> 	instead of SUBREG_PROMOTED_UNSIGNED_SET

Missing full stop.

> --- a/gcc/cfgexpand.c
> +++ b/gcc/cfgexpand.c
> @@ -3297,7 +3297,7 @@ expand_gimple_stmt_1 (gimple stmt)
>  	      ;
>  	    else if (promoted)
>  	      {
> -		int unsignedp = SUBREG_PROMOTED_UNSIGNED_P (target);
> +		int unsignedp = SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED;

From what I understand, here you want the -1/0/1 value and not 2,
so that is
int unsignedp = SUBREG_PROMOTED_GET (target);
if (unsignedp == SRP_SIGNED_AND_UNSIGNED) unsignedp = SRP_UNSIGNED;
I think.  Do you agree?

BTW, the final patch will probably need to be tested on one of the weirdo
ptr_extend targets (ia64-hpux or x86_64-linux -mx32).

> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -329,7 +329,7 @@ convert_move (rtx to, rtx from, int unsignedp)
>    if (GET_CODE (from) == SUBREG && SUBREG_PROMOTED_VAR_P (from)
>        && (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (from)))
>  	  >= GET_MODE_PRECISION (to_mode))
> -      && SUBREG_PROMOTED_UNSIGNED_P (from) == unsignedp)
> +      && SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp))

I think unsignedp (misnamed) may be -1/0/1 here, so either
SUBREG_CHECK_PROMOTED_SIGN needs to handle those 3, or you need to use
something else.  If it handles all 3 values, then it would be say
((SIGN) == SRP_POINTER ? SUBREG_PROMOTED_GET (RTX) == SRP_POINTER
 : (SIGN) == SRP_SIGNED ? SUBREG_PROMOTED_SIGNED_P (RTX)
 : SUBREG_PROMOTED_UNSIGNED_P (RTX))
or so.

>      from = gen_lowpart (to_mode, from), from_mode = to_mode;
>  
>    gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
> @@ -703,7 +703,7 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
>  
>    if (GET_CODE (x) == SUBREG && SUBREG_PROMOTED_VAR_P (x)
>        && GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))) >= GET_MODE_SIZE (mode)
> -      && SUBREG_PROMOTED_UNSIGNED_P (x) == unsignedp)
> +      && SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp))
>      x = gen_lowpart (mode, SUBREG_REG (x));

Similarly.

> @@ -5203,24 +5203,25 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
>  	     == TYPE_PRECISION (TREE_TYPE (exp)))
>  	{
>  	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
> -	      != SUBREG_PROMOTED_UNSIGNED_P (target))
> +	      != SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED)

Here TYPE_UNSIGNED is 0 or 1, so if you define SUBREG_PROMOTED_CHECK_SIGN
the way suggested above, this would be SUBREG_PROMOTED_CHECK_SIGN then,
or if (TYPE_UNSIGNED (TREE_TYPE (exp))
       ? SUBREG_PROMOTED_UNSIGNED_P (target)
       : SUBREG_PROMOTED_SIGNED_P (target))

>  	    {
>  	      /* Some types, e.g. Fortran's logical*4, won't have a signed
>  		 version, so use the mode instead.  */
>  	      tree ntype
>  		= (signed_or_unsigned_type_for
> -		   (SUBREG_PROMOTED_UNSIGNED_P (target), TREE_TYPE (exp)));
> +		   (SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED,

I'd just use TYPE_UNSIGNED (TREE_TYPE (exp)) here instead,
no reason to repeat what the guarding condition did.

> +		    TREE_TYPE (exp)));
>  	      if (ntype == NULL)
>  		ntype = lang_hooks.types.type_for_mode
>  		  (TYPE_MODE (TREE_TYPE (exp)),
> -		   SUBREG_PROMOTED_UNSIGNED_P (target));
> +		   SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
>  
>  	      exp = fold_convert_loc (loc, ntype, exp);
>  	    }
>  
>  	  exp = fold_convert_loc (loc, lang_hooks.types.type_for_mode
>  				  (GET_MODE (SUBREG_REG (target)),
> -				   SUBREG_PROMOTED_UNSIGNED_P (target)),
> +				   SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED),
>  				  exp);

I believe fold_convert only considers zero and non-zero, so no idea
what we want here for SRP_POINTER.  Doing what we used to do would
be SUBREG_PROMOTED_GET (target) != SRP_SIGNED.
>  
>  	  inner_target = SUBREG_REG (target);
> @@ -5234,14 +5235,14 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
>        if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
>  	{
>  	  temp = convert_modes (GET_MODE (target), TYPE_MODE (TREE_TYPE (exp)),
> -				temp, SUBREG_PROMOTED_UNSIGNED_P (target));
> +				temp, SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
>  	  temp = convert_modes (GET_MODE (SUBREG_REG (target)),
>  			        GET_MODE (target), temp,
> -			        SUBREG_PROMOTED_UNSIGNED_P (target));
> +			        SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
>  	}
>  
>        convert_move (SUBREG_REG (target), temp,
> -		    SUBREG_PROMOTED_UNSIGNED_P (target));
> +		    SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);

In all 3 cases here you want -1/0/1 and treat SRP_SIGNED_AND_UNSIGNED as
probably 1, so supposedly you want a macro for that and use it
in the 3 cases here, in expand_gimple_stmt_1 etc.
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -1585,29 +1585,67 @@ get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
>  #define SUBREG_PROMOTED_VAR_P(RTX)					\
>    (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED", (RTX), SUBREG)->in_struct)
>  
> -#define SUBREG_PROMOTED_UNSIGNED_SET(RTX, VAL)				\
> -do {									\
> -  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_SET",	\
> -				    (RTX), SUBREG);			\
> -  if ((VAL) < 0)							\
> -    _rtx->volatil = 1;							\
> -  else {								\
> -    _rtx->volatil = 0;							\
> -    _rtx->unchanging = (VAL);						\
> -  }									\
> -} while (0)
> -
>  /* Valid for subregs which are SUBREG_PROMOTED_VAR_P().  In that case
>     this gives the necessary extensions:
> -   0  - signed
> -   1  - normal unsigned
> +   0  - signed (SPR_SIGNED)
> +   1  - normal unsigned (SPR_UNSIGNED)
> +   2  - value is both sign and unsign extended for mode
> +	(SPR_SIGNED_AND_UNSIGNED).
>     -1 - pointer unsigned, which most often can be handled like unsigned
>          extension, except for generating instructions where we need to
> -	emit special code (ptr_extend insns) on some architectures.  */
> +	emit special code (ptr_extend insns) on some architectures
> +	(SPR_POINTER). */
> +
> +const unsigned int SRP_POINTER = -1;
> +const unsigned int SRP_SIGNED = 0;
> +const unsigned int SRP_UNSIGNED = 1;
> +const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;
> +
> +/* Sets promoted mode for SUBREG_PROMOTED_VAR_P().  */
> +#define SUBREG_PROMOTED_SET(RTX, VAL)		                        \
> +do {								        \
> +  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SET",		\
> +                                    (RTX), SUBREG);			\
> +  switch ((VAL))							\

Please avoid the extra ()s, switch (VAL) is enough.

> +/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN.  */

promoted, typo.

> +#define SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN)	\
> +  ((SIGN) ? SUBREG_PROMOTED_GET ((RTX)) != SRP_SIGNED	\
> +   : SUBREG_PROMOTED_SIGNED_P ((RTX)))

See above.  And note the ((RTX)) should have been (RTX) anyway.

> @@ -5587,7 +5587,8 @@ simplify_subreg (enum machine_mode outermode, rtx op,
>  	{
>  	  newx = gen_rtx_SUBREG (outermode, SUBREG_REG (op), final_offset);
>  	  if (SUBREG_PROMOTED_VAR_P (op)
> -	      && SUBREG_PROMOTED_UNSIGNED_P (op) >= 0
> +	      && (SUBREG_PROMOTED_UNSIGNED_P (op)
> +		  || (SUBREG_PROMOTED_SIGNED_P (op)))

SUBREG_PROMOTED_GET (op) != SRP_POINTER ?
Also note the extra ()s.

>  	      && GET_MODE_CLASS (outermode) == MODE_INT
>  	      && IN_RANGE (GET_MODE_SIZE (outermode),
>  			   GET_MODE_SIZE (innermode),
> @@ -5595,8 +5596,7 @@ simplify_subreg (enum machine_mode outermode, rtx op,
>  	      && subreg_lowpart_p (newx))
>  	    {
>  	      SUBREG_PROMOTED_VAR_P (newx) = 1;
> -	      SUBREG_PROMOTED_UNSIGNED_SET
> -		(newx, SUBREG_PROMOTED_UNSIGNED_P (op));
> +	      SUBREG_PROMOTED_SET (newx, SUBREG_PROMOTED_GET (op));
>  	    }
>  	  return newx;
>  	}


	Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-26  9:41             ` Kugan
  2014-06-26 10:12               ` Jakub Jelinek
@ 2014-06-26 10:25               ` Andreas Schwab
  2014-07-01  8:28                 ` Kugan
  1 sibling, 1 reply; 58+ messages in thread
From: Andreas Schwab @ 2014-06-26 10:25 UTC (permalink / raw)
  To: Kugan; +Cc: Jakub Jelinek, Richard Henderson, gcc-patches

Kugan <kugan.vivekanandarajah@linaro.org> writes:

> @@ -5203,24 +5203,25 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
>  	     == TYPE_PRECISION (TREE_TYPE (exp)))
>  	{
>  	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
> -	      != SUBREG_PROMOTED_UNSIGNED_P (target))
> +	      != SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED)

& has lower precedence than !=.  You should have got a warning that
fails bootstrap.

Andreas.

-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-26 10:12               ` Jakub Jelinek
@ 2014-06-26 10:42                 ` Jakub Jelinek
  2014-07-01  8:21                 ` Kugan
  1 sibling, 0 replies; 58+ messages in thread
From: Jakub Jelinek @ 2014-06-26 10:42 UTC (permalink / raw)
  To: Kugan; +Cc: Richard Henderson, gcc-patches

On Thu, Jun 26, 2014 at 12:12:03PM +0200, Jakub Jelinek wrote:
> > @@ -5234,14 +5235,14 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
> >        if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
> >  	{
> >  	  temp = convert_modes (GET_MODE (target), TYPE_MODE (TREE_TYPE (exp)),
> > -				temp, SUBREG_PROMOTED_UNSIGNED_P (target));
> > +				temp, SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
> >  	  temp = convert_modes (GET_MODE (SUBREG_REG (target)),
> >  			        GET_MODE (target), temp,
> > -			        SUBREG_PROMOTED_UNSIGNED_P (target));
> > +			        SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
> >  	}
> >  
> >        convert_move (SUBREG_REG (target), temp,
> > -		    SUBREG_PROMOTED_UNSIGNED_P (target));
> > +		    SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
> 
> In all 3 cases here you want -1/0/1 and treat SRP_SIGNED_AND_UNSIGNED as
> probably 1, so supposedly you want a macro for that and use it
> in the 3 cases here, in expand_gimple_stmt_1 etc.

That macro (not sure about best name for it), which would for
SUBREG_PROMOTED_GET -1, 0, 1, 2 return -1, 0, 1, 1 could be defined e.g. as
  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_GET", (RTX), SUBREG)->volatil) ? 1	\
   : (RTX)->unchanging - 1)

	Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-26 10:12               ` Jakub Jelinek
  2014-06-26 10:42                 ` Jakub Jelinek
@ 2014-07-01  8:21                 ` Kugan
  2014-07-07  6:52                   ` Kugan
  1 sibling, 1 reply; 58+ messages in thread
From: Kugan @ 2014-07-01  8:21 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, gcc-patches

On 26/06/14 20:12, Jakub Jelinek wrote:
> On Thu, Jun 26, 2014 at 07:41:22PM +1000, Kugan wrote:
>> 2014-06-26  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>
>> 	* calls.c (precompute_arguments): Use new SUBREG_PROMOTED_SET
>> 	instead of SUBREG_PROMOTED_UNSIGNED_SET
> 
> Missing full stop.
> 
>> --- a/gcc/cfgexpand.c
>> +++ b/gcc/cfgexpand.c
>> @@ -3297,7 +3297,7 @@ expand_gimple_stmt_1 (gimple stmt)
>>  	      ;
>>  	    else if (promoted)
>>  	      {
>> -		int unsignedp = SUBREG_PROMOTED_UNSIGNED_P (target);
>> +		int unsignedp = SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED;
> 
> From what I understand, here you want the -1/0/1 value and not 2,
> so that is
> int unsignedp = SUBREG_PROMOTED_GET (target);
> if (unsignedp == SRP_SIGNED_AND_UNSIGNED) unsignedp = SRP_UNSIGNED;
> I think.  Do you agree?

I agree.


> BTW, the final patch will probably need to be tested on one of the weirdo
> ptr_extend targets (ia64-hpux or x86_64-linux -mx32).

I am now looking at testing on such targets. I just want to double check
that x86_64-linux -mx32 is OK for this. When I looked at the src, it
looked to me #define POINTERS_EXTEND_UNSIGNED -1 is needed for this to
happen. x86_64-linux -mx32 doesnt seem to fall into thss.

In addition, I will also test AArch64 ILP32 (#define
POINTERS_EXTEND_UNSIGNED 1), ARM and x86_64 before posting the patch.

>> --- a/gcc/expr.c
>> +++ b/gcc/expr.c
>> @@ -329,7 +329,7 @@ convert_move (rtx to, rtx from, int unsignedp)
>>    if (GET_CODE (from) == SUBREG && SUBREG_PROMOTED_VAR_P (from)
>>        && (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (from)))
>>  	  >= GET_MODE_PRECISION (to_mode))
>> -      && SUBREG_PROMOTED_UNSIGNED_P (from) == unsignedp)
>> +      && SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp))
> 
> I think unsignedp (misnamed) may be -1/0/1 here, so either
> SUBREG_CHECK_PROMOTED_SIGN needs to handle those 3, or you need to use
> something else.  If it handles all 3 values, then it would be say
> ((SIGN) == SRP_POINTER ? SUBREG_PROMOTED_GET (RTX) == SRP_POINTER
>  : (SIGN) == SRP_SIGNED ? SUBREG_PROMOTED_SIGNED_P (RTX)
>  : SUBREG_PROMOTED_UNSIGNED_P (RTX))
> or so.

I have changed it. I have defined a macro SUBREG_PROMOTED_SIGN for this.

> 
>>      from = gen_lowpart (to_mode, from), from_mode = to_mode;
>>  
>>    gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
>> @@ -703,7 +703,7 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
>>  
>>    if (GET_CODE (x) == SUBREG && SUBREG_PROMOTED_VAR_P (x)
>>        && GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))) >= GET_MODE_SIZE (mode)
>> -      && SUBREG_PROMOTED_UNSIGNED_P (x) == unsignedp)
>> +      && SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp))
>>      x = gen_lowpart (mode, SUBREG_REG (x));
> 
> Similarly.


I have changed it too.

> 
>> @@ -5203,24 +5203,25 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
>>  	     == TYPE_PRECISION (TREE_TYPE (exp)))
>>  	{
>>  	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
>> -	      != SUBREG_PROMOTED_UNSIGNED_P (target))
>> +	      != SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED)
> 
> Here TYPE_UNSIGNED is 0 or 1, so if you define SUBREG_PROMOTED_CHECK_SIGN
> the way suggested above, this would be SUBREG_PROMOTED_CHECK_SIGN then,
> or if (TYPE_UNSIGNED (TREE_TYPE (exp))
>        ? SUBREG_PROMOTED_UNSIGNED_P (target)
>        : SUBREG_PROMOTED_SIGNED_P (target))
> 
>>  	    {
>>  	      /* Some types, e.g. Fortran's logical*4, won't have a signed
>>  		 version, so use the mode instead.  */
>>  	      tree ntype
>>  		= (signed_or_unsigned_type_for
>> -		   (SUBREG_PROMOTED_UNSIGNED_P (target), TREE_TYPE (exp)));
>> +		   (SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED,
> 
> I'd just use TYPE_UNSIGNED (TREE_TYPE (exp)) here instead,
> no reason to repeat what the guarding condition did.

Did you mean !TYPE_UNSIGNED (TREE_TYPE (exp))?. isnÂ’t it better to use
the macro SUBREG_PROMOTED_SIGN (defined earlier as you suggested) here?
It might be more readable. I am happy to do what you like.

> 
>> +		    TREE_TYPE (exp)));
>>  	      if (ntype == NULL)
>>  		ntype = lang_hooks.types.type_for_mode
>>  		  (TYPE_MODE (TREE_TYPE (exp)),
>> -		   SUBREG_PROMOTED_UNSIGNED_P (target));
>> +		   SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
>>  
>>  	      exp = fold_convert_loc (loc, ntype, exp);
>>  	    }
>>  
>>  	  exp = fold_convert_loc (loc, lang_hooks.types.type_for_mode
>>  				  (GET_MODE (SUBREG_REG (target)),
>> -				   SUBREG_PROMOTED_UNSIGNED_P (target)),
>> +				   SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED),
>>  				  exp);
> 
> I believe fold_convert only considers zero and non-zero, so no idea
> what we want here for SRP_POINTER.  Doing what we used to do would
> be SUBREG_PROMOTED_GET (target) != SRP_SIGNED.
>>  
>>  	  inner_target = SUBREG_REG (target);
>> @@ -5234,14 +5235,14 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
>>        if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
>>  	{
>>  	  temp = convert_modes (GET_MODE (target), TYPE_MODE (TREE_TYPE (exp)),
>> -				temp, SUBREG_PROMOTED_UNSIGNED_P (target));
>> +				temp, SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
>>  	  temp = convert_modes (GET_MODE (SUBREG_REG (target)),
>>  			        GET_MODE (target), temp,
>> -			        SUBREG_PROMOTED_UNSIGNED_P (target));
>> +			        SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
>>  	}
>>  
>>        convert_move (SUBREG_REG (target), temp,
>> -		    SUBREG_PROMOTED_UNSIGNED_P (target));
>> +		    SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
> 
> In all 3 cases here you want -1/0/1 and treat SRP_SIGNED_AND_UNSIGNED as
> probably 1, so supposedly you want a macro for that and use it
> in the 3 cases here, in expand_gimple_stmt_1 etc.

I have changed this.

>> --- a/gcc/rtl.h
>> +++ b/gcc/rtl.h
>> @@ -1585,29 +1585,67 @@ get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
>>  #define SUBREG_PROMOTED_VAR_P(RTX)					\
>>    (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED", (RTX), SUBREG)->in_struct)
>>  
>> -#define SUBREG_PROMOTED_UNSIGNED_SET(RTX, VAL)				\
>> -do {									\
>> -  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_SET",	\
>> -				    (RTX), SUBREG);			\
>> -  if ((VAL) < 0)							\
>> -    _rtx->volatil = 1;							\
>> -  else {								\
>> -    _rtx->volatil = 0;							\
>> -    _rtx->unchanging = (VAL);						\
>> -  }									\
>> -} while (0)
>> -
>>  /* Valid for subregs which are SUBREG_PROMOTED_VAR_P().  In that case
>>     this gives the necessary extensions:
>> -   0  - signed
>> -   1  - normal unsigned
>> +   0  - signed (SPR_SIGNED)
>> +   1  - normal unsigned (SPR_UNSIGNED)
>> +   2  - value is both sign and unsign extended for mode
>> +	(SPR_SIGNED_AND_UNSIGNED).
>>     -1 - pointer unsigned, which most often can be handled like unsigned
>>          extension, except for generating instructions where we need to
>> -	emit special code (ptr_extend insns) on some architectures.  */
>> +	emit special code (ptr_extend insns) on some architectures
>> +	(SPR_POINTER). */
>> +
>> +const unsigned int SRP_POINTER = -1;
>> +const unsigned int SRP_SIGNED = 0;
>> +const unsigned int SRP_UNSIGNED = 1;
>> +const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;
>> +
>> +/* Sets promoted mode for SUBREG_PROMOTED_VAR_P().  */
>> +#define SUBREG_PROMOTED_SET(RTX, VAL)		                        \
>> +do {								        \
>> +  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SET",		\
>> +                                    (RTX), SUBREG);			\
>> +  switch ((VAL))							\
> 
> Please avoid the extra ()s, switch (VAL) is enough.
> 
>> +/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN.  */
> 
> promoted, typo.
> 
>> +#define SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN)	\
>> +  ((SIGN) ? SUBREG_PROMOTED_GET ((RTX)) != SRP_SIGNED	\
>> +   : SUBREG_PROMOTED_SIGNED_P ((RTX)))
> 
> See above.  And note the ((RTX)) should have been (RTX) anyway.
> 
>> @@ -5587,7 +5587,8 @@ simplify_subreg (enum machine_mode outermode, rtx op,
>>  	{
>>  	  newx = gen_rtx_SUBREG (outermode, SUBREG_REG (op), final_offset);
>>  	  if (SUBREG_PROMOTED_VAR_P (op)
>> -	      && SUBREG_PROMOTED_UNSIGNED_P (op) >= 0
>> +	      && (SUBREG_PROMOTED_UNSIGNED_P (op)
>> +		  || (SUBREG_PROMOTED_SIGNED_P (op)))
> 
> SUBREG_PROMOTED_GET (op) != SRP_POINTER ?
> Also note the extra ()s.
> 
>>  	      && GET_MODE_CLASS (outermode) == MODE_INT
>>  	      && IN_RANGE (GET_MODE_SIZE (outermode),
>>  			   GET_MODE_SIZE (innermode),
>> @@ -5595,8 +5596,7 @@ simplify_subreg (enum machine_mode outermode, rtx op,
>>  	      && subreg_lowpart_p (newx))
>>  	    {
>>  	      SUBREG_PROMOTED_VAR_P (newx) = 1;
>> -	      SUBREG_PROMOTED_UNSIGNED_SET
>> -		(newx, SUBREG_PROMOTED_UNSIGNED_P (op));
>> +	      SUBREG_PROMOTED_SET (newx, SUBREG_PROMOTED_GET (op));
>>  	    }
>>  	  return newx;
>>  	}
> 

I have changed the above as well. I will post the patch after testing
for all the necessary targets.

Thanks for the help.

Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-06-26 10:25               ` Andreas Schwab
@ 2014-07-01  8:28                 ` Kugan
  0 siblings, 0 replies; 58+ messages in thread
From: Kugan @ 2014-07-01  8:28 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Jakub Jelinek, Richard Henderson, gcc-patches



On 26/06/14 20:25, Andreas Schwab wrote:
> Kugan <kugan.vivekanandarajah@linaro.org> writes:
> 
>> @@ -5203,24 +5203,25 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
>>  	     == TYPE_PRECISION (TREE_TYPE (exp)))
>>  	{
>>  	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
>> -	      != SUBREG_PROMOTED_UNSIGNED_P (target))
>> +	      != SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED)
> 
> & has lower precedence than !=.  You should have got a warning that
> fails bootstrap.

Thanks for spotting it. I  did a cross bootstrap and it dose not fail on
warning. I should have mentioned clearly that I intend to do the full
testing on all the targets necessary after getting feedback.

Thanks,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-07-01  8:21                 ` Kugan
@ 2014-07-07  6:52                   ` Kugan
  2014-07-07  8:06                     ` Jakub Jelinek
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-07-07  6:52 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 11598 bytes --]

On 01/07/14 18:21, Kugan wrote:
> On 26/06/14 20:12, Jakub Jelinek wrote:
>> On Thu, Jun 26, 2014 at 07:41:22PM +1000, Kugan wrote:
>>> 2014-06-26  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>>
>>> 	* calls.c (precompute_arguments): Use new SUBREG_PROMOTED_SET
>>> 	instead of SUBREG_PROMOTED_UNSIGNED_SET
>>
>> Missing full stop.
>>
>>> --- a/gcc/cfgexpand.c
>>> +++ b/gcc/cfgexpand.c
>>> @@ -3297,7 +3297,7 @@ expand_gimple_stmt_1 (gimple stmt)
>>>  	      ;
>>>  	    else if (promoted)
>>>  	      {
>>> -		int unsignedp = SUBREG_PROMOTED_UNSIGNED_P (target);
>>> +		int unsignedp = SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED;
>>
>> From what I understand, here you want the -1/0/1 value and not 2,
>> so that is
>> int unsignedp = SUBREG_PROMOTED_GET (target);
>> if (unsignedp == SRP_SIGNED_AND_UNSIGNED) unsignedp = SRP_UNSIGNED;
>> I think.  Do you agree?
> 
> I agree.
> 
> 
>> BTW, the final patch will probably need to be tested on one of the weirdo
>> ptr_extend targets (ia64-hpux or x86_64-linux -mx32).
> 
> I am now looking at testing on such targets. I just want to double check
> that x86_64-linux -mx32 is OK for this. When I looked at the src, it
> looked to me #define POINTERS_EXTEND_UNSIGNED -1 is needed for this to
> happen. x86_64-linux -mx32 doesnt seem to fall into thss.
> 
> In addition, I will also test AArch64 ILP32 (#define
> POINTERS_EXTEND_UNSIGNED 1), ARM and x86_64 before posting the patch.
> 
>>> --- a/gcc/expr.c
>>> +++ b/gcc/expr.c
>>> @@ -329,7 +329,7 @@ convert_move (rtx to, rtx from, int unsignedp)
>>>    if (GET_CODE (from) == SUBREG && SUBREG_PROMOTED_VAR_P (from)
>>>        && (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (from)))
>>>  	  >= GET_MODE_PRECISION (to_mode))
>>> -      && SUBREG_PROMOTED_UNSIGNED_P (from) == unsignedp)
>>> +      && SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp))
>>
>> I think unsignedp (misnamed) may be -1/0/1 here, so either
>> SUBREG_CHECK_PROMOTED_SIGN needs to handle those 3, or you need to use
>> something else.  If it handles all 3 values, then it would be say
>> ((SIGN) == SRP_POINTER ? SUBREG_PROMOTED_GET (RTX) == SRP_POINTER
>>  : (SIGN) == SRP_SIGNED ? SUBREG_PROMOTED_SIGNED_P (RTX)
>>  : SUBREG_PROMOTED_UNSIGNED_P (RTX))
>> or so.
> 
> I have changed it. I have defined a macro SUBREG_PROMOTED_SIGN for this.
> 
>>
>>>      from = gen_lowpart (to_mode, from), from_mode = to_mode;
>>>  
>>>    gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
>>> @@ -703,7 +703,7 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
>>>  
>>>    if (GET_CODE (x) == SUBREG && SUBREG_PROMOTED_VAR_P (x)
>>>        && GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))) >= GET_MODE_SIZE (mode)
>>> -      && SUBREG_PROMOTED_UNSIGNED_P (x) == unsignedp)
>>> +      && SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp))
>>>      x = gen_lowpart (mode, SUBREG_REG (x));
>>
>> Similarly.
> 
> 
> I have changed it too.
> 
>>
>>> @@ -5203,24 +5203,25 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
>>>  	     == TYPE_PRECISION (TREE_TYPE (exp)))
>>>  	{
>>>  	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
>>> -	      != SUBREG_PROMOTED_UNSIGNED_P (target))
>>> +	      != SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED)
>>
>> Here TYPE_UNSIGNED is 0 or 1, so if you define SUBREG_PROMOTED_CHECK_SIGN
>> the way suggested above, this would be SUBREG_PROMOTED_CHECK_SIGN then,
>> or if (TYPE_UNSIGNED (TREE_TYPE (exp))
>>        ? SUBREG_PROMOTED_UNSIGNED_P (target)
>>        : SUBREG_PROMOTED_SIGNED_P (target))
>>
>>>  	    {
>>>  	      /* Some types, e.g. Fortran's logical*4, won't have a signed
>>>  		 version, so use the mode instead.  */
>>>  	      tree ntype
>>>  		= (signed_or_unsigned_type_for
>>> -		   (SUBREG_PROMOTED_UNSIGNED_P (target), TREE_TYPE (exp)));
>>> +		   (SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED,
>>
>> I'd just use TYPE_UNSIGNED (TREE_TYPE (exp)) here instead,
>> no reason to repeat what the guarding condition did.
> 
> Did you mean !TYPE_UNSIGNED (TREE_TYPE (exp))?. isn’t it better to use
> the macro SUBREG_PROMOTED_SIGN (defined earlier as you suggested) here?
> It might be more readable. I am happy to do what you like.
> 
>>
>>> +		    TREE_TYPE (exp)));
>>>  	      if (ntype == NULL)
>>>  		ntype = lang_hooks.types.type_for_mode
>>>  		  (TYPE_MODE (TREE_TYPE (exp)),
>>> -		   SUBREG_PROMOTED_UNSIGNED_P (target));
>>> +		   SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
>>>  
>>>  	      exp = fold_convert_loc (loc, ntype, exp);
>>>  	    }
>>>  
>>>  	  exp = fold_convert_loc (loc, lang_hooks.types.type_for_mode
>>>  				  (GET_MODE (SUBREG_REG (target)),
>>> -				   SUBREG_PROMOTED_UNSIGNED_P (target)),
>>> +				   SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED),
>>>  				  exp);
>>
>> I believe fold_convert only considers zero and non-zero, so no idea
>> what we want here for SRP_POINTER.  Doing what we used to do would
>> be SUBREG_PROMOTED_GET (target) != SRP_SIGNED.
>>>  
>>>  	  inner_target = SUBREG_REG (target);
>>> @@ -5234,14 +5235,14 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
>>>        if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
>>>  	{
>>>  	  temp = convert_modes (GET_MODE (target), TYPE_MODE (TREE_TYPE (exp)),
>>> -				temp, SUBREG_PROMOTED_UNSIGNED_P (target));
>>> +				temp, SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
>>>  	  temp = convert_modes (GET_MODE (SUBREG_REG (target)),
>>>  			        GET_MODE (target), temp,
>>> -			        SUBREG_PROMOTED_UNSIGNED_P (target));
>>> +			        SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
>>>  	}
>>>  
>>>        convert_move (SUBREG_REG (target), temp,
>>> -		    SUBREG_PROMOTED_UNSIGNED_P (target));
>>> +		    SUBREG_PROMOTED_GET (target) & SRP_UNSIGNED);
>>
>> In all 3 cases here you want -1/0/1 and treat SRP_SIGNED_AND_UNSIGNED as
>> probably 1, so supposedly you want a macro for that and use it
>> in the 3 cases here, in expand_gimple_stmt_1 etc.
> 
> I have changed this.
> 
>>> --- a/gcc/rtl.h
>>> +++ b/gcc/rtl.h
>>> @@ -1585,29 +1585,67 @@ get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
>>>  #define SUBREG_PROMOTED_VAR_P(RTX)					\
>>>    (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED", (RTX), SUBREG)->in_struct)
>>>  
>>> -#define SUBREG_PROMOTED_UNSIGNED_SET(RTX, VAL)				\
>>> -do {									\
>>> -  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_SET",	\
>>> -				    (RTX), SUBREG);			\
>>> -  if ((VAL) < 0)							\
>>> -    _rtx->volatil = 1;							\
>>> -  else {								\
>>> -    _rtx->volatil = 0;							\
>>> -    _rtx->unchanging = (VAL);						\
>>> -  }									\
>>> -} while (0)
>>> -
>>>  /* Valid for subregs which are SUBREG_PROMOTED_VAR_P().  In that case
>>>     this gives the necessary extensions:
>>> -   0  - signed
>>> -   1  - normal unsigned
>>> +   0  - signed (SPR_SIGNED)
>>> +   1  - normal unsigned (SPR_UNSIGNED)
>>> +   2  - value is both sign and unsign extended for mode
>>> +	(SPR_SIGNED_AND_UNSIGNED).
>>>     -1 - pointer unsigned, which most often can be handled like unsigned
>>>          extension, except for generating instructions where we need to
>>> -	emit special code (ptr_extend insns) on some architectures.  */
>>> +	emit special code (ptr_extend insns) on some architectures
>>> +	(SPR_POINTER). */
>>> +
>>> +const unsigned int SRP_POINTER = -1;
>>> +const unsigned int SRP_SIGNED = 0;
>>> +const unsigned int SRP_UNSIGNED = 1;
>>> +const unsigned int SRP_SIGNED_AND_UNSIGNED = 2;
>>> +
>>> +/* Sets promoted mode for SUBREG_PROMOTED_VAR_P().  */
>>> +#define SUBREG_PROMOTED_SET(RTX, VAL)		                        \
>>> +do {								        \
>>> +  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SET",		\
>>> +                                    (RTX), SUBREG);			\
>>> +  switch ((VAL))							\
>>
>> Please avoid the extra ()s, switch (VAL) is enough.
>>
>>> +/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promotd for given SIGN.  */
>>
>> promoted, typo.
>>
>>> +#define SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN)	\
>>> +  ((SIGN) ? SUBREG_PROMOTED_GET ((RTX)) != SRP_SIGNED	\
>>> +   : SUBREG_PROMOTED_SIGNED_P ((RTX)))
>>
>> See above.  And note the ((RTX)) should have been (RTX) anyway.
>>
>>> @@ -5587,7 +5587,8 @@ simplify_subreg (enum machine_mode outermode, rtx op,
>>>  	{
>>>  	  newx = gen_rtx_SUBREG (outermode, SUBREG_REG (op), final_offset);
>>>  	  if (SUBREG_PROMOTED_VAR_P (op)
>>> -	      && SUBREG_PROMOTED_UNSIGNED_P (op) >= 0
>>> +	      && (SUBREG_PROMOTED_UNSIGNED_P (op)
>>> +		  || (SUBREG_PROMOTED_SIGNED_P (op)))
>>
>> SUBREG_PROMOTED_GET (op) != SRP_POINTER ?
>> Also note the extra ()s.
>>
>>>  	      && GET_MODE_CLASS (outermode) == MODE_INT
>>>  	      && IN_RANGE (GET_MODE_SIZE (outermode),
>>>  			   GET_MODE_SIZE (innermode),
>>> @@ -5595,8 +5596,7 @@ simplify_subreg (enum machine_mode outermode, rtx op,
>>>  	      && subreg_lowpart_p (newx))
>>>  	    {
>>>  	      SUBREG_PROMOTED_VAR_P (newx) = 1;
>>> -	      SUBREG_PROMOTED_UNSIGNED_SET
>>> -		(newx, SUBREG_PROMOTED_UNSIGNED_P (op));
>>> +	      SUBREG_PROMOTED_SET (newx, SUBREG_PROMOTED_GET (op));
>>>  	    }
>>>  	  return newx;
>>>  	}
>>
> 
> I have changed the above as well. I will post the patch after testing
> for all the necessary targets.
> 

Hi Jakub,

Please find the patch that addresses these comments.

I have bootstrapped on x86_64-unknown-linux-gnu and regression tested
for x86_64-unknown-linux-gnu, arm-none-linux-gnueabi (using qemu),
aarch64_be-none-elf (Foundation model), aarch64-none-elf
--with-abi=ilp32 (Foundation model) and s390x-ibm-linux (64bit, using
qemu) with no new regression.

Is this OK?

Thanks,
Kugan


gcc/
2014-07-07  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* calls.c (precompute_arguments): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET.
	(expand_call): Likewise.
	* cfgexpand.c (expand_gimple_stmt_1): Use SUBREG_PROMOTED_SIGN
	to get promoted mode.
	* combine.c (record_promoted_value): Skip > 0 comparison with
	SUBREG_PROMOTED_UNSIGNED_P as it now returns only 0 or 1.
	* expr.c (convert_move): Use SUBREG_CHECK_PROMOTED_SIGN instead
	of SUBREG_PROMOTED_UNSIGNED_P.
	(convert_modes): Likewise.
	(store_expr): Use SUBREG_PROMOTED_SIGN to get promoted mode.
	Use SUBREG_CHECK_PROMOTED_SIGN instead of SUBREG_PROMOTED_UNSIGNED_P.
	(expand_expr_real_1): Use new SUBREG_PROMOTED_SET instead of
	SUBREG_PROMOTED_UNSIGNED_SET.
	* function.c (assign_parm_setup_reg): Use new SUBREG_PROMOTED_SET
	instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* ifcvt.c (noce_emit_cmove): Updated to use SUBREG_PROMOTED_GET and
	SUBREG_PROMOTED_SET.
	* internal-fn.c (ubsan_expand_si_overflow_mul_check): Use
	SUBREG_PROMOTED_SET instead of SUBREG_PROMOTED_UNSIGNED_SET.
	* optabs.c (widen_operand): Use SUBREG_CHECK_PROMOTED_SIGN instead
	of SUBREG_PROMOTED_UNSIGNED_P.
	* rtl.h (SUBREG_PROMOTED_UNSIGNED_SET): Remove.
	(SUBREG_PROMOTED_SET): New define.
	(SUBREG_PROMOTED_GET): Likewise.
	(SUBREG_PROMOTED_SIGN): Likewise.
	(SUBREG_PROMOTED_SIGNED_P): Likewise.
	(SUBREG_CHECK_PROMOTED_SIGN): Likewise.
	(SUBREG_PROMOTED_UNSIGNED_P): Updated.
	* rtlanal.c (unsigned_reg_p): Use new SUBREG_PROMOTED_SIGN
	instead of SUBREG_PROMOTED_UNSIGNED_GET.
	(nonzero_bits1): Skip > 0 comparison with the results as
	SUBREG_PROMOTED_UNSIGNED_P now returns only 0 or 1.
	(num_sign_bit_copies1): Use SUBREG_PROMOTED_SIGNED_P instead
	of !SUBREG_PROMOTED_UNSIGNED_P.
	* simplify-rtx.c (simplify_unary_operation_1): Use new
	SUBREG_PROMOTED_SIGNED_P instead of !SUBREG_PROMOTED_UNSIGNED_P.
	(simplify_subreg): Use new SUBREG_PROMOTED_SIGNED_P,
	and SUBREG_PROMOTED_SET.



[-- Attachment #2: p1.txt --]
[-- Type: text/plain, Size: 14657 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index 78fe7d8..a3e6faa 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,8 +1484,7 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (args[i].initial_value,
-					    args[i].unsignedp);
+	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
@@ -3365,7 +3364,7 @@ expand_call (tree exp, rtx target, int ignore)
 
 	  target = gen_rtx_SUBREG (TYPE_MODE (type), target, offset);
 	  SUBREG_PROMOTED_VAR_P (target) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (target, unsignedp);
+	  SUBREG_PROMOTED_SET (target, unsignedp);
 	}
 
       /* If size of args is variable or this was a constructor call for a stack
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index e8cd87f..b7a34a2 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3297,7 +3297,7 @@ expand_gimple_stmt_1 (gimple stmt)
 	      ;
 	    else if (promoted)
 	      {
-		int unsignedp = SUBREG_PROMOTED_UNSIGNED_P (target);
+		int unsignedp = SUBREG_PROMOTED_SIGN (target);
 		/* If TEMP is a VOIDmode constant, use convert_modes to make
 		   sure that we properly convert it.  */
 		if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
diff --git a/gcc/combine.c b/gcc/combine.c
index 4e7ef55..15ecf1a 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -12427,7 +12427,7 @@ record_promoted_value (rtx insn, rtx subreg)
       rsp = &reg_stat[regno];
       if (rsp->last_set == insn)
 	{
-	  if (SUBREG_PROMOTED_UNSIGNED_P (subreg) > 0)
+	  if (SUBREG_PROMOTED_UNSIGNED_P (subreg))
 	    rsp->last_set_nonzero_bits &= GET_MODE_MASK (mode);
 	}
 
diff --git a/gcc/expr.c b/gcc/expr.c
index 512c024..10f4a96 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -329,7 +329,7 @@ convert_move (rtx to, rtx from, int unsignedp)
   if (GET_CODE (from) == SUBREG && SUBREG_PROMOTED_VAR_P (from)
       && (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (from)))
 	  >= GET_MODE_PRECISION (to_mode))
-      && SUBREG_PROMOTED_UNSIGNED_P (from) == unsignedp)
+      && SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp))
     from = gen_lowpart (to_mode, from), from_mode = to_mode;
 
   gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
@@ -703,7 +703,7 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
 
   if (GET_CODE (x) == SUBREG && SUBREG_PROMOTED_VAR_P (x)
       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))) >= GET_MODE_SIZE (mode)
-      && SUBREG_PROMOTED_UNSIGNED_P (x) == unsignedp)
+      && SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp))
     x = gen_lowpart (mode, SUBREG_REG (x));
 
   if (GET_MODE (x) != VOIDmode)
@@ -5202,25 +5202,25 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
 	  && GET_MODE_PRECISION (GET_MODE (target))
 	     == TYPE_PRECISION (TREE_TYPE (exp)))
 	{
-	  if (TYPE_UNSIGNED (TREE_TYPE (exp))
-	      != SUBREG_PROMOTED_UNSIGNED_P (target))
+	  if (!SUBREG_CHECK_PROMOTED_SIGN (target,
+					  TYPE_UNSIGNED (TREE_TYPE (exp))))
 	    {
 	      /* Some types, e.g. Fortran's logical*4, won't have a signed
 		 version, so use the mode instead.  */
 	      tree ntype
 		= (signed_or_unsigned_type_for
-		   (SUBREG_PROMOTED_UNSIGNED_P (target), TREE_TYPE (exp)));
+		   (SUBREG_PROMOTED_SIGN (target), TREE_TYPE (exp)));
 	      if (ntype == NULL)
 		ntype = lang_hooks.types.type_for_mode
 		  (TYPE_MODE (TREE_TYPE (exp)),
-		   SUBREG_PROMOTED_UNSIGNED_P (target));
+		   SUBREG_PROMOTED_SIGN (target));
 
 	      exp = fold_convert_loc (loc, ntype, exp);
 	    }
 
 	  exp = fold_convert_loc (loc, lang_hooks.types.type_for_mode
 				  (GET_MODE (SUBREG_REG (target)),
-				   SUBREG_PROMOTED_UNSIGNED_P (target)),
+				   SUBREG_PROMOTED_SIGN (target)),
 				  exp);
 
 	  inner_target = SUBREG_REG (target);
@@ -5234,14 +5234,14 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
       if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
 	{
 	  temp = convert_modes (GET_MODE (target), TYPE_MODE (TREE_TYPE (exp)),
-				temp, SUBREG_PROMOTED_UNSIGNED_P (target));
+				temp, SUBREG_PROMOTED_SIGN (target));
 	  temp = convert_modes (GET_MODE (SUBREG_REG (target)),
 			        GET_MODE (target), temp,
-			        SUBREG_PROMOTED_UNSIGNED_P (target));
+				SUBREG_PROMOTED_SIGN (target));
 	}
 
       convert_move (SUBREG_REG (target), temp,
-		    SUBREG_PROMOTED_UNSIGNED_P (target));
+		    SUBREG_PROMOTED_SIGN (target));
 
       return NULL_RTX;
     }
@@ -9513,7 +9513,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (temp, unsignedp);
+	  SUBREG_PROMOTED_SET (temp, unsignedp);
 	  return temp;
 	}
 
diff --git a/gcc/function.c b/gcc/function.c
index 441289e..9509622 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3093,7 +3093,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
 	  /* The argument is already sign/zero extended, so note it
 	     into the subreg.  */
 	  SUBREG_PROMOTED_VAR_P (tempreg) = 1;
-	  SUBREG_PROMOTED_UNSIGNED_SET (tempreg, unsignedp);
+	  SUBREG_PROMOTED_SET (tempreg, unsignedp);
 	}
 
       /* TREE_USED gets set erroneously during expand_assignment.  */
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 816cdaa..56e2f3a 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -1448,8 +1448,8 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 	  || byte_vtrue != byte_vfalse
 	  || (SUBREG_PROMOTED_VAR_P (vtrue)
 	      != SUBREG_PROMOTED_VAR_P (vfalse))
-	  || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
-	      != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
+	  || (SUBREG_PROMOTED_GET (vtrue)
+	      != SUBREG_PROMOTED_GET (vfalse)))
 	return NULL_RTX;
 
       promoted_target = gen_reg_rtx (GET_MODE (reg_vtrue));
@@ -1463,7 +1463,7 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 
       target = gen_rtx_SUBREG (GET_MODE (vtrue), promoted_target, byte_vtrue);
       SUBREG_PROMOTED_VAR_P (target) = SUBREG_PROMOTED_VAR_P (vtrue);
-      SUBREG_PROMOTED_UNSIGNED_SET (target, SUBREG_PROMOTED_UNSIGNED_P (vtrue));
+      SUBREG_PROMOTED_SET (target, SUBREG_PROMOTED_GET (vtrue));
       emit_move_insn (x, target);
       return x;
     }
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 78f59d6..4e0b964 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -584,12 +584,12 @@ ubsan_expand_si_overflow_mul_check (gimple stmt)
 	  if (GET_CODE (lopart0) == SUBREG)
 	    {
 	      SUBREG_PROMOTED_VAR_P (lopart0) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (lopart0, 0);
+	      SUBREG_PROMOTED_SET (lopart0, 0);
 	    }
 	  if (GET_CODE (lopart1) == SUBREG)
 	    {
 	      SUBREG_PROMOTED_VAR_P (lopart1) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET (lopart1, 0);
+	      SUBREG_PROMOTED_SET (lopart1, 0);
 	    }
 	  tree halfstype = build_nonstandard_integer_type (hprec, 0);
 	  ops.op0 = make_tree (halfstype, lopart0);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index ca1c194..a00b383 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -368,7 +368,7 @@ widen_operand (rtx op, enum machine_mode mode, enum machine_mode oldmode,
      a promoted object differs from our extension.  */
   if (! no_extend
       || (GET_CODE (op) == SUBREG && SUBREG_PROMOTED_VAR_P (op)
-	  && SUBREG_PROMOTED_UNSIGNED_P (op) == unsignedp))
+	  && SUBREG_CHECK_PROMOTED_SIGN (op, unsignedp)))
     return convert_modes (mode, oldmode, op, unsignedp);
 
   /* If MODE is no wider than a single word, we return a lowpart or paradoxical
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 6ec91a8..38f9f69 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -1585,35 +1585,81 @@ get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
 #define SUBREG_PROMOTED_VAR_P(RTX)					\
   (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED", (RTX), SUBREG)->in_struct)
 
-#define SUBREG_PROMOTED_UNSIGNED_SET(RTX, VAL)				\
-do {									\
-  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_SET",	\
-				    (RTX), SUBREG);			\
-  if ((VAL) < 0)							\
-    _rtx->volatil = 1;							\
-  else {								\
-    _rtx->volatil = 0;							\
-    _rtx->unchanging = (VAL);						\
-  }									\
-} while (0)
-
 /* Valid for subregs which are SUBREG_PROMOTED_VAR_P().  In that case
    this gives the necessary extensions:
-   0  - signed
-   1  - normal unsigned
+   0  - signed (SPR_SIGNED)
+   1  - normal unsigned (SPR_UNSIGNED)
+   2  - value is both sign and unsign extended for mode
+	(SPR_SIGNED_AND_UNSIGNED).
    -1 - pointer unsigned, which most often can be handled like unsigned
         extension, except for generating instructions where we need to
-	emit special code (ptr_extend insns) on some architectures.  */
+	emit special code (ptr_extend insns) on some architectures
+	(SPR_POINTER). */
+
+const int SRP_POINTER = -1;
+const int SRP_SIGNED = 0;
+const int SRP_UNSIGNED = 1;
+const int SRP_SIGNED_AND_UNSIGNED = 2;
+
+/* Sets promoted mode for SUBREG_PROMOTED_VAR_P().  */
+#define SUBREG_PROMOTED_SET(RTX, VAL)		                        \
+do {								        \
+  rtx const _rtx = RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SET",		\
+                                    (RTX), SUBREG);			\
+  switch (VAL)								\
+  {									\
+    case SRP_POINTER:							\
+      _rtx->volatil = 0;						\
+      _rtx->unchanging = 0;						\
+      break;								\
+    case SRP_SIGNED:							\
+      _rtx->volatil = 0;						\
+      _rtx->unchanging = 1;						\
+      break;								\
+    case SRP_UNSIGNED:							\
+      _rtx->volatil = 1;						\
+      _rtx->unchanging = 0;						\
+      break;								\
+    case SRP_SIGNED_AND_UNSIGNED:					\
+      _rtx->volatil = 1;						\
+      _rtx->unchanging = 1;						\
+      break;								\
+  }									\
+} while (0)
 
+/* Gets the value stored in promoted mode for SUBREG_PROMOTED_VAR_P(),
+   including SRP_SIGNED_AND_UNSIGNED if promoted for
+   both signed and unsigned.  */
+#define SUBREG_PROMOTED_GET(RTX)	\
+  (2 * (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_GET", (RTX), SUBREG)->volatil)\
+   + (RTX)->unchanging - 1)
+
+/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
+   for SIGNED type.  */
+#define SUBREG_PROMOTED_SIGNED_P(RTX)	\
+  (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SIGNED_P", (RTX), SUBREG)->unchanging)
+
+/* Predicate to check if RTX of SUBREG_PROMOTED_VAR_P() is promoted
+   for UNSIGNED type.  */
 #define SUBREG_PROMOTED_UNSIGNED_P(RTX)	\
-  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
-   ? -1 : (int) (RTX)->unchanging)
+  (RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil)
+
+/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promoted for given SIGN.  */
+#define SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN)	\
+((SIGN) == SRP_POINTER ? SUBREG_PROMOTED_GET (RTX) == SRP_POINTER	\
+ : (SIGN) == SRP_SIGNED ? SUBREG_PROMOTED_SIGNED_P (RTX)		\
+ : SUBREG_PROMOTED_UNSIGNED_P (RTX))
 
 /* True if the subreg was generated by LRA for reload insns.  Such
    subregs are valid only during LRA.  */
 #define LRA_SUBREG_P(RTX)	\
   (RTL_FLAG_CHECK1 ("LRA_SUBREG_P", (RTX), SUBREG)->jump)
 
+/* Returns sign of promoted mode for SUBREG_PROMOTED_VAR_P().  */
+#define SUBREG_PROMOTED_SIGN(RTX)	\
+  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SIGN", (RTX), SUBREG)->volatil) ? 1\
+   : (RTX)->unchanging - 1)
+
 /* Access various components of an ASM_OPERANDS rtx.  */
 
 #define ASM_OPERANDS_TEMPLATE(RTX) XCSTR (RTX, 0, ASM_OPERANDS)
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 82cfc1bf..76e7628 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -670,7 +670,7 @@ unsigned_reg_p (rtx op)
     return true;
 
   if (GET_CODE (op) == SUBREG
-      && SUBREG_PROMOTED_UNSIGNED_P (op))
+      && SUBREG_PROMOTED_SIGN (op))
     return true;
 
   return false;
@@ -4309,7 +4309,7 @@ nonzero_bits1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
 	 been zero-extended, we know that at least the high-order bits
 	 are zero, though others might be too.  */
 
-      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_UNSIGNED_P (x) > 0)
+      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_UNSIGNED_P (x))
 	nonzero = GET_MODE_MASK (GET_MODE (x))
 		  & cached_nonzero_bits (SUBREG_REG (x), GET_MODE (x),
 					 known_x, known_mode, known_ret);
@@ -4619,7 +4619,7 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
 	 and we are looking at it in a wider mode, we know that at least the
 	 high-order bits are known to be sign bit copies.  */
 
-      if (SUBREG_PROMOTED_VAR_P (x) && ! SUBREG_PROMOTED_UNSIGNED_P (x))
+      if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_SIGNED_P (x))
 	{
 	  num0 = cached_num_sign_bit_copies (SUBREG_REG (x), mode,
 					     known_x, known_mode, known_ret);
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 181b56f..6a35a5f 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1352,7 +1352,7 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	 target mode is the same as the variable's promotion.  */
       if (GET_CODE (op) == SUBREG
 	  && SUBREG_PROMOTED_VAR_P (op)
-	  && ! SUBREG_PROMOTED_UNSIGNED_P (op)
+	  && SUBREG_PROMOTED_SIGNED_P (op)
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
 	{
 	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
@@ -1419,7 +1419,7 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	 target mode is the same as the variable's promotion.  */
       if (GET_CODE (op) == SUBREG
 	  && SUBREG_PROMOTED_VAR_P (op)
-	  && SUBREG_PROMOTED_UNSIGNED_P (op) > 0
+	  && SUBREG_PROMOTED_UNSIGNED_P (op)
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
 	{
 	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
@@ -5587,7 +5587,7 @@ simplify_subreg (enum machine_mode outermode, rtx op,
 	{
 	  newx = gen_rtx_SUBREG (outermode, SUBREG_REG (op), final_offset);
 	  if (SUBREG_PROMOTED_VAR_P (op)
-	      && SUBREG_PROMOTED_UNSIGNED_P (op) >= 0
+	      && SUBREG_PROMOTED_SIGN (op) >= 0
 	      && GET_MODE_CLASS (outermode) == MODE_INT
 	      && IN_RANGE (GET_MODE_SIZE (outermode),
 			   GET_MODE_SIZE (innermode),
@@ -5595,8 +5595,7 @@ simplify_subreg (enum machine_mode outermode, rtx op,
 	      && subreg_lowpart_p (newx))
 	    {
 	      SUBREG_PROMOTED_VAR_P (newx) = 1;
-	      SUBREG_PROMOTED_UNSIGNED_SET
-		(newx, SUBREG_PROMOTED_UNSIGNED_P (op));
+	      SUBREG_PROMOTED_SET (newx, SUBREG_PROMOTED_GET (op));
 	    }
 	  return newx;
 	}

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-06-25  8:36       ` Jakub Jelinek
@ 2014-07-07  6:55         ` Kugan
  2014-07-10 12:15           ` Richard Biener
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-07-07  6:55 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 3228 bytes --]

> For -fwrapv I don't see why you'd get into trouble ever, the VRP computation
> should be well aware of the -fwrapv semantics and the value ranges should
> reflect that.
> 
> For -fno-strict-overflow, I have no idea since it is very weirdly defined.
> 
> In any case, for your example above, the loop is always well defined,
> because for char/short a++ is performed as:
> a = (short) ((int) a + 1)
> So, if the patch turns it into infinite loop, with -Os -fno-strict-overflow
> or -Os, it is simply a problem with the patch.  VR [1, 32768] looks correct,
> a++ is performed only if a is >= 0, therefore before addition [0, 32767].
> But from VR [1, 32768] you can't optimize away the sign extension, make sure
> you don't have there off-by-one?
> 
> It would be nice if the patch contained some testcases, it is easy
> to construct testcases where you have arbitrary VRs on some SSA_NAMEs,
> you just need something to stick the VR on, so you can do something like:
> type foo (type a)
> {
>   if (a < VR_min + 1 || a > VR_max + 1) return; // If VR_min is type minimum or VR_max type maximum this needs to be adjusted of course.
>   a = a + 1;
>   // now you can try some cast that your optimization would try to optimize
>   return a;
> }
> Or void bar (type a) { a = (a & mask) + bias; (or similarly) }
> Make sure to cover the boundary cases, where VR minimum or maximum still
> allow optimizing away zero and/or sign extensions, and another case where
> they are +- 1 and already don't allow it.


Hi Jakub,

For -fwrapv, it is due to how PROMOTE_MODE is defined in arm back-end.
In the test-case, a function (which has signed char return type) returns
-1 in one of the paths. ARM PROMOTE_MODE changes that to 255 and relies
on zero/sign extension generated by RTL again for the correct value. I
saw some other targets also defining similar think. I am therefore
skipping removing zero/sign extension if the ssa variable can be set to
negative integer constants.


As for the -fno-strict-overflow case, if the variables overflows, in VRP
dumps, I see +INF(OVF), but the value range stored in ssa has TYPE_MAX.
We therefore should limit the comparison to (TYPE_MIN < VR_MIN && VR_MAX
< TYPE_MAX) instead of (TYPE_MIN <= VR_MIN && VR_MAX <= TYPE_MAX) when
checking to be sure that this is not the overflowing case. Attached
patch changes this.

I have bootstrapped on x86_64-unknown-linux-gnu and regression tested
for x86_64-unknown-linux-gnu, arm-none-linux-gnueabi (using qemu),
aarch64_be-none-elf (Foundation model), aarch64-none-elf
--with-abi=ilp32 (Foundation model) and s390x-ibm-linux (64bit, using
qemu) with no new regression.

Is this OK?

Thanks,
Kugan

gcc/
2014-07-07  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* calls.c (precompute_arguments): Check is_promoted_for_type
	and set the promoted mode.
	(is_promoted_for_type): New function.
	(expand_expr_real_1): Check is_promoted_for_type
	and set the promoted mode.
	* expr.h (is_promoted_for_type): New function definition.
	* cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
	SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.


gcc/testsuite

2014-07-07  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* gcc.dg/zero_sign_ext_test.c: New test.

[-- Attachment #2: p2.txt --]
[-- Type: text/plain, Size: 10440 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index a3e6faa..eac512f 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,7 +1484,10 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
+	      if (is_promoted_for_type (args[i].tree_value, mode, !args[i].unsignedp))
+		SUBREG_PROMOTED_SET (args[i].initial_value, SRP_SIGNED_AND_UNSIGNED);
+	      else
+		SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index b7a34a2..ac6776d 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
 					  GET_MODE (target), temp, unsignedp);
 		  }
 
-		convert_move (SUBREG_REG (target), temp, unsignedp);
+		if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
+		    && (GET_CODE (temp) == SUBREG)
+		    && (GET_MODE (target) == GET_MODE (temp))
+		    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
+		  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
+		else
+		  convert_move (SUBREG_REG (target), temp, unsignedp);
 	      }
 	    else if (nontemporal && emit_storent_insn (target, temp))
 	      ;
diff --git a/gcc/expr.c b/gcc/expr.c
index 10f4a96..68708c1 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-address.h"
 #include "cfgexpand.h"
 #include "builtins.h"
+#include "tree-ssa.h"
 
 #ifndef STACK_PUSH_CODE
 #ifdef STACK_GROWS_DOWNWARD
@@ -9210,6 +9211,88 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 }
 #undef REDUCE_BIT_FIELD
 
+/* Return TRUE if value in SSA is already zero/sign extended for lhs type
+   (type here is the combination of LHS_MODE and LHS_UNS) using value range
+   information stored.  Return FALSE otherwise.  */
+bool
+is_promoted_for_type (tree ssa, enum machine_mode lhs_mode, bool lhs_uns)
+{
+  wide_int type_min, type_max;
+  wide_int min, max, limit;
+  unsigned int prec;
+  tree lhs_type;
+  bool rhs_uns;
+  gimple stmt;
+
+  if (ssa == NULL_TREE
+      || TREE_CODE (ssa) != SSA_NAME
+      || !INTEGRAL_TYPE_P (TREE_TYPE (ssa)))
+    return false;
+
+  /* Return FALSE if value_range is not recorded for SSA.  */
+  if (get_range_info (ssa, &min, &max) != VR_RANGE)
+    return false;
+  stmt = SSA_NAME_DEF_STMT (ssa);
+
+  /* In some architectures, negative integer constants are truncated and
+     sign changed with target defined PROMOTE_MODE macro. This will impact
+     the value range seen here and produce wrong code if zero/sign extensions
+     are eliminated. Therefore, return false if this SSA can have negative
+     integers.  */
+  if (is_gimple_assign (stmt)
+      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_unary))
+    {
+      tree rhs1 = gimple_assign_rhs1 (stmt);
+      if (TREE_CODE (rhs1) == INTEGER_CST
+	  && !TYPE_UNSIGNED (TREE_TYPE (ssa))
+	  && tree_int_cst_compare (rhs1, integer_zero_node) == -1)
+	return false;
+    }
+  else if (gimple_code (stmt) == GIMPLE_PHI)
+    {
+      unsigned int i;
+      for (i = 0; i < gimple_phi_num_args (stmt); ++i)
+	{
+	  tree arg = gimple_phi_arg_def (stmt, i);
+	  if (TREE_CODE (arg) == INTEGER_CST
+	      && !TYPE_UNSIGNED (TREE_TYPE (ssa))
+	      && tree_int_cst_compare (arg, integer_zero_node) == -1)
+	    return false;
+	}
+    }
+
+  lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
+  rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
+  prec = min.get_precision ();
+
+  /* Signed maximum value.  */
+  limit = wide_int::from (TYPE_MAX_VALUE (TREE_TYPE (ssa)), prec, SIGNED);
+
+  /* Signedness of LHS and RHS differs but values in range.  */
+  if ((rhs_uns != lhs_uns)
+      && ((!lhs_uns && !wi::neg_p (min, TYPE_SIGN (lhs_type)))
+	  || (lhs_uns && (wi::cmp (max, limit, TYPE_SIGN (TREE_TYPE (ssa))) == -1))))
+    lhs_uns = !lhs_uns;
+
+  /* Signedness of LHS and RHS should match.  */
+  if (rhs_uns != lhs_uns)
+    return false;
+
+  type_min = wide_int::from (TYPE_MIN_VALUE (lhs_type), prec,
+			     TYPE_SIGN (TREE_TYPE (ssa)));
+  type_max = wide_int::from (TYPE_MAX_VALUE (lhs_type), prec,
+			     TYPE_SIGN (TREE_TYPE (ssa)));
+
+  /* Check if values lies in-between the type range.  */
+  if ((wi::neg_p (max, TYPE_SIGN (TREE_TYPE (ssa)))
+       || (wi::cmp (max, type_max, TYPE_SIGN (TREE_TYPE (ssa))) == -1))
+      && (!wi::neg_p (min, TYPE_SIGN (TREE_TYPE (ssa)))
+	  || (wi::cmp (min, 0, TYPE_SIGN (TREE_TYPE (ssa))) == 0)
+	  || (wi::cmp (type_min, min, TYPE_SIGN (TREE_TYPE (ssa))) == -1)))
+    return true;
+
+  return false;
+}
 
 /* Return TRUE if expression STMT is suitable for replacement.  
    Never consider memory loads as replaceable, because those don't ever lead 
@@ -9513,7 +9596,10 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_SET (temp, unsignedp);
+	  if (is_promoted_for_type (ssa_name, mode, !unsignedp))
+	    SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED);
+	  else
+	    SUBREG_PROMOTED_SET (temp, unsignedp);
 	  return temp;
 	}
 
diff --git a/gcc/expr.h b/gcc/expr.h
index 6a1d3ab..e99d000 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -440,6 +440,7 @@ extern rtx expand_expr_real_1 (tree, rtx, enum machine_mode,
 			       enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_2 (sepops, rtx, enum machine_mode,
 			       enum expand_modifier);
+extern bool is_promoted_for_type (tree, enum machine_mode, bool);
 
 /* Generate code for computing expression EXP.
    An rtx for the computed value is returned.  The value is never null.
diff --git a/gcc/testsuite/gcc.dg/zero_sign_ext_test.c b/gcc/testsuite/gcc.dg/zero_sign_ext_test.c
index e69de29..ef37612 100644
--- a/gcc/testsuite/gcc.dg/zero_sign_ext_test.c
+++ b/gcc/testsuite/gcc.dg/zero_sign_ext_test.c
@@ -0,0 +1,135 @@
+extern void abort (void);
+
+/* { dg-options "-O2" } */
+/* { dg-do run } */
+
+#define	TYPE_MAX(type, sign)	\
+  ((!sign) ? ((1 << (sizeof (type) * 8 - 1)) - 1) :	\
+   ((1 << (sizeof (type) * 8)) - 1))
+#define	TYPE_MIN(type, sign)	\
+  ((!sign) ? -(1 << (sizeof (type) * 8 - 1)) : 0)
+
+#define	TEST_FN(NAME, ARG_TYPE, RET_TYPE, CAST_TYPE, VAL, VR_MIN, VR_MAX)\
+  __attribute__((noinline, noclone)) RET_TYPE				\
+      NAME (ARG_TYPE arg){						\
+      RET_TYPE ret = VAL;						\
+      if (arg + 1 < VR_MIN || arg + 1 > VR_MAX) return ret;		\
+      /* Value Range of arg at this point will be  [VR_min, VR_max].  */\
+      arg = arg + VAL;							\
+      ret = (CAST_TYPE)arg;						\
+      return arg;							\
+  }
+
+/* Signed to signed conversion with value in-range. */
+TEST_FN (foo1, short, short, char, 1, TYPE_MIN (char, 0), TYPE_MAX (char, 0));
+TEST_FN (foo2, short, short, char, 1, TYPE_MIN (char, 0) + 1,\
+	TYPE_MAX (char, 0) - 1);
+
+/* Signed to signed conversion with value not in-range. */
+TEST_FN (foo3, short, short, char, -1, TYPE_MIN (short, 0) + 1,  100);
+TEST_FN (foo4, short, short, char, 1, 12, TYPE_MAX (short, 0) + 1);
+
+/* Unsigned to unsigned conversion with value in-range. */
+TEST_FN (foo5, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (char, 1) + 1, TYPE_MAX (char, 1) - 1);
+TEST_FN (foo6, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (char, 1), TYPE_MAX (char, 1));
+
+/* Unsigned to unsigned conversion with value not in-range. */
+TEST_FN (foo7, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (short, 1) + 1, TYPE_MAX (short, 1) - 1);
+TEST_FN (foo8, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (short, 1), TYPE_MAX (short, 1));
+
+/* Signed to unsigned conversion with value range positive. */
+TEST_FN (foo9, short, short, unsigned char, -1, 1,\
+	TYPE_MAX (char, 1) - 1);
+TEST_FN (foo10, short, short, unsigned char, 1, 0,\
+	TYPE_MAX (char, 1));
+
+/* Signed to unsigned conversion with value range negative. */
+TEST_FN (foo11, short, short, unsigned char, 1,\
+	TYPE_MIN (char, 0) + 1, TYPE_MAX (char, 0) - 1);
+TEST_FN (foo12, short, short, unsigned char, 1,\
+	TYPE_MIN (char, 0), TYPE_MAX (char, 0));
+
+/* Unsigned to Signed conversion with value range in signed equiv range */
+TEST_FN (foo13, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1) + 1, TYPE_MAX (char, 0) - 1);
+TEST_FN (foo14, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1), TYPE_MAX (char, 0));
+
+/* Unsigned to Signed conversion with value range not-in signed range */
+TEST_FN (foo15, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1) + 1, TYPE_MAX (char, 1) - 1);
+TEST_FN (foo16, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1), TYPE_MAX (char, 1));
+
+int main ()
+{
+  /* Signed to signed conversion with value in-range. */
+  /* arg + 1 */
+  if (foo1 (-32) != -31)
+    abort ();
+  /* arg + 1 */
+  if (foo2 (32) != 33)
+    abort ();
+
+  /* Signed to signed conversion with value not in-range. */
+  /* arg - 1 */
+  if (foo3 (-512) != -513)
+    abort ();
+  /* arg + 1 */
+  if (foo4 (512) != 513)
+    abort ();
+
+  /* Unsigned to unsigned conversion with value in-range. */
+  /* arg + 1 */
+  if (foo5 (64) != 65)
+    abort ();
+  /* arg + 1 */
+  if (foo6 (64) != 65)
+    abort ();
+
+  /* Unigned to unsigned conversion with value not in-range. */
+  /* arg + 1 */
+  if (foo7 (512) != 513)
+    abort ();
+  /* arg + 1 */
+  if (foo8 (512) != 513)
+    abort ();
+
+  /* Signed to unsigned conversion with value range positive. */
+  /* arg - 1 */
+  if (foo9 (2) != 1)
+    abort ();
+  /* arg + 1 */
+  if (foo10 (2) != 3)
+    abort ();
+
+  /* Signed to unsigned conversion with value range negative. */
+  /* arg + 1 */
+  if (foo11 (-125) != -124)
+    abort ();
+  /* arg + 1 */
+  if (foo12 (-125) != -124)
+    abort ();
+
+  /* Unsigned to Signed conversion with value range in signed equiv range */
+  /* arg + 1 */
+  if (foo13 (125) != 126)
+    abort ();
+  /* arg + 1 */
+  if (foo14 (125) != 126)
+    abort ();
+
+  /* Unsigned to Signed conversion with value range not-in signed range */
+  /* arg + 1 */
+  if (foo15 (250) != 251)
+    abort ();
+  /* arg + 1 */
+  if (foo16 (250) != 251)
+    abort ();
+
+  return 0;
+}

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED)
  2014-07-07  6:52                   ` Kugan
@ 2014-07-07  8:06                     ` Jakub Jelinek
  0 siblings, 0 replies; 58+ messages in thread
From: Jakub Jelinek @ 2014-07-07  8:06 UTC (permalink / raw)
  To: Kugan; +Cc: Richard Henderson, gcc-patches

On Mon, Jul 07, 2014 at 04:52:39PM +1000, Kugan wrote:

Ok with following nit:

> +/* Checks if RTX of SUBREG_PROMOTED_VAR_P() is promoted for given SIGN.  */
> +#define SUBREG_CHECK_PROMOTED_SIGN(RTX, SIGN)	\
> +((SIGN) == SRP_POINTER ? SUBREG_PROMOTED_GET (RTX) == SRP_POINTER	\
> + : (SIGN) == SRP_SIGNED ? SUBREG_PROMOTED_SIGNED_P (RTX)		\
> + : SUBREG_PROMOTED_UNSIGNED_P (RTX))
>  
>  /* True if the subreg was generated by LRA for reload insns.  Such
>     subregs are valid only during LRA.  */
>  #define LRA_SUBREG_P(RTX)	\
>    (RTL_FLAG_CHECK1 ("LRA_SUBREG_P", (RTX), SUBREG)->jump)
>  
> +/* Returns sign of promoted mode for SUBREG_PROMOTED_VAR_P().  */
> +#define SUBREG_PROMOTED_SIGN(RTX)	\
> +  ((RTL_FLAG_CHECK1 ("SUBREG_PROMOTED_SIGN", (RTX), SUBREG)->volatil) ? 1\
> +   : (RTX)->unchanging - 1)
> +
>  /* Access various components of an ASM_OPERANDS rtx.  */

Can you please move SUBREG_PROMOTED_SIGN definition right below
SUBREG_CHECK_PROMOTED_SIGN, so all the SUBREG promotion macros are next to
each other?

	Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-07-07  6:55         ` Kugan
@ 2014-07-10 12:15           ` Richard Biener
  2014-07-11 11:52             ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-07-10 12:15 UTC (permalink / raw)
  To: Kugan; +Cc: Jakub Jelinek, gcc-patches

On Mon, Jul 7, 2014 at 8:55 AM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>> For -fwrapv I don't see why you'd get into trouble ever, the VRP computation
>> should be well aware of the -fwrapv semantics and the value ranges should
>> reflect that.
>>
>> For -fno-strict-overflow, I have no idea since it is very weirdly defined.
>>
>> In any case, for your example above, the loop is always well defined,
>> because for char/short a++ is performed as:
>> a = (short) ((int) a + 1)
>> So, if the patch turns it into infinite loop, with -Os -fno-strict-overflow
>> or -Os, it is simply a problem with the patch.  VR [1, 32768] looks correct,
>> a++ is performed only if a is >= 0, therefore before addition [0, 32767].
>> But from VR [1, 32768] you can't optimize away the sign extension, make sure
>> you don't have there off-by-one?

I have fixed the above bug yesterday.

>> It would be nice if the patch contained some testcases, it is easy
>> to construct testcases where you have arbitrary VRs on some SSA_NAMEs,
>> you just need something to stick the VR on, so you can do something like:
>> type foo (type a)
>> {
>>   if (a < VR_min + 1 || a > VR_max + 1) return; // If VR_min is type minimum or VR_max type maximum this needs to be adjusted of course.
>>   a = a + 1;
>>   // now you can try some cast that your optimization would try to optimize
>>   return a;
>> }
>> Or void bar (type a) { a = (a & mask) + bias; (or similarly) }
>> Make sure to cover the boundary cases, where VR minimum or maximum still
>> allow optimizing away zero and/or sign extensions, and another case where
>> they are +- 1 and already don't allow it.
>
>
> Hi Jakub,
>
> For -fwrapv, it is due to how PROMOTE_MODE is defined in arm back-end.
> In the test-case, a function (which has signed char return type) returns
> -1 in one of the paths. ARM PROMOTE_MODE changes that to 255 and relies
> on zero/sign extension generated by RTL again for the correct value. I
> saw some other targets also defining similar think. I am therefore
> skipping removing zero/sign extension if the ssa variable can be set to
> negative integer constants.

Hm?  I think you should rather check that you are removing a
sign-/zero-extension - PROMOTE_MODE tells you if it will sign- or
zero-extend.  Definitely

+  /* In some architectures, negative integer constants are truncated and
+     sign changed with target defined PROMOTE_MODE macro. This will impact
+     the value range seen here and produce wrong code if zero/sign extensions
+     are eliminated. Therefore, return false if this SSA can have negative
+     integers.  */
+  if (is_gimple_assign (stmt)
+      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_unary))
+    {
+      tree rhs1 = gimple_assign_rhs1 (stmt);
+      if (TREE_CODE (rhs1) == INTEGER_CST
+         && !TYPE_UNSIGNED (TREE_TYPE (ssa))
+         && tree_int_cst_compare (rhs1, integer_zero_node) == -1)
+       return false;

looks completely bogus ... (an unary op with a constant operand?)

instead you want to do sth like

  mode = TYPE_MODE (TREE_TYPE (ssa));
  rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
  PROMOTE_MODE (mode, rhs_uns, TREE_TYPE (ssa));

instead of initializing rhs_uns from ssas type.  That is, if
PROMOTE_MODE tells you to promote _not_ according to ssas sign then
honor that.

> As for the -fno-strict-overflow case, if the variables overflows, in VRP
> dumps, I see +INF(OVF), but the value range stored in ssa has TYPE_MAX.
> We therefore should limit the comparison to (TYPE_MIN < VR_MIN && VR_MAX
> < TYPE_MAX) instead of (TYPE_MIN <= VR_MIN && VR_MAX <= TYPE_MAX) when
> checking to be sure that this is not the overflowing case. Attached
> patch changes this.

I don't think that's necessary - the overflow cases happen only when
that overflow has undefined behavior, thus any valid program will have
values <= MAX.

Richard.

> I have bootstrapped on x86_64-unknown-linux-gnu and regression tested
> for x86_64-unknown-linux-gnu, arm-none-linux-gnueabi (using qemu),
> aarch64_be-none-elf (Foundation model), aarch64-none-elf
> --with-abi=ilp32 (Foundation model) and s390x-ibm-linux (64bit, using
> qemu) with no new regression.
>
> Is this OK?
>
> Thanks,
> Kugan
>
> gcc/
> 2014-07-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>         * calls.c (precompute_arguments): Check is_promoted_for_type
>         and set the promoted mode.
>         (is_promoted_for_type): New function.
>         (expand_expr_real_1): Check is_promoted_for_type
>         and set the promoted mode.
>         * expr.h (is_promoted_for_type): New function definition.
>         * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>         SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>
>
> gcc/testsuite
>
> 2014-07-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>         * gcc.dg/zero_sign_ext_test.c: New test.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-07-10 12:15           ` Richard Biener
@ 2014-07-11 11:52             ` Kugan
  2014-07-11 12:47               ` Richard Biener
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-07-11 11:52 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jakub Jelinek, gcc-patches

Thanks foe the review and suggestions.

On 10/07/14 22:15, Richard Biener wrote:
> On Mon, Jul 7, 2014 at 8:55 AM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:

[...]

>>
>> For -fwrapv, it is due to how PROMOTE_MODE is defined in arm back-end.
>> In the test-case, a function (which has signed char return type) returns
>> -1 in one of the paths. ARM PROMOTE_MODE changes that to 255 and relies
>> on zero/sign extension generated by RTL again for the correct value. I
>> saw some other targets also defining similar think. I am therefore
>> skipping removing zero/sign extension if the ssa variable can be set to
>> negative integer constants.
> 
> Hm?  I think you should rather check that you are removing a
> sign-/zero-extension - PROMOTE_MODE tells you if it will sign- or
> zero-extend.  Definitely
> 
> +  /* In some architectures, negative integer constants are truncated and
> +     sign changed with target defined PROMOTE_MODE macro. This will impact
> +     the value range seen here and produce wrong code if zero/sign extensions
> +     are eliminated. Therefore, return false if this SSA can have negative
> +     integers.  */
> +  if (is_gimple_assign (stmt)
> +      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_unary))
> +    {
> +      tree rhs1 = gimple_assign_rhs1 (stmt);
> +      if (TREE_CODE (rhs1) == INTEGER_CST
> +         && !TYPE_UNSIGNED (TREE_TYPE (ssa))
> +         && tree_int_cst_compare (rhs1, integer_zero_node) == -1)
> +       return false;
> 
> looks completely bogus ... (an unary op with a constant operand?)
> instead you want to do sth like

I see that unary op with a constant operand is not possible in gimple.
What I wanted to check here is any sort of constant loads; but seems
that will not happen in gimple. Is PHI statements the only possible
statements where we will end up with such constants.

>   mode = TYPE_MODE (TREE_TYPE (ssa));
>   rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
>   PROMOTE_MODE (mode, rhs_uns, TREE_TYPE (ssa));
> 
> instead of initializing rhs_uns from ssas type.  That is, if
> PROMOTE_MODE tells you to promote _not_ according to ssas sign then
> honor that.

This is triggered in pr43017.c in function foo for arm-none-linux-gnueabi.

where, the gimple statement that cause this looks like:
.....
  # _3 = PHI <_17(7), -1(2)>
bb43:
  return _3;

ARM PROMOTE_MODE changes the sign for integer constants only and hence
looking at the variable with PROMOTE_MODE is not changing the sign in
this case.

#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
  if (GET_MODE_CLASS (MODE) == MODE_INT		\
      && GET_MODE_SIZE (MODE) < 4)      	\
    {						\
      if (MODE == QImode)			\
	UNSIGNEDP = 1;				\
      else if (MODE == HImode)			\
	UNSIGNEDP = 1;				\
      (MODE) = SImode;				\
    }

>> As for the -fno-strict-overflow case, if the variables overflows, in VRP
>> dumps, I see +INF(OVF), but the value range stored in ssa has TYPE_MAX.
>> We therefore should limit the comparison to (TYPE_MIN < VR_MIN && VR_MAX
>> < TYPE_MAX) instead of (TYPE_MIN <= VR_MIN && VR_MAX <= TYPE_MAX) when
>> checking to be sure that this is not the overflowing case. Attached
>> patch changes this.
> 
> I don't think that's necessary - the overflow cases happen only when
> that overflow has undefined behavior, thus any valid program will have
> values <= MAX.

I see that you have now removed +INF(OVF). I will change it this way.

Thanks again,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-07-11 11:52             ` Kugan
@ 2014-07-11 12:47               ` Richard Biener
  2014-07-14  2:58                 ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-07-11 12:47 UTC (permalink / raw)
  To: Kugan; +Cc: Jakub Jelinek, gcc-patches

On Fri, Jul 11, 2014 at 1:52 PM, Kugan
<kugan.vivekanandarajah@linaro.org> wrote:
> Thanks foe the review and suggestions.
>
> On 10/07/14 22:15, Richard Biener wrote:
>> On Mon, Jul 7, 2014 at 8:55 AM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>
> [...]
>
>>>
>>> For -fwrapv, it is due to how PROMOTE_MODE is defined in arm back-end.
>>> In the test-case, a function (which has signed char return type) returns
>>> -1 in one of the paths. ARM PROMOTE_MODE changes that to 255 and relies
>>> on zero/sign extension generated by RTL again for the correct value. I
>>> saw some other targets also defining similar think. I am therefore
>>> skipping removing zero/sign extension if the ssa variable can be set to
>>> negative integer constants.
>>
>> Hm?  I think you should rather check that you are removing a
>> sign-/zero-extension - PROMOTE_MODE tells you if it will sign- or
>> zero-extend.  Definitely
>>
>> +  /* In some architectures, negative integer constants are truncated and
>> +     sign changed with target defined PROMOTE_MODE macro. This will impact
>> +     the value range seen here and produce wrong code if zero/sign extensions
>> +     are eliminated. Therefore, return false if this SSA can have negative
>> +     integers.  */
>> +  if (is_gimple_assign (stmt)
>> +      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_unary))
>> +    {
>> +      tree rhs1 = gimple_assign_rhs1 (stmt);
>> +      if (TREE_CODE (rhs1) == INTEGER_CST
>> +         && !TYPE_UNSIGNED (TREE_TYPE (ssa))
>> +         && tree_int_cst_compare (rhs1, integer_zero_node) == -1)
>> +       return false;
>>
>> looks completely bogus ... (an unary op with a constant operand?)
>> instead you want to do sth like
>
> I see that unary op with a constant operand is not possible in gimple.
> What I wanted to check here is any sort of constant loads; but seems
> that will not happen in gimple. Is PHI statements the only possible
> statements where we will end up with such constants.

No, in theory you can have

  ssa_1 = -1;

but that's not unary but a GIMPLE_SINGLE_RHS and thus
gimple_assign_rhs_code (stmt) == INTEGER_CST.

>>   mode = TYPE_MODE (TREE_TYPE (ssa));
>>   rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
>>   PROMOTE_MODE (mode, rhs_uns, TREE_TYPE (ssa));
>>
>> instead of initializing rhs_uns from ssas type.  That is, if
>> PROMOTE_MODE tells you to promote _not_ according to ssas sign then
>> honor that.
>
> This is triggered in pr43017.c in function foo for arm-none-linux-gnueabi.
>
> where, the gimple statement that cause this looks like:
> .....
>   # _3 = PHI <_17(7), -1(2)>
> bb43:
>   return _3;
>
> ARM PROMOTE_MODE changes the sign for integer constants only and hence
> looking at the variable with PROMOTE_MODE is not changing the sign in
> this case.
>
> #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)     \
>   if (GET_MODE_CLASS (MODE) == MODE_INT         \
>       && GET_MODE_SIZE (MODE) < 4)              \
>     {                                           \
>       if (MODE == QImode)                       \
>         UNSIGNEDP = 1;                          \
>       else if (MODE == HImode)                  \
>         UNSIGNEDP = 1;                          \
>       (MODE) = SImode;                          \
>     }

Where does it only apply for "constants"?  It applies to all QImode and
HImode entities.

>>> As for the -fno-strict-overflow case, if the variables overflows, in VRP
>>> dumps, I see +INF(OVF), but the value range stored in ssa has TYPE_MAX.
>>> We therefore should limit the comparison to (TYPE_MIN < VR_MIN && VR_MAX
>>> < TYPE_MAX) instead of (TYPE_MIN <= VR_MIN && VR_MAX <= TYPE_MAX) when
>>> checking to be sure that this is not the overflowing case. Attached
>>> patch changes this.
>>
>> I don't think that's necessary - the overflow cases happen only when
>> that overflow has undefined behavior, thus any valid program will have
>> values <= MAX.
>
> I see that you have now removed +INF(OVF). I will change it this way.

I have not removed anything, I just fixed a bug.

Richard.

> Thanks again,
> Kugan
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-07-11 12:47               ` Richard Biener
@ 2014-07-14  2:58                 ` Kugan
  2014-07-14 20:11                   ` Bernhard Reutner-Fischer
  2014-07-23 14:22                   ` Richard Biener
  0 siblings, 2 replies; 58+ messages in thread
From: Kugan @ 2014-07-14  2:58 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jakub Jelinek, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 4365 bytes --]

On 11/07/14 22:47, Richard Biener wrote:
> On Fri, Jul 11, 2014 at 1:52 PM, Kugan
> <kugan.vivekanandarajah@linaro.org> wrote:
>> Thanks foe the review and suggestions.
>>
>> On 10/07/14 22:15, Richard Biener wrote:
>>> On Mon, Jul 7, 2014 at 8:55 AM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>>
>> [...]
>>
>>>>
>>>> For -fwrapv, it is due to how PROMOTE_MODE is defined in arm back-end.
>>>> In the test-case, a function (which has signed char return type) returns
>>>> -1 in one of the paths. ARM PROMOTE_MODE changes that to 255 and relies
>>>> on zero/sign extension generated by RTL again for the correct value. I
>>>> saw some other targets also defining similar think. I am therefore
>>>> skipping removing zero/sign extension if the ssa variable can be set to
>>>> negative integer constants.
>>>
>>> Hm?  I think you should rather check that you are removing a
>>> sign-/zero-extension - PROMOTE_MODE tells you if it will sign- or
>>> zero-extend.  Definitely
>>>
>>> +  /* In some architectures, negative integer constants are truncated and
>>> +     sign changed with target defined PROMOTE_MODE macro. This will impact
>>> +     the value range seen here and produce wrong code if zero/sign extensions
>>> +     are eliminated. Therefore, return false if this SSA can have negative
>>> +     integers.  */
>>> +  if (is_gimple_assign (stmt)
>>> +      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_unary))
>>> +    {
>>> +      tree rhs1 = gimple_assign_rhs1 (stmt);
>>> +      if (TREE_CODE (rhs1) == INTEGER_CST
>>> +         && !TYPE_UNSIGNED (TREE_TYPE (ssa))
>>> +         && tree_int_cst_compare (rhs1, integer_zero_node) == -1)
>>> +       return false;
>>>
>>> looks completely bogus ... (an unary op with a constant operand?)
>>> instead you want to do sth like
>>
>> I see that unary op with a constant operand is not possible in gimple.
>> What I wanted to check here is any sort of constant loads; but seems
>> that will not happen in gimple. Is PHI statements the only possible
>> statements where we will end up with such constants.
> 
> No, in theory you can have
> 
>   ssa_1 = -1;
> 
> but that's not unary but a GIMPLE_SINGLE_RHS and thus
> gimple_assign_rhs_code (stmt) == INTEGER_CST.
> 
>>>   mode = TYPE_MODE (TREE_TYPE (ssa));
>>>   rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
>>>   PROMOTE_MODE (mode, rhs_uns, TREE_TYPE (ssa));
>>>
>>> instead of initializing rhs_uns from ssas type.  That is, if
>>> PROMOTE_MODE tells you to promote _not_ according to ssas sign then
>>> honor that.
>>
>> This is triggered in pr43017.c in function foo for arm-none-linux-gnueabi.
>>
>> where, the gimple statement that cause this looks like:
>> .....
>>   # _3 = PHI <_17(7), -1(2)>
>> bb43:
>>   return _3;
>>
>> ARM PROMOTE_MODE changes the sign for integer constants only and hence
>> looking at the variable with PROMOTE_MODE is not changing the sign in
>> this case.
>>
>> #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)     \
>>   if (GET_MODE_CLASS (MODE) == MODE_INT         \
>>       && GET_MODE_SIZE (MODE) < 4)              \
>>     {                                           \
>>       if (MODE == QImode)                       \
>>         UNSIGNEDP = 1;                          \
>>       else if (MODE == HImode)                  \
>>         UNSIGNEDP = 1;                          \
>>       (MODE) = SImode;                          \
>>     }
> 
> Where does it only apply for "constants"?  It applies to all QImode and
> HImode entities.

oops, sorry. I don’t know what I was thinking or looking at when I wrote
that :( It indeed fixes my problems. Thanks for that.

Here is the modified patch. Bootstrapped and regression tested for
86_64-unknown-linux-gnu and arm-none-linux-gnueabi with no new regressions.


Is this OK?

Thanks,
Kugan


gcc/

2014-07-14  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* calls.c (precompute_arguments): Check is_promoted_for_type
	and set the promoted mode.
	(is_promoted_for_type): New function.
	(expand_expr_real_1): Check is_promoted_for_type
	and set the promoted mode.
	* expr.h (is_promoted_for_type): New function definition.
	* cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
	SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.


gcc/testsuite
2014-07-14  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* gcc.dg/zero_sign_ext_test.c: New test.

[-- Attachment #2: p2.txt --]
[-- Type: text/plain, Size: 9696 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index a3e6faa..eac512f 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,7 +1484,10 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
+	      if (is_promoted_for_type (args[i].tree_value, mode, !args[i].unsignedp))
+		SUBREG_PROMOTED_SET (args[i].initial_value, SRP_SIGNED_AND_UNSIGNED);
+	      else
+		SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index f98c322..b14626c 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
 					  GET_MODE (target), temp, unsignedp);
 		  }
 
-		convert_move (SUBREG_REG (target), temp, unsignedp);
+		if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
+		    && (GET_CODE (temp) == SUBREG)
+		    && (GET_MODE (target) == GET_MODE (temp))
+		    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
+		  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
+		else
+		  convert_move (SUBREG_REG (target), temp, unsignedp);
 	      }
 	    else if (nontemporal && emit_storent_insn (target, temp))
 	      ;
diff --git a/gcc/expr.c b/gcc/expr.c
index 7356e76..d25f506 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-address.h"
 #include "cfgexpand.h"
 #include "builtins.h"
+#include "tree-ssa.h"
 
 #ifndef STACK_PUSH_CODE
 #ifdef STACK_GROWS_DOWNWARD
@@ -9224,6 +9225,65 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 }
 #undef REDUCE_BIT_FIELD
 
+/* Return TRUE if value in SSA is already zero/sign extended for lhs type
+   (type here is the combination of LHS_MODE and LHS_UNS) using value range
+   information stored.  Return FALSE otherwise.  */
+bool
+is_promoted_for_type (tree ssa, enum machine_mode lhs_mode, bool lhs_uns)
+{
+  wide_int type_min, type_max;
+  wide_int min, max, limit;
+  unsigned int prec;
+  tree lhs_type;
+  bool rhs_uns;
+  enum machine_mode mode;
+
+  if (ssa == NULL_TREE
+      || TREE_CODE (ssa) != SSA_NAME
+      || !INTEGRAL_TYPE_P (TREE_TYPE (ssa)))
+    return false;
+
+  /* Return FALSE if value_range is not recorded for SSA.  */
+  if (get_range_info (ssa, &min, &max) != VR_RANGE)
+    return false;
+
+  /* In some architectures, modes are promoted and sign changed with
+     target defined PROMOTE_MODE macro.  If PROMOTE_MODE tells you to
+     promote _not_ according to ssa's sign then honour that.  */
+  mode = TYPE_MODE (TREE_TYPE (ssa));
+  rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
+  PROMOTE_MODE (mode, rhs_uns, TREE_TYPE (ssa));
+
+  lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
+  prec = min.get_precision ();
+
+  /* Signed maximum value.  */
+  limit = wide_int::from (TYPE_MAX_VALUE (TREE_TYPE (ssa)), prec, SIGNED);
+
+  /* Signedness of LHS and RHS differs but values in range.  */
+  if ((rhs_uns != lhs_uns)
+      && ((!lhs_uns && !wi::neg_p (min, TYPE_SIGN (lhs_type)))
+	  || (lhs_uns && (wi::cmp (max, limit, TYPE_SIGN (TREE_TYPE (ssa))) == -1))))
+    lhs_uns = !lhs_uns;
+
+  /* Signedness of LHS and RHS should match.  */
+  if (rhs_uns != lhs_uns)
+    return false;
+
+  type_min = wide_int::from (TYPE_MIN_VALUE (lhs_type), prec,
+			     TYPE_SIGN (TREE_TYPE (ssa)));
+  type_max = wide_int::from (TYPE_MAX_VALUE (lhs_type), prec,
+			     TYPE_SIGN (TREE_TYPE (ssa)));
+
+  /* Check if values lies in-between the type range.  */
+  if ((wi::neg_p (max, TYPE_SIGN (TREE_TYPE (ssa)))
+       || (wi::cmp (max, type_max, TYPE_SIGN (TREE_TYPE (ssa))) != 1))
+      && (!wi::neg_p (min, TYPE_SIGN (TREE_TYPE (ssa)))
+	  || (wi::cmp (type_min, min, TYPE_SIGN (TREE_TYPE (ssa))) != 1)))
+    return true;
+
+  return false;
+}
 
 /* Return TRUE if expression STMT is suitable for replacement.  
    Never consider memory loads as replaceable, because those don't ever lead 
@@ -9527,7 +9587,10 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_SET (temp, unsignedp);
+	  if (is_promoted_for_type (ssa_name, mode, !unsignedp))
+	    SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED);
+	  else
+	    SUBREG_PROMOTED_SET (temp, unsignedp);
 	  return temp;
 	}
 
diff --git a/gcc/expr.h b/gcc/expr.h
index 6a1d3ab..e99d000 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -440,6 +440,7 @@ extern rtx expand_expr_real_1 (tree, rtx, enum machine_mode,
 			       enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_2 (sepops, rtx, enum machine_mode,
 			       enum expand_modifier);
+extern bool is_promoted_for_type (tree, enum machine_mode, bool);
 
 /* Generate code for computing expression EXP.
    An rtx for the computed value is returned.  The value is never null.
diff --git a/gcc/testsuite/gcc.dg/zero_sign_ext_test.c b/gcc/testsuite/gcc.dg/zero_sign_ext_test.c
index e69de29..6a52678 100644
--- a/gcc/testsuite/gcc.dg/zero_sign_ext_test.c
+++ b/gcc/testsuite/gcc.dg/zero_sign_ext_test.c
@@ -0,0 +1,136 @@
+extern void abort (void);
+
+/* { dg-options "-O2" } */
+/* { dg-do run } */
+
+#define TYPE_MAX(type, sign)	\
+  ((!sign) ? ((1 << (sizeof (type) * 8 - 1)) - 1) :	\
+   ((1 << (sizeof (type) * 8)) - 1))
+#define TYPE_MIN(type, sign)	\
+  ((!sign) ? -(1 << (sizeof (type) * 8 - 1)) : 0)
+
+#define TEST_FN(NAME, ARG_TYPE, RET_TYPE, CAST_TYPE, VAL, VR_MIN, VR_MAX)\
+  __attribute__((noinline, noclone)) RET_TYPE				\
+      NAME (ARG_TYPE arg){						\
+      RET_TYPE ret = VAL;						\
+      if (arg + 1 < VR_MIN || arg + 1 > VR_MAX) return ret;		\
+      /* Value Range of arg at this point will be  [VR_min, VR_max].  */\
+      arg = arg + VAL;							\
+      ret = (CAST_TYPE)arg;						\
+      return arg;							\
+  }
+
+/* Signed to signed conversion with value in-range.  */
+TEST_FN (foo1, short, short, char, 1, TYPE_MIN (char, 0), TYPE_MAX (char, 0));
+TEST_FN (foo2, short, short, char, 1, TYPE_MIN (char, 0) + 1,\
+	TYPE_MAX (char, 0) - 1);
+
+/* Signed to signed conversion with value not in-range.  */
+TEST_FN (foo3, short, short, char, -1, TYPE_MIN (short, 0) + 1,  100);
+TEST_FN (foo4, short, short, char, 1, 12, TYPE_MAX (short, 0) + 1);
+
+/* Unsigned to unsigned conversion with value in-range.  */
+TEST_FN (foo5, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (char, 1) + 1, TYPE_MAX (char, 1) - 1);
+TEST_FN (foo6, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (char, 1), TYPE_MAX (char, 1));
+
+/* Unsigned to unsigned conversion with value not in-range.  */
+TEST_FN (foo7, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (short, 1) + 1, TYPE_MAX (short, 1) - 1);
+TEST_FN (foo8, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (short, 1), TYPE_MAX (short, 1));
+
+/* Signed to unsigned conversion with value range positive.  */
+TEST_FN (foo9, short, short, unsigned char, -1, 1,\
+	TYPE_MAX (char, 1) - 1);
+TEST_FN (foo10, short, short, unsigned char, 1, 0,\
+	TYPE_MAX (char, 1));
+
+/* Signed to unsigned conversion with value range negative.  */
+TEST_FN (foo11, short, short, unsigned char, 1,\
+	TYPE_MIN (char, 0) + 1, TYPE_MAX (char, 0) - 1);
+TEST_FN (foo12, short, short, unsigned char, 1,\
+	TYPE_MIN (char, 0), TYPE_MAX (char, 0));
+
+/* Unsigned to Signed conversion with value range in signed equiv range.  */
+TEST_FN (foo13, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1) + 1, TYPE_MAX (char, 0) - 1);
+TEST_FN (foo14, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1), TYPE_MAX (char, 0));
+
+/* Unsigned to Signed conversion with value range not-in signed range.  */
+TEST_FN (foo15, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1) + 1, TYPE_MAX (char, 1) - 1);
+TEST_FN (foo16, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1), TYPE_MAX (char, 1));
+
+int main ()
+{
+  /* Signed to signed conversion with value in-range.  */
+  /* arg + 1.  */
+  if (foo1 (-32) != -31)
+    abort ();
+  /* arg + 1.  */
+  if (foo2 (32) != 33)
+    abort ();
+
+  /* Signed to signed conversion with value not in-range.  */
+  /* arg - 1.  */
+  if (foo3 (-512) != -513)
+    abort ();
+  /* arg + 1.  */
+  if (foo4 (512) != 513)
+    abort ();
+
+  /* Unsigned to unsigned conversion with value in-range.  */
+  /* arg + 1.  */
+  if (foo5 (64) != 65)
+    abort ();
+  /* arg + 1.  */
+  if (foo6 (64) != 65)
+    abort ();
+
+  /* Unsigned to unsigned conversion with value not in-range.  */
+  /* arg + 1.  */
+  if (foo7 (512) != 513)
+    abort ();
+  /* arg + 1.  */
+  if (foo8 (512) != 513)
+    abort ();
+
+  /* Signed to unsigned conversion with value range positive.  */
+  /* arg - 1.  */
+  if (foo9 (2) != 1)
+    abort ();
+  /* arg + 1.  */
+  if (foo10 (2) != 3)
+    abort ();
+
+  /* Signed to unsigned conversion with value range negative.  */
+  /* arg + 1.  */
+  if (foo11 (-125) != -124)
+    abort ();
+  /* arg + 1.  */
+  if (foo12 (-125) != -124)
+    abort ();
+
+  /* Unsigned to Signed conversion with value range in signed equiv range.  */
+  /* arg + 1.  */
+  if (foo13 (125) != 126)
+    abort ();
+  /* arg + 1.  */
+  if (foo14 (125) != 126)
+    abort ();
+
+  /* Unsigned to Signed conversion with value range not-in signed range.  */
+  /* arg + 1.  */
+  if (foo15 (250) != 251)
+    abort ();
+  /* arg + 1.  */
+  if (foo16 (250) != 251)
+    abort ();
+
+  return 0;
+}
+

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-07-14  2:58                 ` Kugan
@ 2014-07-14 20:11                   ` Bernhard Reutner-Fischer
  2014-07-23 14:22                   ` Richard Biener
  1 sibling, 0 replies; 58+ messages in thread
From: Bernhard Reutner-Fischer @ 2014-07-14 20:11 UTC (permalink / raw)
  To: Kugan, Richard Biener; +Cc: Jakub Jelinek, gcc-patches

On 14 July 2014 04:58:17 Kugan <kugan.vivekanandarajah@linaro.org> wrote:

> On 11/07/14 22:47, Richard Biener wrote:
> > On Fri, Jul 11, 2014 at 1:52 PM, Kugan
> > <kugan.vivekanandarajah@linaro.org> wrote:
> >> Thanks foe the review and suggestions.
> >>
> >> On 10/07/14 22:15, Richard Biener wrote:
> >>> On Mon, Jul 7, 2014 at 8:55 AM, Kugan 
> <kugan.vivekanandarajah@linaro.org> wrote:
> >>
> >> [...]
> >>
> >>>>
> >>>> For -fwrapv, it is due to how PROMOTE_MODE is defined in arm back-end.
> >>>> In the test-case, a function (which has signed char return type) returns
> >>>> -1 in one of the paths. ARM PROMOTE_MODE changes that to 255 and relies
> >>>> on zero/sign extension generated by RTL again for the correct value. I
> >>>> saw some other targets also defining similar think. I am therefore
> >>>> skipping removing zero/sign extension if the ssa variable can be set to
> >>>> negative integer constants.
> >>>
> >>> Hm?  I think you should rather check that you are removing a
> >>> sign-/zero-extension - PROMOTE_MODE tells you if it will sign- or
> >>> zero-extend.  Definitely
> >>>
> >>> +  /* In some architectures, negative integer constants are truncated and
> >>> +     sign changed with target defined PROMOTE_MODE macro. This will impact
> >>> +     the value range seen here and produce wrong code if zero/sign 
> extensions
> >>> +     are eliminated. Therefore, return false if this SSA can have negative
> >>> +     integers.  */
> >>> +  if (is_gimple_assign (stmt)
> >>> +      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_unary))
> >>> +    {
> >>> +      tree rhs1 = gimple_assign_rhs1 (stmt);
> >>> +      if (TREE_CODE (rhs1) == INTEGER_CST
> >>> +         && !TYPE_UNSIGNED (TREE_TYPE (ssa))
> >>> +         && tree_int_cst_compare (rhs1, integer_zero_node) == -1)
> >>> +       return false;
> >>>
> >>> looks completely bogus ... (an unary op with a constant operand?)
> >>> instead you want to do sth like
> >>
> >> I see that unary op with a constant operand is not possible in gimple.
> >> What I wanted to check here is any sort of constant loads; but seems
> >> that will not happen in gimple. Is PHI statements the only possible
> >> statements where we will end up with such constants.
> >
> > No, in theory you can have
> >
> >   ssa_1 = -1;
> >
> > but that's not unary but a GIMPLE_SINGLE_RHS and thus
> > gimple_assign_rhs_code (stmt) == INTEGER_CST.
> >
> >>>   mode = TYPE_MODE (TREE_TYPE (ssa));
> >>>   rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
> >>>   PROMOTE_MODE (mode, rhs_uns, TREE_TYPE (ssa));
> >>>
> >>> instead of initializing rhs_uns from ssas type.  That is, if
> >>> PROMOTE_MODE tells you to promote _not_ according to ssas sign then
> >>> honor that.
> >>
> >> This is triggered in pr43017.c in function foo for arm-none-linux-gnueabi.
> >>
> >> where, the gimple statement that cause this looks like:
> >> .....
> >>   # _3 = PHI <_17(7), -1(2)>
> >> bb43:
> >>   return _3;
> >>
> >> ARM PROMOTE_MODE changes the sign for integer constants only and hence
> >> looking at the variable with PROMOTE_MODE is not changing the sign in
> >> this case.
> >>
> >> #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)     \
> >>   if (GET_MODE_CLASS (MODE) == MODE_INT         \
> >>       && GET_MODE_SIZE (MODE) < 4)              \
> >>     {                                           \
> >>       if (MODE == QImode)                       \
> >>         UNSIGNEDP = 1;                          \
> >>       else if (MODE == HImode)                  \
> >>         UNSIGNEDP = 1;                          \
> >>       (MODE) = SImode;                          \
> >>     }
> >
> > Where does it only apply for "constants"?  It applies to all QImode and
> > HImode entities.
>
> oops, sorry. I don’t know what I was thinking or looking at when I wrote
> that :( It indeed fixes my problems. Thanks for that.
>
> Here is the modified patch. Bootstrapped and regression tested for
> 86_64-unknown-linux-gnu and arm-none-linux-gnueabi with no new regressions.
>
>
> Is this OK?
>
> Thanks,
> Kugan
>
>
> gcc/
>
> 2014-07-14  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
> 	* calls.c (precompute_arguments): Check is_promoted_for_type
> 	and set the promoted mode.
> 	(is_promoted_for_type): New function.

Don't we name predicates more like promoted_for_type_p?

Thanks,
> 	(expand_expr_real_1): Check is_promoted_for_type
> 	and set the promoted mode.
> 	* expr.h (is_promoted_for_type): New function definition.
> 	* cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
> 	SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>
>
> gcc/testsuite
> 2014-07-14  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
> 	* gcc.dg/zero_sign_ext_test.c: New test.



Sent with AquaMail for Android
http://www.aqua-mail.com


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-07-14  2:58                 ` Kugan
  2014-07-14 20:11                   ` Bernhard Reutner-Fischer
@ 2014-07-23 14:22                   ` Richard Biener
  2014-08-01  4:51                     ` Kugan
  1 sibling, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-07-23 14:22 UTC (permalink / raw)
  To: Kugan; +Cc: Jakub Jelinek, gcc-patches

On Mon, Jul 14, 2014 at 4:57 AM, Kugan
<kugan.vivekanandarajah@linaro.org> wrote:
> On 11/07/14 22:47, Richard Biener wrote:
>> On Fri, Jul 11, 2014 at 1:52 PM, Kugan
>> <kugan.vivekanandarajah@linaro.org> wrote:
>>> Thanks foe the review and suggestions.
>>>
>>> On 10/07/14 22:15, Richard Biener wrote:
>>>> On Mon, Jul 7, 2014 at 8:55 AM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>>>
>>> [...]
>>>
>>>>>
>>>>> For -fwrapv, it is due to how PROMOTE_MODE is defined in arm back-end.
>>>>> In the test-case, a function (which has signed char return type) returns
>>>>> -1 in one of the paths. ARM PROMOTE_MODE changes that to 255 and relies
>>>>> on zero/sign extension generated by RTL again for the correct value. I
>>>>> saw some other targets also defining similar think. I am therefore
>>>>> skipping removing zero/sign extension if the ssa variable can be set to
>>>>> negative integer constants.
>>>>
>>>> Hm?  I think you should rather check that you are removing a
>>>> sign-/zero-extension - PROMOTE_MODE tells you if it will sign- or
>>>> zero-extend.  Definitely
>>>>
>>>> +  /* In some architectures, negative integer constants are truncated and
>>>> +     sign changed with target defined PROMOTE_MODE macro. This will impact
>>>> +     the value range seen here and produce wrong code if zero/sign extensions
>>>> +     are eliminated. Therefore, return false if this SSA can have negative
>>>> +     integers.  */
>>>> +  if (is_gimple_assign (stmt)
>>>> +      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_unary))
>>>> +    {
>>>> +      tree rhs1 = gimple_assign_rhs1 (stmt);
>>>> +      if (TREE_CODE (rhs1) == INTEGER_CST
>>>> +         && !TYPE_UNSIGNED (TREE_TYPE (ssa))
>>>> +         && tree_int_cst_compare (rhs1, integer_zero_node) == -1)
>>>> +       return false;
>>>>
>>>> looks completely bogus ... (an unary op with a constant operand?)
>>>> instead you want to do sth like
>>>
>>> I see that unary op with a constant operand is not possible in gimple.
>>> What I wanted to check here is any sort of constant loads; but seems
>>> that will not happen in gimple. Is PHI statements the only possible
>>> statements where we will end up with such constants.
>>
>> No, in theory you can have
>>
>>   ssa_1 = -1;
>>
>> but that's not unary but a GIMPLE_SINGLE_RHS and thus
>> gimple_assign_rhs_code (stmt) == INTEGER_CST.
>>
>>>>   mode = TYPE_MODE (TREE_TYPE (ssa));
>>>>   rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
>>>>   PROMOTE_MODE (mode, rhs_uns, TREE_TYPE (ssa));
>>>>
>>>> instead of initializing rhs_uns from ssas type.  That is, if
>>>> PROMOTE_MODE tells you to promote _not_ according to ssas sign then
>>>> honor that.
>>>
>>> This is triggered in pr43017.c in function foo for arm-none-linux-gnueabi.
>>>
>>> where, the gimple statement that cause this looks like:
>>> .....
>>>   # _3 = PHI <_17(7), -1(2)>
>>> bb43:
>>>   return _3;
>>>
>>> ARM PROMOTE_MODE changes the sign for integer constants only and hence
>>> looking at the variable with PROMOTE_MODE is not changing the sign in
>>> this case.
>>>
>>> #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)     \
>>>   if (GET_MODE_CLASS (MODE) == MODE_INT         \
>>>       && GET_MODE_SIZE (MODE) < 4)              \
>>>     {                                           \
>>>       if (MODE == QImode)                       \
>>>         UNSIGNEDP = 1;                          \
>>>       else if (MODE == HImode)                  \
>>>         UNSIGNEDP = 1;                          \
>>>       (MODE) = SImode;                          \
>>>     }
>>
>> Where does it only apply for "constants"?  It applies to all QImode and
>> HImode entities.
>
> oops, sorry. I don’t know what I was thinking or looking at when I wrote
> that :( It indeed fixes my problems. Thanks for that.
>
> Here is the modified patch. Bootstrapped and regression tested for
> 86_64-unknown-linux-gnu and arm-none-linux-gnueabi with no new regressions.
>
>
> Is this OK?

+  lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
...
+      && ((!lhs_uns && !wi::neg_p (min, TYPE_SIGN (lhs_type)))
...
+  type_min = wide_int::from (TYPE_MIN_VALUE (lhs_type), prec,
+                            TYPE_SIGN (TREE_TYPE (ssa)));
+  type_max = wide_int::from (TYPE_MAX_VALUE (lhs_type), prec,
+                            TYPE_SIGN (TREE_TYPE (ssa)));

you shouldn't try getting at lhs_type.  Btw, do you want to constrain
lhs_mode to MODE_INTs somewhere?

For TYPE_SIGN use lhs_uns instead, for the min/max value you
should use wi::min_value () and wi::max_value () instead.

You are still using TYPE_SIGN (TREE_TYPE (ssa)) here and later,
but we computed rhs_uns "properly" using PROMOTE_MODE.
I think  the code with re-setting lhs_uns if rhs_uns != lhs_uns
and later using TYPE_SIGN again is pretty hard to follow.

Btw, it seems you need to conditionalize the call to PROMOTE_MODE
on its availability.

Isn't it simply about choosing a proper range we need to restrict
ssa to?  That is, dependent on rhs_uns computed by PROMOTE_MODE,
simply:

+  mode = TYPE_MODE (TREE_TYPE (ssa));
+  rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
#ifdef PROMOTE_MODE
+  PROMOTE_MODE (mode, rhs_uns, TREE_TYPE (ssa));
#endif

 if (rhs_uns)
   return wi::ge_p (min, 0);  // if min >= 0 then range contains positive values
 else
   return wi::le_p (max, wi::max_value (TYPE_PRECISION (TREE_TYPE
(ssa)), SIGNED);  // if max <= signed-max-of-type then range doesn't
need sign-extension

?

Thanks,
Richard.

> Thanks,
> Kugan
>
>
> gcc/
>
> 2014-07-14  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>         * calls.c (precompute_arguments): Check is_promoted_for_type
>         and set the promoted mode.
>         (is_promoted_for_type): New function.
>         (expand_expr_real_1): Check is_promoted_for_type
>         and set the promoted mode.
>         * expr.h (is_promoted_for_type): New function definition.
>         * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>         SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>
>
> gcc/testsuite
> 2014-07-14  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>         * gcc.dg/zero_sign_ext_test.c: New test.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-07-23 14:22                   ` Richard Biener
@ 2014-08-01  4:51                     ` Kugan
  2014-08-01 11:16                       ` Richard Biener
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-08-01  4:51 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jakub Jelinek, gcc-patches

> +  lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
> ...
> +      && ((!lhs_uns && !wi::neg_p (min, TYPE_SIGN (lhs_type)))
> ...
> +  type_min = wide_int::from (TYPE_MIN_VALUE (lhs_type), prec,
> +                            TYPE_SIGN (TREE_TYPE (ssa)));
> +  type_max = wide_int::from (TYPE_MAX_VALUE (lhs_type), prec,
> +                            TYPE_SIGN (TREE_TYPE (ssa)));
> 
> you shouldn't try getting at lhs_type.  Btw, do you want to constrain
> lhs_mode to MODE_INTs somewhere?

Is this in addition to !INTEGRAL_TYPE_P (TREE_TYPE (ssa)))? Do you mean
that I should check lhs_mode as well?

> For TYPE_SIGN use lhs_uns instead, for the min/max value you
> should use wi::min_value () and wi::max_value () instead.
> 
> You are still using TYPE_SIGN (TREE_TYPE (ssa)) here and later,
> but we computed rhs_uns "properly" using PROMOTE_MODE.
> I think  the code with re-setting lhs_uns if rhs_uns != lhs_uns
> and later using TYPE_SIGN again is pretty hard to follow.
> 
> Btw, it seems you need to conditionalize the call to PROMOTE_MODE
> on its availability.
> 
> Isn't it simply about choosing a proper range we need to restrict
> ssa to?  That is, dependent on rhs_uns computed by PROMOTE_MODE,
> simply:
> 
> +  mode = TYPE_MODE (TREE_TYPE (ssa));
> +  rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
> #ifdef PROMOTE_MODE
> +  PROMOTE_MODE (mode, rhs_uns, TREE_TYPE (ssa));
> #endif
> 
>  if (rhs_uns)
>    return wi::ge_p (min, 0);  // if min >= 0 then range contains positive values
>  else
>    return wi::le_p (max, wi::max_value (TYPE_PRECISION (TREE_TYPE
> (ssa)), SIGNED);  // if max <= signed-max-of-type then range doesn't
> need sign-extension

I think we will have to check that ssa has necessary sign/zero extension
when assigned to lhs_type. If PROMOTE_MODE tells us that ssa's type will
be interpreted differently, the value range of ssa also will have
corresponding range.  In this cases, shouldn’t we have to check for
upper and lower limit for both min and max?

How about this?

bool
promoted_for_type_p (tree ssa, enum machine_mode lhs_mode, bool lhs_uns)
{
  wide_int min, max;
  tree lhs_type, rhs_type;
  bool rhs_uns;
  enum machine_mode rhs_mode;
  tree min_tree, max_tree;

  if (ssa == NULL_TREE
      || TREE_CODE (ssa) != SSA_NAME
      || !INTEGRAL_TYPE_P (TREE_TYPE (ssa)))
    return false;

  /* Return FALSE if value_range is not recorded for SSA.  */
  if (get_range_info (ssa, &min, &max) != VR_RANGE)
    return false;

  rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
  if (rhs_uns != lhs_uns)
    {
      /* Signedness of LHS and RHS differs and values also cannot be
	 represented in LHS range.  */
      unsigned int prec = min.get_precision ();
      if ((lhs_uns && wi::neg_p (min, rhs_uns ? UNSIGNED : SIGNED))
	  || (!lhs_uns && !wi::le_p (max,
				    wi::max_value (prec, SIGNED),
				    rhs_uns ? UNSIGNED : SIGNED)))
	return false;
    }

  /* In some architectures, modes are promoted and sign changed with
     target defined PROMOTE_MODE macro.  If PROMOTE_MODE tells you to
     promote _not_ according to ssa's sign then honour that.  */
  rhs_mode = TYPE_MODE (TREE_TYPE (ssa));
#ifdef PROMOTE_MODE
  PROMOTE_MODE (rhs_mode, rhs_uns, TREE_TYPE (ssa));
#endif

  rhs_type = lang_hooks.types.type_for_mode (rhs_mode, rhs_uns);
  lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
  min_tree = wide_int_to_tree (rhs_type, min);
  max_tree = wide_int_to_tree (rhs_type, max);

  /* Check if values lies in-between the type range.  */
  if (int_fits_type_p (min_tree, lhs_type)
      && int_fits_type_p (max_tree, lhs_type))
    return true;
  else
    return false;
}


Thanks,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-01  4:51                     ` Kugan
@ 2014-08-01 11:16                       ` Richard Biener
  2014-08-01 16:04                         ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-08-01 11:16 UTC (permalink / raw)
  To: Kugan; +Cc: Jakub Jelinek, gcc-patches

On Fri, Aug 1, 2014 at 6:51 AM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>> +  lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
>> ...
>> +      && ((!lhs_uns && !wi::neg_p (min, TYPE_SIGN (lhs_type)))
>> ...
>> +  type_min = wide_int::from (TYPE_MIN_VALUE (lhs_type), prec,
>> +                            TYPE_SIGN (TREE_TYPE (ssa)));
>> +  type_max = wide_int::from (TYPE_MAX_VALUE (lhs_type), prec,
>> +                            TYPE_SIGN (TREE_TYPE (ssa)));
>>
>> you shouldn't try getting at lhs_type.  Btw, do you want to constrain
>> lhs_mode to MODE_INTs somewhere?
>
> Is this in addition to !INTEGRAL_TYPE_P (TREE_TYPE (ssa)))? Do you mean
> that I should check lhs_mode as well?

No, that's probably enough.

>> For TYPE_SIGN use lhs_uns instead, for the min/max value you
>> should use wi::min_value () and wi::max_value () instead.
>>
>> You are still using TYPE_SIGN (TREE_TYPE (ssa)) here and later,
>> but we computed rhs_uns "properly" using PROMOTE_MODE.
>> I think  the code with re-setting lhs_uns if rhs_uns != lhs_uns
>> and later using TYPE_SIGN again is pretty hard to follow.
>>
>> Btw, it seems you need to conditionalize the call to PROMOTE_MODE
>> on its availability.
>>
>> Isn't it simply about choosing a proper range we need to restrict
>> ssa to?  That is, dependent on rhs_uns computed by PROMOTE_MODE,
>> simply:
>>
>> +  mode = TYPE_MODE (TREE_TYPE (ssa));
>> +  rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
>> #ifdef PROMOTE_MODE
>> +  PROMOTE_MODE (mode, rhs_uns, TREE_TYPE (ssa));
>> #endif
>>
>>  if (rhs_uns)
>>    return wi::ge_p (min, 0);  // if min >= 0 then range contains positive values
>>  else
>>    return wi::le_p (max, wi::max_value (TYPE_PRECISION (TREE_TYPE
>> (ssa)), SIGNED);  // if max <= signed-max-of-type then range doesn't
>> need sign-extension
>
> I think we will have to check that ssa has necessary sign/zero extension
> when assigned to lhs_type. If PROMOTE_MODE tells us that ssa's type will
> be interpreted differently, the value range of ssa also will have
> corresponding range.  In this cases, shouldn’t we have to check for
> upper and lower limit for both min and max?

Hmm?  That's exactly what the check is testing...  we know that
min <= max thus if min >= 0 then max >= 0.

zero_extension will never do anything on [0, INF]

If max < MAX-SIGNED then sign-extension will not do anything.  Ok,
sign-extension will do sth for negative values still.  So rather

  if (rhs_uns)
    return wi::geu_p (min, 0);
  else
    return wi::ges_p (min, 0) && wi::les_p (max, wi::max_value
(TYPE_PRECISION (TREE_TYPE (ssa)), SIGNED));

?

I don't like the use of int_fits_type_p you propose.

Richard.

> How about this?
>
> bool
> promoted_for_type_p (tree ssa, enum machine_mode lhs_mode, bool lhs_uns)
> {
>   wide_int min, max;
>   tree lhs_type, rhs_type;
>   bool rhs_uns;
>   enum machine_mode rhs_mode;
>   tree min_tree, max_tree;
>
>   if (ssa == NULL_TREE
>       || TREE_CODE (ssa) != SSA_NAME
>       || !INTEGRAL_TYPE_P (TREE_TYPE (ssa)))
>     return false;
>
>   /* Return FALSE if value_range is not recorded for SSA.  */
>   if (get_range_info (ssa, &min, &max) != VR_RANGE)
>     return false;
>
>   rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
>   if (rhs_uns != lhs_uns)
>     {
>       /* Signedness of LHS and RHS differs and values also cannot be
>          represented in LHS range.  */
>       unsigned int prec = min.get_precision ();
>       if ((lhs_uns && wi::neg_p (min, rhs_uns ? UNSIGNED : SIGNED))
>           || (!lhs_uns && !wi::le_p (max,
>                                     wi::max_value (prec, SIGNED),
>                                     rhs_uns ? UNSIGNED : SIGNED)))
>         return false;
>     }
>
>   /* In some architectures, modes are promoted and sign changed with
>      target defined PROMOTE_MODE macro.  If PROMOTE_MODE tells you to
>      promote _not_ according to ssa's sign then honour that.  */
>   rhs_mode = TYPE_MODE (TREE_TYPE (ssa));
> #ifdef PROMOTE_MODE
>   PROMOTE_MODE (rhs_mode, rhs_uns, TREE_TYPE (ssa));
> #endif
>
>   rhs_type = lang_hooks.types.type_for_mode (rhs_mode, rhs_uns);
>   lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
>   min_tree = wide_int_to_tree (rhs_type, min);
>   max_tree = wide_int_to_tree (rhs_type, max);
>
>   /* Check if values lies in-between the type range.  */
>   if (int_fits_type_p (min_tree, lhs_type)
>       && int_fits_type_p (max_tree, lhs_type))
>     return true;
>   else
>     return false;
> }
>
>
> Thanks,
> Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-01 11:16                       ` Richard Biener
@ 2014-08-01 16:04                         ` Kugan
  2014-08-03 23:56                           ` Kugan
  2014-08-05 14:18                           ` Richard Biener
  0 siblings, 2 replies; 58+ messages in thread
From: Kugan @ 2014-08-01 16:04 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jakub Jelinek, gcc-patches

>>>  if (rhs_uns)
>>>    return wi::ge_p (min, 0);  // if min >= 0 then range contains positive values
>>>  else
>>>    return wi::le_p (max, wi::max_value (TYPE_PRECISION (TREE_TYPE
>>> (ssa)), SIGNED);  // if max <= signed-max-of-type then range doesn't
>>> need sign-extension
>>
>> I think we will have to check that ssa has necessary sign/zero extension
>> when assigned to lhs_type. If PROMOTE_MODE tells us that ssa's type will
>> be interpreted differently, the value range of ssa also will have
>> corresponding range.  In this cases, shouldn’t we have to check for
>> upper and lower limit for both min and max?
> 
> Hmm?  That's exactly what the check is testing...  we know that
> min <= max thus if min >= 0 then max >= 0.
> 
> zero_extension will never do anything on [0, INF]
> 
> If max < MAX-SIGNED then sign-extension will not do anything.  Ok,
> sign-extension will do sth for negative values still.  So rather
> 
>   if (rhs_uns)
>     return wi::geu_p (min, 0);
>   else
>     return wi::ges_p (min, 0) && wi::les_p (max, wi::max_value
> (TYPE_PRECISION (TREE_TYPE (ssa)), SIGNED));
> 
> ?

Thanks for the explanation. I agree. Don’t we have to however check this
on lhs_uns as this function is checking if ssa is promoted for lhs_sign
and lhs_mode?

Here is an attempt based on this. I ran regression testing with
arm-none-linux-gnueabi on qemu-arm without any new regressions.

Sine I am not comparing value ranges to see if it can be represented in
lhs_sigh, I can now skip the PROMOTED_MODE check.

I am still using wide_int::from (instead of wi::max_value) to get the
limit as I have to match the precision with min, max precision.
otherwise wide_int comparisons will not work. Is there a better way for
this?

/* Return TRUE if value in SSA is already zero/sign extended for lhs type
   (type here is the combination of LHS_MODE and LHS_UNS) using value range
   information stored.  Return FALSE otherwise.  */
bool
promoted_for_type_p (tree ssa, enum machine_mode lhs_mode, bool lhs_uns)
{
  wide_int min, max, limit;
  tree lhs_type;
  bool rhs_uns;
  signop rhs_signop;

  if (ssa == NULL_TREE
      || TREE_CODE (ssa) != SSA_NAME
      || !INTEGRAL_TYPE_P (TREE_TYPE (ssa)))
    return false;

  /* Return FALSE if value_range is not recorded for SSA.  */
  if (get_range_info (ssa, &min, &max) != VR_RANGE)
    return false;

  rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
  rhs_signop = rhs_uns ? UNSIGNED : SIGNED;
  lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
  limit = wide_int::from (TYPE_MAX_VALUE (lhs_type),
			  TYPE_PRECISION (TREE_TYPE (ssa)), SIGNED);

  if (lhs_uns)
    /* If min >= 0 then range contains positive values and doesnt need
       zero-extension.  */
    return wi::ge_p (min, 0, rhs_signop);
  else
    /* If min >= 0 and max <= signed-max-of-type then range doesn't need
       sign-extension.  */
    return wi::ge_p (min, 0, rhs_signop) && wi::le_p (max, limit,
rhs_signop);
}

Thanks,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-01 16:04                         ` Kugan
@ 2014-08-03 23:56                           ` Kugan
  2014-08-05 14:18                           ` Richard Biener
  1 sibling, 0 replies; 58+ messages in thread
From: Kugan @ 2014-08-03 23:56 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jakub Jelinek, gcc-patches


On 02/08/14 02:03, Kugan wrote:
>>>>  if (rhs_uns)
>>>>    return wi::ge_p (min, 0);  // if min >= 0 then range contains positive values
>>>>  else
>>>>    return wi::le_p (max, wi::max_value (TYPE_PRECISION (TREE_TYPE
>>>> (ssa)), SIGNED);  // if max <= signed-max-of-type then range doesn't
>>>> need sign-extension
>>>
>>> I think we will have to check that ssa has necessary sign/zero extension
>>> when assigned to lhs_type. If PROMOTE_MODE tells us that ssa's type will
>>> be interpreted differently, the value range of ssa also will have
>>> corresponding range.  In this cases, shouldn’t we have to check for
>>> upper and lower limit for both min and max?
>>
>> Hmm?  That's exactly what the check is testing...  we know that
>> min <= max thus if min >= 0 then max >= 0.
>>
>> zero_extension will never do anything on [0, INF]
>>
>> If max < MAX-SIGNED then sign-extension will not do anything.  Ok,
>> sign-extension will do sth for negative values still.  So rather
>>
>>   if (rhs_uns)
>>     return wi::geu_p (min, 0);
>>   else
>>     return wi::ges_p (min, 0) && wi::les_p (max, wi::max_value
>> (TYPE_PRECISION (TREE_TYPE (ssa)), SIGNED));
>>
>> ?

Looking at your comments again, I think we have to consider three things
here.

To be able assign to LHS (of lhs_uns and lhs_mode) without conversion of
RHS (tree SSA)

* If we ignore the mode changes (i.e. LHS_mode can be different in terms
of precision) and ignore PROMOTE_MODE and consider only the sign of LHS
and RHS
  if (lhs_uns)
   return wi::ge_p (min, 0, rhs_signop);  // if min >= 0 then range
contains positive values
 else
   if (rhs_uns)
     // if max <= signed-max-of-type then range doesn't need sign-extension
     return wi::le_p (max, wi::max_value (TYPE_PRECISION (TREE_TYPE
(ssa)), SIGNED);
   else
     return true;


* However, if we consider the PROMOTE_MODE might change the RHS sign
  if (lhs_uns)
    {
      return wi::ge_p (min, 0, rhs_signop);
    }
  else
    {
      signed_max = wide_int::from (TYPE_MAX_VALUE (lhs_type),
				   TYPE_PRECISION (TREE_TYPE (ssa)), rhs_signop);
      if (rhs_uns)
	/* If PROMOTE_MODE changed an RHS signed to unsigned and
	   SSA contains negative value range, we still have to do sign-extend.  */
	return wi::ge_p (min, 0, TYPE_SIGN (TREE_TYPE (ssa)))
	  && wi::le_p (max, signed_max, rhs_signop);
      else
	/* If PROMOTE_MODE changed an RHS unsigned to signed and SSA contains value
	   range more than signed-max-of-type, we still have to do sign-extend.  */
	return wi::le_p (max, signed_max, TYPE_SIGN (TREE_TYPE (ssa)));
    }

* If we also consider that LHS mode and RHS mode precision can be different
  if (lhs_uns)
    {
      unsigned_max = wide_int::from (TYPE_MAX_VALUE (lhs_type),
				     TYPE_PRECISION (TREE_TYPE (ssa)), rhs_signop);
      /* If min >= 0 then range contains positive values and doesnt need
	 zero-extension.  If max <= unsigned-max-of-type, then value fits type.  */
      return wi::ge_p (min, 0, rhs_signop)
	&& wi::le_p (max, unsigned_max, rhs_signop);
    }
  else
    {
      signed_max = wide_int::from (TYPE_MAX_VALUE (lhs_type),
				   TYPE_PRECISION (TREE_TYPE (ssa)), rhs_signop);
      signed_min = wide_int::from (TYPE_MIN_VALUE (lhs_type),
				   TYPE_PRECISION (TREE_TYPE (ssa)), rhs_signop);
      if (rhs_uns)
	/* If PROMOTE_MODE changed an RHS signed to unsigned and
	   SSA contains negative value range, we still have to do sign-extend.  */
	return wi::ge_p (min, 0, TYPE_SIGN (TREE_TYPE (ssa)))
	  && wi::le_p (max, signed_max, rhs_signop);
      else
	/* If PROMOTE_MODE changed an RHS unsigned to signed and SSA contains value
	   range more than signed-max-of-type, we still have to do sign-extend.  */
	return wi::le_p (max, signed_max, TYPE_SIGN (TREE_TYPE (ssa)))
	  && wi::ge_p (min, signed_min, rhs_signop);
    }
}


Since we can have PROMOTE_MODE changing the sign and LHS mode and RHS
mode precision can be different, the check should be the third one. Does
that make sense or am I still missing it?

Thanks again for your time,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-01 16:04                         ` Kugan
  2014-08-03 23:56                           ` Kugan
@ 2014-08-05 14:18                           ` Richard Biener
  2014-08-05 14:21                             ` Jakub Jelinek
  1 sibling, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-08-05 14:18 UTC (permalink / raw)
  To: Kugan; +Cc: Jakub Jelinek, gcc-patches

On Fri, Aug 1, 2014 at 6:03 PM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>>>>  if (rhs_uns)
>>>>    return wi::ge_p (min, 0);  // if min >= 0 then range contains positive values
>>>>  else
>>>>    return wi::le_p (max, wi::max_value (TYPE_PRECISION (TREE_TYPE
>>>> (ssa)), SIGNED);  // if max <= signed-max-of-type then range doesn't
>>>> need sign-extension
>>>
>>> I think we will have to check that ssa has necessary sign/zero extension
>>> when assigned to lhs_type. If PROMOTE_MODE tells us that ssa's type will
>>> be interpreted differently, the value range of ssa also will have
>>> corresponding range.  In this cases, shouldn’t we have to check for
>>> upper and lower limit for both min and max?
>>
>> Hmm?  That's exactly what the check is testing...  we know that
>> min <= max thus if min >= 0 then max >= 0.
>>
>> zero_extension will never do anything on [0, INF]
>>
>> If max < MAX-SIGNED then sign-extension will not do anything.  Ok,
>> sign-extension will do sth for negative values still.  So rather
>>
>>   if (rhs_uns)
>>     return wi::geu_p (min, 0);
>>   else
>>     return wi::ges_p (min, 0) && wi::les_p (max, wi::max_value
>> (TYPE_PRECISION (TREE_TYPE (ssa)), SIGNED));
>>
>> ?
>
> Thanks for the explanation. I agree. Don’t we have to however check this
> on lhs_uns as this function is checking if ssa is promoted for lhs_sign
> and lhs_mode?
>
> Here is an attempt based on this. I ran regression testing with
> arm-none-linux-gnueabi on qemu-arm without any new regressions.
>
> Sine I am not comparing value ranges to see if it can be represented in
> lhs_sigh, I can now skip the PROMOTED_MODE check.

Now I'm lost.  You call this function from two contexts:

diff --git a/gcc/calls.c b/gcc/calls.c
index a3e6faa..eac512f 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,7 +1484,10 @@ precompute_arguments (int num_actuals, struct
arg_data *args)
              args[i].initial_value
                = gen_lowpart_SUBREG (mode, args[i].value);
              SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-             SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
+             if (is_promoted_for_type (args[i].tree_value, mode,
!args[i].unsignedp))
+               SUBREG_PROMOTED_SET (args[i].initial_value,
SRP_SIGNED_AND_UNSIGNED);
+             else
+               SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);

and

@@ -9527,7 +9587,10 @@ expand_expr_real_1 (tree exp, rtx target, enum
machine_mode tmode,

          temp = gen_lowpart_SUBREG (mode, decl_rtl);
          SUBREG_PROMOTED_VAR_P (temp) = 1;
-         SUBREG_PROMOTED_SET (temp, unsignedp);
+         if (is_promoted_for_type (ssa_name, mode, !unsignedp))
+           SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED);
+         else
+           SUBREG_PROMOTED_SET (temp, unsignedp);
          return temp;
        }

what's the semantic of setting SRP_SIGNED_AND_UNSIGNED
on the subreg?  That is, for the created (subreg:lhs_mode
(reg:<PROMOTE_MODE of ssa> N))?

it seems that we need to verify that 'ssa', when promoted,
does not have bits set above the target modes MSB when
we know it is zero-extended (according to PROMOTE_MODE)?
Or has all bits set to one and is sign-extended (according to
PROMOTE_MODE)?

Now it seems that the promotion is according to
promote_{function,decl}_mode in expand_expr_real_1
and according to promote_mode in calls.c.

The function comment above promoted_for_type_p needs to be
more elaborate on what invariant it checks.  As you pass in
the subreg mode but you need to verify the larger mode is
properly extended.

> I am still using wide_int::from (instead of wi::max_value) to get the
> limit as I have to match the precision with min, max precision.
> otherwise wide_int comparisons will not work. Is there a better way for
> this?

I don't understand.  wi::max_value takes a precision argument.

>
> /* Return TRUE if value in SSA is already zero/sign extended for lhs type
>    (type here is the combination of LHS_MODE and LHS_UNS) using value range
>    information stored.  Return FALSE otherwise.  */
> bool
> promoted_for_type_p (tree ssa, enum machine_mode lhs_mode, bool lhs_uns)
> {
>   wide_int min, max, limit;
>   tree lhs_type;
>   bool rhs_uns;
>   signop rhs_signop;
>
>   if (ssa == NULL_TREE
>       || TREE_CODE (ssa) != SSA_NAME
>       || !INTEGRAL_TYPE_P (TREE_TYPE (ssa)))
>     return false;
>
>   /* Return FALSE if value_range is not recorded for SSA.  */
>   if (get_range_info (ssa, &min, &max) != VR_RANGE)
>     return false;
>
>   rhs_uns = TYPE_UNSIGNED (TREE_TYPE (ssa));
>   rhs_signop = rhs_uns ? UNSIGNED : SIGNED;
>   lhs_type = lang_hooks.types.type_for_mode (lhs_mode, lhs_uns);
>   limit = wide_int::from (TYPE_MAX_VALUE (lhs_type),
>                           TYPE_PRECISION (TREE_TYPE (ssa)), SIGNED);
>
>   if (lhs_uns)
>     /* If min >= 0 then range contains positive values and doesnt need
>        zero-extension.  */
>     return wi::ge_p (min, 0, rhs_signop);
>   else
>     /* If min >= 0 and max <= signed-max-of-type then range doesn't need
>        sign-extension.  */
>     return wi::ge_p (min, 0, rhs_signop) && wi::le_p (max, limit,
> rhs_signop);
> }
>
> Thanks,
> Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-05 14:18                           ` Richard Biener
@ 2014-08-05 14:21                             ` Jakub Jelinek
  2014-08-06 12:09                               ` Richard Biener
  0 siblings, 1 reply; 58+ messages in thread
From: Jakub Jelinek @ 2014-08-05 14:21 UTC (permalink / raw)
  To: Richard Biener; +Cc: Kugan, gcc-patches

On Tue, Aug 05, 2014 at 04:17:41PM +0200, Richard Biener wrote:
> what's the semantic of setting SRP_SIGNED_AND_UNSIGNED
> on the subreg?  That is, for the created (subreg:lhs_mode
> (reg:<PROMOTE_MODE of ssa> N))?

SRP_SIGNED_AND_UNSIGNED on a subreg should mean that
the subreg is both zero and sign extended, which means
that the topmost bit of the narrower mode is known to be zero,
and all bits above it in the wider mode are known to be zero too.
SRP_SIGNED means that the topmost bit of the narrower mode is
either 0 or 1 and depending on that the above wider mode bits
are either all 0 or all 1.
SRP_UNSIGNED means that regardless of the topmost bit value,
all above wider mode bits are 0.

	Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-05 14:21                             ` Jakub Jelinek
@ 2014-08-06 12:09                               ` Richard Biener
  2014-08-06 13:22                                 ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-08-06 12:09 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Kugan, gcc-patches

On Tue, Aug 5, 2014 at 4:21 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Aug 05, 2014 at 04:17:41PM +0200, Richard Biener wrote:
>> what's the semantic of setting SRP_SIGNED_AND_UNSIGNED
>> on the subreg?  That is, for the created (subreg:lhs_mode
>> (reg:<PROMOTE_MODE of ssa> N))?
>
> SRP_SIGNED_AND_UNSIGNED on a subreg should mean that
> the subreg is both zero and sign extended, which means
> that the topmost bit of the narrower mode is known to be zero,
> and all bits above it in the wider mode are known to be zero too.
> SRP_SIGNED means that the topmost bit of the narrower mode is
> either 0 or 1 and depending on that the above wider mode bits
> are either all 0 or all 1.
> SRP_UNSIGNED means that regardless of the topmost bit value,
> all above wider mode bits are 0.

Ok, then from the context of the patch we already know that
either SRP_UNSIGNED or SRP_SIGNED is true which means
that the value is sign- or zero-extended.

I suppose inside promoted_for_type_p
TYPE_MODE (TREE_TYPE (ssa)) == lhs_mode, I'm not sure
why you pass !unsignedp as lhs_uns.

Now, from 'ssa' alone we can't tell anything about a larger mode
registers value if that is either zero- or sign-extended.  But we
know that those bits are properly zero-extended if unsignedp
and properly sign-extended if !unsignedp?

So what the predicate tries to prove is that sign- and zero-extending
results in the same larger-mode value.  This is true if the
MSB of the smaller mode is not set.

Let's assume that smaller mode is that of 'ssa' then the test
is just

  return (!tree_int_cst_sign_bit (min) && !tree_int_cst_sign_bit (max));

no?

Thanks,
Richard.

>         Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-06 12:09                               ` Richard Biener
@ 2014-08-06 13:22                                 ` Kugan
  2014-08-06 13:29                                   ` Richard Biener
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-08-06 13:22 UTC (permalink / raw)
  To: Richard Biener, Jakub Jelinek; +Cc: gcc-patches

On 06/08/14 22:09, Richard Biener wrote:
> On Tue, Aug 5, 2014 at 4:21 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>> On Tue, Aug 05, 2014 at 04:17:41PM +0200, Richard Biener wrote:
>>> what's the semantic of setting SRP_SIGNED_AND_UNSIGNED
>>> on the subreg?  That is, for the created (subreg:lhs_mode
>>> (reg:<PROMOTE_MODE of ssa> N))?
>>
>> SRP_SIGNED_AND_UNSIGNED on a subreg should mean that
>> the subreg is both zero and sign extended, which means
>> that the topmost bit of the narrower mode is known to be zero,
>> and all bits above it in the wider mode are known to be zero too.
>> SRP_SIGNED means that the topmost bit of the narrower mode is
>> either 0 or 1 and depending on that the above wider mode bits
>> are either all 0 or all 1.
>> SRP_UNSIGNED means that regardless of the topmost bit value,
>> all above wider mode bits are 0.
> 
> Ok, then from the context of the patch we already know that
> either SRP_UNSIGNED or SRP_SIGNED is true which means
> that the value is sign- or zero-extended.
> 
> I suppose inside promoted_for_type_p
> TYPE_MODE (TREE_TYPE (ssa)) == lhs_mode, I'm not sure
> why you pass !unsignedp as lhs_uns.

In expand_expr_real_1, it is already known that it is promoted for
unsigned_p and we are setting SUBREG_PROMOTED_SET (temp, unsignedp).

If we can prove that it is also promoted for !unsignedp, we can set
SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED).

promoted_for_type_p should prove this based on the value range info.

> 
> Now, from 'ssa' alone we can't tell anything about a larger mode
> registers value if that is either zero- or sign-extended.  But we
> know that those bits are properly zero-extended if unsignedp
> and properly sign-extended if !unsignedp?
> 
> So what the predicate tries to prove is that sign- and zero-extending
> results in the same larger-mode value.  This is true if the
> MSB of the smaller mode is not set.
> 
> Let's assume that smaller mode is that of 'ssa' then the test
> is just
> 
>   return (!tree_int_cst_sign_bit (min) && !tree_int_cst_sign_bit (max));
> 
> no?

hmm,  is this because we will never have a call to promoted_for_type_p
with same sign (ignoring PROMOTE_MODE) for 'ssa' and the larger mode.
The case with larger mode signed and 'ssa' unsigned will not work.
Therefore larger mode unsigned and 'ssa' signed will be the only case
that we should consider.

However, with PROMOTE_MODE, isnt that we will miss some cases with this.

Thanks,
Kugan


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-06 13:22                                 ` Kugan
@ 2014-08-06 13:29                                   ` Richard Biener
  2014-08-07  5:25                                     ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-08-06 13:29 UTC (permalink / raw)
  To: Kugan; +Cc: Jakub Jelinek, gcc-patches

On Wed, Aug 6, 2014 at 3:21 PM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
> On 06/08/14 22:09, Richard Biener wrote:
>> On Tue, Aug 5, 2014 at 4:21 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>>> On Tue, Aug 05, 2014 at 04:17:41PM +0200, Richard Biener wrote:
>>>> what's the semantic of setting SRP_SIGNED_AND_UNSIGNED
>>>> on the subreg?  That is, for the created (subreg:lhs_mode
>>>> (reg:<PROMOTE_MODE of ssa> N))?
>>>
>>> SRP_SIGNED_AND_UNSIGNED on a subreg should mean that
>>> the subreg is both zero and sign extended, which means
>>> that the topmost bit of the narrower mode is known to be zero,
>>> and all bits above it in the wider mode are known to be zero too.
>>> SRP_SIGNED means that the topmost bit of the narrower mode is
>>> either 0 or 1 and depending on that the above wider mode bits
>>> are either all 0 or all 1.
>>> SRP_UNSIGNED means that regardless of the topmost bit value,
>>> all above wider mode bits are 0.
>>
>> Ok, then from the context of the patch we already know that
>> either SRP_UNSIGNED or SRP_SIGNED is true which means
>> that the value is sign- or zero-extended.
>>
>> I suppose inside promoted_for_type_p
>> TYPE_MODE (TREE_TYPE (ssa)) == lhs_mode, I'm not sure
>> why you pass !unsignedp as lhs_uns.
>
> In expand_expr_real_1, it is already known that it is promoted for
> unsigned_p and we are setting SUBREG_PROMOTED_SET (temp, unsignedp).
>
> If we can prove that it is also promoted for !unsignedp, we can set
> SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED).
>
> promoted_for_type_p should prove this based on the value range info.
>
>>
>> Now, from 'ssa' alone we can't tell anything about a larger mode
>> registers value if that is either zero- or sign-extended.  But we
>> know that those bits are properly zero-extended if unsignedp
>> and properly sign-extended if !unsignedp?
>>
>> So what the predicate tries to prove is that sign- and zero-extending
>> results in the same larger-mode value.  This is true if the
>> MSB of the smaller mode is not set.
>>
>> Let's assume that smaller mode is that of 'ssa' then the test
>> is just
>>
>>   return (!tree_int_cst_sign_bit (min) && !tree_int_cst_sign_bit (max));
>>
>> no?
>
> hmm,  is this because we will never have a call to promoted_for_type_p
> with same sign (ignoring PROMOTE_MODE) for 'ssa' and the larger mode.
> The case with larger mode signed and 'ssa' unsigned will not work.
> Therefore larger mode unsigned and 'ssa' signed will be the only case
> that we should consider.
>
> However, with PROMOTE_MODE, isnt that we will miss some cases with this.

No, PROMOTE_MODE will still either sign- or zero-extend.  If either
results in zeros in the upper bits then PROMOTE_MODE doesn't matter.

Richard.

> Thanks,
> Kugan
>
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-06 13:29                                   ` Richard Biener
@ 2014-08-07  5:25                                     ` Kugan
  2014-08-07  8:09                                       ` Richard Biener
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-08-07  5:25 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jakub Jelinek, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 3763 bytes --]

On 06/08/14 23:29, Richard Biener wrote:
> On Wed, Aug 6, 2014 at 3:21 PM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>> On 06/08/14 22:09, Richard Biener wrote:
>>> On Tue, Aug 5, 2014 at 4:21 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>>>> On Tue, Aug 05, 2014 at 04:17:41PM +0200, Richard Biener wrote:
>>>>> what's the semantic of setting SRP_SIGNED_AND_UNSIGNED
>>>>> on the subreg?  That is, for the created (subreg:lhs_mode
>>>>> (reg:<PROMOTE_MODE of ssa> N))?
>>>>
>>>> SRP_SIGNED_AND_UNSIGNED on a subreg should mean that
>>>> the subreg is both zero and sign extended, which means
>>>> that the topmost bit of the narrower mode is known to be zero,
>>>> and all bits above it in the wider mode are known to be zero too.
>>>> SRP_SIGNED means that the topmost bit of the narrower mode is
>>>> either 0 or 1 and depending on that the above wider mode bits
>>>> are either all 0 or all 1.
>>>> SRP_UNSIGNED means that regardless of the topmost bit value,
>>>> all above wider mode bits are 0.
>>>
>>> Ok, then from the context of the patch we already know that
>>> either SRP_UNSIGNED or SRP_SIGNED is true which means
>>> that the value is sign- or zero-extended.
>>>
>>> I suppose inside promoted_for_type_p
>>> TYPE_MODE (TREE_TYPE (ssa)) == lhs_mode, I'm not sure
>>> why you pass !unsignedp as lhs_uns.
>>
>> In expand_expr_real_1, it is already known that it is promoted for
>> unsigned_p and we are setting SUBREG_PROMOTED_SET (temp, unsignedp).
>>
>> If we can prove that it is also promoted for !unsignedp, we can set
>> SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED).
>>
>> promoted_for_type_p should prove this based on the value range info.
>>
>>>
>>> Now, from 'ssa' alone we can't tell anything about a larger mode
>>> registers value if that is either zero- or sign-extended.  But we
>>> know that those bits are properly zero-extended if unsignedp
>>> and properly sign-extended if !unsignedp?
>>>
>>> So what the predicate tries to prove is that sign- and zero-extending
>>> results in the same larger-mode value.  This is true if the
>>> MSB of the smaller mode is not set.
>>>
>>> Let's assume that smaller mode is that of 'ssa' then the test
>>> is just
>>>
>>>   return (!tree_int_cst_sign_bit (min) && !tree_int_cst_sign_bit (max));
>>>
>>> no?
>>
>> hmm,  is this because we will never have a call to promoted_for_type_p
>> with same sign (ignoring PROMOTE_MODE) for 'ssa' and the larger mode.
>> The case with larger mode signed and 'ssa' unsigned will not work.
>> Therefore larger mode unsigned and 'ssa' signed will be the only case
>> that we should consider.
>>
>> However, with PROMOTE_MODE, isnt that we will miss some cases with this.
> 
> No, PROMOTE_MODE will still either sign- or zero-extend.  If either
> results in zeros in the upper bits then PROMOTE_MODE doesn't matter.
> 

Thanks for the explanation. Please find the attached patch that
implements this. I have updated the comments and predicate to match this.

Bootstrap tested on x86_64-unknown-linux-gnu and regression tested on
x86_64-unknown-linux-gnu and arm-none-linux-gnueabi with no new
regressions. Is this OK?

Thanks,
Kugan

gcc/
2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* calls.c (precompute_arguments): Check
	 promoted_for_signed_and_unsigned_p and set the promoted mode.
	(promoted_for_signed_and_unsigned_p): New function.
	(expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
	and set the promoted mode.
	* expr.h (promoted_for_signed_and_unsigned_p): New function definition.
	* cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
	SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.


gcc/testsuite
2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* gcc.dg/zero_sign_ext_test.c: New test.



[-- Attachment #2: p2.txt --]
[-- Type: text/plain, Size: 8498 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index 00c5028..4285ec1 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1484,7 +1484,10 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
+	      if (promoted_for_signed_and_unsigned_p (args[i].tree_value, mode))
+		SUBREG_PROMOTED_SET (args[i].initial_value, SRP_SIGNED_AND_UNSIGNED);
+	      else
+		SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index f98c322..b14626c 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
 					  GET_MODE (target), temp, unsignedp);
 		  }
 
-		convert_move (SUBREG_REG (target), temp, unsignedp);
+		if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
+		    && (GET_CODE (temp) == SUBREG)
+		    && (GET_MODE (target) == GET_MODE (temp))
+		    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
+		  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
+		else
+		  convert_move (SUBREG_REG (target), temp, unsignedp);
 	      }
 	    else if (nontemporal && emit_storent_insn (target, temp))
 	      ;
diff --git a/gcc/expr.c b/gcc/expr.c
index 1242031..c217b9a 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-address.h"
 #include "cfgexpand.h"
 #include "builtins.h"
+#include "tree-ssa.h"
 
 #ifndef STACK_PUSH_CODE
 #ifdef STACK_GROWS_DOWNWARD
@@ -9224,6 +9225,35 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 }
 #undef REDUCE_BIT_FIELD
 
+/* Return TRUE if value in SSA is zero and sign extended for wider mode MODE
+   using value range information stored.  Return FALSE otherwise.
+
+   This is used to check if SUBREG is zero and sign extended and to set
+   promoted mode SRP_SIGNED_AND_UNSIGNED to SUBREG.  */
+
+bool
+promoted_for_signed_and_unsigned_p (tree ssa, enum machine_mode mode)
+{
+  wide_int min, max;
+
+  if (ssa == NULL_TREE
+      || TREE_CODE (ssa) != SSA_NAME
+      || !INTEGRAL_TYPE_P (TREE_TYPE (ssa))
+      || (TYPE_PRECISION (TREE_TYPE (ssa)) > GET_MODE_PRECISION (mode)))
+    return false;
+
+  /* Return FALSE if value_range is not recorded for SSA.  */
+  if (get_range_info (ssa, &min, &max) != VR_RANGE)
+    return false;
+
+  /* Return true (to set SRP_SIGNED_AND_UNSIGNED to SUBREG) if MSB of the smaller
+     mode is not set (i.e. MSB of ssa is not set).  */
+  if (!wi::neg_p (min, SIGNED) && !wi::neg_p(max, SIGNED))
+    return true;
+  else
+    return false;
+
+}
 
 /* Return TRUE if expression STMT is suitable for replacement.  
    Never consider memory loads as replaceable, because those don't ever lead 
@@ -9527,7 +9557,10 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_SET (temp, unsignedp);
+	  if (promoted_for_signed_and_unsigned_p (ssa_name, mode))
+	    SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED);
+	  else
+	    SUBREG_PROMOTED_SET (temp, unsignedp);
 	  return temp;
 	}
 
diff --git a/gcc/expr.h b/gcc/expr.h
index 6a1d3ab..a429509 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -440,6 +440,7 @@ extern rtx expand_expr_real_1 (tree, rtx, enum machine_mode,
 			       enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_2 (sepops, rtx, enum machine_mode,
 			       enum expand_modifier);
+extern bool promoted_for_signed_and_unsigned_p (tree, enum machine_mode);
 
 /* Generate code for computing expression EXP.
    An rtx for the computed value is returned.  The value is never null.
diff --git a/gcc/testsuite/gcc.dg/zero_sign_ext_test.c b/gcc/testsuite/gcc.dg/zero_sign_ext_test.c
index e69de29..6a52678 100644
--- a/gcc/testsuite/gcc.dg/zero_sign_ext_test.c
+++ b/gcc/testsuite/gcc.dg/zero_sign_ext_test.c
@@ -0,0 +1,136 @@
+extern void abort (void);
+
+/* { dg-options "-O2" } */
+/* { dg-do run } */
+
+#define TYPE_MAX(type, sign)	\
+  ((!sign) ? ((1 << (sizeof (type) * 8 - 1)) - 1) :	\
+   ((1 << (sizeof (type) * 8)) - 1))
+#define TYPE_MIN(type, sign)	\
+  ((!sign) ? -(1 << (sizeof (type) * 8 - 1)) : 0)
+
+#define TEST_FN(NAME, ARG_TYPE, RET_TYPE, CAST_TYPE, VAL, VR_MIN, VR_MAX)\
+  __attribute__((noinline, noclone)) RET_TYPE				\
+      NAME (ARG_TYPE arg){						\
+      RET_TYPE ret = VAL;						\
+      if (arg + 1 < VR_MIN || arg + 1 > VR_MAX) return ret;		\
+      /* Value Range of arg at this point will be  [VR_min, VR_max].  */\
+      arg = arg + VAL;							\
+      ret = (CAST_TYPE)arg;						\
+      return arg;							\
+  }
+
+/* Signed to signed conversion with value in-range.  */
+TEST_FN (foo1, short, short, char, 1, TYPE_MIN (char, 0), TYPE_MAX (char, 0));
+TEST_FN (foo2, short, short, char, 1, TYPE_MIN (char, 0) + 1,\
+	TYPE_MAX (char, 0) - 1);
+
+/* Signed to signed conversion with value not in-range.  */
+TEST_FN (foo3, short, short, char, -1, TYPE_MIN (short, 0) + 1,  100);
+TEST_FN (foo4, short, short, char, 1, 12, TYPE_MAX (short, 0) + 1);
+
+/* Unsigned to unsigned conversion with value in-range.  */
+TEST_FN (foo5, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (char, 1) + 1, TYPE_MAX (char, 1) - 1);
+TEST_FN (foo6, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (char, 1), TYPE_MAX (char, 1));
+
+/* Unsigned to unsigned conversion with value not in-range.  */
+TEST_FN (foo7, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (short, 1) + 1, TYPE_MAX (short, 1) - 1);
+TEST_FN (foo8, unsigned short, unsigned short, unsigned char, 1,\
+	TYPE_MIN (short, 1), TYPE_MAX (short, 1));
+
+/* Signed to unsigned conversion with value range positive.  */
+TEST_FN (foo9, short, short, unsigned char, -1, 1,\
+	TYPE_MAX (char, 1) - 1);
+TEST_FN (foo10, short, short, unsigned char, 1, 0,\
+	TYPE_MAX (char, 1));
+
+/* Signed to unsigned conversion with value range negative.  */
+TEST_FN (foo11, short, short, unsigned char, 1,\
+	TYPE_MIN (char, 0) + 1, TYPE_MAX (char, 0) - 1);
+TEST_FN (foo12, short, short, unsigned char, 1,\
+	TYPE_MIN (char, 0), TYPE_MAX (char, 0));
+
+/* Unsigned to Signed conversion with value range in signed equiv range.  */
+TEST_FN (foo13, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1) + 1, TYPE_MAX (char, 0) - 1);
+TEST_FN (foo14, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1), TYPE_MAX (char, 0));
+
+/* Unsigned to Signed conversion with value range not-in signed range.  */
+TEST_FN (foo15, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1) + 1, TYPE_MAX (char, 1) - 1);
+TEST_FN (foo16, unsigned short, unsigned short, char, 1,\
+	TYPE_MIN (char, 1), TYPE_MAX (char, 1));
+
+int main ()
+{
+  /* Signed to signed conversion with value in-range.  */
+  /* arg + 1.  */
+  if (foo1 (-32) != -31)
+    abort ();
+  /* arg + 1.  */
+  if (foo2 (32) != 33)
+    abort ();
+
+  /* Signed to signed conversion with value not in-range.  */
+  /* arg - 1.  */
+  if (foo3 (-512) != -513)
+    abort ();
+  /* arg + 1.  */
+  if (foo4 (512) != 513)
+    abort ();
+
+  /* Unsigned to unsigned conversion with value in-range.  */
+  /* arg + 1.  */
+  if (foo5 (64) != 65)
+    abort ();
+  /* arg + 1.  */
+  if (foo6 (64) != 65)
+    abort ();
+
+  /* Unsigned to unsigned conversion with value not in-range.  */
+  /* arg + 1.  */
+  if (foo7 (512) != 513)
+    abort ();
+  /* arg + 1.  */
+  if (foo8 (512) != 513)
+    abort ();
+
+  /* Signed to unsigned conversion with value range positive.  */
+  /* arg - 1.  */
+  if (foo9 (2) != 1)
+    abort ();
+  /* arg + 1.  */
+  if (foo10 (2) != 3)
+    abort ();
+
+  /* Signed to unsigned conversion with value range negative.  */
+  /* arg + 1.  */
+  if (foo11 (-125) != -124)
+    abort ();
+  /* arg + 1.  */
+  if (foo12 (-125) != -124)
+    abort ();
+
+  /* Unsigned to Signed conversion with value range in signed equiv range.  */
+  /* arg + 1.  */
+  if (foo13 (125) != 126)
+    abort ();
+  /* arg + 1.  */
+  if (foo14 (125) != 126)
+    abort ();
+
+  /* Unsigned to Signed conversion with value range not-in signed range.  */
+  /* arg + 1.  */
+  if (foo15 (250) != 251)
+    abort ();
+  /* arg + 1.  */
+  if (foo16 (250) != 251)
+    abort ();
+
+  return 0;
+}
+

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-07  5:25                                     ` Kugan
@ 2014-08-07  8:09                                       ` Richard Biener
  0 siblings, 0 replies; 58+ messages in thread
From: Richard Biener @ 2014-08-07  8:09 UTC (permalink / raw)
  To: Kugan; +Cc: Jakub Jelinek, gcc-patches

On Thu, Aug 7, 2014 at 7:24 AM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
> On 06/08/14 23:29, Richard Biener wrote:
>> On Wed, Aug 6, 2014 at 3:21 PM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>>> On 06/08/14 22:09, Richard Biener wrote:
>>>> On Tue, Aug 5, 2014 at 4:21 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>>>>> On Tue, Aug 05, 2014 at 04:17:41PM +0200, Richard Biener wrote:
>>>>>> what's the semantic of setting SRP_SIGNED_AND_UNSIGNED
>>>>>> on the subreg?  That is, for the created (subreg:lhs_mode
>>>>>> (reg:<PROMOTE_MODE of ssa> N))?
>>>>>
>>>>> SRP_SIGNED_AND_UNSIGNED on a subreg should mean that
>>>>> the subreg is both zero and sign extended, which means
>>>>> that the topmost bit of the narrower mode is known to be zero,
>>>>> and all bits above it in the wider mode are known to be zero too.
>>>>> SRP_SIGNED means that the topmost bit of the narrower mode is
>>>>> either 0 or 1 and depending on that the above wider mode bits
>>>>> are either all 0 or all 1.
>>>>> SRP_UNSIGNED means that regardless of the topmost bit value,
>>>>> all above wider mode bits are 0.
>>>>
>>>> Ok, then from the context of the patch we already know that
>>>> either SRP_UNSIGNED or SRP_SIGNED is true which means
>>>> that the value is sign- or zero-extended.
>>>>
>>>> I suppose inside promoted_for_type_p
>>>> TYPE_MODE (TREE_TYPE (ssa)) == lhs_mode, I'm not sure
>>>> why you pass !unsignedp as lhs_uns.
>>>
>>> In expand_expr_real_1, it is already known that it is promoted for
>>> unsigned_p and we are setting SUBREG_PROMOTED_SET (temp, unsignedp).
>>>
>>> If we can prove that it is also promoted for !unsignedp, we can set
>>> SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED).
>>>
>>> promoted_for_type_p should prove this based on the value range info.
>>>
>>>>
>>>> Now, from 'ssa' alone we can't tell anything about a larger mode
>>>> registers value if that is either zero- or sign-extended.  But we
>>>> know that those bits are properly zero-extended if unsignedp
>>>> and properly sign-extended if !unsignedp?
>>>>
>>>> So what the predicate tries to prove is that sign- and zero-extending
>>>> results in the same larger-mode value.  This is true if the
>>>> MSB of the smaller mode is not set.
>>>>
>>>> Let's assume that smaller mode is that of 'ssa' then the test
>>>> is just
>>>>
>>>>   return (!tree_int_cst_sign_bit (min) && !tree_int_cst_sign_bit (max));
>>>>
>>>> no?
>>>
>>> hmm,  is this because we will never have a call to promoted_for_type_p
>>> with same sign (ignoring PROMOTE_MODE) for 'ssa' and the larger mode.
>>> The case with larger mode signed and 'ssa' unsigned will not work.
>>> Therefore larger mode unsigned and 'ssa' signed will be the only case
>>> that we should consider.
>>>
>>> However, with PROMOTE_MODE, isnt that we will miss some cases with this.
>>
>> No, PROMOTE_MODE will still either sign- or zero-extend.  If either
>> results in zeros in the upper bits then PROMOTE_MODE doesn't matter.
>>
>
> Thanks for the explanation. Please find the attached patch that
> implements this. I have updated the comments and predicate to match this.
>
> Bootstrap tested on x86_64-unknown-linux-gnu and regression tested on
> x86_64-unknown-linux-gnu and arm-none-linux-gnueabi with no new
> regressions. Is this OK?

Ok with changing

+      || (TYPE_PRECISION (TREE_TYPE (ssa)) > GET_MODE_PRECISION (mode)))
+    return false;

to check with != (does that even happen?)

Thanks,
Richard.

> Thanks,
> Kugan
>
> gcc/
> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>         * calls.c (precompute_arguments): Check
>          promoted_for_signed_and_unsigned_p and set the promoted mode.
>         (promoted_for_signed_and_unsigned_p): New function.
>         (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>         and set the promoted mode.
>         * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>         * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>         SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>
>
> gcc/testsuite
> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>         * gcc.dg/zero_sign_ext_test.c: New test.
>
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-09-09 10:06                   ` Kugan
@ 2014-09-09 10:28                     ` Richard Biener
  0 siblings, 0 replies; 58+ messages in thread
From: Richard Biener @ 2014-09-09 10:28 UTC (permalink / raw)
  To: Kugan; +Cc: Uros Bizjak, gcc-patches, Jakub Jelinek

On Tue, Sep 9, 2014 at 12:06 PM, Kugan
<kugan.vivekanandarajah@linaro.org> wrote:
>
>
> On 08/09/14 19:48, Richard Biener wrote:
>> On Sun, Sep 7, 2014 at 11:50 AM, Kugan
>> <kugan.vivekanandarajah@linaro.org> wrote:
>>> On 05/09/14 19:50, Richard Biener wrote:
>>>
>>>> Well - the best way would be to expose the target specifics to GIMPLE
>>>> at some point in the optimization pipeline.  My guess would be that it's
>>>> appropriate after loop optimizations (but maybe before induction variable
>>>> optimization).
>>>>
>>>> That is, have a pass that applies register promotion to all SSA names
>>>> in the function, inserting appropriate truncations and extensions.  That
>>>> way you'd never see (set (subreg...) on RTL.  The VRP and DOM
>>>> passes running after that pass would then be able to aggressively
>>>> optimize redundant truncations and extensions.
>>>>
>>>> Effects on debug information are to be considered.  You can change
>>>> the type of SSA names in-place but you don't want to do that for
>>>> user DECLs (and we can't have the SSA name type and its DECL
>>>> type differ - and not sure if we might want to lift that restriction).
>>>
>>> Thanks. I will try to implement this.
>>>
>>> I still would like to keep the VRP based approach as there are some
>>> cases that I think can only be done with range info. For example:
>>>
>>> short foo(unsigned char c)
>>> {
>>>   c = c & (unsigned char)0x0F;
>>>   if( c > 7 )
>>>     return((short)(c - 5));
>>>   else
>>>     return(( short )c);
>>> }
>>>
>>>
>>> So, how about adding and setting the overflow/wrap around flag to
>>> range_info. We now set static_flag for VR_RANG/VR_ANTI_RANGE. If we go
>>> back to the max + 1, min - 1 for VR_ANTI_RANGE, we can use this
>>> static_flag to encode overflow/wrap around. Will that be something
>>> acceptable?
>>
>> You mean tracking in the VRP lattice whether a value wrapped around
>> (or was assumed not to due to undefined behavior)?  I'm not sure this
>> is easy to do correctly (VRP is large).
>>
>> Note that I don't think we'll lose the testcase you quoted if the promotion
>> pass runs before VRP2.   We'd have as input to VRP2 sth like (assuming
>> promote mode would promote to SImode)
>>
>>   SImode tem_2 = (unsigned int)c_1(D);
>>   tem_3 = tem_3 & 0xF;
>>   if (tem_3 > 7)
>>     {
>>       tem_4 = tem_3 - 5;
>>       short _5 = (short)_4;
>>       tem_5 = (unsigned int)_5;
>>      return tem_5;
>>    }
>> else
>>    {
>>      short _6 = (short)_3;
>>      return _6;
>>    }
>>
>> VRP should be able to remove the (unsigned int)(short) sign-extension
>> of tem_4.
>>
>> note that both incoming registers and return registers are "interesting".
>> For simplicity I suggest to not promote them on GIMPLE.
>>
>> What you'd lose in VRP2 is the smaller value-ranges you'd get from
>> (undefined) wrapping.  You could recover the undefinedness by
>> looking at SSA names recorded value-range and transfering that
>> in the promotion pass (but I'm not sure if you want to open the
>> can of latent signed overflow bugs in programs even more for
>> PROMOTE_MODE targets...)
>>
>
> Thanks. In the meantime I would like to revert the patch which is
> enabling zero/sign extension. I have bootstrapped it in x86_64 and
> regression testing is ongoing. Is this OK ?

Ok.

Thanks,
Richard.

> Thanks,
> Kugan
>
> gcc/ChangeLog:
>
> 2014-09-09  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>         Revert r213751:
>         * calls.c (precompute_arguments): Check
>          promoted_for_signed_and_unsigned_p and set the promoted mode.
>         (promoted_for_signed_and_unsigned_p): New function.
>         (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>         and set the promoted mode.
>         * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>         * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>         SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-09-08  9:48                 ` Richard Biener
@ 2014-09-09 10:06                   ` Kugan
  2014-09-09 10:28                     ` Richard Biener
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-09-09 10:06 UTC (permalink / raw)
  To: Richard Biener; +Cc: Uros Bizjak, gcc-patches, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 3667 bytes --]



On 08/09/14 19:48, Richard Biener wrote:
> On Sun, Sep 7, 2014 at 11:50 AM, Kugan
> <kugan.vivekanandarajah@linaro.org> wrote:
>> On 05/09/14 19:50, Richard Biener wrote:
>>
>>> Well - the best way would be to expose the target specifics to GIMPLE
>>> at some point in the optimization pipeline.  My guess would be that it's
>>> appropriate after loop optimizations (but maybe before induction variable
>>> optimization).
>>>
>>> That is, have a pass that applies register promotion to all SSA names
>>> in the function, inserting appropriate truncations and extensions.  That
>>> way you'd never see (set (subreg...) on RTL.  The VRP and DOM
>>> passes running after that pass would then be able to aggressively
>>> optimize redundant truncations and extensions.
>>>
>>> Effects on debug information are to be considered.  You can change
>>> the type of SSA names in-place but you don't want to do that for
>>> user DECLs (and we can't have the SSA name type and its DECL
>>> type differ - and not sure if we might want to lift that restriction).
>>
>> Thanks. I will try to implement this.
>>
>> I still would like to keep the VRP based approach as there are some
>> cases that I think can only be done with range info. For example:
>>
>> short foo(unsigned char c)
>> {
>>   c = c & (unsigned char)0x0F;
>>   if( c > 7 )
>>     return((short)(c - 5));
>>   else
>>     return(( short )c);
>> }
>>
>>
>> So, how about adding and setting the overflow/wrap around flag to
>> range_info. We now set static_flag for VR_RANG/VR_ANTI_RANGE. If we go
>> back to the max + 1, min - 1 for VR_ANTI_RANGE, we can use this
>> static_flag to encode overflow/wrap around. Will that be something
>> acceptable?
> 
> You mean tracking in the VRP lattice whether a value wrapped around
> (or was assumed not to due to undefined behavior)?  I'm not sure this
> is easy to do correctly (VRP is large).
> 
> Note that I don't think we'll lose the testcase you quoted if the promotion
> pass runs before VRP2.   We'd have as input to VRP2 sth like (assuming
> promote mode would promote to SImode)
> 
>   SImode tem_2 = (unsigned int)c_1(D);
>   tem_3 = tem_3 & 0xF;
>   if (tem_3 > 7)
>     {
>       tem_4 = tem_3 - 5;
>       short _5 = (short)_4;
>       tem_5 = (unsigned int)_5;
>      return tem_5;
>    }
> else
>    {
>      short _6 = (short)_3;
>      return _6;
>    }
> 
> VRP should be able to remove the (unsigned int)(short) sign-extension
> of tem_4.
> 
> note that both incoming registers and return registers are "interesting".
> For simplicity I suggest to not promote them on GIMPLE.
> 
> What you'd lose in VRP2 is the smaller value-ranges you'd get from
> (undefined) wrapping.  You could recover the undefinedness by
> looking at SSA names recorded value-range and transfering that
> in the promotion pass (but I'm not sure if you want to open the
> can of latent signed overflow bugs in programs even more for
> PROMOTE_MODE targets...)
> 

Thanks. In the meantime I would like to revert the patch which is
enabling zero/sign extension. I have bootstrapped it in x86_64 and
regression testing is ongoing. Is this OK ?

Thanks,
Kugan

gcc/ChangeLog:

2014-09-09  Kugan Vivekanandarajah  <kuganv@linaro.org>

	Revert r213751:
	* calls.c (precompute_arguments): Check
	 promoted_for_signed_and_unsigned_p and set the promoted mode.
	(promoted_for_signed_and_unsigned_p): New function.
	(expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
	and set the promoted mode.
	* expr.h (promoted_for_signed_and_unsigned_p): New function definition.
	* cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
	SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.

[-- Attachment #2: p.txt --]
[-- Type: text/plain, Size: 3886 bytes --]

diff --git a/gcc/calls.c b/gcc/calls.c
index 03ed9c8..345331f 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1486,10 +1486,7 @@ precompute_arguments (int num_actuals, struct arg_data *args)
 	      args[i].initial_value
 		= gen_lowpart_SUBREG (mode, args[i].value);
 	      SUBREG_PROMOTED_VAR_P (args[i].initial_value) = 1;
-	      if (promoted_for_signed_and_unsigned_p (args[i].tree_value, mode))
-		SUBREG_PROMOTED_SET (args[i].initial_value, SRP_SIGNED_AND_UNSIGNED);
-	      else
-		SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
+	      SUBREG_PROMOTED_SET (args[i].initial_value, args[i].unsignedp);
 	    }
 	}
     }
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index db76897..8916305 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3325,13 +3325,7 @@ expand_gimple_stmt_1 (gimple stmt)
 					  GET_MODE (target), temp, unsignedp);
 		  }
 
-		if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
-		    && (GET_CODE (temp) == SUBREG)
-		    && (GET_MODE (target) == GET_MODE (temp))
-		    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
-		  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
-		else
-		  convert_move (SUBREG_REG (target), temp, unsignedp);
+		convert_move (SUBREG_REG (target), temp, unsignedp);
 	      }
 	    else if (nontemporal && emit_storent_insn (target, temp))
 	      ;
diff --git a/gcc/expr.c b/gcc/expr.c
index 7e69955..5039734 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -68,7 +68,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-address.h"
 #include "cfgexpand.h"
 #include "builtins.h"
-#include "tree-ssa.h"
 
 #ifndef STACK_PUSH_CODE
 #ifdef STACK_GROWS_DOWNWARD
@@ -9250,35 +9249,6 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 }
 #undef REDUCE_BIT_FIELD
 
-/* Return TRUE if value in SSA is zero and sign extended for wider mode MODE
-   using value range information stored.  Return FALSE otherwise.
-
-   This is used to check if SUBREG is zero and sign extended and to set
-   promoted mode SRP_SIGNED_AND_UNSIGNED to SUBREG.  */
-
-bool
-promoted_for_signed_and_unsigned_p (tree ssa, enum machine_mode mode)
-{
-  wide_int min, max;
-
-  if (ssa == NULL_TREE
-      || TREE_CODE (ssa) != SSA_NAME
-      || !INTEGRAL_TYPE_P (TREE_TYPE (ssa))
-      || (TYPE_PRECISION (TREE_TYPE (ssa)) != GET_MODE_PRECISION (mode)))
-    return false;
-
-  /* Return FALSE if value_range is not recorded for SSA.  */
-  if (get_range_info (ssa, &min, &max) != VR_RANGE)
-    return false;
-
-  /* Return true (to set SRP_SIGNED_AND_UNSIGNED to SUBREG) if MSB of the
-     smaller mode is not set (i.e.  MSB of ssa is not set).  */
-  if (!wi::neg_p (min, SIGNED) && !wi::neg_p(max, SIGNED))
-    return true;
-  else
-    return false;
-
-}
 
 /* Return TRUE if expression STMT is suitable for replacement.  
    Never consider memory loads as replaceable, because those don't ever lead 
@@ -9582,10 +9552,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  if (promoted_for_signed_and_unsigned_p (ssa_name, mode))
-	    SUBREG_PROMOTED_SET (temp, SRP_SIGNED_AND_UNSIGNED);
-	  else
-	    SUBREG_PROMOTED_SET (temp, unsignedp);
+	  SUBREG_PROMOTED_SET (temp, unsignedp);
 	  return temp;
 	}
 
diff --git a/gcc/expr.h b/gcc/expr.h
index 756c894..2e66329 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -440,7 +440,6 @@ extern rtx expand_expr_real_1 (tree, rtx, enum machine_mode,
 			       enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_2 (sepops, rtx, enum machine_mode,
 			       enum expand_modifier);
-extern bool promoted_for_signed_and_unsigned_p (tree, enum machine_mode);
 
 /* Generate code for computing expression EXP.
    An rtx for the computed value is returned.  The value is never null.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-09-07  9:51               ` Kugan
@ 2014-09-08  9:48                 ` Richard Biener
  2014-09-09 10:06                   ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-09-08  9:48 UTC (permalink / raw)
  To: Kugan; +Cc: Uros Bizjak, gcc-patches, Jakub Jelinek

On Sun, Sep 7, 2014 at 11:50 AM, Kugan
<kugan.vivekanandarajah@linaro.org> wrote:
> On 05/09/14 19:50, Richard Biener wrote:
>
>> Well - the best way would be to expose the target specifics to GIMPLE
>> at some point in the optimization pipeline.  My guess would be that it's
>> appropriate after loop optimizations (but maybe before induction variable
>> optimization).
>>
>> That is, have a pass that applies register promotion to all SSA names
>> in the function, inserting appropriate truncations and extensions.  That
>> way you'd never see (set (subreg...) on RTL.  The VRP and DOM
>> passes running after that pass would then be able to aggressively
>> optimize redundant truncations and extensions.
>>
>> Effects on debug information are to be considered.  You can change
>> the type of SSA names in-place but you don't want to do that for
>> user DECLs (and we can't have the SSA name type and its DECL
>> type differ - and not sure if we might want to lift that restriction).
>
> Thanks. I will try to implement this.
>
> I still would like to keep the VRP based approach as there are some
> cases that I think can only be done with range info. For example:
>
> short foo(unsigned char c)
> {
>   c = c & (unsigned char)0x0F;
>   if( c > 7 )
>     return((short)(c - 5));
>   else
>     return(( short )c);
> }
>
>
> So, how about adding and setting the overflow/wrap around flag to
> range_info. We now set static_flag for VR_RANG/VR_ANTI_RANGE. If we go
> back to the max + 1, min - 1 for VR_ANTI_RANGE, we can use this
> static_flag to encode overflow/wrap around. Will that be something
> acceptable?

You mean tracking in the VRP lattice whether a value wrapped around
(or was assumed not to due to undefined behavior)?  I'm not sure this
is easy to do correctly (VRP is large).

Note that I don't think we'll lose the testcase you quoted if the promotion
pass runs before VRP2.   We'd have as input to VRP2 sth like (assuming
promote mode would promote to SImode)

  SImode tem_2 = (unsigned int)c_1(D);
  tem_3 = tem_3 & 0xF;
  if (tem_3 > 7)
    {
      tem_4 = tem_3 - 5;
      short _5 = (short)_4;
      tem_5 = (unsigned int)_5;
     return tem_5;
   }
else
   {
     short _6 = (short)_3;
     return _6;
   }

VRP should be able to remove the (unsigned int)(short) sign-extension
of tem_4.

note that both incoming registers and return registers are "interesting".
For simplicity I suggest to not promote them on GIMPLE.

What you'd lose in VRP2 is the smaller value-ranges you'd get from
(undefined) wrapping.  You could recover the undefinedness by
looking at SSA names recorded value-range and transfering that
in the promotion pass (but I'm not sure if you want to open the
can of latent signed overflow bugs in programs even more for
PROMOTE_MODE targets...)

Richard.

>
> Thanks again,
> Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-09-05  9:51             ` Richard Biener
@ 2014-09-07  9:51               ` Kugan
  2014-09-08  9:48                 ` Richard Biener
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-09-07  9:51 UTC (permalink / raw)
  To: Richard Biener; +Cc: Uros Bizjak, gcc-patches, Jakub Jelinek

On 05/09/14 19:50, Richard Biener wrote:

> Well - the best way would be to expose the target specifics to GIMPLE
> at some point in the optimization pipeline.  My guess would be that it's
> appropriate after loop optimizations (but maybe before induction variable
> optimization).
> 
> That is, have a pass that applies register promotion to all SSA names
> in the function, inserting appropriate truncations and extensions.  That
> way you'd never see (set (subreg...) on RTL.  The VRP and DOM
> passes running after that pass would then be able to aggressively
> optimize redundant truncations and extensions.
> 
> Effects on debug information are to be considered.  You can change
> the type of SSA names in-place but you don't want to do that for
> user DECLs (and we can't have the SSA name type and its DECL
> type differ - and not sure if we might want to lift that restriction).

Thanks. I will try to implement this.

I still would like to keep the VRP based approach as there are some
cases that I think can only be done with range info. For example:

short foo(unsigned char c)
{
  c = c & (unsigned char)0x0F;
  if( c > 7 )
    return((short)(c - 5));
  else
    return(( short )c);
}


So, how about adding and setting the overflow/wrap around flag to
range_info. We now set static_flag for VR_RANG/VR_ANTI_RANGE. If we go
back to the max + 1, min - 1 for VR_ANTI_RANGE, we can use this
static_flag to encode overflow/wrap around. Will that be something
acceptable?

Thanks again,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-09-05  1:33           ` Kugan
@ 2014-09-05  9:51             ` Richard Biener
  2014-09-07  9:51               ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-09-05  9:51 UTC (permalink / raw)
  To: Kugan; +Cc: Uros Bizjak, gcc-patches, Jakub Jelinek

On Fri, Sep 5, 2014 at 3:33 AM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>>> Here is an attempt to do the value range computation in promoted_mode's
>>> type when it is overflowing. Bootstrapped on x86-84.
>>
>> Err - I think you misunderstood this as a suggestion to do this ;)
>> value-ranges should be computed according to the type not according
>> to the (promoted) mode.  Otherwise we will miss optimization
>> opportunities.
>
> Oops, sorry, I had my doubts about making trees aware of back-end stuff.
>
> Coming back to the original problem, what would be the best approach to
> handle this. Looking at the VRP pass, it seems to me that only MULT_EXPR
> and LSHIFT_EXPR are truncating values this way. All other operation are
> setting it to type_min, type_max. Can we rely on this ?

No, that doesn't sound like a good thing to do.

> Is this error not showing up in PROMOTED_MODE <= word_mode (and
> the mode precision of register from which we SUBREG is <= word_mode
> precision) is just a coincidence. Can we rely on this?

Sounds like a coincidence to me.

> Is there anyway we can fix this?

Well - the best way would be to expose the target specifics to GIMPLE
at some point in the optimization pipeline.  My guess would be that it's
appropriate after loop optimizations (but maybe before induction variable
optimization).

That is, have a pass that applies register promotion to all SSA names
in the function, inserting appropriate truncations and extensions.  That
way you'd never see (set (subreg...) on RTL.  The VRP and DOM
passes running after that pass would then be able to aggressively
optimize redundant truncations and extensions.

Effects on debug information are to be considered.  You can change
the type of SSA names in-place but you don't want to do that for
user DECLs (and we can't have the SSA name type and its DECL
type differ - and not sure if we might want to lift that restriction).

Richard.

> Thanks again,
> Kugan
>
>
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-09-04 13:00         ` Richard Biener
@ 2014-09-05  1:33           ` Kugan
  2014-09-05  9:51             ` Richard Biener
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-09-05  1:33 UTC (permalink / raw)
  To: Richard Biener; +Cc: Uros Bizjak, gcc-patches, Jakub Jelinek

>> Here is an attempt to do the value range computation in promoted_mode's
>> type when it is overflowing. Bootstrapped on x86-84.
> 
> Err - I think you misunderstood this as a suggestion to do this ;)
> value-ranges should be computed according to the type not according
> to the (promoted) mode.  Otherwise we will miss optimization
> opportunities.

Oops, sorry, I had my doubts about making trees aware of back-end stuff.

Coming back to the original problem, what would be the best approach to
handle this. Looking at the VRP pass, it seems to me that only MULT_EXPR
and LSHIFT_EXPR are truncating values this way. All other operation are
setting it to type_min, type_max. Can we rely on this ?

Is this error not showing up in PROMOTED_MODE <= word_mode (and
the mode precision of register from which we SUBREG is <= word_mode
precision) is just a coincidence. Can we rely on this?

Is there anyway we can fix this?

Thanks again,
Kugan



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-09-04  3:41       ` Kugan
@ 2014-09-04 13:00         ` Richard Biener
  2014-09-05  1:33           ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-09-04 13:00 UTC (permalink / raw)
  To: Kugan; +Cc: Uros Bizjak, gcc-patches, Jakub Jelinek

On Thu, Sep 4, 2014 at 5:41 AM, Kugan <kugan.vivekanandarajah@linaro.org> wrote:
>>> I added this part of the code (in cfgexpand.c) to handle binary/unary/..
>>> gimple operations and used the LHS value range to infer the assigned
>>> value range. I will revert this part of the code as this is wrong.
>>>
>>> I dont think checking promoted_mode for temp will be necessary here as
>>> convert_move will handle it correctly if promoted_mode is set for temp.
>>>
>>> Thus, I will reimplement setting promoted_mode to temp (in
>>> expand_expr_real_2) based on the gimple statement content on RHS. i.e.
>>> by looking at the RHS operands and its value ranges and by calculating
>>> the resulting value range. Does this sound OK to you.
>>
>> No, this sounds backward again and won't work because those operands
>> again could be just truncated - thus you can't rely on their value-range.
>>
>> What you would need is VRP computing value-ranges in the promoted
>> mode from the start (and it doesn't do that).
>
>
> Hi Richard,
>
> Here is an attempt to do the value range computation in promoted_mode's
> type when it is overflowing. Bootstrapped on x86-84.

Err - I think you misunderstood this as a suggestion to do this ;)
value-ranges should be computed according to the type not according
to the (promoted) mode.  Otherwise we will miss optimization
opportunities.

Richard.

> Based on your feedback, I will do more testing on this.
>
> Thanks for your time,
> Kugan
>
> gcc/ChangeLog:
>
> 2014-09-04  Kugan Vivekanandarajah <kuganv@linaro.org>
>
>         * tree-ssa-ccp.c (ccp_finalize): Adjust the nonzero_bits precision to
>         the type.
>         (evaluate_stmt): Likewise.
>         * tree-ssanames.c (set_range_info): Adjust if the precision of stored
>         value range is different.
>         * tree-vrp.c (normalize_int_cst_precision): New function.
>         (set_value_range): Add assert to check precision.
>         (set_and_canonicalize_value_range): Call normalize_int_cst_precision
>         on min and max.
>         (promoted_type): New function.
>         (promote_unary_vr): Likewise.
>         (promote_binary_vr): Likewise.
>         (extract_range_from_binary_expr_1): Adjust type to match value range.
>         Store value ranges in promoted type if they overflow.
>         (extract_range_from_unary_expr_1): Likewise.
>         (adjust_range_with_scev): Call normalize_int_cst_precision
>         on min and max.
>         (vrp_visit_assignment_or_call): Likewise.
>         (simplify_bit_ops_using_ranges): Adjust the value range precision.
>         (test_for_singularity): Likewise.
>         (simplify_stmt_for_jump_threading): Likewise.
>         (extract_range_from_assert): Likewise.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-28  8:57     ` Richard Biener
@ 2014-09-04  3:41       ` Kugan
  2014-09-04 13:00         ` Richard Biener
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-09-04  3:41 UTC (permalink / raw)
  To: Richard Biener; +Cc: Uros Bizjak, gcc-patches, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 2196 bytes --]

>> I added this part of the code (in cfgexpand.c) to handle binary/unary/..
>> gimple operations and used the LHS value range to infer the assigned
>> value range. I will revert this part of the code as this is wrong.
>>
>> I dont think checking promoted_mode for temp will be necessary here as
>> convert_move will handle it correctly if promoted_mode is set for temp.
>>
>> Thus, I will reimplement setting promoted_mode to temp (in
>> expand_expr_real_2) based on the gimple statement content on RHS. i.e.
>> by looking at the RHS operands and its value ranges and by calculating
>> the resulting value range. Does this sound OK to you.
> 
> No, this sounds backward again and won't work because those operands
> again could be just truncated - thus you can't rely on their value-range.
> 
> What you would need is VRP computing value-ranges in the promoted
> mode from the start (and it doesn't do that).


Hi Richard,

Here is an attempt to do the value range computation in promoted_mode's
type when it is overflowing. Bootstrapped on x86-84.

Based on your feedback, I will do more testing on this.

Thanks for your time,
Kugan

gcc/ChangeLog:

2014-09-04  Kugan Vivekanandarajah <kuganv@linaro.org>

	* tree-ssa-ccp.c (ccp_finalize): Adjust the nonzero_bits precision to
	the type.
	(evaluate_stmt): Likewise.
	* tree-ssanames.c (set_range_info): Adjust if the precision of stored
	value range is different.
	* tree-vrp.c (normalize_int_cst_precision): New function.
	(set_value_range): Add assert to check precision.
	(set_and_canonicalize_value_range): Call normalize_int_cst_precision
	on min and max.
	(promoted_type): New function.
	(promote_unary_vr): Likewise.
	(promote_binary_vr): Likewise.
	(extract_range_from_binary_expr_1): Adjust type to match value range.
	Store value ranges in promoted type if they overflow.
	(extract_range_from_unary_expr_1): Likewise.
	(adjust_range_with_scev): Call normalize_int_cst_precision
	on min and max.
	(vrp_visit_assignment_or_call): Likewise.
	(simplify_bit_ops_using_ranges): Adjust the value range precision.
	(test_for_singularity): Likewise.
	(simplify_stmt_for_jump_threading): Likewise.
	(extract_range_from_assert): Likewise.

[-- Attachment #2: p.txt --]
[-- Type: text/plain, Size: 13120 bytes --]

diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index a90f708..1733073 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -916,7 +916,11 @@ ccp_finalize (void)
 	  unsigned int precision = TYPE_PRECISION (TREE_TYPE (val->value));
 	  wide_int nonzero_bits = wide_int::from (val->mask, precision,
 						  UNSIGNED) | val->value;
-	  nonzero_bits &= get_nonzero_bits (name);
+	  wide_int nonzero_bits_name = get_nonzero_bits (name);
+	  if (precision != nonzero_bits_name.get_precision ())
+	    nonzero_bits = wi::shwi (*nonzero_bits.get_val (),
+				     nonzero_bits_name.get_precision ());
+	  nonzero_bits &= nonzero_bits_name;
 	  set_nonzero_bits (name, nonzero_bits);
 	}
     }
@@ -1852,6 +1856,8 @@ evaluate_stmt (gimple stmt)
     {
       tree lhs = gimple_get_lhs (stmt);
       wide_int nonzero_bits = get_nonzero_bits (lhs);
+      if (TYPE_PRECISION (TREE_TYPE (lhs)) != nonzero_bits.get_precision ())
+	  nonzero_bits = wide_int_to_tree (TREE_TYPE (lhs), nonzero_bits);
       if (nonzero_bits != -1)
 	{
 	  if (!is_constant)
diff --git a/gcc/tree-ssanames.c b/gcc/tree-ssanames.c
index 3af80a0..459c669 100644
--- a/gcc/tree-ssanames.c
+++ b/gcc/tree-ssanames.c
@@ -192,7 +192,7 @@ set_range_info (tree name, enum value_range_type range_type,
   gcc_assert (!POINTER_TYPE_P (TREE_TYPE (name)));
   gcc_assert (range_type == VR_RANGE || range_type == VR_ANTI_RANGE);
   range_info_def *ri = SSA_NAME_RANGE_INFO (name);
-  unsigned int precision = TYPE_PRECISION (TREE_TYPE (name));
+  unsigned int precision = min.get_precision ();
 
   /* Allocate if not available.  */
   if (ri == NULL)
@@ -204,6 +204,15 @@ set_range_info (tree name, enum value_range_type range_type,
       SSA_NAME_RANGE_INFO (name) = ri;
       ri->set_nonzero_bits (wi::shwi (-1, precision));
     }
+  else if (ri->get_min ().get_precision () != precision)
+    {
+      size_t size = (sizeof (range_info_def)
+		     + trailing_wide_ints <3>::extra_size (precision));
+      ri = static_cast<range_info_def *> (ggc_realloc (ri, size));
+      ri->ints.set_precision (precision);
+      SSA_NAME_RANGE_INFO (name) = ri;
+      ri->set_nonzero_bits (wi::shwi (-1, precision));
+    }
 
   /* Record the range type.  */
   if (SSA_NAME_RANGE_TYPE (name) != range_type)
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index d16fd8a..772676a 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -61,6 +61,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "optabs.h"
 #include "tree-ssa-threadedge.h"
 #include "wide-int.h"
+#include "langhooks.h"
 
 
 
@@ -424,6 +425,23 @@ set_value_range_to_varying (value_range_t *vr)
     bitmap_clear (vr->equiv);
 }
 
+/* Normalize min and max to promoted_type if their precision differs.  */
+
+static void
+normalize_int_cst_precision (tree *min, tree *max)
+{
+  if (TREE_CODE (*min) != INTEGER_CST
+	      || TREE_CODE (*max) != INTEGER_CST)
+    return;
+  if (TYPE_PRECISION (TREE_TYPE (*min)) != TYPE_PRECISION (TREE_TYPE (*max)))
+    {
+      tree type = TREE_TYPE (*min);
+      if (TYPE_PRECISION (TREE_TYPE (*min)) < TYPE_PRECISION (TREE_TYPE (*max)))
+	type = TREE_TYPE (*max);
+      *min = wide_int_to_tree (type, *min);
+      *max = wide_int_to_tree (type, *max);
+    }
+}
 
 /* Set value range VR to {T, MIN, MAX, EQUIV}.  */
 
@@ -438,6 +456,8 @@ set_value_range (value_range_t *vr, enum value_range_type t, tree min,
       int cmp;
 
       gcc_assert (min && max);
+      gcc_assert (TYPE_PRECISION (TREE_TYPE (min))
+		  == TYPE_PRECISION (TREE_TYPE (max)));
 
       gcc_assert ((!TREE_OVERFLOW_P (min) || is_overflow_infinity (min))
 		  && (!TREE_OVERFLOW_P (max) || is_overflow_infinity (max)));
@@ -597,6 +617,8 @@ set_and_canonicalize_value_range (value_range_t *vr, enum value_range_type t,
       return;
     }
 
+  if (min != NULL_TREE && max != NULL_TREE)
+    normalize_int_cst_precision (&min, &max);
   set_value_range (vr, t, min, max, equiv);
 }
 
@@ -951,6 +973,66 @@ usable_range_p (value_range_t *vr, bool *strict_overflow_p)
   return true;
 }
 
+/* Return the promoted type as defined by PROMOTE_MODE of the target.  */
+
+static tree
+promoted_type (tree type)
+{
+#ifdef PROMOTE_MODE
+  tree new_type;
+  if (!POINTER_TYPE_P (type)
+      && (TREE_CODE (type) != ENUMERAL_TYPE)
+      && INTEGRAL_TYPE_P (type))
+    {
+      enum machine_mode mode = TYPE_MODE (type);
+      int uns = TYPE_SIGN (type);
+      PROMOTE_MODE (mode, uns, type);
+      uns = TYPE_SIGN (type);
+      new_type = lang_hooks.types.type_for_mode (mode, uns);
+      if (TYPE_PRECISION (new_type) > TYPE_PRECISION (type))
+	type = new_type;
+    }
+#endif
+  return type;
+}
+
+/* Promote VRO to promoted_type if their precision differ and
+   return the new type.  */
+
+static tree
+promote_unary_vr (tree type, value_range_t *vr0)
+{
+  tree expr_type = type;
+
+  if (!range_int_cst_p (vr0))
+    return expr_type;
+  if ((TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (vr0->min)))
+      || (TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (vr0->max))))
+    {
+      expr_type = promoted_type (type);
+      vr0->min = wide_int_to_tree (expr_type, vr0->min);
+      vr0->max = wide_int_to_tree (expr_type, vr0->max);
+    }
+  return expr_type;
+}
+
+/* Promote VRO and VR1 to promoted_type if their precision differ and
+   return the new type.  */
+
+static tree
+promote_binary_vr (tree type, value_range_t *vr0, value_range_t *vr1)
+{
+  tree expr_type0 = promote_unary_vr (type, vr0);
+  tree expr_type1 = promote_unary_vr (type, vr1);
+
+  if (TYPE_PRECISION (expr_type0) == TYPE_PRECISION (expr_type1))
+    return expr_type0;
+  if (TYPE_PRECISION (expr_type0) < TYPE_PRECISION (expr_type1))
+    return promote_unary_vr (expr_type1, vr0);
+  else
+    return promote_unary_vr (expr_type0, vr1);
+}
+
 
 /* Return true if the result of assignment STMT is know to be non-negative.
    If the return value is based on the assumption that signed overflow is
@@ -1741,6 +1823,7 @@ extract_range_from_assert (value_range_t *vr_p, tree expr)
 		TREE_NO_WARNING (max) = 1;
 	    }
 
+	  normalize_int_cst_precision (&min, &max);
 	  set_value_range (vr_p, VR_RANGE, min, max, vr_p->equiv);
 	}
     }
@@ -1781,6 +1864,7 @@ extract_range_from_assert (value_range_t *vr_p, tree expr)
 		TREE_NO_WARNING (min) = 1;
 	    }
 
+	  normalize_int_cst_precision (&min, &max);
 	  set_value_range (vr_p, VR_RANGE, min, max, vr_p->equiv);
 	}
     }
@@ -2376,6 +2460,9 @@ extract_range_from_binary_expr_1 (value_range_t *vr,
      range and see what we end up with.  */
   if (code == PLUS_EXPR || code == MINUS_EXPR)
     {
+      /* If any of the value range is in promoted type, promote them all
+	 including the type.  */
+      expr_type = promote_binary_vr (expr_type, &vr0, &vr1);
       /* If we have a PLUS_EXPR with two VR_RANGE integer constant
          ranges compute the precise range for such case if possible.  */
       if (range_int_cst_p (&vr0)
@@ -2562,6 +2649,9 @@ extract_range_from_binary_expr_1 (value_range_t *vr,
   else if (code == MIN_EXPR
 	   || code == MAX_EXPR)
     {
+      /* If any of the value range is in promoted type, promote them all
+	 including the type.  */
+      expr_type = promote_binary_vr (expr_type, &vr0, &vr1);
       if (vr0.type == VR_RANGE
 	  && !symbolic_range_p (&vr0))
 	{
@@ -2625,6 +2715,8 @@ extract_range_from_binary_expr_1 (value_range_t *vr,
              <wi::extended_tree <WIDE_INT_MAX_PRECISION * 2> > vrp_int_cst;
 	  vrp_int sizem1 = wi::mask <vrp_int> (prec, false);
 	  vrp_int size = sizem1 + 1;
+	  vrp_int type_min = vrp_int_cst (TYPE_MIN_VALUE (expr_type));
+	  vrp_int type_max = vrp_int_cst (TYPE_MAX_VALUE (expr_type));
 
 	  /* Extend the values using the sign of the result to PREC2.
 	     From here on out, everthing is just signed math no matter
@@ -2697,8 +2789,17 @@ extract_range_from_binary_expr_1 (value_range_t *vr,
 
 	  /* The following should handle the wrapping and selecting
 	     VR_ANTI_RANGE for us.  */
-	  min = wide_int_to_tree (expr_type, prod0);
-	  max = wide_int_to_tree (expr_type, prod3);
+	  if (wi::lts_p (prod0, type_min)
+	      || wi::gts_p (prod3, type_max))
+	    {
+	      min = wide_int_to_tree (promoted_type (expr_type), prod0);
+	      max = wide_int_to_tree (promoted_type (expr_type), prod3);
+	    }
+	  else
+	    {
+	      min = wide_int_to_tree (expr_type, prod0);
+	      max = wide_int_to_tree (expr_type, prod3);
+	    }
 	  set_and_canonicalize_value_range (vr, VR_RANGE, min, max, NULL);
 	  return;
 	}
@@ -2724,6 +2825,8 @@ extract_range_from_binary_expr_1 (value_range_t *vr,
   else if (code == RSHIFT_EXPR
 	   || code == LSHIFT_EXPR)
     {
+      /* If value range is in promoted type, promote the type as well.  */
+      expr_type = promote_unary_vr (expr_type, &vr0);
       /* If we have a RSHIFT_EXPR with any shift values outside [0..prec-1],
 	 then drop to VR_VARYING.  Outside of this range we get undefined
 	 behavior from the shift operation.  We cannot even trust
@@ -2946,6 +3049,9 @@ extract_range_from_binary_expr_1 (value_range_t *vr,
       wide_int may_be_nonzero0, may_be_nonzero1;
       wide_int must_be_nonzero0, must_be_nonzero1;
 
+      /* If any of the value range is in promoted type, promote them all
+	 including the type.  */
+      expr_type = promote_binary_vr (expr_type, &vr0, &vr1);
       int_cst_range0 = zero_nonzero_bits_from_vr (expr_type, &vr0,
 						  &may_be_nonzero0,
 						  &must_be_nonzero0);
@@ -3224,14 +3330,22 @@ extract_range_from_unary_expr_1 (value_range_t *vr,
 	  tree new_min, new_max;
 	  if (is_overflow_infinity (vr0.min))
 	    new_min = negative_overflow_infinity (outer_type);
-	  else
+	  else if (int_fits_type_p (vr0.min, outer_type))
 	    new_min = force_fit_type (outer_type, wi::to_widest (vr0.min),
 				      0, false);
+	  else
+	    new_min = force_fit_type (promoted_type (outer_type),
+				      wi::to_widest (vr0.min),
+				      0, false);
 	  if (is_overflow_infinity (vr0.max))
 	    new_max = positive_overflow_infinity (outer_type);
-	  else
+	  else if (int_fits_type_p (vr0.min, outer_type))
 	    new_max = force_fit_type (outer_type, wi::to_widest (vr0.max),
 				      0, false);
+	  else
+	    new_max = force_fit_type (promoted_type (outer_type),
+				      wi::to_widest (vr0.max),
+				      0, false);
 	  set_and_canonicalize_value_range (vr, vr0.type,
 					    new_min, new_max, NULL);
 	  return;
@@ -3940,6 +4054,8 @@ adjust_range_with_scev (value_range_t *vr, struct loop *loop,
 	  && is_positive_overflow_infinity (max)))
     return;
 
+  if (min != NULL_TREE && max != NULL_TREE)
+    normalize_int_cst_precision (&min, &max);
   set_value_range (vr, VR_RANGE, min, max, vr->equiv);
 }
 
@@ -6668,6 +6784,8 @@ vrp_visit_assignment_or_call (gimple stmt, tree *output_p)
       else
 	extract_range_from_assignment (&new_vr, stmt);
 
+      if (range_int_cst_p (&new_vr))
+	normalize_int_cst_precision (&new_vr.min, &new_vr.max);
       if (update_value_range (lhs, &new_vr))
 	{
 	  *output_p = lhs;
@@ -8399,6 +8517,8 @@ vrp_visit_phi_node (gimple phi)
   /* If the new range is different than the previous value, keep
      iterating.  */
 update_range:
+  if (range_int_cst_p (&vr_result))
+    normalize_int_cst_precision (&vr_result.min, &vr_result.max);
   if (update_value_range (lhs, &vr_result))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
@@ -8655,9 +8775,19 @@ simplify_bit_ops_using_ranges (gimple_stmt_iterator *gsi, gimple stmt)
   if (!zero_nonzero_bits_from_vr (TREE_TYPE (op0), &vr0, &may_be_nonzero0,
 				  &must_be_nonzero0))
     return false;
-  if (!zero_nonzero_bits_from_vr (TREE_TYPE (op1), &vr1, &may_be_nonzero1,
+  if (!zero_nonzero_bits_from_vr (TREE_TYPE (op0), &vr1, &may_be_nonzero1,
 				  &must_be_nonzero1))
     return false;
+  if (TYPE_PRECISION (TREE_TYPE (op0)) != may_be_nonzero0.get_precision ())
+    {
+      may_be_nonzero0 = wide_int_to_tree (TREE_TYPE (op0), may_be_nonzero0);
+      must_be_nonzero0 = wide_int_to_tree (TREE_TYPE (op0), must_be_nonzero0);
+    }
+  if (TYPE_PRECISION (TREE_TYPE (op0)) != may_be_nonzero1.get_precision ())
+    {
+      may_be_nonzero1 = wide_int_to_tree (TREE_TYPE (op1), may_be_nonzero0);
+      must_be_nonzero1 = wide_int_to_tree (TREE_TYPE (op1), must_be_nonzero0);
+    }
 
   switch (gimple_assign_rhs_code (stmt))
     {
@@ -8752,9 +8882,9 @@ test_for_singularity (enum tree_code cond_code, tree op0,
   if (min && max)
     {
       if (compare_values (vr->min, min) == 1)
-	min = vr->min;
+	min = wide_int_to_tree (TREE_TYPE (op0), vr->min);
       if (compare_values (vr->max, max) == -1)
-	max = vr->max;
+	max = wide_int_to_tree (TREE_TYPE (op0), vr->max);
 
       /* If the new min/max values have converged to a single value,
 	 then there is only one value which can satisfy the condition,
@@ -9474,7 +9604,7 @@ simplify_stmt_for_jump_threading (gimple stmt, gimple within_stmt)
 	{
 	  extract_range_from_assignment (&new_vr, stmt);
 	  if (range_int_cst_singleton_p (&new_vr))
-	    return new_vr.min;
+	    return wide_int_to_tree (TREE_TYPE (lhs), new_vr.min);
 	}
     }
 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-09-01  8:48     ` Jakub Jelinek
@ 2014-09-01  8:54       ` Uros Bizjak
  0 siblings, 0 replies; 58+ messages in thread
From: Uros Bizjak @ 2014-09-01  8:54 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Biener, gcc-patches, Kugan

On Mon, Sep 1, 2014 at 10:47 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Aug 27, 2014 at 12:25:14PM +0200, Uros Bizjak wrote:
>> Something like following (untested) patch that also fixes the testcase perhaps?
>>
>> -- cut here--
>> Index: cfgexpand.c
>> ===================================================================
>> --- cfgexpand.c (revision 214445)
>> +++ cfgexpand.c (working copy)
>> @@ -3322,6 +3322,7 @@ expand_gimple_stmt_1 (gimple stmt)
>>
>>                 if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
>>                     && (GET_CODE (temp) == SUBREG)
>> +                   && SUBREG_PROMOTED_VAR_P (temp)
>>                     && (GET_MODE (target) == GET_MODE (temp))
>>                     && (GET_MODE (SUBREG_REG (target)) == GET_MODE
>> (SUBREG_REG (temp))))
>
> Looks like a wrong order of the predicates in any case, first you should
> check if it is a SUBREG, then SUBREG_PROMOTED_VAR_P and only then
> SUBREG_PROMOTED_GET.  Also, the extra ()s around single line conditions
> are unnecessary.

This comment applies to the original code, not the patched line, I guess.

Uros.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-27 10:32   ` Uros Bizjak
  2014-08-27 10:32     ` Richard Biener
@ 2014-09-01  8:48     ` Jakub Jelinek
  2014-09-01  8:54       ` Uros Bizjak
  1 sibling, 1 reply; 58+ messages in thread
From: Jakub Jelinek @ 2014-09-01  8:48 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Richard Biener, gcc-patches, Kugan

On Wed, Aug 27, 2014 at 12:25:14PM +0200, Uros Bizjak wrote:
> Something like following (untested) patch that also fixes the testcase perhaps?
> 
> -- cut here--
> Index: cfgexpand.c
> ===================================================================
> --- cfgexpand.c (revision 214445)
> +++ cfgexpand.c (working copy)
> @@ -3322,6 +3322,7 @@ expand_gimple_stmt_1 (gimple stmt)
> 
>                 if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
>                     && (GET_CODE (temp) == SUBREG)
> +                   && SUBREG_PROMOTED_VAR_P (temp)
>                     && (GET_MODE (target) == GET_MODE (temp))
>                     && (GET_MODE (SUBREG_REG (target)) == GET_MODE
> (SUBREG_REG (temp))))

Looks like a wrong order of the predicates in any case, first you should
check if it is a SUBREG, then SUBREG_PROMOTED_VAR_P and only then
SUBREG_PROMOTED_GET.  Also, the extra ()s around single line conditions
are unnecessary.

>                   emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
> -- cut here
> 
> Uros.

	Jakub

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-28  7:50   ` Kugan
@ 2014-08-28  8:57     ` Richard Biener
  2014-09-04  3:41       ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Richard Biener @ 2014-08-28  8:57 UTC (permalink / raw)
  To: Kugan; +Cc: Uros Bizjak, gcc-patches, Jakub Jelinek

On Thu, Aug 28, 2014 at 9:50 AM, Kugan
<kugan.vivekanandarajah@linaro.org> wrote:
>
>
> On 27/08/14 20:07, Richard Biener wrote:
>> On Wed, Aug 27, 2014 at 12:01 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> Hello!
>>>
>>>> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>>>
>>>> * calls.c (precompute_arguments): Check
>>>> promoted_for_signed_and_unsigned_p and set the promoted mode.
>>>> (promoted_for_signed_and_unsigned_p): New function.
>>>> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>>>> and set the promoted mode.
>>>> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>>>> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>>>> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>>>
>>> This patch regresses:
>>>
>>> Running target unix
>>> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
>>> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
>>>
>>> on alphaev6-linux-gnu.
>>>
>>> The problem can be illustrated with attached testcase with a
>>> crosscompiler to alphaev68-linux-gnu (-O2 -fopenmp). The problem is in
>>> missing SImode extension after DImode shift of SImode subregs for this
>>> part:
>>>
>>> --cut here--
>>>   # test.23_12 = PHI <0(37), 1(36)>
>>>   _242 = ivtmp.181_73 + 2147483645;
>>>   _240 = _242 * 2;
>>>   _63 = (integer(kind=4)) _240;
>>>   if (ubound.6_99 <= 2)
>>>     goto <bb 39>;
>>>   else
>>>     goto <bb 40>;
>>> ;;    succ:       39
>>> ;;                40
>>>
>>> ;;   basic block 39, loop depth 1
>>> ;;    pred:       38
>>>   pretmp_337 = test.23_12 | l_76;
>>>   goto <bb 45>;
>>> ;;    succ:       45
>>>
>>> ;;   basic block 40, loop depth 1
>>> ;;    pred:       38
>>>   _11 = *c_208[0];
>>>   if (_11 != _63)
>>>     goto <bb 45>;
>>>   else
>>>     goto <bb 42>;
>>> --cut here--
>>>
>>> this expands to:
>>>
>>> (code_label 592 591 593 35 "" [0 uses])
>>>
>>> (note 593 592 0 NOTE_INSN_BASIC_BLOCK)
>>>
>>> ;; _63 = (integer(kind=4)) _240;
>>>
>>> (insn 594 593 595 (set (reg:SI 538)
>>>         (const_int 1073741824 [0x40000000])) -1
>>>      (nil))
>>>
>>> (insn 595 594 596 (set (reg:SI 539)
>>>         (plus:SI (reg:SI 538)
>>>             (const_int 1073741824 [0x40000000]))) -1
>>>      (nil))
>>>
>>> (insn 596 595 597 (set (reg:SI 537)
>>>         (plus:SI (reg:SI 539)
>>>             (const_int -3 [0xfffffffffffffffd]))) -1
>>>      (expr_list:REG_EQUAL (const_int 2147483645 [0x7ffffffd])
>>>         (nil)))
>>>
>>> (insn 597 596 598 (set (reg:SI 536 [ D.1700 ])
>>>         (plus:SI (subreg/s/v/u:SI (reg:DI 144 [ ivtmp.181 ]) 0)
>>>             (reg:SI 537))) -1
>>>      (nil))
>>>
>>> (insn 598 597 599 (set (reg:DI 540)
>>>         (ashift:DI (subreg:DI (reg:SI 536 [ D.1700 ]) 0)
>>>             (const_int 1 [0x1]))) -1
>>>      (nil))
>>>
>>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>>>         (reg:DI 540)) -1
>>>      (nil))
>>>
>>> ...
>>>
>>> (note 610 609 0 NOTE_INSN_BASIC_BLOCK)
>>>
>>> ;; _11 = *c_208[0];
>>>
>>> (insn 611 610 0 (set (reg:DI 120 [ D.1694 ])
>>>         (sign_extend:DI (mem:SI (reg/v/f:DI 227 [ c ]) [7 *c_208+0 S4
>>> A128]))) simd7.f90:12 -1
>>>      (nil))
>>>
>>> ;; if (_11 != _63)
>>>
>>> (insn 612 611 613 40 (set (reg:DI 545)
>>>         (eq:DI (reg:DI 120 [ D.1694 ])
>>>             (reg:DI 145 [ D.1694 ]))) simd7.f90:12 -1
>>>      (nil))
>>>
>>> (jump_insn 613 612 616 40 (set (pc)
>>>         (if_then_else (eq (reg:DI 545)
>>>                 (const_int 0 [0]))
>>>             (label_ref 0)
>>>             (pc))) simd7.f90:12 -1
>>>      (int_list:REG_BR_PROB 450 (nil)))
>>>
>>> which results in following asm:
>>>
>>> $L35:
>>>     addl $25,$7,$2     # 597    addsi3/1    [length = 4]
>>>     addq $2,$2,$2     # 598    ashldi3/1    [length = 4]     <------ here
>>>     bne $24,$L145     # 601    *bcc_normal    [length = 4]
>>>     lda $4,4($20)     # 627    *adddi_internal/2    [length = 4]
>>>     ldl $8,0($20)     # 611    *extendsidi2_1/2    [length = 4]
>>>     lda $3,3($31)     # 74    *movdi/2    [length = 4]
>>>     cmpeq $8,$2,$2     # 612    *setcc_internal    [length = 4]  <-- compare
>>>     bne $2,$L40     # 613    *bcc_normal    [length = 4]
>>>     br $31,$L88     # 2403    jump    [length = 4]
>>>     .align 4
>>> ...
>>>
>>> Tracking the values with the debugger shows wrong calculation:
>>>
>>>    0x000000012000108c <+1788>:  addl    t10,t12,t1
>>>    0x0000000120001090 <+1792>:  addq    t1,t1,t1
>>>    ...
>>>    0x00000001200010a4 <+1812>:  cmpeq   t6,t1,t1
>>>    0x00000001200010a8 <+1816>:  bne     t1,0x1200010c0 <foo_+1840>
>>>
>>> (gdb) si
>>> 0x000000012000108c      17          l = l .or. any (b /= 7 + i)
>>> (gdb) i r t10 t12
>>> t10            0x7      7
>>> t12            0x7ffffffd       2147483645
>>>
>>> (gdb) si
>>> 0x0000000120001090      17          l = l .or. any (b /= 7 + i)
>>> (gdb) i r t1
>>> t1             0xffffffff80000004       -2147483644
>>>
>>> (gdb) si
>>> 18          l = l .or. any (c /= 8 + 2 * i)
>>> (gdb) i r t1
>>> t1             0xffffffff00000008       -4294967288
>>>
>>> At this point, the calculation should zero-extend SImode value to full
>>> DImode, since compare operates on DImode values. The problematic insn
>>> is (insn 599), which is now a DImode assignment instead of
>>> zero-extend, due to:
>>>
>>> --- a/gcc/cfgexpand.c
>>> +++ b/gcc/cfgexpand.c
>>> @@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
>>>    GET_MODE (target), temp, unsignedp);
>>>    }
>>>
>>> - convert_move (SUBREG_REG (target), temp, unsignedp);
>>> + if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
>>> +    && (GET_CODE (temp) == SUBREG)
>>> +    && (GET_MODE (target) == GET_MODE (temp))
>>> +    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
>>> +  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
>>> + else
>>> +  convert_move (SUBREG_REG (target), temp, unsignedp);
>>>        }
>>>      else if (nontemporal && emit_storent_insn (target, temp))
>>>        ;
>>>
>>> When compiling this code, we have:
>>>
>>> lhs = _63
>>> target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
>>> temp = (subreg:SI (reg:DI 540) 0)
>>>
>>> So, the code assumes that it is possible to copy (reg:DI 540) directly
>>> to (reg:DI 154). However, this is not the case, since we still have
>>> garbage in the top 32bits.
>>>
>>> Reverting the part above fixes the runtime failure, since (insn 599) is now:
>>>
>>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>>>         (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
>>>      (nil))
>>>
>>> It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.
>>
>> Yeah, that makes sense.
>>
>
> Thanks Richard for your comments.
>
> I added this part of the code (in cfgexpand.c) to handle binary/unary/..
> gimple operations and used the LHS value range to infer the assigned
> value range. I will revert this part of the code as this is wrong.
>
> I dont think checking promoted_mode for temp will be necessary here as
> convert_move will handle it correctly if promoted_mode is set for temp.
>
> Thus, I will reimplement setting promoted_mode to temp (in
> expand_expr_real_2) based on the gimple statement content on RHS. i.e.
> by looking at the RHS operands and its value ranges and by calculating
> the resulting value range. Does this sound OK to you.

No, this sounds backward again and won't work because those operands
again could be just truncated - thus you can't rely on their value-range.

What you would need is VRP computing value-ranges in the promoted
mode from the start (and it doesn't do that).

Richard.

> Thanks,
> Kugan
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-27 10:07 ` Richard Biener
  2014-08-27 10:32   ` Uros Bizjak
@ 2014-08-28  7:50   ` Kugan
  2014-08-28  8:57     ` Richard Biener
  1 sibling, 1 reply; 58+ messages in thread
From: Kugan @ 2014-08-28  7:50 UTC (permalink / raw)
  To: Richard Biener, Uros Bizjak; +Cc: gcc-patches, Jakub Jelinek



On 27/08/14 20:07, Richard Biener wrote:
> On Wed, Aug 27, 2014 at 12:01 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> Hello!
>>
>>> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>>
>>> * calls.c (precompute_arguments): Check
>>> promoted_for_signed_and_unsigned_p and set the promoted mode.
>>> (promoted_for_signed_and_unsigned_p): New function.
>>> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>>> and set the promoted mode.
>>> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>>> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>>> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>>
>> This patch regresses:
>>
>> Running target unix
>> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
>> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
>>
>> on alphaev6-linux-gnu.
>>
>> The problem can be illustrated with attached testcase with a
>> crosscompiler to alphaev68-linux-gnu (-O2 -fopenmp). The problem is in
>> missing SImode extension after DImode shift of SImode subregs for this
>> part:
>>
>> --cut here--
>>   # test.23_12 = PHI <0(37), 1(36)>
>>   _242 = ivtmp.181_73 + 2147483645;
>>   _240 = _242 * 2;
>>   _63 = (integer(kind=4)) _240;
>>   if (ubound.6_99 <= 2)
>>     goto <bb 39>;
>>   else
>>     goto <bb 40>;
>> ;;    succ:       39
>> ;;                40
>>
>> ;;   basic block 39, loop depth 1
>> ;;    pred:       38
>>   pretmp_337 = test.23_12 | l_76;
>>   goto <bb 45>;
>> ;;    succ:       45
>>
>> ;;   basic block 40, loop depth 1
>> ;;    pred:       38
>>   _11 = *c_208[0];
>>   if (_11 != _63)
>>     goto <bb 45>;
>>   else
>>     goto <bb 42>;
>> --cut here--
>>
>> this expands to:
>>
>> (code_label 592 591 593 35 "" [0 uses])
>>
>> (note 593 592 0 NOTE_INSN_BASIC_BLOCK)
>>
>> ;; _63 = (integer(kind=4)) _240;
>>
>> (insn 594 593 595 (set (reg:SI 538)
>>         (const_int 1073741824 [0x40000000])) -1
>>      (nil))
>>
>> (insn 595 594 596 (set (reg:SI 539)
>>         (plus:SI (reg:SI 538)
>>             (const_int 1073741824 [0x40000000]))) -1
>>      (nil))
>>
>> (insn 596 595 597 (set (reg:SI 537)
>>         (plus:SI (reg:SI 539)
>>             (const_int -3 [0xfffffffffffffffd]))) -1
>>      (expr_list:REG_EQUAL (const_int 2147483645 [0x7ffffffd])
>>         (nil)))
>>
>> (insn 597 596 598 (set (reg:SI 536 [ D.1700 ])
>>         (plus:SI (subreg/s/v/u:SI (reg:DI 144 [ ivtmp.181 ]) 0)
>>             (reg:SI 537))) -1
>>      (nil))
>>
>> (insn 598 597 599 (set (reg:DI 540)
>>         (ashift:DI (subreg:DI (reg:SI 536 [ D.1700 ]) 0)
>>             (const_int 1 [0x1]))) -1
>>      (nil))
>>
>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>>         (reg:DI 540)) -1
>>      (nil))
>>
>> ...
>>
>> (note 610 609 0 NOTE_INSN_BASIC_BLOCK)
>>
>> ;; _11 = *c_208[0];
>>
>> (insn 611 610 0 (set (reg:DI 120 [ D.1694 ])
>>         (sign_extend:DI (mem:SI (reg/v/f:DI 227 [ c ]) [7 *c_208+0 S4
>> A128]))) simd7.f90:12 -1
>>      (nil))
>>
>> ;; if (_11 != _63)
>>
>> (insn 612 611 613 40 (set (reg:DI 545)
>>         (eq:DI (reg:DI 120 [ D.1694 ])
>>             (reg:DI 145 [ D.1694 ]))) simd7.f90:12 -1
>>      (nil))
>>
>> (jump_insn 613 612 616 40 (set (pc)
>>         (if_then_else (eq (reg:DI 545)
>>                 (const_int 0 [0]))
>>             (label_ref 0)
>>             (pc))) simd7.f90:12 -1
>>      (int_list:REG_BR_PROB 450 (nil)))
>>
>> which results in following asm:
>>
>> $L35:
>>     addl $25,$7,$2     # 597    addsi3/1    [length = 4]
>>     addq $2,$2,$2     # 598    ashldi3/1    [length = 4]     <------ here
>>     bne $24,$L145     # 601    *bcc_normal    [length = 4]
>>     lda $4,4($20)     # 627    *adddi_internal/2    [length = 4]
>>     ldl $8,0($20)     # 611    *extendsidi2_1/2    [length = 4]
>>     lda $3,3($31)     # 74    *movdi/2    [length = 4]
>>     cmpeq $8,$2,$2     # 612    *setcc_internal    [length = 4]  <-- compare
>>     bne $2,$L40     # 613    *bcc_normal    [length = 4]
>>     br $31,$L88     # 2403    jump    [length = 4]
>>     .align 4
>> ...
>>
>> Tracking the values with the debugger shows wrong calculation:
>>
>>    0x000000012000108c <+1788>:  addl    t10,t12,t1
>>    0x0000000120001090 <+1792>:  addq    t1,t1,t1
>>    ...
>>    0x00000001200010a4 <+1812>:  cmpeq   t6,t1,t1
>>    0x00000001200010a8 <+1816>:  bne     t1,0x1200010c0 <foo_+1840>
>>
>> (gdb) si
>> 0x000000012000108c      17          l = l .or. any (b /= 7 + i)
>> (gdb) i r t10 t12
>> t10            0x7      7
>> t12            0x7ffffffd       2147483645
>>
>> (gdb) si
>> 0x0000000120001090      17          l = l .or. any (b /= 7 + i)
>> (gdb) i r t1
>> t1             0xffffffff80000004       -2147483644
>>
>> (gdb) si
>> 18          l = l .or. any (c /= 8 + 2 * i)
>> (gdb) i r t1
>> t1             0xffffffff00000008       -4294967288
>>
>> At this point, the calculation should zero-extend SImode value to full
>> DImode, since compare operates on DImode values. The problematic insn
>> is (insn 599), which is now a DImode assignment instead of
>> zero-extend, due to:
>>
>> --- a/gcc/cfgexpand.c
>> +++ b/gcc/cfgexpand.c
>> @@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
>>    GET_MODE (target), temp, unsignedp);
>>    }
>>
>> - convert_move (SUBREG_REG (target), temp, unsignedp);
>> + if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
>> +    && (GET_CODE (temp) == SUBREG)
>> +    && (GET_MODE (target) == GET_MODE (temp))
>> +    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
>> +  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
>> + else
>> +  convert_move (SUBREG_REG (target), temp, unsignedp);
>>        }
>>      else if (nontemporal && emit_storent_insn (target, temp))
>>        ;
>>
>> When compiling this code, we have:
>>
>> lhs = _63
>> target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
>> temp = (subreg:SI (reg:DI 540) 0)
>>
>> So, the code assumes that it is possible to copy (reg:DI 540) directly
>> to (reg:DI 154). However, this is not the case, since we still have
>> garbage in the top 32bits.
>>
>> Reverting the part above fixes the runtime failure, since (insn 599) is now:
>>
>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>>         (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
>>      (nil))
>>
>> It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.
> 
> Yeah, that makes sense.
> 

Thanks Richard for your comments.

I added this part of the code (in cfgexpand.c) to handle binary/unary/..
gimple operations and used the LHS value range to infer the assigned
value range. I will revert this part of the code as this is wrong.

I dont think checking promoted_mode for temp will be necessary here as
convert_move will handle it correctly if promoted_mode is set for temp.

Thus, I will reimplement setting promoted_mode to temp (in
expand_expr_real_2) based on the gimple statement content on RHS. i.e.
by looking at the RHS operands and its value ranges and by calculating
the resulting value range. Does this sound OK to you.

Thanks,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-28  6:44     ` Marc Glisse
@ 2014-08-28  7:29       ` Kugan
  0 siblings, 0 replies; 58+ messages in thread
From: Kugan @ 2014-08-28  7:29 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak, Richard Biener, Jakub Jelinek



On 28/08/14 16:44, Marc Glisse wrote:
> On Thu, 28 Aug 2014, Kugan wrote:
> 
>> On 27/08/14 23:02, Kugan wrote:
>>> On 27/08/14 20:01, Uros Bizjak wrote:
>>>> Hello!
>>>>
>>>>> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>>>>
>>>>> * calls.c (precompute_arguments): Check
>>>>> promoted_for_signed_and_unsigned_p and set the promoted mode.
>>>>> (promoted_for_signed_and_unsigned_p): New function.
>>>>> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>>>>> and set the promoted mode.
>>>>> * expr.h (promoted_for_signed_and_unsigned_p): New function
>>>>> definition.
>>>>> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>>>>> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>>>>
>>>> This patch regresses:
>>>>
>>>> Running target unix
>>>> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
>>>> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
>>>>
>>>
>>> [snip]
>>>
>>>> When compiling this code, we have:
>>>>
>>>> lhs = _63
>>>> target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
>>>> temp = (subreg:SI (reg:DI 540) 0)
>>>>
>>>> So, the code assumes that it is possible to copy (reg:DI 540) directly
>>>> to (reg:DI 154). However, this is not the case, since we still have
>>>> garbage in the top 32bits.
>>>>
>>>> Reverting the part above fixes the runtime failure, since (insn 599)
>>>> is now:
>>>>
>>>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>>>>         (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
>>>>      (nil))
>>>>
>>>> It looks to me that we have also to check the temp with
>>>> SUBREG_PROMOTED_*.
>>>
>>> Sorry for the breakage. I am looking into this now and I can reproduce
>>> it on qemu-alpha.
>>>
>>> I have noticed the following VRP data which is used in deciding this
>>> erroneous removal. It seems suspicious to me.
>>>
>>> _343: [2147483652, 2147483715]
>>> _344: [8, 134]
>>> _345: [8, 134]
>>>
>>> _343 = ivtmp.179_52 + 2147483645;
>>> _344 = _343 * 2;
>>> _345 = (integer(kind=4)) _344;
>>>
>>> Error comes from the third statement.
>>
>> In tree-vrp.c, in extract_range_from_binary_expr_1, there is a loss of
>> precision and the value_range is truncated. For the test-case provided
>> by Uros, it is
>>
>> _344 = _343 * 2;
>> [...,0x100000008], precision = 384
>> [...,0x100000086], precision = 384
>>
>> and it is converted to following when it goes from wide_int to tree.
>> [8, 134]
> 
> Why do you believe that is wrong? Assuming _344 has a 32 bit type with
> wrapping overflow, this is just doing the wrapping modulo 2^32.
> 

Indeed. I missed the TYPE_OVERFLOW_WRAPS check earlier. Thanks for
pointing me to that.

Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-28  3:46   ` Kugan
@ 2014-08-28  6:44     ` Marc Glisse
  2014-08-28  7:29       ` Kugan
  0 siblings, 1 reply; 58+ messages in thread
From: Marc Glisse @ 2014-08-28  6:44 UTC (permalink / raw)
  To: Kugan; +Cc: Uros Bizjak, gcc-patches, Richard Biener, Jakub Jelinek

On Thu, 28 Aug 2014, Kugan wrote:

> On 27/08/14 23:02, Kugan wrote:
>> On 27/08/14 20:01, Uros Bizjak wrote:
>>> Hello!
>>>
>>>> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>>>
>>>> * calls.c (precompute_arguments): Check
>>>> promoted_for_signed_and_unsigned_p and set the promoted mode.
>>>> (promoted_for_signed_and_unsigned_p): New function.
>>>> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>>>> and set the promoted mode.
>>>> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>>>> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>>>> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>>>
>>> This patch regresses:
>>>
>>> Running target unix
>>> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
>>> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
>>>
>>
>> [snip]
>>
>>> When compiling this code, we have:
>>>
>>> lhs = _63
>>> target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
>>> temp = (subreg:SI (reg:DI 540) 0)
>>>
>>> So, the code assumes that it is possible to copy (reg:DI 540) directly
>>> to (reg:DI 154). However, this is not the case, since we still have
>>> garbage in the top 32bits.
>>>
>>> Reverting the part above fixes the runtime failure, since (insn 599) is now:
>>>
>>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>>>         (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
>>>      (nil))
>>>
>>> It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.
>>
>> Sorry for the breakage. I am looking into this now and I can reproduce
>> it on qemu-alpha.
>>
>> I have noticed the following VRP data which is used in deciding this
>> erroneous removal. It seems suspicious to me.
>>
>> _343: [2147483652, 2147483715]
>> _344: [8, 134]
>> _345: [8, 134]
>>
>> _343 = ivtmp.179_52 + 2147483645;
>> _344 = _343 * 2;
>> _345 = (integer(kind=4)) _344;
>>
>> Error comes from the third statement.
>
> In tree-vrp.c, in extract_range_from_binary_expr_1, there is a loss of
> precision and the value_range is truncated. For the test-case provided
> by Uros, it is
>
> _344 = _343 * 2;
> [...,0x100000008], precision = 384
> [...,0x100000086], precision = 384
>
> and it is converted to following when it goes from wide_int to tree.
> [8, 134]

Why do you believe that is wrong? Assuming _344 has a 32 bit type with 
wrapping overflow, this is just doing the wrapping modulo 2^32.

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-27 13:02 ` Kugan
@ 2014-08-28  3:46   ` Kugan
  2014-08-28  6:44     ` Marc Glisse
  0 siblings, 1 reply; 58+ messages in thread
From: Kugan @ 2014-08-28  3:46 UTC (permalink / raw)
  To: Uros Bizjak, gcc-patches, Richard Biener; +Cc: Jakub Jelinek


On 27/08/14 23:02, Kugan wrote:
> On 27/08/14 20:01, Uros Bizjak wrote:
>> Hello!
>>
>>> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>>
>>> * calls.c (precompute_arguments): Check
>>> promoted_for_signed_and_unsigned_p and set the promoted mode.
>>> (promoted_for_signed_and_unsigned_p): New function.
>>> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>>> and set the promoted mode.
>>> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>>> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>>> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>>
>> This patch regresses:
>>
>> Running target unix
>> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
>> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
>>
> 
> [snip]
> 
>> When compiling this code, we have:
>>
>> lhs = _63
>> target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
>> temp = (subreg:SI (reg:DI 540) 0)
>>
>> So, the code assumes that it is possible to copy (reg:DI 540) directly
>> to (reg:DI 154). However, this is not the case, since we still have
>> garbage in the top 32bits.
>>
>> Reverting the part above fixes the runtime failure, since (insn 599) is now:
>>
>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>>         (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
>>      (nil))
>>
>> It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.
> 
> Sorry for the breakage. I am looking into this now and I can reproduce
> it on qemu-alpha.
> 
> I have noticed the following VRP data which is used in deciding this
> erroneous removal. It seems suspicious to me.
> 
> _343: [2147483652, 2147483715]
> _344: [8, 134]
> _345: [8, 134]
> 
> _343 = ivtmp.179_52 + 2147483645;
> _344 = _343 * 2;
> _345 = (integer(kind=4)) _344;
> 
> Error comes from the third statement.

In tree-vrp.c, in extract_range_from_binary_expr_1, there is a loss of
precision and the value_range is truncated. For the test-case provided
by Uros, it is

_344 = _343 * 2;
[...,0x100000008], precision = 384
[...,0x100000086], precision = 384

and it is converted to following when it goes from wide_int to tree.
[8, 134]

How about doing something like this to fix it.

diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index d16fd8a..c0fb902 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -2625,6 +2625,8 @@ extract_range_from_binary_expr_1 (value_range_t *vr,
              <wi::extended_tree <WIDE_INT_MAX_PRECISION * 2> > vrp_int_cst;
 	  vrp_int sizem1 = wi::mask <vrp_int> (prec, false);
 	  vrp_int size = sizem1 + 1;
+	  vrp_int type_min = vrp_int_cst (TYPE_MIN_VALUE (expr_type));
+	  vrp_int type_max = vrp_int_cst (TYPE_MAX_VALUE (expr_type));

 	  /* Extend the values using the sign of the result to PREC2.
 	     From here on out, everthing is just signed math no matter
@@ -2688,7 +2690,9 @@ extract_range_from_binary_expr_1 (value_range_t *vr,

 	  /* diff = max - min.  */
 	  prod2 = prod3 - prod0;
-	  if (wi::geu_p (prod2, sizem1))
+	  if (wi::geu_p (prod2, sizem1)
+	      || wi::lts_p (prod0, type_min)
+	      || wi::gts_p (prod3, type_max))
 	    {
 	      /* the range covers all values.  */
 	      set_value_range_to_varying (vr);


If this looks reasonable I will do proper testing and post the results
with the Changelog.

Thanks,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-27 10:01 Uros Bizjak
  2014-08-27 10:07 ` Richard Biener
@ 2014-08-27 13:02 ` Kugan
  2014-08-28  3:46   ` Kugan
  1 sibling, 1 reply; 58+ messages in thread
From: Kugan @ 2014-08-27 13:02 UTC (permalink / raw)
  To: Uros Bizjak, gcc-patches; +Cc: Jakub Jelinek, Richard Biener

On 27/08/14 20:01, Uros Bizjak wrote:
> Hello!
> 
>> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>
>> * calls.c (precompute_arguments): Check
>> promoted_for_signed_and_unsigned_p and set the promoted mode.
>> (promoted_for_signed_and_unsigned_p): New function.
>> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>> and set the promoted mode.
>> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
> 
> This patch regresses:
> 
> Running target unix
> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
> 

[snip]

> When compiling this code, we have:
> 
> lhs = _63
> target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
> temp = (subreg:SI (reg:DI 540) 0)
> 
> So, the code assumes that it is possible to copy (reg:DI 540) directly
> to (reg:DI 154). However, this is not the case, since we still have
> garbage in the top 32bits.
> 
> Reverting the part above fixes the runtime failure, since (insn 599) is now:
> 
> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>         (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
>      (nil))
> 
> It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.

Sorry for the breakage. I am looking into this now and I can reproduce
it on qemu-alpha.

I have noticed the following VRP data which is used in deciding this
erroneous removal. It seems suspicious to me.

_343: [2147483652, 2147483715]
_344: [8, 134]
_345: [8, 134]

_343 = ivtmp.179_52 + 2147483645;
_344 = _343 * 2;
_345 = (integer(kind=4)) _344;

Error comes from the third statement.

Thanks,
Kugan

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-27 10:07 ` Richard Biener
@ 2014-08-27 10:32   ` Uros Bizjak
  2014-08-27 10:32     ` Richard Biener
  2014-09-01  8:48     ` Jakub Jelinek
  2014-08-28  7:50   ` Kugan
  1 sibling, 2 replies; 58+ messages in thread
From: Uros Bizjak @ 2014-08-27 10:32 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, Kugan, Jakub Jelinek

On Wed, Aug 27, 2014 at 12:07 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
>> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>>
>>> * calls.c (precompute_arguments): Check
>>> promoted_for_signed_and_unsigned_p and set the promoted mode.
>>> (promoted_for_signed_and_unsigned_p): New function.
>>> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>>> and set the promoted mode.
>>> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>>> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>>> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>>
>> This patch regresses:
>>
>> Running target unix
>> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
>> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
>>
>> on alphaev6-linux-gnu.
>>
>> So, the code assumes that it is possible to copy (reg:DI 540) directly
>> to (reg:DI 154). However, this is not the case, since we still have
>> garbage in the top 32bits.
>>
>> Reverting the part above fixes the runtime failure, since (insn 599) is now:
>>
>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>>         (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
>>      (nil))
>>
>> It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.
>
> Yeah, that makes sense.

Something like following (untested) patch that also fixes the testcase perhaps?

-- cut here--
Index: cfgexpand.c
===================================================================
--- cfgexpand.c (revision 214445)
+++ cfgexpand.c (working copy)
@@ -3322,6 +3322,7 @@ expand_gimple_stmt_1 (gimple stmt)

                if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
                    && (GET_CODE (temp) == SUBREG)
+                   && SUBREG_PROMOTED_VAR_P (temp)
                    && (GET_MODE (target) == GET_MODE (temp))
                    && (GET_MODE (SUBREG_REG (target)) == GET_MODE
(SUBREG_REG (temp))))
                  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
-- cut here

Uros.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-27 10:32   ` Uros Bizjak
@ 2014-08-27 10:32     ` Richard Biener
  2014-09-01  8:48     ` Jakub Jelinek
  1 sibling, 0 replies; 58+ messages in thread
From: Richard Biener @ 2014-08-27 10:32 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Kugan, Jakub Jelinek

On Wed, Aug 27, 2014 at 12:25 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Wed, Aug 27, 2014 at 12:07 PM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>>> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>>>
>>>> * calls.c (precompute_arguments): Check
>>>> promoted_for_signed_and_unsigned_p and set the promoted mode.
>>>> (promoted_for_signed_and_unsigned_p): New function.
>>>> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>>>> and set the promoted mode.
>>>> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>>>> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>>>> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>>>
>>> This patch regresses:
>>>
>>> Running target unix
>>> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
>>> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
>>>
>>> on alphaev6-linux-gnu.
>>>
>>> So, the code assumes that it is possible to copy (reg:DI 540) directly
>>> to (reg:DI 154). However, this is not the case, since we still have
>>> garbage in the top 32bits.
>>>
>>> Reverting the part above fixes the runtime failure, since (insn 599) is now:
>>>
>>> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>>>         (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
>>>      (nil))
>>>
>>> It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.
>>
>> Yeah, that makes sense.
>
> Something like following (untested) patch that also fixes the testcase perhaps?

Yes (though I'm not really familiar with the RTL side here and the
comment before SUBREG_PROMOTED_VAR_P looks odd)

Richard.

> -- cut here--
> Index: cfgexpand.c
> ===================================================================
> --- cfgexpand.c (revision 214445)
> +++ cfgexpand.c (working copy)
> @@ -3322,6 +3322,7 @@ expand_gimple_stmt_1 (gimple stmt)
>
>                 if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
>                     && (GET_CODE (temp) == SUBREG)
> +                   && SUBREG_PROMOTED_VAR_P (temp)
>                     && (GET_MODE (target) == GET_MODE (temp))
>                     && (GET_MODE (SUBREG_REG (target)) == GET_MODE
> (SUBREG_REG (temp))))
>                   emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
> -- cut here
>
> Uros.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
  2014-08-27 10:01 Uros Bizjak
@ 2014-08-27 10:07 ` Richard Biener
  2014-08-27 10:32   ` Uros Bizjak
  2014-08-28  7:50   ` Kugan
  2014-08-27 13:02 ` Kugan
  1 sibling, 2 replies; 58+ messages in thread
From: Richard Biener @ 2014-08-27 10:07 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Kugan, Jakub Jelinek

On Wed, Aug 27, 2014 at 12:01 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> Hello!
>
>> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>
>> * calls.c (precompute_arguments): Check
>> promoted_for_signed_and_unsigned_p and set the promoted mode.
>> (promoted_for_signed_and_unsigned_p): New function.
>> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
>> and set the promoted mode.
>> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
>> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
>> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
>
> This patch regresses:
>
> Running target unix
> FAIL: libgomp.fortran/simd7.f90   -O2  execution test
> FAIL: libgomp.fortran/simd7.f90   -Os  execution test
>
> on alphaev6-linux-gnu.
>
> The problem can be illustrated with attached testcase with a
> crosscompiler to alphaev68-linux-gnu (-O2 -fopenmp). The problem is in
> missing SImode extension after DImode shift of SImode subregs for this
> part:
>
> --cut here--
>   # test.23_12 = PHI <0(37), 1(36)>
>   _242 = ivtmp.181_73 + 2147483645;
>   _240 = _242 * 2;
>   _63 = (integer(kind=4)) _240;
>   if (ubound.6_99 <= 2)
>     goto <bb 39>;
>   else
>     goto <bb 40>;
> ;;    succ:       39
> ;;                40
>
> ;;   basic block 39, loop depth 1
> ;;    pred:       38
>   pretmp_337 = test.23_12 | l_76;
>   goto <bb 45>;
> ;;    succ:       45
>
> ;;   basic block 40, loop depth 1
> ;;    pred:       38
>   _11 = *c_208[0];
>   if (_11 != _63)
>     goto <bb 45>;
>   else
>     goto <bb 42>;
> --cut here--
>
> this expands to:
>
> (code_label 592 591 593 35 "" [0 uses])
>
> (note 593 592 0 NOTE_INSN_BASIC_BLOCK)
>
> ;; _63 = (integer(kind=4)) _240;
>
> (insn 594 593 595 (set (reg:SI 538)
>         (const_int 1073741824 [0x40000000])) -1
>      (nil))
>
> (insn 595 594 596 (set (reg:SI 539)
>         (plus:SI (reg:SI 538)
>             (const_int 1073741824 [0x40000000]))) -1
>      (nil))
>
> (insn 596 595 597 (set (reg:SI 537)
>         (plus:SI (reg:SI 539)
>             (const_int -3 [0xfffffffffffffffd]))) -1
>      (expr_list:REG_EQUAL (const_int 2147483645 [0x7ffffffd])
>         (nil)))
>
> (insn 597 596 598 (set (reg:SI 536 [ D.1700 ])
>         (plus:SI (subreg/s/v/u:SI (reg:DI 144 [ ivtmp.181 ]) 0)
>             (reg:SI 537))) -1
>      (nil))
>
> (insn 598 597 599 (set (reg:DI 540)
>         (ashift:DI (subreg:DI (reg:SI 536 [ D.1700 ]) 0)
>             (const_int 1 [0x1]))) -1
>      (nil))
>
> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>         (reg:DI 540)) -1
>      (nil))
>
> ...
>
> (note 610 609 0 NOTE_INSN_BASIC_BLOCK)
>
> ;; _11 = *c_208[0];
>
> (insn 611 610 0 (set (reg:DI 120 [ D.1694 ])
>         (sign_extend:DI (mem:SI (reg/v/f:DI 227 [ c ]) [7 *c_208+0 S4
> A128]))) simd7.f90:12 -1
>      (nil))
>
> ;; if (_11 != _63)
>
> (insn 612 611 613 40 (set (reg:DI 545)
>         (eq:DI (reg:DI 120 [ D.1694 ])
>             (reg:DI 145 [ D.1694 ]))) simd7.f90:12 -1
>      (nil))
>
> (jump_insn 613 612 616 40 (set (pc)
>         (if_then_else (eq (reg:DI 545)
>                 (const_int 0 [0]))
>             (label_ref 0)
>             (pc))) simd7.f90:12 -1
>      (int_list:REG_BR_PROB 450 (nil)))
>
> which results in following asm:
>
> $L35:
>     addl $25,$7,$2     # 597    addsi3/1    [length = 4]
>     addq $2,$2,$2     # 598    ashldi3/1    [length = 4]     <------ here
>     bne $24,$L145     # 601    *bcc_normal    [length = 4]
>     lda $4,4($20)     # 627    *adddi_internal/2    [length = 4]
>     ldl $8,0($20)     # 611    *extendsidi2_1/2    [length = 4]
>     lda $3,3($31)     # 74    *movdi/2    [length = 4]
>     cmpeq $8,$2,$2     # 612    *setcc_internal    [length = 4]  <-- compare
>     bne $2,$L40     # 613    *bcc_normal    [length = 4]
>     br $31,$L88     # 2403    jump    [length = 4]
>     .align 4
> ...
>
> Tracking the values with the debugger shows wrong calculation:
>
>    0x000000012000108c <+1788>:  addl    t10,t12,t1
>    0x0000000120001090 <+1792>:  addq    t1,t1,t1
>    ...
>    0x00000001200010a4 <+1812>:  cmpeq   t6,t1,t1
>    0x00000001200010a8 <+1816>:  bne     t1,0x1200010c0 <foo_+1840>
>
> (gdb) si
> 0x000000012000108c      17          l = l .or. any (b /= 7 + i)
> (gdb) i r t10 t12
> t10            0x7      7
> t12            0x7ffffffd       2147483645
>
> (gdb) si
> 0x0000000120001090      17          l = l .or. any (b /= 7 + i)
> (gdb) i r t1
> t1             0xffffffff80000004       -2147483644
>
> (gdb) si
> 18          l = l .or. any (c /= 8 + 2 * i)
> (gdb) i r t1
> t1             0xffffffff00000008       -4294967288
>
> At this point, the calculation should zero-extend SImode value to full
> DImode, since compare operates on DImode values. The problematic insn
> is (insn 599), which is now a DImode assignment instead of
> zero-extend, due to:
>
> --- a/gcc/cfgexpand.c
> +++ b/gcc/cfgexpand.c
> @@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
>    GET_MODE (target), temp, unsignedp);
>    }
>
> - convert_move (SUBREG_REG (target), temp, unsignedp);
> + if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
> +    && (GET_CODE (temp) == SUBREG)
> +    && (GET_MODE (target) == GET_MODE (temp))
> +    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
> +  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
> + else
> +  convert_move (SUBREG_REG (target), temp, unsignedp);
>        }
>      else if (nontemporal && emit_storent_insn (target, temp))
>        ;
>
> When compiling this code, we have:
>
> lhs = _63
> target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
> temp = (subreg:SI (reg:DI 540) 0)
>
> So, the code assumes that it is possible to copy (reg:DI 540) directly
> to (reg:DI 154). However, this is not the case, since we still have
> garbage in the top 32bits.
>
> Reverting the part above fixes the runtime failure, since (insn 599) is now:
>
> (insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
>         (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
>      (nil))
>
> It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.

Yeah, that makes sense.

Richard.

> Uros.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 2/2] Enable elimination of zext/sext
@ 2014-08-27 10:01 Uros Bizjak
  2014-08-27 10:07 ` Richard Biener
  2014-08-27 13:02 ` Kugan
  0 siblings, 2 replies; 58+ messages in thread
From: Uros Bizjak @ 2014-08-27 10:01 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kugan, Jakub Jelinek, Richard Biener

[-- Attachment #1: Type: text/plain, Size: 5703 bytes --]

Hello!

> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
> * calls.c (precompute_arguments): Check
> promoted_for_signed_and_unsigned_p and set the promoted mode.
> (promoted_for_signed_and_unsigned_p): New function.
> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
> and set the promoted mode.
> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.

This patch regresses:

Running target unix
FAIL: libgomp.fortran/simd7.f90   -O2  execution test
FAIL: libgomp.fortran/simd7.f90   -Os  execution test

on alphaev6-linux-gnu.

The problem can be illustrated with attached testcase with a
crosscompiler to alphaev68-linux-gnu (-O2 -fopenmp). The problem is in
missing SImode extension after DImode shift of SImode subregs for this
part:

--cut here--
  # test.23_12 = PHI <0(37), 1(36)>
  _242 = ivtmp.181_73 + 2147483645;
  _240 = _242 * 2;
  _63 = (integer(kind=4)) _240;
  if (ubound.6_99 <= 2)
    goto <bb 39>;
  else
    goto <bb 40>;
;;    succ:       39
;;                40

;;   basic block 39, loop depth 1
;;    pred:       38
  pretmp_337 = test.23_12 | l_76;
  goto <bb 45>;
;;    succ:       45

;;   basic block 40, loop depth 1
;;    pred:       38
  _11 = *c_208[0];
  if (_11 != _63)
    goto <bb 45>;
  else
    goto <bb 42>;
--cut here--

this expands to:

(code_label 592 591 593 35 "" [0 uses])

(note 593 592 0 NOTE_INSN_BASIC_BLOCK)

;; _63 = (integer(kind=4)) _240;

(insn 594 593 595 (set (reg:SI 538)
        (const_int 1073741824 [0x40000000])) -1
     (nil))

(insn 595 594 596 (set (reg:SI 539)
        (plus:SI (reg:SI 538)
            (const_int 1073741824 [0x40000000]))) -1
     (nil))

(insn 596 595 597 (set (reg:SI 537)
        (plus:SI (reg:SI 539)
            (const_int -3 [0xfffffffffffffffd]))) -1
     (expr_list:REG_EQUAL (const_int 2147483645 [0x7ffffffd])
        (nil)))

(insn 597 596 598 (set (reg:SI 536 [ D.1700 ])
        (plus:SI (subreg/s/v/u:SI (reg:DI 144 [ ivtmp.181 ]) 0)
            (reg:SI 537))) -1
     (nil))

(insn 598 597 599 (set (reg:DI 540)
        (ashift:DI (subreg:DI (reg:SI 536 [ D.1700 ]) 0)
            (const_int 1 [0x1]))) -1
     (nil))

(insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
        (reg:DI 540)) -1
     (nil))

...

(note 610 609 0 NOTE_INSN_BASIC_BLOCK)

;; _11 = *c_208[0];

(insn 611 610 0 (set (reg:DI 120 [ D.1694 ])
        (sign_extend:DI (mem:SI (reg/v/f:DI 227 [ c ]) [7 *c_208+0 S4
A128]))) simd7.f90:12 -1
     (nil))

;; if (_11 != _63)

(insn 612 611 613 40 (set (reg:DI 545)
        (eq:DI (reg:DI 120 [ D.1694 ])
            (reg:DI 145 [ D.1694 ]))) simd7.f90:12 -1
     (nil))

(jump_insn 613 612 616 40 (set (pc)
        (if_then_else (eq (reg:DI 545)
                (const_int 0 [0]))
            (label_ref 0)
            (pc))) simd7.f90:12 -1
     (int_list:REG_BR_PROB 450 (nil)))

which results in following asm:

$L35:
    addl $25,$7,$2     # 597    addsi3/1    [length = 4]
    addq $2,$2,$2     # 598    ashldi3/1    [length = 4]     <------ here
    bne $24,$L145     # 601    *bcc_normal    [length = 4]
    lda $4,4($20)     # 627    *adddi_internal/2    [length = 4]
    ldl $8,0($20)     # 611    *extendsidi2_1/2    [length = 4]
    lda $3,3($31)     # 74    *movdi/2    [length = 4]
    cmpeq $8,$2,$2     # 612    *setcc_internal    [length = 4]  <-- compare
    bne $2,$L40     # 613    *bcc_normal    [length = 4]
    br $31,$L88     # 2403    jump    [length = 4]
    .align 4
...

Tracking the values with the debugger shows wrong calculation:

   0x000000012000108c <+1788>:  addl    t10,t12,t1
   0x0000000120001090 <+1792>:  addq    t1,t1,t1
   ...
   0x00000001200010a4 <+1812>:  cmpeq   t6,t1,t1
   0x00000001200010a8 <+1816>:  bne     t1,0x1200010c0 <foo_+1840>

(gdb) si
0x000000012000108c      17          l = l .or. any (b /= 7 + i)
(gdb) i r t10 t12
t10            0x7      7
t12            0x7ffffffd       2147483645

(gdb) si
0x0000000120001090      17          l = l .or. any (b /= 7 + i)
(gdb) i r t1
t1             0xffffffff80000004       -2147483644

(gdb) si
18          l = l .or. any (c /= 8 + 2 * i)
(gdb) i r t1
t1             0xffffffff00000008       -4294967288

At this point, the calculation should zero-extend SImode value to full
DImode, since compare operates on DImode values. The problematic insn
is (insn 599), which is now a DImode assignment instead of
zero-extend, due to:

--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
   GET_MODE (target), temp, unsignedp);
   }

- convert_move (SUBREG_REG (target), temp, unsignedp);
+ if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
+    && (GET_CODE (temp) == SUBREG)
+    && (GET_MODE (target) == GET_MODE (temp))
+    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
+  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
+ else
+  convert_move (SUBREG_REG (target), temp, unsignedp);
       }
     else if (nontemporal && emit_storent_insn (target, temp))
       ;

When compiling this code, we have:

lhs = _63
target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
temp = (subreg:SI (reg:DI 540) 0)

So, the code assumes that it is possible to copy (reg:DI 540) directly
to (reg:DI 154). However, this is not the case, since we still have
garbage in the top 32bits.

Reverting the part above fixes the runtime failure, since (insn 599) is now:

(insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
        (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
     (nil))

It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.

Uros.

[-- Attachment #2: simd7.f90 --]
[-- Type: text/x-csrc, Size: 1050 bytes --]

subroutine foo (d, e, f, g, m, n)
  integer :: i, j, b(2:9), c(3:n), d(:), e(2:n), f(2:,3:), n
  integer, allocatable :: g(:), h(:), k, m
  logical :: l
  l = .false.
  allocate (h(2:7))
  i = 4; j = 4; b = 7; c = 8; d = 9; e = 10; f = 11; g = 12; h = 13; k = 14; m = 15
!$omp simd linear(b)linear(c:2)linear(d:3)linear(e:4)linear(f:5)linear(g:6) &
!$omp & linear(h:7)linear(k:8)linear(m:9) reduction(.or.:l)
  do i = 0, 63 
    l = l .or. any (b /= 7 + i)
    l = l .or. any (c /= 8 + 2 * i)
    b = b + 1; c = c + 2
    d = d + 3; e = e + 4; f = f + 5; g = g + 6
    h = h + 7; k = k + 8; m = m + 9
  end do
  if (l .or. i /= 64) call abort
  if (any (b /= 7 + 64) .or. any (c /= 8 + 2 * 64)) call abort
end subroutine

  interface
    subroutine foo (d, e, f, g, m, n)
      integer :: d(:), e(2:n), f(2:,3:), n
      integer, allocatable :: g(:), m
    end subroutine
  end interface
  integer, parameter :: n = 8
  integer :: d(2:18), e(3:n+1), f(5:6,7:9)
  integer, allocatable :: g(:), m
  allocate (g(7:10))
  call foo (d, e, f, g, m, n)
end

^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2014-09-09 10:28 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-24 11:48 [PATCH 0/2] Zext/sext elimination using value range Kugan
2014-06-24 11:51 ` [PATCH 1/2] Enable setting sign and unsigned promoted mode (SPR_SIGNED_AND_UNSIGNED) Kugan
2014-06-24 12:18   ` Jakub Jelinek
2014-06-25  7:21     ` Kugan
2014-06-25  7:50       ` Jakub Jelinek
2014-06-26  1:06         ` Kugan
2014-06-26  2:48           ` Kugan
2014-06-26  5:50           ` Jakub Jelinek
2014-06-26  9:41             ` Kugan
2014-06-26 10:12               ` Jakub Jelinek
2014-06-26 10:42                 ` Jakub Jelinek
2014-07-01  8:21                 ` Kugan
2014-07-07  6:52                   ` Kugan
2014-07-07  8:06                     ` Jakub Jelinek
2014-06-26 10:25               ` Andreas Schwab
2014-07-01  8:28                 ` Kugan
2014-06-24 11:53 ` [PATCH 2/2] Enable elimination of zext/sext Kugan
2014-06-24 12:21   ` Jakub Jelinek
2014-06-25  8:15     ` Kugan
2014-06-25  8:36       ` Jakub Jelinek
2014-07-07  6:55         ` Kugan
2014-07-10 12:15           ` Richard Biener
2014-07-11 11:52             ` Kugan
2014-07-11 12:47               ` Richard Biener
2014-07-14  2:58                 ` Kugan
2014-07-14 20:11                   ` Bernhard Reutner-Fischer
2014-07-23 14:22                   ` Richard Biener
2014-08-01  4:51                     ` Kugan
2014-08-01 11:16                       ` Richard Biener
2014-08-01 16:04                         ` Kugan
2014-08-03 23:56                           ` Kugan
2014-08-05 14:18                           ` Richard Biener
2014-08-05 14:21                             ` Jakub Jelinek
2014-08-06 12:09                               ` Richard Biener
2014-08-06 13:22                                 ` Kugan
2014-08-06 13:29                                   ` Richard Biener
2014-08-07  5:25                                     ` Kugan
2014-08-07  8:09                                       ` Richard Biener
2014-08-27 10:01 Uros Bizjak
2014-08-27 10:07 ` Richard Biener
2014-08-27 10:32   ` Uros Bizjak
2014-08-27 10:32     ` Richard Biener
2014-09-01  8:48     ` Jakub Jelinek
2014-09-01  8:54       ` Uros Bizjak
2014-08-28  7:50   ` Kugan
2014-08-28  8:57     ` Richard Biener
2014-09-04  3:41       ` Kugan
2014-09-04 13:00         ` Richard Biener
2014-09-05  1:33           ` Kugan
2014-09-05  9:51             ` Richard Biener
2014-09-07  9:51               ` Kugan
2014-09-08  9:48                 ` Richard Biener
2014-09-09 10:06                   ` Kugan
2014-09-09 10:28                     ` Richard Biener
2014-08-27 13:02 ` Kugan
2014-08-28  3:46   ` Kugan
2014-08-28  6:44     ` Marc Glisse
2014-08-28  7:29       ` Kugan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).