[PATCH 5/7] sparc: basic support for the SPARC M8 cpu

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH 5/7] sparc: basic support for the SPARC M8 cpu
  2017-06-29 11:43 [PATCH 0/7] Support for the SPARC M8 cpu Jose E. Marchesi
                   ` (2 preceding siblings ...)
  2017-06-29 11:43 ` [PATCH 3/7] sparc: introduce insn subtypes Jose E. Marchesi
@ 2017-06-29 11:43 ` Jose E. Marchesi
  2017-06-29 11:43 ` [PATCH 4/7] sparc: reworked M7 DFA based on instruction subtypes Jose E. Marchesi
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Jose E. Marchesi @ 2017-06-29 11:43 UTC (permalink / raw)
  To: gcc-patches

This patch adds the following support for the SPARC M8 cpu, which
implements the Oracle SPARC Architecture 2017:

- Support for -mcpu=m8 and -mtune=m8.
- Definition of cpu target macros and specs in the backend.
- Tuning of backend parameters for the M8.
- Addition of a new cpu type m8 in the machine description.

gcc/ChangeLog:

	* config.gcc: Handle m8 in --with-{cpu,tune} options.
	* config.in: Add HAVE_AS_SPARC6 define.
	* config/sparc/driver-sparc.c (cpu_names): Add entry for the SPARC
	M8.
	* config/sparc/sol2.h (CPP_CPU64_DEFAULT_SPEC): Define for
	TARGET_CPU_m8.
	(ASM_CPU32_DEFAUILT_SPEC): Likewise.
	(CPP_CPU_SPEC): Handle m8.
	(ASM_CPU_SPEC): Likewise.
	* config/sparc/sparc-opts.h (enum processor_type): Add
	PROCESSOR_M8.
	* config/sparc/sparc.c (m8_costs): New struct.
	(sparc_option_override): Handle TARGET_CPU_m8.
	(sparc32_initialize_trampoline): Likewise.
	(sparc64_initialize_trampoline): Likewise.
	(sparc_issue_rate): Likewise.
	(sparc_register_move_cost): Likewise.
	* config/sparc/sparc.h (TARGET_CPU_m8): Define.
	(CPP_CPU64_DEFAULT_SPEC): Define for M8.
	(ASM_CPU64_DEFAULT_SPEC): Likewise.
	(CPP_CPU_SPEC): Handle M8.
	(ASM_CPU_SPEC): Likewise.
	(AS_M8_FLAG): Define.
	* config/sparc/sparc.md: Add m8 to the cpu attribute.
	* config/sparc/sparc.opt: New option -mcpu=m8 for sparc targets.
	* configure.ac (HAVE_AS_SPARC6): Check for assembler support for
	M8 instructions.
	* configure: Regenerate.
	* doc/invoke.texi (SPARC Options): Document -mcpu=m8 and
	-mtune=m8.
---
 gcc/ChangeLog                   | 33 +++++++++++++++++++++
 gcc/config.gcc                  |  2 +-
 gcc/config.in                   |  4 +++
 gcc/config/sparc/driver-sparc.c |  1 +
 gcc/config/sparc/sol2.h         | 14 +++++++--
 gcc/config/sparc/sparc-opts.h   |  1 +
 gcc/config/sparc/sparc.c        | 65 ++++++++++++++++++++++++++++++++++-------
 gcc/config/sparc/sparc.h        | 16 +++++++++-
 gcc/config/sparc/sparc.md       |  3 +-
 gcc/config/sparc/sparc.opt      |  3 ++
 gcc/configure                   | 35 ++++++++++++++++++++++
 gcc/configure.ac                | 12 ++++++++
 gcc/doc/invoke.texi             | 12 ++++----
 13 files changed, 181 insertions(+), 20 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index a97bbc8..090b81f 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4425,7 +4425,7 @@ case "${target}" in
 			| sparclite | f930 | f934 | sparclite86x \
 			| sparclet | tsc701 \
 			| v9 | ultrasparc | ultrasparc3 | niagara | niagara2 \
-			| niagara3 | niagara4 | niagara7)
+			| niagara3 | niagara4 | niagara7 | m8)
 				# OK
 				;;
 			*)
diff --git a/gcc/config.in b/gcc/config.in
index bf2aa7b..bff886a 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -660,6 +660,10 @@
 #undef HAVE_AS_SPARC5_VIS4
 #endif
 
+/* Define if your assembler supports SPARC6 instructions. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_SPARC6
+#endif
 
 /* Define if your assembler and linker support GOTDATA_OP relocs. */
 #ifndef USED_FOR_TARGET
diff --git a/gcc/config/sparc/driver-sparc.c b/gcc/config/sparc/driver-sparc.c
index b96ef47..0c25d6c 100644
--- a/gcc/config/sparc/driver-sparc.c
+++ b/gcc/config/sparc/driver-sparc.c
@@ -79,6 +79,7 @@ static const struct cpu_names {
 #endif
   { "SPARC-M7",		"niagara7" },
   { "SPARC-S7",		"niagara7" },
+  { "SPARC-M8",		"m8" },
   { NULL,	NULL }
   };
 
diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h
index 8a50bfe..b8177c0 100644
--- a/gcc/config/sparc/sol2.h
+++ b/gcc/config/sparc/sol2.h
@@ -174,13 +174,22 @@ along with GCC; see the file COPYING3.  If not see
 #define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG AS_NIAGARA7_FLAG
 #endif
 
+#if TARGET_CPU_DEFAULT == TARGET_CPU_m8
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC AS_SPARC32_FLAG AS_M8_FLAG
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG AS_M8_FLAG
+#endif
+
 #undef CPP_CPU_SPEC
 #define CPP_CPU_SPEC "\
 %{mcpu=sparclet|mcpu=tsc701:-D__sparclet__} \
 %{mcpu=sparclite|mcpu-f930|mcpu=f934:-D__sparclite__} \
 %{mcpu=v8:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
 %{mcpu=supersparc:-D__supersparc__ " DEF_ARCH32_SPEC("-D__sparcv8") "} \
-%{mcpu=v9|mcpu=ultrasparc|mcpu=ultrasparc3|mcpu=niagara|mcpu=niagara2|mcpu=niagara3|mcpu=niagara4|mcpu=niagara7:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{mcpu=v9|mcpu=ultrasparc|mcpu=ultrasparc3|mcpu=niagara|mcpu=niagara2|mcpu=niagara3|mcpu=niagara4|mcpu=niagara7|mcpu=m8:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
 %{!mcpu*:%(cpp_cpu_default)} \
 "
 
@@ -290,7 +299,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 %{mcpu=niagara3:" DEF_ARCH32_SPEC("-xarch=v8plus" AS_NIAGARA3_FLAG) DEF_ARCH64_SPEC("-xarch=v9" AS_NIAGARA3_FLAG) "} \
 %{mcpu=niagara4:" DEF_ARCH32_SPEC(AS_SPARC32_FLAG AS_NIAGARA4_FLAG) DEF_ARCH64_SPEC(AS_SPARC64_FLAG AS_NIAGARA4_FLAG) "} \
 %{mcpu=niagara7:" DEF_ARCH32_SPEC(AS_SPARC32_FLAG AS_NIAGARA7_FLAG) DEF_ARCH64_SPEC(AS_SPARC64_FLAG AS_NIAGARA7_FLAG) "} \
-%{!mcpu=niagara7:%{!mcpu=niagara4:%{!mcpu=niagara3:%{!mcpu=niagara2:%{!mcpu=niagara:%{!mcpu=ultrasparc3:%{!mcpu=ultrasparc:%{!mcpu=v9:%{mcpu*:" DEF_ARCH32_SPEC("-xarch=v8") DEF_ARCH64_SPEC("-xarch=v9") "}}}}}}}}} \
+%{mcpu=m8:" DEF_ARCH32_SPEC(AS_SPARC32_FLAG AS_M8_FLAG) DEF_ARCH64_SPEC(AS_SPARC64_FLAG AS_M8_FLAG) "} \
+%{!mcpu=m8:%{!mcpu=niagara7:%{!mcpu=niagara4:%{!mcpu=niagara3:%{!mcpu=niagara2:%{!mcpu=niagara:%{!mcpu=ultrasparc3:%{!mcpu=ultrasparc:%{!mcpu=v9:%{mcpu*:" DEF_ARCH32_SPEC("-xarch=v8") DEF_ARCH64_SPEC("-xarch=v9") "}}}}}}}}}} \
 %{!mcpu*:%(asm_cpu_default)} \
 "
 
diff --git a/gcc/config/sparc/sparc-opts.h b/gcc/config/sparc/sparc-opts.h
index 6e7c2ac..581e86e 100644
--- a/gcc/config/sparc/sparc-opts.h
+++ b/gcc/config/sparc/sparc-opts.h
@@ -46,6 +46,7 @@ enum processor_type {
   PROCESSOR_NIAGARA3,
   PROCESSOR_NIAGARA4,
   PROCESSOR_NIAGARA7,
+  PROCESSOR_M8,
   PROCESSOR_NATIVE
 };
 
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 790a036..0644ab5 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -448,6 +448,30 @@ struct processor_costs niagara7_costs = {
   0, /* shift penalty */
 };
 
+static const
+struct processor_costs m8_costs = {
+  COSTS_N_INSNS (3), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (3), /* int zeroed load */
+  COSTS_N_INSNS (3), /* float load */
+  COSTS_N_INSNS (9), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (9), /* fadd, fsub */
+  COSTS_N_INSNS (9), /* fcmp */
+  COSTS_N_INSNS (9), /* fmov, fmovr */
+  COSTS_N_INSNS (9), /* fmul */
+  COSTS_N_INSNS (26), /* fdivs */
+  COSTS_N_INSNS (30), /* fdivd */
+  COSTS_N_INSNS (33), /* fsqrts */
+  COSTS_N_INSNS (41), /* fsqrtd */
+  COSTS_N_INSNS (12), /* imul */
+  COSTS_N_INSNS (10), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (57), /* udiv/sdiv */
+  COSTS_N_INSNS (30), /* udivx/sdivx */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
 static const struct processor_costs *sparc_costs = &cypress_costs;
 
 #ifdef HAVE_AS_RELAX_OPTION
@@ -1286,6 +1310,7 @@ sparc_option_override (void)
     { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
     { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
     { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
+    { TARGET_CPU_m8, PROCESSOR_M8 },
     { -1, PROCESSOR_V7 }
   };
   const struct cpu_default *def;
@@ -1337,6 +1362,9 @@ sparc_option_override (void)
       MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
     /* UltraSPARC M7 */
     { "niagara7",	MASK_ISA,
+      MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
+    /* UltraSPARC M8 */
+    { "m8",		MASK_ISA,
       MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
   };
   const struct cpu_table *cpu;
@@ -1529,7 +1557,8 @@ sparc_option_override (void)
 	  || sparc_cpu == PROCESSOR_NIAGARA3
 	  || sparc_cpu == PROCESSOR_NIAGARA4)
 	align_functions = 32;
-      else if (sparc_cpu == PROCESSOR_NIAGARA7)
+      else if (sparc_cpu == PROCESSOR_NIAGARA7
+	       || sparc_cpu == PROCESSOR_M8)
 	align_functions = 64;
     }
 
@@ -1597,6 +1626,9 @@ sparc_option_override (void)
     case PROCESSOR_NIAGARA7:
       sparc_costs = &niagara7_costs;
       break;
+    case PROCESSOR_M8:
+      sparc_costs = &m8_costs;
+      break;
     case PROCESSOR_NATIVE:
       gcc_unreachable ();
     };
@@ -1659,13 +1691,14 @@ sparc_option_override (void)
 			   || sparc_cpu == PROCESSOR_NIAGARA4)
 			  ? 2
 			  : (sparc_cpu == PROCESSOR_ULTRASPARC3
-			     ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
+			     ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
+				     || sparc_cpu == PROCESSOR_M8)
 				    ? 32 : 3))),
 			 global_options.x_param_values,
 			 global_options_set.x_param_values);
 
-  /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
-     params.def), so no maybe_set_param_value is needed.
+  /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
+     bytes.
 
      The Oracle SPARC Architecture (previously the UltraSPARC
      Architecture) specification states that when a PREFETCH[A]
@@ -1681,6 +1714,11 @@ sparc_option_override (void)
      L2 and L3, but only 32B are brought into the L1D$. (Assuming it
      is a read_n prefetch, which is the only type which allocates to
      the L1.)  */
+  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+			 (sparc_cpu == PROCESSOR_M8
+			  ? 64 : 32),
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
 
   /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
      Hardvard level-1 caches) in kilobytes.  Both UltraSPARC and
@@ -1692,7 +1730,8 @@ sparc_option_override (void)
 			   || sparc_cpu == PROCESSOR_NIAGARA2
 			   || sparc_cpu == PROCESSOR_NIAGARA3
 			   || sparc_cpu == PROCESSOR_NIAGARA4
-			   || sparc_cpu == PROCESSOR_NIAGARA7)
+			   || sparc_cpu == PROCESSOR_NIAGARA7
+			   || sparc_cpu == PROCESSOR_M8)
 			  ? 16 : 64),
 			 global_options.x_param_values,
 			 global_options_set.x_param_values);
@@ -1701,7 +1740,8 @@ sparc_option_override (void)
   /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes.  Note
      that 512 is the default in params.def.  */
   maybe_set_param_value (PARAM_L2_CACHE_SIZE,
-			 (sparc_cpu == PROCESSOR_NIAGARA4
+			 ((sparc_cpu == PROCESSOR_NIAGARA4
+			   || sparc_cpu == PROCESSOR_M8)
 			  ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
 				   ? 256 : 512)),
 			 global_options.x_param_values,
@@ -9478,7 +9518,8 @@ sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
       && sparc_cpu != PROCESSOR_NIAGARA2
       && sparc_cpu != PROCESSOR_NIAGARA3
       && sparc_cpu != PROCESSOR_NIAGARA4
-      && sparc_cpu != PROCESSOR_NIAGARA7)
+      && sparc_cpu != PROCESSOR_NIAGARA7
+      && sparc_cpu != PROCESSOR_M8)
     emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
 
   /* Call __enable_execute_stack after writing onto the stack to make sure
@@ -9524,7 +9565,8 @@ sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
       && sparc_cpu != PROCESSOR_NIAGARA2
       && sparc_cpu != PROCESSOR_NIAGARA3
       && sparc_cpu != PROCESSOR_NIAGARA4
-      && sparc_cpu != PROCESSOR_NIAGARA7)
+      && sparc_cpu != PROCESSOR_NIAGARA7
+      && sparc_cpu != PROCESSOR_M8)
     emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
 
   /* Call __enable_execute_stack after writing onto the stack to make sure
@@ -9724,7 +9766,8 @@ sparc_use_sched_lookahead (void)
       || sparc_cpu == PROCESSOR_NIAGARA3)
     return 0;
   if (sparc_cpu == PROCESSOR_NIAGARA4
-      || sparc_cpu == PROCESSOR_NIAGARA7)
+      || sparc_cpu == PROCESSOR_NIAGARA7
+      || sparc_cpu == PROCESSOR_M8)
     return 2;
   if (sparc_cpu == PROCESSOR_ULTRASPARC
       || sparc_cpu == PROCESSOR_ULTRASPARC3)
@@ -9758,6 +9801,7 @@ sparc_issue_rate (void)
       return 2;
     case PROCESSOR_ULTRASPARC:
     case PROCESSOR_ULTRASPARC3:
+    case PROCESSOR_M8:
       return 4;
     }
 }
@@ -11458,7 +11502,8 @@ sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
 	  || sparc_cpu == PROCESSOR_NIAGARA2
 	  || sparc_cpu == PROCESSOR_NIAGARA3
 	  || sparc_cpu == PROCESSOR_NIAGARA4
-	  || sparc_cpu == PROCESSOR_NIAGARA7)
+	  || sparc_cpu == PROCESSOR_NIAGARA7
+	  || sparc_cpu == PROCESSOR_M8)
 	return 12;
 
       return 6;
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 581774e..2bd667f 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -143,6 +143,7 @@ extern enum cmodel sparc_cmodel;
 #define TARGET_CPU_niagara3	15
 #define TARGET_CPU_niagara4	16
 #define TARGET_CPU_niagara7	19
+#define TARGET_CPU_m8		20
 
 #if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
  || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
@@ -151,7 +152,8 @@ extern enum cmodel sparc_cmodel;
  || TARGET_CPU_DEFAULT == TARGET_CPU_niagara2 \
  || TARGET_CPU_DEFAULT == TARGET_CPU_niagara3 \
  || TARGET_CPU_DEFAULT == TARGET_CPU_niagara4 \
- || TARGET_CPU_DEFAULT == TARGET_CPU_niagara7
+ || TARGET_CPU_DEFAULT == TARGET_CPU_niagara7 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_m8
 
 #define CPP_CPU32_DEFAULT_SPEC ""
 #define ASM_CPU32_DEFAULT_SPEC ""
@@ -192,6 +194,10 @@ extern enum cmodel sparc_cmodel;
 #define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
 #define ASM_CPU64_DEFAULT_SPEC AS_NIAGARA7_FLAG
 #endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_m8
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC AS_M8_FLAG
+#endif
 
 #else
 
@@ -295,6 +301,7 @@ extern enum cmodel sparc_cmodel;
 %{mcpu=niagara3:-D__sparc_v9__} \
 %{mcpu=niagara4:-D__sparc_v9__} \
 %{mcpu=niagara7:-D__sparc_v9__} \
+%{mcpu=m8:-D__sparc_v9__} \
 %{!mcpu*:%(cpp_cpu_default)} \
 "
 #define CPP_ARCH32_SPEC ""
@@ -347,6 +354,7 @@ extern enum cmodel sparc_cmodel;
 %{mcpu=niagara3:%{!mv8plus:-Av9" AS_NIAGARA3_FLAG "}} \
 %{mcpu=niagara4:%{!mv8plus:" AS_NIAGARA4_FLAG "}} \
 %{mcpu=niagara7:%{!mv8plus:" AS_NIAGARA7_FLAG "}} \
+%{mcpu=m8:%{!mv8plus:" AS_M8_FLAG "}} \
 %{!mcpu*:%(asm_cpu_default)} \
 "
 
@@ -1799,6 +1807,12 @@ extern int sparc_indent_opcode;
 #define AS_NIAGARA7_FLAG AS_NIAGARA4_FLAG
 #endif
 
+#ifdef HAVE_AS_SPARC6
+#define AS_M8_FLAG "-xarch=sparc6"
+#else
+#define AS_M8_FLAG AS_NIAGARA7_FLAG
+#endif
+
 #ifdef HAVE_AS_LEON
 #define AS_LEON_FLAG "-Aleon"
 #define AS_LEONV7_FLAG "-Aleon"
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index b550f037..b666992 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -238,7 +238,8 @@
    niagara2,
    niagara3,
    niagara4,
-   niagara7"
+   niagara7,
+   m8"
   (const (symbol_ref "sparc_cpu_attr")))
 
 ;; Attribute for the instruction set.
diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt
index 86f85d9..4f30cc4 100644
--- a/gcc/config/sparc/sparc.opt
+++ b/gcc/config/sparc/sparc.opt
@@ -209,6 +209,9 @@ Enum(sparc_processor_type) String(niagara4) Value(PROCESSOR_NIAGARA4)
 EnumValue
 Enum(sparc_processor_type) String(niagara7) Value(PROCESSOR_NIAGARA7)
 
+EnumValue
+Enum(sparc_processor_type) String(m8) Value(PROCESSOR_M8)
+
 mcmodel=
 Target RejectNegative Joined Var(sparc_cmodel_string)
 Use given SPARC-V9 code model.
diff --git a/gcc/configure b/gcc/configure
index 317517c..2a1d87e 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -25279,6 +25279,41 @@ $as_echo "#define HAVE_AS_SPARC5_VIS4 1" >>confdefs.h
 
 fi
 
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for SPARC6 instructions" >&5
+$as_echo_n "checking assembler for SPARC6 instructions... " >&6; }
+if test "${gcc_cv_as_sparc_sparc6+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_sparc_sparc6=no
+  if test x$gcc_cv_as != x; then
+    $as_echo '.text
+       .register %g2, #scratch
+       .register %g3, #scratch
+       .align 4
+       rd %entropy, %g1
+       fpsll64x %f0, %f2, %f4' > conftest.s
+    if { ac_try='$gcc_cv_as $gcc_cv_as_flags -xarch=sparc6 -o conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+    then
+	gcc_cv_as_sparc_sparc6=yes
+    else
+      echo "configure: failed program was" >&5
+      cat conftest.s >&5
+    fi
+    rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sparc_sparc6" >&5
+$as_echo "$gcc_cv_as_sparc_sparc6" >&6; }
+if test $gcc_cv_as_sparc_sparc6 = yes; then
+
+$as_echo "#define HAVE_AS_SPARC6 1" >>confdefs.h
+
+fi
 
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for LEON instructions" >&5
 $as_echo_n "checking assembler for LEON instructions... " >&6; }
diff --git a/gcc/configure.ac b/gcc/configure.ac
index e1b03a9..987aa0d 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -4003,6 +4003,18 @@ foo:
       [AC_DEFINE(HAVE_AS_SPARC5_VIS4, 1,
                 [Define if your assembler supports SPARC5 and VIS 4.0 instructions.])])
 
+    gcc_GAS_CHECK_FEATURE([SPARC6 instructions],
+      gcc_cv_as_sparc_sparc6,,
+      [-xarch=sparc6],
+      [.text
+       .register %g2, #scratch
+       .register %g3, #scratch
+       .align 4
+       rd %entropy, %g1
+       fpsll64x %f0, %f2, %f4],,
+      [AC_DEFINE(HAVE_AS_SPARC6, 1,
+                [Define if your assembler supports SPARC6 instructions.])])
+
     gcc_GAS_CHECK_FEATURE([LEON instructions],
       gcc_cv_as_sparc_leon,,
       [-Aleon],
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d1e097b..4e320ec 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -23862,7 +23862,7 @@ for machine type @var{cpu_type}.  Supported values for @var{cpu_type} are
 @samp{leon}, @samp{leon3}, @samp{leon3v7}, @samp{sparclite}, @samp{f930},
 @samp{f934}, @samp{sparclite86x}, @samp{sparclet}, @samp{tsc701}, @samp{v9},
 @samp{ultrasparc}, @samp{ultrasparc3}, @samp{niagara}, @samp{niagara2},
-@samp{niagara3}, @samp{niagara4} and @samp{niagara7}.
+@samp{niagara3}, @samp{niagara4}, @samp{niagara7} and @samp{m8}.
 
 Native Solaris and GNU/Linux toolchains also support the value @samp{native},
 which selects the best architecture option for the host processor.
@@ -23890,7 +23890,8 @@ f930, f934, sparclite86x
 tsc701
 
 @item v9
-ultrasparc, ultrasparc3, niagara, niagara2, niagara3, niagara4, niagara7
+ultrasparc, ultrasparc3, niagara, niagara2, niagara3, niagara4,
+niagara7, m8
 @end table
 
 By default (unless configured otherwise), GCC generates code for the V7
@@ -23934,7 +23935,8 @@ additionally optimizes it for Sun UltraSPARC T2 chips. With
 UltraSPARC T3 chips.  With @option{-mcpu=niagara4}, the compiler
 additionally optimizes it for Sun UltraSPARC T4 chips.  With
 @option{-mcpu=niagara7}, the compiler additionally optimizes it for
-Oracle SPARC M7 chips.
+Oracle SPARC M7 chips.  With @option{-mcpu=m8}, the compiler
+additionally optimizes it for Oracle M8 chips.
 
 @item -mtune=@var{cpu_type}
 @opindex mtune
@@ -23949,8 +23951,8 @@ that select a particular CPU implementation.  Those are
 @samp{leon3}, @samp{leon3v7}, @samp{f930}, @samp{f934},
 @samp{sparclite86x}, @samp{tsc701}, @samp{ultrasparc},
 @samp{ultrasparc3}, @samp{niagara}, @samp{niagara2}, @samp{niagara3},
-@samp{niagara4} and @samp{niagara7}.  With native Solaris and
-GNU/Linux toolchains, @samp{native} can also be used.
+@samp{niagara4}, @samp{niagara7} and @samp{m8}.  With native Solaris
+and GNU/Linux toolchains, @samp{native} can also be used.
 
 @item -mv8plus
 @itemx -mno-v8plus
-- 
2.3.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 6/7] sparc: support for VIS4B instructions
  2017-06-29 11:43 [PATCH 0/7] Support for the SPARC M8 cpu Jose E. Marchesi
@ 2017-06-29 11:43 ` Jose E. Marchesi
  2017-06-29 11:43 ` [PATCH 7/7] sparc: M8 DFA scheduler Jose E. Marchesi
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Jose E. Marchesi @ 2017-06-29 11:43 UTC (permalink / raw)
  To: gcc-patches

This patch adds suppport for the following VIS instructions, which are
introduced in the Oracle SPARC Architecture 2017 and implemented by the
SPARC M8 cpu:

- Dictionary unpack.
- Partitioned compare with shifted result.
- Unsigned partitioned compare with shifted result.
- Partitioned dual-equal compare with shifted result.
- Partitioned unsigned range compare with shifted result.

The facilities introduced are:

- A new option -mvis4b.
- Compiler built-ins for the above mentioned instructions.

Tests and documentation are also provided.

gcc/ChangeLog:

	* config/sparc/sparc.opt: New option -mvis4b.
	* config/sparc/sparc.c (dump_target_flag_bits): Handle MASK_VIS4B.
	(sparc_option_override): Handle VIS4B.
	(enum sparc_builtins): Define
	SPARC_BUILTIN_DICTUNPACK{8,16,32},
	SPARC_BUILTIN_FPCMP{LE,GT,EQ,NE}{8,16,32}SHL,
	SPARC_BUILTIN_FPCMPU{LE,GT}{8,16,32}SHL,
	SPARC_BUILTIN_FPCMPDE{8,16,32}SHL and
	SPARC_BUILTIN_FPCMPUR{8,16,32}SHL.
	(check_constant_argument): New function.
	(sparc_vis_init_builtins): Define builtins
	__builtin_vis_dictunpack{8,16,32},
	__builtin_vis_fpcmp{le,gt,eq,ne}{8,16,32}shl,
	__builtin_vis_fpcmpu{le,gt}{8,16,32}shl,
	__builtin_vis_fpcmpde{8,16,32}shl and
	__builtin_vis_fpcmpur{8,16,32}shl.
	(sparc_expand_builtin): Check that the constant operands to
	__builtin_vis_fpcmp*shl and _builtin_vis_dictunpack* are indeed
	constant and in range.
	* config/sparc/sparc-c.c (sparc_target_macros): Handle
	TARGET_VIS4B.
	* config/sparc/sparc.h (SPARC_IMM2_P): Define.
	(SPARC_IMM5_P): Likewise.
	* config/sparc/sparc.md (cpu_feature): Add new feagure "vis4b".
	(enabled): Handle vis4b.
	(UNSPEC_DICTUNPACK): New unspec.
	(UNSPEC_FPCMPSHL): Likewise.
	(UNSPEC_FPUCMPSHL): Likewise.
	(UNSPEC_FPCMPDESHL): Likewise.
	(UNSPEC_FPCMPURSHL): Likewise.
	(cpu_feature): New CPU feature `vis4b'.
	(dictunpack{8,16,32}): New insns.
	(FPCSMODE): New mode iterator.
	(fpcscond): New code iterator.
	(fpcsucond): Likewise.
	(fpcmp{le,gt,eq,ne}{8,16,32}{si,di}shl): New insns.
	(fpcmpu{le,gt}{8,16,32}{si,di}shl): Likewise.
	(fpcmpde{8,16,32}{si,di}shl): Likewise.
	(fpcmpur{8,16,32}{si,di}shl): Likewise.
	* config/sparc/constraints.md: Define constraints `q' for unsigned
	2-bit integer constants and `t' for unsigned 5-bit integer
	constants.
	* config/sparc/predicates.md (imm5_operand_dictunpack8): New
	predicate.
	(imm5_operand_dictunpack16): Likewise.
	(imm5_operand_dictunpack32): Likewise.
	(imm2_operand): Likewise.
	* doc/invoke.texi (SPARC Options): Document -mvis4b.
	* doc/extend.texi (SPARC VIS Built-in Functions): Document the
	ditunpack* and fpcmp*shl builtins.

gcc/testsuite/ChangeLog:

	* gcc.target/sparc/dictunpack.c: New file.
	* gcc.target/sparc/fpcmpdeshl.c: Likewise.
	* gcc.target/sparc/fpcmpshl.c: Likewise.
	* gcc.target/sparc/fpcmpurshl.c: Likewise.
	* gcc.target/sparc/fpcmpushl.c: Likewise.
---
 gcc/ChangeLog                               |  53 ++++++
 gcc/config/sparc/constraints.md             |  12 +-
 gcc/config/sparc/predicates.md              |  27 +++
 gcc/config/sparc/sparc-c.c                  |   7 +-
 gcc/config/sparc/sparc.c                    | 247 +++++++++++++++++++++++++++-
 gcc/config/sparc/sparc.h                    |   4 +
 gcc/config/sparc/sparc.md                   |  73 +++++++-
 gcc/config/sparc/sparc.opt                  |   4 +
 gcc/doc/extend.texi                         |  39 +++++
 gcc/doc/invoke.texi                         |  13 ++
 gcc/testsuite/ChangeLog                     |   8 +
 gcc/testsuite/gcc.target/sparc/dictunpack.c |  25 +++
 gcc/testsuite/gcc.target/sparc/fpcmpdeshl.c |  25 +++
 gcc/testsuite/gcc.target/sparc/fpcmpshl.c   |  81 +++++++++
 gcc/testsuite/gcc.target/sparc/fpcmpurshl.c |  25 +++
 gcc/testsuite/gcc.target/sparc/fpcmpushl.c  |  43 +++++
 16 files changed, 677 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/sparc/dictunpack.c
 create mode 100644 gcc/testsuite/gcc.target/sparc/fpcmpdeshl.c
 create mode 100644 gcc/testsuite/gcc.target/sparc/fpcmpshl.c
 create mode 100644 gcc/testsuite/gcc.target/sparc/fpcmpurshl.c
 create mode 100644 gcc/testsuite/gcc.target/sparc/fpcmpushl.c

diff --git a/gcc/config/sparc/constraints.md b/gcc/config/sparc/constraints.md
index 7c9ef74..cff5a61 100644
--- a/gcc/config/sparc/constraints.md
+++ b/gcc/config/sparc/constraints.md
@@ -19,7 +19,7 @@
 
 ;;; Unused letters:
 ;;;     B
-;;;    a        jkl    q  tuv xyz
+;;;    a        jkl        uv xyz
 
 
 ;; Register constraints
@@ -58,6 +58,16 @@
 
 ;; Integer constant constraints
 
+(define_constraint "q"
+ "Unsigned 2-bit integer constant"
+  (and (match_code "const_int")
+       (match_test "SPARC_IMM2_P (ival)")))
+
+(define_constraint "t"
+ "Unsigned 5-bit integer constant"
+ (and (match_code "const_int")
+      (match_test "SPARC_IMM5_P (ival)")))
+
 (define_constraint "A"
  "Signed 5-bit integer constant"
  (and (match_code "const_int")
diff --git a/gcc/config/sparc/predicates.md b/gcc/config/sparc/predicates.md
index 951933e..3f8526d 100644
--- a/gcc/config/sparc/predicates.md
+++ b/gcc/config/sparc/predicates.md
@@ -328,6 +328,33 @@
        (and (match_code "const_int")
             (match_test "SPARC_SIMM5_P (INTVAL (op))"))))
 
+;; Return true if OP is a constant in the range 0..7.  This is an
+;; acceptable second operand for dictunpack instructions setting a
+;; V8QI mode in the destination register.
+(define_predicate "imm5_operand_dictunpack8"
+  (and (match_code "const_int")
+       (match_test "(INTVAL (op) >= 0 && INTVAL (op) < 8)")))
+
+;; Return true if OP is a constant in the range 7..15.  This is an
+;; acceptable second operand for dictunpack instructions setting a
+;; V4HI mode in the destination register.
+(define_predicate "imm5_operand_dictunpack16"
+  (and (match_code "const_int")
+       (match_test "(INTVAL (op) >= 8 && INTVAL (op) < 16)")))
+
+;; Return true if OP is a constant in the range 15..31.  This is an
+;; acceptable second operand for dictunpack instructions setting a
+;; V2SI mode in the destination register.
+(define_predicate "imm5_operand_dictunpack32"
+  (and (match_code "const_int")
+       (match_test "(INTVAL (op) >= 16 && INTVAL (op) < 32)")))
+
+;; Return true if OP is a constant that is representable by a 2-bit
+;; unsigned field.  This is an acceptable third operand for
+;; fpcmp*shl instructions.
+(define_predicate "imm2_operand"
+  (and (match_code "const_int")
+       (match_test "SPARC_IMM2_P (INTVAL (op))")))
 
 ;; Predicates for miscellaneous instructions.
 
diff --git a/gcc/config/sparc/sparc-c.c b/gcc/config/sparc/sparc-c.c
index 9603173..4aacfff 100644
--- a/gcc/config/sparc/sparc-c.c
+++ b/gcc/config/sparc/sparc-c.c
@@ -40,7 +40,12 @@ sparc_target_macros (void)
       cpp_assert (parse_in, "machine=sparc");
     }
 
-  if (TARGET_VIS4)
+  if (TARGET_VIS4B)
+    {
+      cpp_define (parse_in, "__VIS__=0x410");
+      cpp_define (parse_in, "__VIS=0x410");
+    }
+  else if (TARGET_VIS4)
     {
       cpp_define (parse_in, "__VIS__=0x400");
       cpp_define (parse_in, "__VIS=0x400");
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 0644ab5..dfbf2c3 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -1246,6 +1246,8 @@ dump_target_flag_bits (const int flags)
     fprintf (stderr, "VIS3 ");
   if (flags & MASK_VIS4)
     fprintf (stderr, "VIS4 ");
+  if (flags & MASK_VIS4B)
+    fprintf (stderr, "VIS4B ");
   if (flags & MASK_CBCOND)
     fprintf (stderr, "CBCOND ");
   if (flags & MASK_DEPRECATED_V8_INSNS)
@@ -1365,7 +1367,8 @@ sparc_option_override (void)
       MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
     /* UltraSPARC M8 */
     { "m8",		MASK_ISA,
-      MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
+      MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC
+      |MASK_VIS4B }
   };
   const struct cpu_table *cpu;
   unsigned int i;
@@ -1495,6 +1498,9 @@ sparc_option_override (void)
 #ifndef HAVE_AS_SPARC5_VIS4
 		   & ~(MASK_VIS4 | MASK_SUBXC)
 #endif
+#ifndef HAVE_AS_SPARC6
+		   & ~(MASK_VIS4B)
+#endif
 #ifndef HAVE_AS_LEON
 		   & ~(MASK_LEON | MASK_LEON3)
 #endif
@@ -1513,11 +1519,15 @@ sparc_option_override (void)
   if (TARGET_VIS4)
     target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
 
-  /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
-     disabled.  */
+  /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
+  if (TARGET_VIS4B)
+    target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
+
+  /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b and -mfmaf if
+     FPU is disabled.  */
   if (! TARGET_FPU)
     target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
-		      | MASK_FMAF);
+		      | MASK_VIS4B | MASK_FMAF);
 
   /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
      are available; -m64 also implies v9.  */
@@ -10384,6 +10394,45 @@ enum sparc_builtins
   SPARC_BUILTIN_FPSUBS8,
   SPARC_BUILTIN_FPSUBUS8,
   SPARC_BUILTIN_FPSUBUS16,
+
+  /* VIS 4.0B builtins.  */
+
+  /* Note that all the DICTUNPACK* entries should be kept
+     contiguous.  */
+  SPARC_BUILTIN_FIRST_DICTUNPACK,
+  SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
+  SPARC_BUILTIN_DICTUNPACK16,
+  SPARC_BUILTIN_DICTUNPACK32,
+  SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
+
+  /* Note that all the FPCMP*SHL entries should be kept
+     contiguous.  */
+  SPARC_BUILTIN_FIRST_FPCMPSHL,
+  SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
+  SPARC_BUILTIN_FPCMPGT8SHL,
+  SPARC_BUILTIN_FPCMPEQ8SHL,
+  SPARC_BUILTIN_FPCMPNE8SHL,
+  SPARC_BUILTIN_FPCMPLE16SHL,
+  SPARC_BUILTIN_FPCMPGT16SHL,
+  SPARC_BUILTIN_FPCMPEQ16SHL,
+  SPARC_BUILTIN_FPCMPNE16SHL,
+  SPARC_BUILTIN_FPCMPLE32SHL,
+  SPARC_BUILTIN_FPCMPGT32SHL,
+  SPARC_BUILTIN_FPCMPEQ32SHL,
+  SPARC_BUILTIN_FPCMPNE32SHL,
+  SPARC_BUILTIN_FPCMPULE8SHL,
+  SPARC_BUILTIN_FPCMPUGT8SHL,
+  SPARC_BUILTIN_FPCMPULE16SHL,
+  SPARC_BUILTIN_FPCMPUGT16SHL,
+  SPARC_BUILTIN_FPCMPULE32SHL,
+  SPARC_BUILTIN_FPCMPUGT32SHL,
+  SPARC_BUILTIN_FPCMPDE8SHL,
+  SPARC_BUILTIN_FPCMPDE16SHL,
+  SPARC_BUILTIN_FPCMPDE32SHL,
+  SPARC_BUILTIN_FPCMPUR8SHL,
+  SPARC_BUILTIN_FPCMPUR16SHL,
+  SPARC_BUILTIN_FPCMPUR32SHL,
+  SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
   
   SPARC_BUILTIN_MAX
 };
@@ -10391,6 +10440,27 @@ enum sparc_builtins
 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
 
+/* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
+   The instruction should require a constant operand of some sort.  The
+   function prints an error if OPVAL is not valid.  */
+
+static int
+check_constant_argument (enum insn_code icode, int opnum, rtx opval)
+{
+  if (GET_CODE (opval) != CONST_INT)
+    {
+      error ("%qs expects a constant argument", insn_data[icode].name);
+      return false;
+    }
+
+  if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
+    {
+      error ("constant argument out of range for %qs", insn_data[icode].name);
+      return false;
+    }
+  return true;
+}
+
 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE.  Return the
    function decl or NULL_TREE if the builtin was not added.  */
 
@@ -10484,6 +10554,12 @@ sparc_vis_init_builtins (void)
 						      v8qi, v8qi, 0);
   tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
 						      v8qi, v8qi, 0);
+  tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
+						    intSI_type_node, 0);
+  tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
+						    intSI_type_node, 0);
+  tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
+						    intDI_type_node, 0);
   tree di_ftype_di_di = build_function_type_list (intDI_type_node,
 						  intDI_type_node,
 						  intDI_type_node, 0);
@@ -10938,6 +11014,156 @@ sparc_vis_init_builtins (void)
       def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
 			 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
     }
+
+  if (TARGET_VIS4B)
+    {
+      def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
+			 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
+      def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
+			 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
+      def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
+			 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
+
+      if (TARGET_ARCH64)
+	{
+	  tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
+								 v8qi, v8qi,
+								 intSI_type_node, 0);
+	  tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
+								 v4hi, v4hi,
+								 intSI_type_node, 0);
+	  tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
+								 v2si, v2si,
+								 intSI_type_node, 0);
+	  
+	  def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
+			     SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
+			     SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
+			     SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
+			     SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
+			     SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
+			     SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
+			     SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
+			     SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
+			     SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
+	  def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
+			     SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
+	  def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
+			     SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
+	  def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
+			     SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
+
+
+	  def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
+			     SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
+			     SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
+			     SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
+			     SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
+			     SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
+	  def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
+			     SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
+			     SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
+			     SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
+			     SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
+			     SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
+			     SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
+			     SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
+
+	}
+      else
+	{
+	  tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
+								 v8qi, v8qi,
+								 intSI_type_node, 0);
+	  tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
+								 v4hi, v4hi,
+								 intSI_type_node, 0);
+	  tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
+								 v2si, v2si,
+								 intSI_type_node, 0);
+	  
+	  def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
+			     SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
+			     SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
+			     SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
+			     SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
+			     SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
+			     SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
+			     SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
+			     SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
+			     SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v2si_v2si_si);
+	  def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
+			     SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
+	  def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
+			     SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
+	  def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
+			     SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
+
+
+	  def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
+			     SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
+			     SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
+			     SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
+			     SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
+			     SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
+	  def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
+			     SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
+			     SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
+			     SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
+			     SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
+
+	  def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
+			     SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
+			     SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
+	  def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
+			     SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
+	}
+    }
 }
 
 /* Implement TARGET_BUILTIN_DECL hook.  */
@@ -10992,6 +11218,19 @@ sparc_expand_builtin (tree exp, rtx target,
       insn_op = &insn_data[icode].operand[idx];
       op[arg_count] = expand_normal (arg);
 
+      /* Some of the builtins require constant arguments.  We check
+	 for this here.  */
+      if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
+	   && code <= SPARC_BUILTIN_LAST_FPCMPSHL
+	   && arg_count == 3)
+	  || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
+	      && code <= SPARC_BUILTIN_LAST_DICTUNPACK
+	      && arg_count == 2))
+	{
+	  if (!check_constant_argument (icode, idx, op[arg_count]))
+	    return const0_rtx;
+	}
+
       if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
 	{
 	  if (!address_operand (op[arg_count], SImode))
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 2bd667f..d7c617e 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -1047,6 +1047,10 @@ extern char leaf_reg_remap[];
 /* Local macro to handle the two v9 classes of FP regs.  */
 #define FP_REG_CLASS_P(CLASS) ((CLASS) == FP_REGS || (CLASS) == EXTRA_FP_REGS)
 
+/* Predicate for 2-bit and 5-bit unsigned constants.  */
+#define SPARC_IMM2_P(X) (((unsigned HOST_WIDE_INT) (X) & ~0x3) == 0)
+#define SPARC_IMM5_P(X) (((unsigned HOST_WIDE_INT) (X) & ~0x1F)	== 0)
+
 /* Predicates for 5-bit, 10-bit, 11-bit and 13-bit signed constants.  */
 #define SPARC_SIMM5_P(X)  ((unsigned HOST_WIDE_INT) (X) + 0x10 < 0x20)
 #define SPARC_SIMM10_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x200 < 0x400)
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index b666992..407544b 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -94,6 +94,12 @@
   UNSPEC_ADDV
   UNSPEC_SUBV
   UNSPEC_NEGV
+
+  UNSPEC_DICTUNPACK
+  UNSPEC_FPCMPSHL
+  UNSPEC_FPUCMPSHL
+  UNSPEC_FPCMPDESHL
+  UNSPEC_FPCMPURSHL
 ])
 
 (define_c_enum "unspecv" [
@@ -252,7 +258,7 @@
 	 (symbol_ref "TARGET_SPARCLET") (const_string "sparclet")]
 	(const_string "v7"))))
 
-(define_attr "cpu_feature" "none,fpu,fpunotv9,v9,vis,vis3,vis4"
+(define_attr "cpu_feature" "none,fpu,fpunotv9,v9,vis,vis3,vis4,vis4b"
   (const_string "none"))
 
 (define_attr "lra" "disabled,enabled"
@@ -266,7 +272,8 @@
          (eq_attr "cpu_feature" "v9") (symbol_ref "TARGET_V9")
          (eq_attr "cpu_feature" "vis") (symbol_ref "TARGET_VIS")
          (eq_attr "cpu_feature" "vis3") (symbol_ref "TARGET_VIS3")
-         (eq_attr "cpu_feature" "vis4") (symbol_ref "TARGET_VIS4")]
+         (eq_attr "cpu_feature" "vis4") (symbol_ref "TARGET_VIS4")
+         (eq_attr "cpu_feature" "vis4b") (symbol_ref "TARGET_VIS4B")]
         (const_int 0)))
 
 ;; The SPARC instructions used by the backend are organized into a
@@ -329,7 +336,7 @@
 ;; fga/maxmin: FP{MAX,MIN}[U]{8,16,32}
 ;; fga/cmask: CMASK{8,16,32}
 ;; fga/other: BSHUFFLE FALIGNDATAg FP{ADD,SUB}[S]{8,16,32}
-;;            FP{ADD,SUB}US{8,16}
+;;            FP{ADD,SUB}US{8,16} DICTUNPACK
 ;; gsr/reg: RDGSR WRGSR
 ;; gsr/alignaddr: ALIGNADDRESS[_LITTLE]
 ;; vismv/double:  FSRC2d
@@ -343,6 +350,8 @@
 ;; visl/double: FONEd FZEROd FNOT1d F{OR,AND,XOR}d F{NOR,NAND,XNOR}d
 ;;              F{OR,AND}NOT1d F{OR,AND}NOT2d
 ;; viscmp: FPCMP{LE,GT,NE,EQ}{8,16,32} FPCMPU{LE,GT,NE,EQ}{8,16,32}
+;;         FPCMP{LE,GT,EQ,NE}{8,16,32}SHL FPCMPU{LE,GT,EQ,NE}{8,16,32}SHL
+;;         FPCMPDE{8,16,32}SHL FPCMPUR{8,16,32}SHL
 ;; fgm_pack: FPACKFIX FPACK{8,16,32}
 ;; fgm_mul: FMUL8SUx16 FMUL8ULx16 FMUL8x16 FMUL8x16AL
 ;;          FMUL8x16AU FMULD8SUx16 FMULD8ULx16
@@ -9656,4 +9665,62 @@ visl")
   [(set_attr "type" "fp")
    (set_attr "fptype" "double")])
 
+;; VIS4B instructions.
+
+(define_mode_iterator DUMODE [V2SI V4HI V8QI])
+
+(define_insn "dictunpack<DUMODE:vbits>"
+  [(set (match_operand:DUMODE 0 "register_operand" "=e")
+        (unspec:DUMODE [(match_operand:DF 1 "register_operand" "e")
+                        (match_operand:SI 2 "imm5_operand_dictunpack<DUMODE:vbits>" "t")]
+         UNSPEC_DICTUNPACK))]
+  "TARGET_VIS4B"
+  "dictunpack\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "subtype" "other")])
+
+(define_mode_iterator FPCSMODE [V2SI V4HI V8QI])
+(define_code_iterator fpcscond [le gt eq ne])
+(define_code_iterator fpcsucond [le gt])
+
+(define_insn "fpcmp<fpcscond:code><FPCSMODE:vbits><P:mode>shl"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(fpcscond:FPCSMODE (match_operand:FPCSMODE 1 "register_operand" "e")
+                                      (match_operand:FPCSMODE 2 "register_operand" "e"))
+                   (match_operand:SI 3 "imm2_operand" "q")]
+         UNSPEC_FPCMPSHL))]
+   "TARGET_VIS4B"
+   "fpcmp<fpcscond:code><FPCSMODE:vbits>shl\t%1, %2, %3, %0"
+   [(set_attr "type" "viscmp")])
+
+(define_insn "fpcmpu<fpcsucond:code><FPCSMODE:vbits><P:mode>shl"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(fpcsucond:FPCSMODE (match_operand:FPCSMODE 1 "register_operand" "e")
+                                       (match_operand:FPCSMODE 2 "register_operand" "e"))
+                   (match_operand:SI 3 "imm2_operand" "q")]
+         UNSPEC_FPUCMPSHL))]
+   "TARGET_VIS4B"
+   "fpcmpu<fpcsucond:code><FPCSMODE:vbits>shl\t%1, %2, %3, %0"
+   [(set_attr "type" "viscmp")])
+
+(define_insn "fpcmpde<FPCSMODE:vbits><P:mode>shl"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:FPCSMODE 1 "register_operand" "e")
+                   (match_operand:FPCSMODE 2 "register_operand" "e")
+                   (match_operand:SI 3 "imm2_operand" "q")]
+         UNSPEC_FPCMPDESHL))]
+   "TARGET_VIS4B"
+   "fpcmpde<FPCSMODE:vbits>shl\t%1, %2, %3, %0"
+   [(set_attr "type" "viscmp")])
+
+(define_insn "fpcmpur<FPCSMODE:vbits><P:mode>shl"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:FPCSMODE 1 "register_operand" "e")
+                   (match_operand:FPCSMODE 2 "register_operand" "e")
+                   (match_operand:SI 3 "imm2_operand" "q")]
+         UNSPEC_FPCMPURSHL))]
+   "TARGET_VIS4B"
+   "fpcmpur<FPCSMODE:vbits>shl\t%1, %2, %3, %0"
+   [(set_attr "type" "viscmp")])
+
 (include "sync.md")
diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt
index 4f30cc4..cc51bd4 100644
--- a/gcc/config/sparc/sparc.opt
+++ b/gcc/config/sparc/sparc.opt
@@ -81,6 +81,10 @@ mvis4
 Target Report Mask(VIS4)
 Use UltraSPARC Visual Instruction Set version 4.0 extensions.
 
+mvis4b
+Target Report Mask(VIS4B)
+Use additional VIS instructions introduced in OSA2017.
+
 mcbcond
 Target Report Mask(CBCOND)
 Use UltraSPARC Compare-and-Branch extensions.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7f5191d..9c2631d 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -19212,6 +19212,45 @@ v4hi __builtin_vis_fpminu16 (v4hi, v4hi);
 v2si __builtin_vis_fpminu32 (v2si, v2si);
 @end smallexample
 
+When you use the @option{-mvis4b} switch, the VIS version 4.0B
+built-in functions also become available:
+
+@smallexample
+v8qi __builtin_vis_dictunpack8 (double, int);
+v4hi __builtin_vis_dictunpack16 (double, int);
+v2si __builtin_vis_dictunpack32 (double, int);
+
+long __builtin_vis_fpcmple8shl (v8qi, v8qi, int);
+long __builtin_vis_fpcmpgt8shl (v8qi, v8qi, int);
+long __builtin_vis_fpcmpeq8shl (v8qi, v8qi, int);
+long __builtin_vis_fpcmpne8shl (v8qi, v8qi, int);
+
+long __builtin_vis_fpcmple16shl (v4hi, v4hi, int);
+long __builtin_vis_fpcmpgt16shl (v4hi, v4hi, int);
+long __builtin_vis_fpcmpeq16shl (v4hi, v4hi, int);
+long __builtin_vis_fpcmpne16shl (v4hi, v4hi, int);
+
+long __builtin_vis_fpcmple32shl (v2si, v2si, int);
+long __builtin_vis_fpcmpgt32shl (v2si, v2si, int);
+long __builtin_vis_fpcmpeq32shl (v2si, v2si, int);
+long __builtin_vis_fpcmpne32shl (v2si, v2si, int);
+
+long __builtin_vis_fpcmpule8shl (v8qi, v8qi, int);
+long __builtin_vis_fpcmpugt8shl (v8qi, v8qi, int);
+long __builtin_vis_fpcmpule16shl (v4hi, v4hi, int);
+long __builtin_vis_fpcmpugt16shl (v4hi, v4hi, int);
+long __builtin_vis_fpcmpule32shl (v2si, v2si, int);
+long __builtin_vis_fpcmpugt32shl (v2si, v2si, int);
+
+long __builtin_vis_fpcmpde8shl (v8qi, v8qi, int);
+long __builtin_vis_fpcmpde16shl (v4hi, v4hi, int);
+long __builtin_vis_fpcmpde32shl (v2si, v2si, int);
+
+long __builtin_vis_fpcmpur8shl (v8qi, v8qi, int);
+long __builtin_vis_fpcmpur16shl (v4hi, v4hi, int);
+long __builtin_vis_fpcmpur32shl (v2si, v2si, int);
+@end smallexample
+
 @node SPU Built-in Functions
 @subsection SPU Built-in Functions
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 4e320ec..434c6a1 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1125,6 +1125,7 @@ See RS/6000 and PowerPC Options.
 -muser-mode  -mno-user-mode @gol
 -mv8plus  -mno-v8plus  -mvis  -mno-vis @gol
 -mvis2  -mno-vis2  -mvis3  -mno-vis3 @gol
+-mvis4 -mno-vis4 -mvis4b -mno-vis4b @gol
 -mcbcond  -mno-cbcond  -mfmaf  -mno-fmaf  @gol
 -mpopc  -mno-popc  -msubxc  -mno-subxc@gol
 -mfix-at697f  -mfix-ut699 @gol
@@ -24000,6 +24001,18 @@ default is @option{-mvis4} when targeting a cpu that supports such
 instructions, such as niagara-7 and later.  Setting @option{-mvis4}
 also sets @option{-mvis3}, @option{-mvis2} and @option{-mvis}.
 
+@item -mvis4b
+@itemx -mno-vis4b
+@opindex mvis4b
+@opindex mno-vis4b
+With @option{-mvis4b}, GCC generates code that takes advantage of
+version 4.0 of the UltraSPARC Visual Instruction Set extensions, plus
+the additional VIS instructions introduced in the Oracle SPARC
+Architecture 2017.  The default is @option{-mvis4b} when targeting a
+cpu that supports such instructions, such as m8 and later.  Setting
+@option{-mvis4b} also sets @option{-mvis4}, @option{-mvis3},
+@option{-mvis2} and @option{-mvis}.
+
 @item -mcbcond
 @itemx -mno-cbcond
 @opindex mcbcond
diff --git a/gcc/testsuite/gcc.target/sparc/dictunpack.c b/gcc/testsuite/gcc.target/sparc/dictunpack.c
new file mode 100644
index 0000000..4334dee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sparc/dictunpack.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mvis4b" } */
+
+typedef unsigned char vec8 __attribute__((vector_size(8)));
+typedef short vec16 __attribute__((vector_size(8)));
+typedef int vec32 __attribute__((vector_size(8)));
+
+vec8 test_dictunpack8 (double a)
+{
+  return __builtin_vis_dictunpack8 (a, 6);
+}
+
+vec16 test_dictunpack16 (double a)
+{
+  return __builtin_vis_dictunpack16 (a, 14);
+}
+
+vec32 test_dictunpack32 (double a)
+{
+  return __builtin_vis_dictunpack32 (a, 30);
+}
+
+/* { dg-final { scan-assembler "dictunpack\t%" } } */
+/* { dg-final { scan-assembler "dictunpack\t%" } } */
+/* { dg-final { scan-assembler "dictunpack\t%" } } */
diff --git a/gcc/testsuite/gcc.target/sparc/fpcmpdeshl.c b/gcc/testsuite/gcc.target/sparc/fpcmpdeshl.c
new file mode 100644
index 0000000..3e3daa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sparc/fpcmpdeshl.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mvis4b" } */
+
+typedef unsigned char vec8 __attribute__((vector_size(8)));
+typedef short vec16 __attribute__((vector_size(8)));
+typedef int vec32 __attribute__((vector_size(8)));
+
+long test_fpcmpde8shl (vec8 a, vec8 b)
+{
+  return __builtin_vis_fpcmpde8shl (a, b, 2);
+}
+
+long test_fpcmpde16shl (vec16 a, vec16 b)
+{
+  return __builtin_vis_fpcmpde16shl (a, b, 2);
+}
+
+long test_fpcmpde32shl (vec32 a, vec32 b)
+{
+  return __builtin_vis_fpcmpde32shl (a, b, 2);
+}
+
+/* { dg-final { scan-assembler "fpcmpde8shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpde16shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpde32shl\t%" } } */
diff --git a/gcc/testsuite/gcc.target/sparc/fpcmpshl.c b/gcc/testsuite/gcc.target/sparc/fpcmpshl.c
new file mode 100644
index 0000000..0985251
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sparc/fpcmpshl.c
@@ -0,0 +1,81 @@
+/* { dg-do compile } */
+/* { dg-options "-mvis4b" } */
+
+typedef unsigned char vec8 __attribute__((vector_size(8)));
+typedef short vec16 __attribute__((vector_size(8)));
+typedef int vec32 __attribute__((vector_size(8)));
+
+long test_fpcmple8shl (vec8 a, vec8 b)
+{
+  return __builtin_vis_fpcmple8shl (a, b, 2);
+}
+
+long test_fpcmpgt8shl (vec8 a, vec8 b)
+{
+  return __builtin_vis_fpcmpgt8shl (a, b, 2);
+}
+
+long test_fpcmpeq8shl (vec8 a, vec8 b)
+{
+  return __builtin_vis_fpcmpeq8shl (a, b, 2);
+}
+
+long test_fpcmpne8shl (vec8 a, vec8 b)
+{
+  return __builtin_vis_fpcmpne8shl (a, b, 2);
+}
+
+long test_fpcmple16shl (vec16 a, vec16 b)
+{
+  return __builtin_vis_fpcmple16shl (a, b, 2);
+}
+
+long test_fpcmpgt16shl (vec16 a, vec16 b)
+{
+  return __builtin_vis_fpcmpgt16shl (a, b, 2);
+}
+
+long test_fpcmpeq16shl (vec16 a, vec16 b)
+{
+  return __builtin_vis_fpcmpeq16shl (a, b, 2);
+}
+
+long test_fpcmpne16shl (vec16 a, vec16 b)
+{
+  return __builtin_vis_fpcmpne16shl (a, b, 2);
+}
+
+long test_fpcmple32shl (vec32 a, vec32 b)
+{
+  return __builtin_vis_fpcmple32shl (a, b, 2);
+}
+
+long test_fpcmpgt32shl (vec32 a, vec32 b)
+{
+  return __builtin_vis_fpcmpgt32shl (a, b, 2);
+}
+
+long test_fpcmpeq32shl (vec32 a, vec32 b)
+{
+  return __builtin_vis_fpcmpeq32shl (a, b, 2);
+}
+
+long test_fpcmpne32shl (vec32 a, vec32 b)
+{
+  return __builtin_vis_fpcmpne32shl (a, b, 2);
+}
+
+/* { dg-final { scan-assembler "fpcmple8shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpgt8shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpeq8shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpne8shl\t%" } } */
+
+/* { dg-final { scan-assembler "fpcmple16shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpgt16shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpeq16shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpne16shl\t%" } } */
+
+/* { dg-final { scan-assembler "fpcmple32shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpgt32shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpeq32shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpne32shl\t%" } } */
diff --git a/gcc/testsuite/gcc.target/sparc/fpcmpurshl.c b/gcc/testsuite/gcc.target/sparc/fpcmpurshl.c
new file mode 100644
index 0000000..db74e01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sparc/fpcmpurshl.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mvis4b" } */
+
+typedef unsigned char vec8 __attribute__((vector_size(8)));
+typedef short vec16 __attribute__((vector_size(8)));
+typedef int vec32 __attribute__((vector_size(8)));
+
+long test_fpcmpur8shl (vec8 a, vec8 b)
+{
+  return __builtin_vis_fpcmpur8shl (a, b, 2);
+}
+
+long test_fpcmpur16shl (vec16 a, vec16 b)
+{
+  return __builtin_vis_fpcmpur16shl (a, b, 2);
+}
+
+long test_fpcmpur32shl (vec32 a, vec32 b)
+{
+  return __builtin_vis_fpcmpur32shl (a, b, 2);
+}
+
+/* { dg-final { scan-assembler "fpcmpur8shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpur16shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpur32shl\t%" } } */
diff --git a/gcc/testsuite/gcc.target/sparc/fpcmpushl.c b/gcc/testsuite/gcc.target/sparc/fpcmpushl.c
new file mode 100644
index 0000000..fc58dedd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sparc/fpcmpushl.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-mvis4b" } */
+
+typedef unsigned char vec8 __attribute__((vector_size(8)));
+typedef short vec16 __attribute__((vector_size(8)));
+typedef int vec32 __attribute__((vector_size(8)));
+
+long test_fpcmpule8shl (vec8 a, vec8 b)
+{
+  return __builtin_vis_fpcmpule8shl (a, b, 2);
+}
+
+long test_fpcmpugt8shl (vec8 a, vec8 b)
+{
+  return __builtin_vis_fpcmpugt8shl (a, b, 2);
+}
+
+long test_fpcmpule16shl (vec16 a, vec16 b)
+{
+  return __builtin_vis_fpcmpule16shl (a, b, 2);
+}
+
+long test_fpcmpugt16shl (vec16 a, vec16 b)
+{
+  return __builtin_vis_fpcmpugt16shl (a, b, 2);
+}
+
+long test_fpcmpule32shl (vec32 a, vec32 b)
+{
+  return __builtin_vis_fpcmpule32shl (a, b, 2);
+}
+
+long test_fpcmpugt32shl (vec32 a, vec32 b)
+{
+  return __builtin_vis_fpcmpugt32shl (a, b, 2);
+}
+
+/* { dg-final { scan-assembler "fpcmpule8shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpugt8shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpule16shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpugt16shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpule32shl\t%" } } */
+/* { dg-final { scan-assembler "fpcmpugt32shl\t%" } } */
-- 
2.3.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/7] sparc: put bmask* instructions in it's own insn type and adjust DFAs
  2017-06-29 11:43 [PATCH 0/7] Support for the SPARC M8 cpu Jose E. Marchesi
                   ` (5 preceding siblings ...)
  2017-06-29 11:43 ` [PATCH 2/7] sparc: put VIS compare instructions in it's own insn type and adjust DFAs Jose E. Marchesi
@ 2017-06-29 11:43 ` Jose E. Marchesi
  2017-07-04  8:46 ` [PATCH 0/7] Support for the SPARC M8 cpu Rainer Orth
  7 siblings, 0 replies; 12+ messages in thread
From: Jose E. Marchesi @ 2017-06-29 11:43 UTC (permalink / raw)
  To: gcc-patches

This patch introduces a new value for the insn type attribute bmask.
bmask instructions, which were previously typed as `array', are
adapted to use it, and finally the several DFA schedulers are updated
accordingly.

gcc/ChangeLog:

	* config/sparc/sparc.md: New instruction type `bmask'.
	(bmaskdi_vis): Use the `bmask' type.
	(bmasksi_vis): Likewise.
	* config/sparc/ultra3.md (us3_array): Likewise.
	* config/sparc/niagara7.md (n7_array): Likewise.
	* config/sparc/niagara4.md (n4_array): Likewise.
	* config/sparc/niagara2.md (niag2_vis): Likewise.
	(niag3_vis): Likewise.
	* config/sparc/niagara.md (niag_vis): Likewise.
---
 gcc/ChangeLog                | 12 ++++++++++++
 gcc/config/sparc/niagara.md  |  2 +-
 gcc/config/sparc/niagara2.md |  4 ++--
 gcc/config/sparc/niagara4.md |  2 +-
 gcc/config/sparc/niagara7.md |  4 ++--
 gcc/config/sparc/sparc.md    |  6 +++---
 gcc/config/sparc/ultra3.md   |  2 +-
 7 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/gcc/config/sparc/niagara.md b/gcc/config/sparc/niagara.md
index f79771f..f9a1f6d 100644
--- a/gcc/config/sparc/niagara.md
+++ b/gcc/config/sparc/niagara.md
@@ -114,5 +114,5 @@
  */
 (define_insn_reservation "niag_vis" 8
   (and (eq_attr "cpu" "niagara")
-    (eq_attr "type" "fga,visl,vismv,fgm_pack,fgm_mul,pdist,edge,edgen,gsr,array"))
+    (eq_attr "type" "fga,visl,vismv,fgm_pack,fgm_mul,pdist,edge,edgen,gsr,array,bmask"))
   "niag_pipe*8")
diff --git a/gcc/config/sparc/niagara2.md b/gcc/config/sparc/niagara2.md
index 9bcdd06..34ee630 100644
--- a/gcc/config/sparc/niagara2.md
+++ b/gcc/config/sparc/niagara2.md
@@ -111,10 +111,10 @@
 
 (define_insn_reservation "niag2_vis" 6
   (and (eq_attr "cpu" "niagara2")
-    (eq_attr "type" "fga,vismv,visl,fgm_pack,fgm_mul,pdist,edge,edgen,array,gsr"))
+    (eq_attr "type" "fga,vismv,visl,fgm_pack,fgm_mul,pdist,edge,edgen,array,bmask,gsr"))
   "niag2_pipe*6")
 
 (define_insn_reservation "niag3_vis" 9
   (and (eq_attr "cpu" "niagara3")
-    (eq_attr "type" "fga,vismv,visl,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,array,gsr"))
+    (eq_attr "type" "fga,vismv,visl,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,array,bmask,gsr"))
   "niag2_pipe*9")
diff --git a/gcc/config/sparc/niagara4.md b/gcc/config/sparc/niagara4.md
index ad0a04b..cc1bb75 100644
--- a/gcc/config/sparc/niagara4.md
+++ b/gcc/config/sparc/niagara4.md
@@ -66,7 +66,7 @@
 
 (define_insn_reservation "n4_array" 12
   (and (eq_attr "cpu" "niagara4")
-    (eq_attr "type" "array,edge,edgen"))
+    (eq_attr "type" "array,bmask,edge,edgen"))
   "n4_slot1, nothing*11")
 
 (define_insn_reservation "n4_vis_move_1cycle" 1
diff --git a/gcc/config/sparc/niagara7.md b/gcc/config/sparc/niagara7.md
index 12d6ab0..3dc8f9e 100644
--- a/gcc/config/sparc/niagara7.md
+++ b/gcc/config/sparc/niagara7.md
@@ -71,7 +71,7 @@
 
 (define_insn_reservation "n7_array" 12
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "array,edge,edgen"))
+    (eq_attr "type" "array,bmask,edge,edgen"))
   "n7_slot1, nothing*11")
 
 (define_insn_reservation "n7_fpdivs" 24
@@ -133,4 +133,4 @@
       (eq_attr "v3pipe" "false")))
   "n7_slot1, nothing*10")
 
-(define_bypass 3 "*_v3pipe" "*_v3pipe")
+(define_bypass 3 "n7*_v3pipe" "n7_*_v3pipe")
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 5c5096b..da23060 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -281,7 +281,7 @@
    fpcmp,
    fpmul,fpdivs,fpdivd,
    fpsqrts,fpsqrtd,
-   fga,visl,vismv,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,gsr,array,
+   fga,visl,vismv,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,gsr,array,bmask,
    cmove,
    ialuX,
    multi,savew,flushw,iflush,trap,lzd"
@@ -9134,7 +9134,7 @@
         (plus:DI (match_dup 1) (match_dup 2)))]
   "TARGET_VIS2 && TARGET_ARCH64"
   "bmask\t%r1, %r2, %0"
-  [(set_attr "type" "array")
+  [(set_attr "type" "bmask")
    (set_attr "v3pipe" "true")])
 
 (define_insn "bmasksi_vis"
@@ -9145,7 +9145,7 @@
         (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   "TARGET_VIS2"
   "bmask\t%r1, %r2, %0"
-  [(set_attr "type" "array")
+  [(set_attr "type" "bmask")
    (set_attr "v3pipe" "true")])
 
 (define_insn "bshuffle<VM64:mode>_vis"
diff --git a/gcc/config/sparc/ultra3.md b/gcc/config/sparc/ultra3.md
index 6296b38..f5b81d6 100644
--- a/gcc/config/sparc/ultra3.md
+++ b/gcc/config/sparc/ultra3.md
@@ -56,7 +56,7 @@
 
 (define_insn_reservation "us3_array" 2
   (and (eq_attr "cpu" "ultrasparc3")
-    (eq_attr "type" "array,edgen"))
+    (eq_attr "type" "array,edgen,bmask"))
   "us3_ms + us3_slotany, nothing")
 
 ;; ??? Not entirely accurate.
-- 
2.3.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 4/7] sparc: reworked M7 DFA based on instruction subtypes
  2017-06-29 11:43 [PATCH 0/7] Support for the SPARC M8 cpu Jose E. Marchesi
                   ` (3 preceding siblings ...)
  2017-06-29 11:43 ` [PATCH 5/7] sparc: basic support for the SPARC M8 cpu Jose E. Marchesi
@ 2017-06-29 11:43 ` Jose E. Marchesi
  2017-06-29 11:43 ` [PATCH 2/7] sparc: put VIS compare instructions in it's own insn type and adjust DFAs Jose E. Marchesi
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Jose E. Marchesi @ 2017-06-29 11:43 UTC (permalink / raw)
  To: gcc-patches

This patch reworks the M7 DFA scheduler to use instruction subtypes.  It
also removes the v3pipe insn attribute from sparc.md, as it is no longer
needed.

gcc/ChangeLog:

	* config/sparc/niagara7.md: Rework the DFA scheduler to use insn
	subtypes.
	* config/sparc/sparc.md: Remove the `v3pipe' insn attribute.
	("*movdi_insn_sp32"): Likewise.
	("*movsi_insn"): Likewise.
	("*movdi_insn_sp64"): Likewise.
	("*movsf_insn"): Likewise.
	("*movdf_insn_sp32"): Likewise.
	("*movdf_insn_sp64"): Likewise.
	("*zero_extendsidi2_insn_sp64"): Likewise.
	("*sign_extendsidi2_insn"): Likewise.
	("*mov<VM32:mode>_insn"): Likewise.
	("*mov<VM64:mode>_insn_sp64"): Likewise.
	("*mov<VM64:mode>_insn_sp32"): Likewise.
	("<plusminus_insn><VADDSUB:mode>3"): Likewise.
	("<vlop:code><VL:mode>3"): Likewise.
	("*not_<vlop:code><VL:mode>3"): Likewise.
	("*nand<VL:mode>_vis"): Likewise.
	("*<vlnotop:code>_not1<VL:mode>_vis"): Likewise.
	("*<vlnotop:code>_not2<VL:mode>_vis"): Likewise.
	("one_cmpl<VL:mode>2"): Likewise.
	("faligndata<VM64:mode>_vis"): Likewise.
	("alignaddrsi_vis"): Likewise.
	("alignaddrdi_vis"): Likweise.
	("alignaddrlsi_vis"): Likewise.
	("alignaddrldi_vis"): Likewise.
	("fcmp<gcond:code><GCM:gcm_name><P:mode>_vis"): Likewise.
	("bmaskdi_vis"): Likewise.
	("bmasksi_vis"): Likewise.
	("bshuffle<VM64:mode>_vis"): Likewise.
	("cmask8<P:mode>_vis"): Likewise.
	("cmask16<P:mode>_vis"): Likewise.
	("cmask32<P:mode>_vis"): Likewise.
	("pdistn<P:mode>_vis"): Likewise.
	("<vis3_addsub_ss_patname><VASS:mode>3"): Likewise.
---
 gcc/ChangeLog                |  38 +++++++++
 gcc/config/sparc/niagara7.md | 181 ++++++++++++++++++++++++++++++-------------
 gcc/config/sparc/sparc.md    |  93 +++++++---------------
 3 files changed, 192 insertions(+), 120 deletions(-)

diff --git a/gcc/config/sparc/niagara7.md b/gcc/config/sparc/niagara7.md
index 3f46198..23b6707 100644
--- a/gcc/config/sparc/niagara7.md
+++ b/gcc/config/sparc/niagara7.md
@@ -19,64 +19,120 @@
 
 (define_automaton "niagara7_0")
 
-(define_cpu_unit "n7_slot0,n7_slot1,n7_slot2" "niagara7_0")
-(define_reservation "n7_single_issue" "n7_slot0 + n7_slot1 + n7_slot2")
+;; The S4 core has a dual-issue queue.  This queue is divided into two
+;; slots.  One instruction can be issued each cycle to each slot, and
+;; up to 2 instructions are committed each cycle.  Each slot serves
+;; several execution units, as depicted below:
+;;
+;;
+;;                 m7_slot0 - Integer unit.
+;;                          - Load/Store unit.
+;; === QUEUE ==>
+;;
+;;                 m7_slot1 - Integer unit.
+;;                          - Branch unit.
+;;                          - Floating-point and graphics unit.
+;;                          - 3-cycles crypto unit.
 
-(define_cpu_unit "n7_load_store" "niagara7_0")
+(define_cpu_unit "n7_slot0,n7_slot1" "niagara7_0")
+
+;; Some instructions stall the pipeline and avoid any other
+;; instruction to be issued in the same cycle.  We assume the same for
+;; multi-instruction insns.
+
+(define_reservation "n7_single_issue" "n7_slot0 + n7_slot1")
 
 (define_insn_reservation "n7_single" 1
   (and (eq_attr "cpu" "niagara7")
     (eq_attr "type" "multi,savew,flushw,trap"))
   "n7_single_issue")
 
-(define_insn_reservation "n7_iflush" 27
-  (and (eq_attr "cpu" "niagara7")
-       (eq_attr "type" "iflush"))
-  "(n7_slot0 | n7_slot1), nothing*26")
+;; Most of the instructions executing in the integer unit have a
+;; latency of 1.
 
 (define_insn_reservation "n7_integer" 1
   (and (eq_attr "cpu" "niagara7")
     (eq_attr "type" "ialu,ialuX,shift,cmove,compare"))
   "(n7_slot0 | n7_slot1)")
 
+;; Flushing the instruction memory takes 27 cycles.
+
+(define_insn_reservation "n7_iflush" 27
+  (and (eq_attr "cpu" "niagara7")
+       (eq_attr "type" "iflush"))
+  "(n7_slot0 | n7_slot1), nothing*26")
+
+;; The integer multiplication instructions have a latency of 12 cycles
+;; and execute in the integer unit.
+;;
+;; Likewise for array*, edge* and pdistn instructions.
+
 (define_insn_reservation "n7_imul" 12
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "imul"))
-  "n7_slot1, nothing*11")
+    (eq_attr "type" "imul,array,edge,edgen,pdistn"))
+  "(n7_slot0 | n7_slot1), nothing*11")
+
+;; The integer division instructions have a latency of 35 cycles and
+;; execute in the integer unit.
 
 (define_insn_reservation "n7_idiv" 35
   (and (eq_attr "cpu" "niagara7")
     (eq_attr "type" "idiv"))
-  "n7_slot1, nothing*34")
+  "(n7_slot0 | n7_slot1), nothing*34")
+
+;; Both integer and floating-point load instructions have a latency of
+;; 5 cycles, and execute in the slot0.
+;;
+;; The prefetch instruction also executes in the load/store unit, but
+;; its latency is only 1 cycle.
 
 (define_insn_reservation "n7_load" 5
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "load,fpload,sload"))
-  "(n7_slot0 + n7_load_store), nothing*4")
+       (ior (eq_attr "type" "fpload,sload")
+            (and (eq_attr "type" "load")
+                 (eq_attr "subtype" "regular"))))
+  "n7_slot0, nothing*4")
+
+(define_insn_reservation "n7_prefetch" 1
+  (and (eq_attr "cpu" "niagara7")
+       (eq_attr "type" "load")
+       (eq_attr "subtype" "prefetch"))
+  "n7_slot0")
+
+;; Both integer and floating-point store instructions have a latency
+;; of 1 cycle, and execute in the load/store unit in slot0.
 
 (define_insn_reservation "n7_store" 1
   (and (eq_attr "cpu" "niagara7")
     (eq_attr "type" "store,fpstore"))
-  "(n7_slot0 | n7_slot2) + n7_load_store")
+  "n7_slot0")
+
+;; Control-transfer instructions execute in the Branch Unit in the
+;; slot1.
 
 (define_insn_reservation "n7_cti" 1
   (and (eq_attr "cpu" "niagara7")
     (eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
   "n7_slot1")
 
+;; Many instructions executing in the Floating-point and Graphics unit
+;; in the slot1 feature a latency of 11 cycles.
+
 (define_insn_reservation "n7_fp" 11
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul"))
+       (ior (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist")
+            (and (eq_attr "type" "fga")
+                 (eq_attr "subtype" "fpu,maxmin"))))
   "n7_slot1, nothing*10")
 
-(define_insn_reservation "n7_array" 12
-  (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "array,bmask,edge,edgen"))
-  "n7_slot1, nothing*11")
+;; Floating-point division and floating-point square-root instructions
+;; have high latencies.  They execute in the floating-point and
+;; graphics unit in the slot1.
+
 
 (define_insn_reservation "n7_fpdivs" 24
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "fpdivs,fpsqrts"))
+       (eq_attr "type" "fpdivs,fpsqrts"))
   "n7_slot1, nothing*23")
 
 (define_insn_reservation "n7_fpdivd" 37
@@ -84,53 +140,66 @@
     (eq_attr "type" "fpdivd,fpsqrtd"))
   "n7_slot1, nothing*36")
 
-(define_insn_reservation "n7_lzd" 12
-  (and (eq_attr "cpu" "niagara7")
-       (eq_attr "type" "lzd"))
-  "(n7_slot0 | n7_slot1), nothing*11")
-
-;; There is an internal unit called the "V3 pipe", that was originally
-;; intended to process some of the short cryptographic instructions.
-;; However, as soon as in the T4 several of the VIS instructions
-;; (notably non-FP instructions) have been moved to the V3 pipe.
-;; Consequently, these instructions feature a latency of 3 instead of
-;; 11 or 12 cycles, provided their consumers also execute in the V3
-;; pipe.
+;; SIMD VIS instructions executing in the Floating-point and graphics
+;; unit (FPG) in slot1 usually have a latency of either 11 or 12
+;; cycles.
 ;;
-;; This is modelled here with a bypass.
+;; However, the latency for many instructions is only 3 cycles if the
+;; consumer can also be executed in 3 cycles.  We model this with a
+;; bypass.  In these cases the instructions are executed in the
+;; 3-cycle crypto unit which also serves slot1.
+
+(define_insn_reservation "n7_vis_11cycles" 11
+  (and (eq_attr "cpu" "niagara7")
+       (ior (and (eq_attr "type" "fga")
+                 (eq_attr "subtype" "addsub64,other"))
+            (and (eq_attr "type" "vismv")
+                 (eq_attr "subtype" "double,single"))
+            (and (eq_attr "type" "visl")
+                 (eq_attr "subtype" "double,single"))))
+  "n7_slot1, nothing*10")
 
-(define_insn_reservation "n7_vis_fga" 11
+(define_insn_reservation "n7_vis_12cycles" 12
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "fga,gsr"))
-  "n7_slot1, nothing*10")
+       (ior (eq_attr "type" "bmask,viscmp")
+            (and (eq_attr "type" "fga")
+                 (eq_attr "subtype" "cmask"))
+            (and (eq_attr "type" "vismv")
+                 (eq_attr "subtype" "movstouw"))))
+  "n7_slot1, nothing*11")
+
+(define_bypass 3 "n7_vis_*" "n7_vis_*")
+
+;; Some other VIS instructions have a latency of 12 cycles, and won't
+;; be executed in the 3-cycle crypto pipe.
 
-(define_insn_reservation "n7_vis_fgm" 11
+(define_insn_reservation "n7_lzd" 12
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "fgm_pack,fgm_mul,pdist"))
-  "n7_slot1, nothing*10")
+       (ior (eq_attr "type" "lzd,")
+            (and (eq_attr "type" "gsr")
+                 (eq_attr "subtype" "alignaddr"))))
+  "n7_slot1, nothing*11")
 
-(define_insn_reservation "n7_vis_move_v3pipe" 11
+;; A couple of VIS instructions feature very low latencies in the M7.
+
+(define_insn_reservation "n7_single_vis" 1
   (and (eq_attr "cpu" "niagara7")
-    (and (eq_attr "type" "vismv")
-         (eq_attr "v3pipe" "true")))
+       (eq_attr "type" "vismv")
+       (eq_attr "subtype" "movxtod"))
   "n7_slot1")
 
-(define_insn_reservation "n7_vis_move_11cycle" 11
+(define_insn_reservation "n7_double_vis" 2
   (and (eq_attr "cpu" "niagara7")
-    (and (eq_attr "type" "vismv")
-         (eq_attr "v3pipe" "false")))
-  "n7_slot1, nothing*10")
+       (eq_attr "type" "vismv")
+       (eq_attr "subtype" "movdtox"))
+  "n7_slot1, nothing")
 
-(define_insn_reservation "n7_vis_logical_v3pipe" 11
-  (and (eq_attr "cpu" "niagara7")
-    (and (eq_attr "type" "visl,viscmp,pdistn")
-         (eq_attr "v3pipe" "true")))
-  "n7_slot1, nothing*2")
+;; Reading and writing to the gsr register takes a high number of
+;; cycles that is not documented in the PRM.  Let's use the same value
+;; than the M8.
 
-(define_insn_reservation "n7_vis_logical_11cycle" 11
+(define_insn_reservation "n7_gsr_reg" 70
   (and (eq_attr "cpu" "niagara7")
-    (and (eq_attr "type" "visl,viscmp")
-      (eq_attr "v3pipe" "false")))
-  "n7_slot1, nothing*10")
-
-(define_bypass 3 "n7*_v3pipe" "n7_*_v3pipe")
+       (eq_attr "type" "gsr")
+       (eq_attr "subtype" "reg"))
+  "n7_slot1, nothing*70")
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index d1bf6a7..b550f037 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -574,9 +574,6 @@
 	   (const_string "true")
 	] (const_string "false")))
 
-;; True if the instruction executes in the V3 pipeline, in M7 and later processors.
-(define_attr "v3pipe" "false,true" (const_string "false"))
-
 (define_delay (eq_attr "type" "call")
   [(eq_attr "in_call_delay" "true") (nil) (nil)])
 
@@ -1656,8 +1653,7 @@
    fones\t%0"
   [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
    (set_attr "subtype" "*,*,regular,*,movstouw,single,*,*,*,single,single")
-   (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")
-   (set_attr "v3pipe" "*,*,*,*,true,true,*,*,*,true,true")])
+   (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
 
 (define_insn "*movsi_lo_sum"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -1830,7 +1826,6 @@ visl")
    (set_attr "length" "*,2,*,*,*,*,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
    (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
    (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
-   (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,true,true")
    (set_attr "lra" "*,*,disabled,disabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
 
 (define_insn "*movdi_insn_sp64"
@@ -1854,8 +1849,7 @@ visl")
   [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
    (set_attr "subtype" "*,*,regular,*,movdtox,movxtod,*,*,*,double,double")
    (set_attr "fptype" "*,*,*,*,*,*,double,*,*,double,double")
-   (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")
-   (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,true,true")])
+   (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
 
 (define_expand "movdi_pic_label_ref"
   [(set (match_dup 3) (high:DI
@@ -2385,8 +2379,7 @@ visl")
 }
   [(set_attr "type" "visl,visl,fpmove,*,*,*,vismv,vismv,fpload,load,fpstore,store")
    (set_attr "subtype" "single,single,*,*,*,*,movstouw,single,*,regular,*,*")
-   (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")
-   (set_attr "v3pipe" "true,true,*,*,*,*,true,true,*,*,*,*")])
+   (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")])
 
 ;; The following 3 patterns build SFmode constants in integer registers.
 
@@ -2462,7 +2455,6 @@ visl")
    (set_attr "length" "*,2,*,*,*,2,2,2,*,*,*,*,2,2,2,*,*")
    (set_attr "fptype" "*,*,double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "cpu_feature" "v9,*,vis,vis,v9,fpunotv9,vis3,vis3,fpu,fpu,*,*,fpu,fpu,*,*,*")
-   (set_attr "v3pipe" "*,*,true,true,*,*,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "lra" "*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
 
 (define_insn "*movdf_insn_sp64"
@@ -2487,8 +2479,7 @@ visl")
    (set_attr "subtype" "double,double,*,movdtox,movxtod,regular,*,*,regular,*,*")
    (set_attr "length" "*,*,*,*,*,*,*,*,*,*,2")
    (set_attr "fptype" "double,double,double,double,double,*,*,*,*,*,*")
-   (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")
-   (set_attr "v3pipe" "true,true,*,*,*,*,*,*,*,*,*")])
+   (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")])
 
 ;; This pattern builds DFmode constants in integer registers.
 (define_split
@@ -3119,8 +3110,7 @@ visl")
    movstouw\t%1, %0"
   [(set_attr "type" "shift,load,vismv")
    (set_attr "subtype" "*,regular,movstouw")
-   (set_attr "cpu_feature" "*,*,vis3")
-   (set_attr "v3pipe" "*,*,true")])
+   (set_attr "cpu_feature" "*,*,vis3")])
 
 (define_insn_and_split "*zero_extendsidi2_insn_sp32"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -3435,8 +3425,7 @@ visl")
   movstosw\t%1, %0"
   [(set_attr "type" "shift,sload,vismv")
    (set_attr "us3load_type" "*,3cycle,*")
-   (set_attr "cpu_feature" "*,*,vis3")
-   (set_attr "v3pipe" "*,*,true")])
+   (set_attr "cpu_feature" "*,*,vis3")])
 
 
 ;; Special pattern for optimizing bit-field compares.  This is needed
@@ -8645,8 +8634,7 @@ visl")
   movwtos\t%1, %0"
   [(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,*,vismv,vismv")
    (set_attr "subtype" "single,single,single,*,*,*,regular,*,*,movstouw,single")
-   (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")
-   (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,true,true")])
+   (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")])
 
 (define_insn "*mov<VM64:mode>_insn_sp64"
   [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,W,m,*r, m,*r, e,*r")
@@ -8669,8 +8657,7 @@ visl")
   mov\t%1, %0"
   [(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,vismv,vismv,*")
    (set_attr "subtype" "double,double,double,*,*,*,regular,*,movdtox,movxtod,*")
-   (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")
-   (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,*,*")])
+   (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")])
 
 (define_insn "*mov<VM64:mode>_insn_sp32"
   [(set (match_operand:VM64 0 "nonimmediate_operand"
@@ -8702,7 +8689,6 @@ visl")
    (set_attr "subtype" "*,*,double,double,double,*,*,*,*,regular,*,*,*,*,regular,*")
    (set_attr "length" "*,2,*,*,*,2,2,*,*,*,*,2,2,2,*,*")
    (set_attr "cpu_feature" "*,*,vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*,*,*")
-   (set_attr "v3pipe" "*,*,true,true,true,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "lra" "*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
 
 (define_split
@@ -8781,8 +8767,7 @@ visl")
   "fp<plusminus_insn><vbits>\t%1, %2, %0"
   [(set_attr "type" "fga")
    (set_attr "subtype" "other")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 (define_mode_iterator VL [V1SI V2HI V4QI V1DI V2SI V4HI V8QI])
 (define_mode_attr vlsuf [(V1SI "s") (V2HI "s") (V4QI "s")
@@ -8798,8 +8783,7 @@ visl")
   "TARGET_VIS"
   "f<vlinsn><vlsuf>\t%1, %2, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 (define_insn "*not_<vlop:code><VL:mode>3"
   [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
@@ -8808,8 +8792,7 @@ visl")
   "TARGET_VIS"
   "f<vlninsn><vlsuf>\t%1, %2, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 ;; (ior (not (op1)) (not (op2))) is the canonical form of NAND.
 (define_insn "*nand<VL:mode>_vis"
@@ -8819,8 +8802,7 @@ visl")
   "TARGET_VIS"
   "fnand<vlsuf>\t%1, %2, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 (define_code_iterator vlnotop [ior and])
 
@@ -8831,8 +8813,7 @@ visl")
   "TARGET_VIS"
   "f<vlinsn>not1<vlsuf>\t%1, %2, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 (define_insn "*<vlnotop:code>_not2<VL:mode>_vis"
   [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
@@ -8841,8 +8822,7 @@ visl")
   "TARGET_VIS"
   "f<vlinsn>not2<vlsuf>\t%1, %2, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 (define_insn "one_cmpl<VL:mode>2"
   [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
@@ -8850,8 +8830,7 @@ visl")
   "TARGET_VIS"
   "fnot1<vlsuf>\t%1, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 ;; Hard to generate VIS instructions.  We have builtins for these.
 
@@ -9054,8 +9033,7 @@ visl")
   "faligndata\t%1, %2, %0"
   [(set_attr "type" "fga")
    (set_attr "subtype" "other")
-   (set_attr "fptype" "double")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "double")])
 
 (define_insn "alignaddrsi_vis"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -9066,8 +9044,7 @@ visl")
   "TARGET_VIS"
   "alignaddr\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
-   (set_attr "subtype" "alignaddr")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "alignaddr")])
 
 (define_insn "alignaddrdi_vis"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -9078,8 +9055,7 @@ visl")
   "TARGET_VIS"
   "alignaddr\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
-   (set_attr "subtype" "alignaddr")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "alignaddr")])
 
 (define_insn "alignaddrlsi_vis"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -9091,8 +9067,7 @@ visl")
   "TARGET_VIS"
   "alignaddrl\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
-   (set_attr "subtype" "alignaddr")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "alignaddr")])
 
 (define_insn "alignaddrldi_vis"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -9104,8 +9079,7 @@ visl")
   "TARGET_VIS"
   "alignaddrl\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
-   (set_attr "subtype" "alignaddr")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "alignaddr")])
 
 (define_insn "pdist_vis"
   [(set (match_operand:DI 0 "register_operand" "=e")
@@ -9197,8 +9171,7 @@ visl")
 	 UNSPEC_FCMP))]
   "TARGET_VIS"
   "fcmp<gcond:code><GCM:gcm_name>\t%1, %2, %0"
-  [(set_attr "type" "viscmp")
-   (set_attr "v3pipe" "true")])
+  [(set_attr "type" "viscmp")])
 
 (define_insn "fpcmp<gcond:code>8<P:mode>_vis"
   [(set (match_operand:P 0 "register_operand" "=r")
@@ -9270,8 +9243,7 @@ visl")
         (plus:DI (match_dup 1) (match_dup 2)))]
   "TARGET_VIS2 && TARGET_ARCH64"
   "bmask\t%r1, %r2, %0"
-  [(set_attr "type" "bmask")
-   (set_attr "v3pipe" "true")])
+  [(set_attr "type" "bmask")])
 
 (define_insn "bmasksi_vis"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -9281,8 +9253,7 @@ visl")
         (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   "TARGET_VIS2"
   "bmask\t%r1, %r2, %0"
-  [(set_attr "type" "bmask")
-   (set_attr "v3pipe" "true")])
+  [(set_attr "type" "bmask")])
 
 (define_insn "bshuffle<VM64:mode>_vis"
   [(set (match_operand:VM64 0 "register_operand" "=e")
@@ -9294,8 +9265,7 @@ visl")
   "bshuffle\t%1, %2, %0"
   [(set_attr "type" "fga")
    (set_attr "subtype" "other")
-   (set_attr "fptype" "double")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "double")])
 
 ;; The rtl expanders will happily convert constant permutations on other
 ;; modes down to V8QI.  Rely on this to avoid the complexity of the byte
@@ -9398,8 +9368,7 @@ visl")
   "TARGET_VIS3"
   "cmask8\t%r0"
   [(set_attr "type" "fga")
-   (set_attr "subtype" "cmask")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "cmask")])
 
 (define_insn "cmask16<P:mode>_vis"
   [(set (reg:DI GSR_REG)
@@ -9409,8 +9378,7 @@ visl")
   "TARGET_VIS3"
   "cmask16\t%r0"
   [(set_attr "type" "fga")
-   (set_attr "subtype" "cmask")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "cmask")])
 
 (define_insn "cmask32<P:mode>_vis"
   [(set (reg:DI GSR_REG)
@@ -9420,8 +9388,7 @@ visl")
   "TARGET_VIS3"
   "cmask32\t%r0"
   [(set_attr "type" "fga")
-   (set_attr "subtype" "cmask")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "cmask")])
 
 (define_insn "fchksm16_vis"
   [(set (match_operand:V4HI 0 "register_operand" "=e")
@@ -9456,8 +9423,7 @@ visl")
   "TARGET_VIS3"
   "pdistn\t%1, %2, %0"
   [(set_attr "type" "pdistn")
-   (set_attr "fptype" "double")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "double")])
 
 (define_insn "fmean16_vis"
   [(set (match_operand:V4HI 0 "register_operand" "=e")
@@ -9509,8 +9475,7 @@ visl")
   "TARGET_VIS3"
   "<vis3_addsub_ss_insn><vbits>\t%1, %2, %0"
   [(set_attr "type" "fga")
-   (set_attr "subtype" "other")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "other")])
 
 (define_mode_iterator VMMAX [V8QI V4HI V2SI])
 (define_code_iterator vis4_minmax [smin smax])
-- 
2.3.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/7] sparc: introduce insn subtypes
  2017-06-29 11:43 [PATCH 0/7] Support for the SPARC M8 cpu Jose E. Marchesi
  2017-06-29 11:43 ` [PATCH 6/7] sparc: support for VIS4B instructions Jose E. Marchesi
  2017-06-29 11:43 ` [PATCH 7/7] sparc: M8 DFA scheduler Jose E. Marchesi
@ 2017-06-29 11:43 ` Jose E. Marchesi
  2017-06-29 11:43 ` [PATCH 5/7] sparc: basic support for the SPARC M8 cpu Jose E. Marchesi
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Jose E. Marchesi @ 2017-06-29 11:43 UTC (permalink / raw)
  To: gcc-patches

This patch introduces a new insn attribute `subtype', and marks
existing insns appropriately.  The resulting instruction hierarchy is
documented in a comment.

gcc/ChangeLog:

	* config/sparc/sparc.md ("subtype"): New insn attribute.
	("*wrgsr_sp64"): Set insn subtype.
	("*rdgsr_sp64"): Likewise.
	("alignaddrsi_vis"): Likewise.
	("alignaddrdi_vis"): Likewise.
	("alignaddrlsi_vis"): Likewise.
	("alignaddrldi_vis"): Likewise.
	("<plusminus_insn><VADDSUB:mode>3"): Likewise.
	("fexpand_vis"): Likewise.
	("fpmerge_vis"): Likewise.
	("faligndata<VM64:mode>_vis"): Likewise.
	("bshuffle<VM64:mode>_vis"): Likewise.
	("cmask8<P:mode>_vis"): Likewise.
	("cmask16<P:mode>_vis"): Likewise.
	("cmask32<P:mode>_vis"): Likewise.
	("fchksm16_vis"): Likewise.
	("v<vis3_shift_patname><GCM:mode>3"): Likewise.
	("fmean16_vis"): Likewise.
	("fp<plusminus_insn>64_vis"): Likewise.
	("<plusminus_insn>v8qi3"): Likewise.
	("<vis3_addsub_ss_patname><VASS:mode>3"): Likewise.
	("<vis4_minmax_patname><VMMAX:mode>3"): Likewise.
	("<vis4_uminmax_patname><VMMAX:mode>3"): Likewise.
	("<vis3_addsub_ss_patname>v8qi3"): Likewise.
	("<vis4_addsub_us_patname><VAUS:mode>3"): Likewise.
	("*movqi_insn"): Likewise.
	("*movhi_insn"): Likewise.
	("*movsi_insn"): Likewise.
	("movsi_pic_gotdata_op"): Likewise.
	("*movdi_insn_sp32"): Likewise.
	("*movdi_insn_sp64"): Likewise.
	("movdi_pic_gotdata_op"): Likewise.
	("*movsf_insn"): Likewise.
	("*movdf_insn_sp32"): Likewise.
	("*movdf_insn_sp64"): Likewise.
	("*zero_extendhisi2_insn"): Likewise.
	("*zero_extendqihi2_insn"): Likewise.
	("*zero_extendqisi2_insn"): Likewise.
	("*zero_extendqidi2_insn"): Likewise.
	("*zero_extendhidi2_insn"): Likewise.
	("*zero_extendsidi2_insn_sp64"): Likewise.
	("ldfsr"): Likewise.
	("prefetch_64"): Likewise.
	("prefetch_32"): Likewise.
	("tie_ld32"): Likewise.
	("tie_ld64"): Likewise.
	("*tldo_ldub_sp32"): Likewise.
	("*tldo_ldub1_sp32"): Likewise.
	("*tldo_ldub2_sp32"): Likewise.
	("*tldo_ldub_sp64"): Likewise.
	("*tldo_ldub1_sp64"): Likewise.
	("*tldo_ldub2_sp64"): Likewise.
	("*tldo_ldub3_sp64"): Likewise.
	("*tldo_lduh_sp32"): Likewise.
	("*tldo_lduh1_sp32"): Likewise.
	("*tldo_lduh_sp64"): Likewise.
	("*tldo_lduh1_sp64"): Likewise.
	("*tldo_lduh2_sp64"): Likewise.
	("*tldo_lduw_sp32"): Likewise.
	("*tldo_lduw_sp64"): Likewise.
	("*tldo_lduw1_sp64"): Likewise.
	("*tldo_ldx_sp64"): Likewise.
	("*mov<VM32:mode>_insn"): Likewise.
	("*mov<VM64:mode>_insn_sp64"): Likewise.
	("*mov<VM64:mode>_insn_sp32"): Likewise.
---
 gcc/ChangeLog             |  68 ++++++++++++++++
 gcc/config/sparc/sparc.md | 199 ++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 243 insertions(+), 24 deletions(-)

diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 04da8ae..d1bf6a7 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -268,7 +268,86 @@
          (eq_attr "cpu_feature" "vis4") (symbol_ref "TARGET_VIS4")]
         (const_int 0)))
 
-;; Insn type.
+;; The SPARC instructions used by the backend are organized into a
+;; hierarchy using the insn attributes "type" and "subtype".
+;;
+;; The mnemonics used in the list below are the architectural names
+;; used in the Oracle SPARC Architecture specs.  A / character
+;; separates the type from the subtype where appropriate.  For
+;; brevity, text enclosed in {} denotes alternatives, while text
+;; enclosed in [] is optional.
+;;
+;; Please keep this list updated.  It is of great help for keeping the
+;; correctness and coherence of the DFA schedulers.
+;;
+;; ialu:  <empty>
+;; ialuX: ADD[X]C SUB[X]C
+;; shift: SLL[X] SRL[X] SRA[X]
+;; cmove: MOV{A,N,NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
+;;        MOVF{A,N,U,G,UG,L,UL,LG,NE,E,UE,GE,UGE,LE,ULE,O}
+;;        MOVR{Z,LEZ,LZ,NZ,GZ,GEZ}
+;; compare: ADDcc ADDCcc ANDcc ORcc SUBcc SUBCcc XORcc XNORcc
+;; imul: MULX SMUL[cc] UMUL UMULXHI XMULX XMULXHI
+;; idiv: UDIVX SDIVX
+;; flush: FLUSH
+;; load/regular: LD{UB,UH,UW} LDFSR
+;; load/prefetch: PREFETCH
+;; fpload: LDF LDDF LDQF
+;; sload: LD{SB,SH,SW}
+;; store: ST{B,H,W,X} STFSR
+;; fpstore: STF STDF STQF
+;; cbcond: CWB{NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
+;;         CXB{NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
+;; uncond_branch: BA BPA JMPL
+;; branch: B{NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
+;;         BP{NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
+;;         FB{U,G,UG,L,UL,LG,NE,BE,UE,GE,UGE,LE,ULE,O}
+;; call: CALL
+;; return: RESTORE RETURN
+;; fpmove: FABS{s,d,q} FMOV{s,d,q} FNEG{s,d,q}
+;; fpcmove: FMOV{S,D,Q}{icc,xcc,fcc}
+;; fpcrmove: FMOVR{s,d,q}{Z,LEZ,LZ,NZ,GZ,GEZ}
+;; fp: FADD{s,d,q} FSUB{s,d,q} FHSUB{s,d} FNHADD{s,d} FNADD{s,d}
+;;     FiTO{s,d,q} FsTO{i,x,d,q} FdTO{i,x,s,q} FxTO{d,s,q} FqTO{i,x,s,d}
+;; fpcmp: FCMP{s,d,q} FCMPE{s,d,q}
+;; fpmul: FMADD{s,d}  FMSUB{s,d} FMUL{s,d,q} FNMADD{s,d}
+;;        FNMSUB{s,d} FNMUL{s,d} FNsMULd FsMULd
+;;        FdMULq
+;; array: ARRAY{8,16,32}
+;; bmask: BMASK
+;; edge: EDGE{8,16,32}[L]cc
+;; edgen: EDGE{8,16,32}[L]n
+;; fpdivs: FDIV{s,q}
+;; fpsqrts: FSQRT{s,q}
+;; fpdivd: FDIVd
+;; fpsqrtd: FSQRTd
+;; lzd: LZCNT
+;; fga/addsub64: FP{ADD,SUB}64
+;; fga/fpu: FCHKSM16 FEXPANd FMEAN16 FPMERGE
+;;          FS{LL,RA,RL}{16,32}
+;; fga/maxmin: FP{MAX,MIN}[U]{8,16,32}
+;; fga/cmask: CMASK{8,16,32}
+;; fga/other: BSHUFFLE FALIGNDATAg FP{ADD,SUB}[S]{8,16,32}
+;;            FP{ADD,SUB}US{8,16}
+;; gsr/reg: RDGSR WRGSR
+;; gsr/alignaddr: ALIGNADDRESS[_LITTLE]
+;; vismv/double:  FSRC2d
+;; vismv/single:  MOVwTOs FSRC2s
+;; vismv/movstouw: MOVsTOuw
+;; vismv/movxtod: MOVxTOd
+;; vismv/movdtox: MOVdTOx
+;; visl/single: F{AND,NAND,NOR,OR,NOT1}s
+;;              F{AND,OR}NOT{1,2}s
+;;              FONEs F{ZERO,XNOR,XOR}s FNOT2s
+;; visl/double: FONEd FZEROd FNOT1d F{OR,AND,XOR}d F{NOR,NAND,XNOR}d
+;;              F{OR,AND}NOT1d F{OR,AND}NOT2d
+;; viscmp: FPCMP{LE,GT,NE,EQ}{8,16,32} FPCMPU{LE,GT,NE,EQ}{8,16,32}
+;; fgm_pack: FPACKFIX FPACK{8,16,32}
+;; fgm_mul: FMUL8SUx16 FMUL8ULx16 FMUL8x16 FMUL8x16AL
+;;          FMUL8x16AU FMULD8SUx16 FMULD8ULx16
+;; pdist: PDIST
+;; pdistn: PDISTN
+
 (define_attr "type"
   "ialu,compare,shift,
    load,sload,store,
@@ -288,6 +367,13 @@
    multi,savew,flushw,iflush,trap,lzd"
   (const_string "ialu"))
 
+(define_attr "subtype"
+  "single,double,movstouw,movxtod,movdtox,
+   addsub64,cmask,fpu,maxmin,other,
+   reg,alignaddr,
+   prefetch,regular"
+  (const_string "single"))
+
 ;; True if branch/call has empty delay slot and will emit a nop in it
 (define_attr "empty_delay_slot" "false,true"
   (symbol_ref "(empty_delay_slot (insn)
@@ -1508,6 +1594,7 @@
    ldub\t%1, %0
    stb\t%r1, %0"
   [(set_attr "type" "*,load,store")
+   (set_attr "subtype" "*,regular,*")
    (set_attr "us3load_type" "*,3cycle,*")])
 
 (define_expand "movhi"
@@ -1530,6 +1617,7 @@
    lduh\t%1, %0
    sth\t%r1, %0"
   [(set_attr "type" "*,*,load,store")
+   (set_attr "subtype" "*,*,regular,*")
    (set_attr "us3load_type" "*,*,3cycle,*")])
 
 ;; We always work with constants here.
@@ -1567,6 +1655,7 @@
    fzeros\t%0
    fones\t%0"
   [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
+   (set_attr "subtype" "*,*,regular,*,movstouw,single,*,*,*,single,single")
    (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")
    (set_attr "v3pipe" "*,*,*,*,true,true,*,*,*,true,true")])
 
@@ -1625,7 +1714,8 @@
   return "ld\t[%1 + %2], %0";
 #endif
 }
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "regular")])
 
 (define_expand "movsi_pic_label_ref"
   [(set (match_dup 3) (high:SI
@@ -1734,7 +1824,9 @@
    std\t%1, %0
    fzero\t%0
    fone\t%0"
-  [(set_attr "type" "store,*,load,store,load,store,*,*,fpload,fpstore,*,*,fpmove,*,*,*,fpload,fpstore,visl,visl")
+  [(set_attr "type" "store,*,load,store,load,store,*,*,fpload,fpstore,*,*,fpmove,*,*,*,fpload,fpstore,visl,
+visl")
+   (set_attr "subtype" "*,*,regular,*,regular,*,*,*,*,*,*,*,*,*,*,*,*,*,double,double")
    (set_attr "length" "*,2,*,*,*,*,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
    (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
    (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
@@ -1760,6 +1852,7 @@
    fzero\t%0
    fone\t%0"
   [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
+   (set_attr "subtype" "*,*,regular,*,movdtox,movxtod,*,*,*,double,double")
    (set_attr "fptype" "*,*,*,*,*,*,double,*,*,double,double")
    (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")
    (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,true,true")])
@@ -1848,7 +1941,8 @@
   return "ldx\t[%1 + %2], %0";
 #endif
 }
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "regular")])
 
 (define_insn "*sethi_di_medlow_embmedany_pic"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -2290,6 +2384,7 @@
     }
 }
   [(set_attr "type" "visl,visl,fpmove,*,*,*,vismv,vismv,fpload,load,fpstore,store")
+   (set_attr "subtype" "single,single,*,*,*,*,movstouw,single,*,regular,*,*")
    (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")
    (set_attr "v3pipe" "true,true,*,*,*,*,true,true,*,*,*,*")])
 
@@ -2363,6 +2458,7 @@
   ldd\t%1, %0
   std\t%1, %0"
   [(set_attr "type" "store,*,visl,visl,fpmove,*,*,*,fpload,fpstore,load,store,*,*,*,load,store")
+   (set_attr "subtype" "*,*,double,double,*,*,*,*,*,*,regular,*,*,*,*,regular,*")
    (set_attr "length" "*,2,*,*,*,2,2,2,*,*,*,*,2,2,2,*,*")
    (set_attr "fptype" "*,*,double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "cpu_feature" "v9,*,vis,vis,v9,fpunotv9,vis3,vis3,fpu,fpu,*,*,fpu,fpu,*,*,*")
@@ -2388,6 +2484,7 @@
   stx\t%r1, %0
   #"
   [(set_attr "type" "visl,visl,fpmove,vismv,vismv,load,store,*,load,store,*")
+   (set_attr "subtype" "double,double,*,movdtox,movxtod,regular,*,*,regular,*,*")
    (set_attr "length" "*,*,*,*,*,*,*,*,*,*,2")
    (set_attr "fptype" "double,double,double,double,double,*,*,*,*,*,*")
    (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")
@@ -2917,6 +3014,7 @@
   ""
   "lduh\t%1, %0"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_expand "zero_extendqihi2"
@@ -2933,6 +3031,7 @@
    and\t%1, 0xff, %0
    ldub\t%1, %0"
   [(set_attr "type" "*,load")
+   (set_attr "subtype" "*,regular")
    (set_attr "us3load_type" "*,3cycle")])
 
 (define_expand "zero_extendqisi2"
@@ -2949,6 +3048,7 @@
    and\t%1, 0xff, %0
    ldub\t%1, %0"
   [(set_attr "type" "*,load")
+   (set_attr "subtype" "*,regular")
    (set_attr "us3load_type" "*,3cycle")])
 
 (define_expand "zero_extendqidi2"
@@ -2965,6 +3065,7 @@
    and\t%1, 0xff, %0
    ldub\t%1, %0"
   [(set_attr "type" "*,load")
+   (set_attr "subtype" "*,regular")
    (set_attr "us3load_type" "*,3cycle")])
 
 (define_expand "zero_extendhidi2"
@@ -2996,6 +3097,7 @@
   "TARGET_ARCH64"
   "lduh\t%1, %0"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 ;; ??? Write truncdisi pattern using sra?
@@ -3016,6 +3118,7 @@
    lduw\t%1, %0
    movstouw\t%1, %0"
   [(set_attr "type" "shift,load,vismv")
+   (set_attr "subtype" "*,regular,movstouw")
    (set_attr "cpu_feature" "*,*,vis3")
    (set_attr "v3pipe" "*,*,true")])
 
@@ -7357,7 +7460,8 @@
   [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] UNSPECV_LDFSR)]
   "TARGET_FPU"
   "ld\t%0, %%fsr"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "regular")])
 
 (define_insn "stfsr"
   [(set (match_operand:SI 0 "memory_operand" "=m")
@@ -7721,7 +7825,8 @@
   gcc_assert (locality >= 0 && locality < 4);
   return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
 }
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "prefetch")])
 
 (define_insn "prefetch_32"
   [(prefetch (match_operand:SI 0 "address_operand" "p")
@@ -7746,7 +7851,8 @@
   gcc_assert (locality >= 0 && locality < 4);
   return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
 }
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "prefetch")])
 
 
 ;; Trap instructions.
@@ -7967,7 +8073,8 @@
 		   UNSPEC_TLSIE))]
   "TARGET_TLS && TARGET_ARCH32"
   "ld\\t[%1 + %2], %0, %%tie_ld(%a3)"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "regular")])
 
 (define_insn "tie_ld64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -7977,7 +8084,8 @@
 		   UNSPEC_TLSIE))]
   "TARGET_TLS && TARGET_ARCH64"
   "ldx\\t[%1 + %2], %0, %%tie_ldx(%a3)"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "regular")])
 
 (define_insn "tie_add32"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -8037,6 +8145,7 @@
   "TARGET_TLS && TARGET_ARCH32"
   "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_ldub1_sp32"
@@ -8049,6 +8158,7 @@
   "TARGET_TLS && TARGET_ARCH32"
   "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_ldub2_sp32"
@@ -8061,6 +8171,7 @@
   "TARGET_TLS && TARGET_ARCH32"
   "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_ldsb1_sp32"
@@ -8096,6 +8207,7 @@
   "TARGET_TLS && TARGET_ARCH64"
   "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_ldub1_sp64"
@@ -8108,6 +8220,7 @@
   "TARGET_TLS && TARGET_ARCH64"
   "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_ldub2_sp64"
@@ -8120,6 +8233,7 @@
   "TARGET_TLS && TARGET_ARCH64"
   "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_ldub3_sp64"
@@ -8132,6 +8246,7 @@
   "TARGET_TLS && TARGET_ARCH64"
   "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_ldsb1_sp64"
@@ -8179,6 +8294,7 @@
   "TARGET_TLS && TARGET_ARCH32"
   "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_lduh1_sp32"
@@ -8191,6 +8307,7 @@
   "TARGET_TLS && TARGET_ARCH32"
   "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_ldsh1_sp32"
@@ -8214,6 +8331,7 @@
   "TARGET_TLS && TARGET_ARCH64"
   "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_lduh1_sp64"
@@ -8226,6 +8344,7 @@
   "TARGET_TLS && TARGET_ARCH64"
   "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_lduh2_sp64"
@@ -8238,6 +8357,7 @@
   "TARGET_TLS && TARGET_ARCH64"
   "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
   [(set_attr "type" "load")
+   (set_attr "subtype" "regular")
    (set_attr "us3load_type" "3cycle")])
 
 (define_insn "*tldo_ldsh1_sp64"
@@ -8272,7 +8392,8 @@
 			 (match_operand:SI 1 "register_operand" "r"))))]
   "TARGET_TLS && TARGET_ARCH32"
   "ld\t[%1 + %2], %0, %%tldo_add(%3)"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "regular")])
 
 (define_insn "*tldo_lduw_sp64"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -8282,7 +8403,8 @@
 			 (match_operand:DI 1 "register_operand" "r"))))]
   "TARGET_TLS && TARGET_ARCH64"
   "lduw\t[%1 + %2], %0, %%tldo_add(%3)"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "regular")])
 
 (define_insn "*tldo_lduw1_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -8293,7 +8415,8 @@
 			   (match_operand:DI 1 "register_operand" "r")))))]
   "TARGET_TLS && TARGET_ARCH64"
   "lduw\t[%1 + %2], %0, %%tldo_add(%3)"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "regular")])
 
 (define_insn "*tldo_ldsw1_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -8315,7 +8438,8 @@
 			 (match_operand:DI 1 "register_operand" "r"))))]
   "TARGET_TLS && TARGET_ARCH64"
   "ldx\t[%1 + %2], %0, %%tldo_add(%3)"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "load")
+   (set_attr "subtype" "regular")])
 
 (define_insn "*tldo_stb_sp32"
   [(set (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
@@ -8520,6 +8644,7 @@
   movstouw\t%1, %0
   movwtos\t%1, %0"
   [(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,*,vismv,vismv")
+   (set_attr "subtype" "single,single,single,*,*,*,regular,*,*,movstouw,single")
    (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")
    (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,true,true")])
 
@@ -8543,6 +8668,7 @@
   movxtod\t%1, %0
   mov\t%1, %0"
   [(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,vismv,vismv,*")
+   (set_attr "subtype" "double,double,double,*,*,*,regular,*,movdtox,movxtod,*")
    (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")
    (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,*,*")])
 
@@ -8573,6 +8699,7 @@
   ldd\t%1, %0
   std\t%1, %0"
   [(set_attr "type" "store,*,visl,visl,vismv,*,*,fpload,fpstore,load,store,*,*,*,load,store")
+   (set_attr "subtype" "*,*,double,double,double,*,*,*,*,regular,*,*,*,*,regular,*")
    (set_attr "length" "*,2,*,*,*,2,2,*,*,*,*,2,2,2,*,*")
    (set_attr "cpu_feature" "*,*,vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*,*,*")
    (set_attr "v3pipe" "*,*,true,true,true,*,*,*,*,*,*,*,*,*,*,*")
@@ -8653,6 +8780,7 @@
   "TARGET_VIS"
   "fp<plusminus_insn><vbits>\t%1, %2, %0"
   [(set_attr "type" "fga")
+   (set_attr "subtype" "other")
    (set_attr "fptype" "<vfptype>")
    (set_attr "v3pipe" "true")])
 
@@ -8765,6 +8893,7 @@
  "TARGET_VIS"
  "fexpand\t%1, %0"
  [(set_attr "type" "fga")
+  (set_attr "subtype" "fpu")
   (set_attr "fptype" "double")])
 
 (define_insn "fpmerge_vis"
@@ -8779,6 +8908,7 @@
  "TARGET_VIS"
  "fpmerge\t%1, %2, %0"
  [(set_attr "type" "fga")
+  (set_attr "subtype" "fpu")
   (set_attr "fptype" "double")])
 
 ;; Partitioned multiply instructions
@@ -8867,7 +8997,8 @@
   [(set (reg:DI GSR_REG) (match_operand:DI 0 "arith_operand" "rI"))]
   "TARGET_VIS && TARGET_ARCH64"
   "wr\t%%g0, %0, %%gsr"
-  [(set_attr "type" "gsr")])
+  [(set_attr "type" "gsr")
+   (set_attr "subtype" "reg")])
 
 (define_insn "wrgsr_v8plus"
   [(set (reg:DI GSR_REG) (match_operand:DI 0 "arith_operand" "I,r"))
@@ -8898,7 +9029,8 @@
   [(set (match_operand:DI 0 "register_operand" "=r") (reg:DI GSR_REG))]
   "TARGET_VIS && TARGET_ARCH64"
   "rd\t%%gsr, %0"
-  [(set_attr "type" "gsr")])
+  [(set_attr "type" "gsr")
+   (set_attr "subtype" "reg")])
 
 (define_insn "rdgsr_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=r") (reg:DI GSR_REG))
@@ -8921,6 +9053,7 @@
   "TARGET_VIS"
   "faligndata\t%1, %2, %0"
   [(set_attr "type" "fga")
+   (set_attr "subtype" "other")
    (set_attr "fptype" "double")
    (set_attr "v3pipe" "true")])
 
@@ -8933,6 +9066,7 @@
   "TARGET_VIS"
   "alignaddr\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
+   (set_attr "subtype" "alignaddr")
    (set_attr "v3pipe" "true")])
 
 (define_insn "alignaddrdi_vis"
@@ -8944,6 +9078,7 @@
   "TARGET_VIS"
   "alignaddr\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
+   (set_attr "subtype" "alignaddr")
    (set_attr "v3pipe" "true")])
 
 (define_insn "alignaddrlsi_vis"
@@ -8956,6 +9091,7 @@
   "TARGET_VIS"
   "alignaddrl\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
+   (set_attr "subtype" "alignaddr")
    (set_attr "v3pipe" "true")])
 
 (define_insn "alignaddrldi_vis"
@@ -8968,6 +9104,7 @@
   "TARGET_VIS"
   "alignaddrl\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
+   (set_attr "subtype" "alignaddr")
    (set_attr "v3pipe" "true")])
 
 (define_insn "pdist_vis"
@@ -9156,6 +9293,7 @@
   "TARGET_VIS2"
   "bshuffle\t%1, %2, %0"
   [(set_attr "type" "fga")
+   (set_attr "subtype" "other")
    (set_attr "fptype" "double")
    (set_attr "v3pipe" "true")])
 
@@ -9260,6 +9398,7 @@
   "TARGET_VIS3"
   "cmask8\t%r0"
   [(set_attr "type" "fga")
+   (set_attr "subtype" "cmask")
    (set_attr "v3pipe" "true")])
 
 (define_insn "cmask16<P:mode>_vis"
@@ -9270,6 +9409,7 @@
   "TARGET_VIS3"
   "cmask16\t%r0"
   [(set_attr "type" "fga")
+   (set_attr "subtype" "cmask")
    (set_attr "v3pipe" "true")])
 
 (define_insn "cmask32<P:mode>_vis"
@@ -9280,6 +9420,7 @@
   "TARGET_VIS3"
   "cmask32\t%r0"
   [(set_attr "type" "fga")
+   (set_attr "subtype" "cmask")
    (set_attr "v3pipe" "true")])
 
 (define_insn "fchksm16_vis"
@@ -9289,7 +9430,8 @@
                      UNSPEC_FCHKSM16))]
   "TARGET_VIS3"
   "fchksm16\t%1, %2, %0"
-  [(set_attr "type" "fga")])
+  [(set_attr "type" "fga")
+   (set_attr "subtype" "fpu")])
 
 (define_code_iterator vis3_shift [ashift ss_ashift lshiftrt ashiftrt])
 (define_code_attr vis3_shift_insn
@@ -9303,7 +9445,8 @@
 			(match_operand:GCM 2 "register_operand" "<vconstr>")))]
   "TARGET_VIS3"
   "<vis3_shift_insn><vbits>\t%1, %2, %0"
-  [(set_attr "type" "fga")])
+  [(set_attr "type" "fga")
+   (set_attr "subtype" "fpu")])
 
 (define_insn "pdistn<P:mode>_vis"
   [(set (match_operand:P 0 "register_operand" "=r")
@@ -9331,7 +9474,8 @@
           (const_int 1))))]
   "TARGET_VIS3"
   "fmean16\t%1, %2, %0"
-  [(set_attr "type" "fga")])
+  [(set_attr "type" "fga")
+   (set_attr "subtype" "fpu")])
 
 (define_insn "fp<plusminus_insn>64_vis"
   [(set (match_operand:V1DI 0 "register_operand" "=e")
@@ -9339,7 +9483,8 @@
 			(match_operand:V1DI 2 "register_operand" "e")))]
   "TARGET_VIS3"
   "fp<plusminus_insn>64\t%1, %2, %0"
-  [(set_attr "type" "fga")])
+  [(set_attr "type" "fga")
+   (set_attr "subtype" "addsub64")])
 
 (define_insn "<plusminus_insn>v8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "=e")
@@ -9347,7 +9492,8 @@
                         (match_operand:V8QI 2 "register_operand" "e")))]
   "TARGET_VIS4"
   "fp<plusminus_insn>8\t%1, %2, %0"
-  [(set_attr "type" "fga")])
+  [(set_attr "type" "fga")
+   (set_attr "subtype" "other")])
 
 (define_mode_iterator VASS [V4HI V2SI V2HI V1SI])
 (define_code_iterator vis3_addsub_ss [ss_plus ss_minus])
@@ -9363,6 +9509,7 @@
   "TARGET_VIS3"
   "<vis3_addsub_ss_insn><vbits>\t%1, %2, %0"
   [(set_attr "type" "fga")
+   (set_attr "subtype" "other")
    (set_attr "v3pipe" "true")])
 
 (define_mode_iterator VMMAX [V8QI V4HI V2SI])
@@ -9378,7 +9525,8 @@
                            (match_operand:VMMAX 2 "register_operand" "<vconstr>")))]
   "TARGET_VIS4"
   "<vis4_minmax_insn><vbits>\t%1, %2, %0"
-  [(set_attr "type" "fga")])
+  [(set_attr "type" "fga")
+   (set_attr "subtype" "maxmin")])
 
 (define_code_iterator vis4_uminmax [umin umax])
 (define_code_attr vis4_uminmax_insn
@@ -9392,7 +9540,8 @@
                             (match_operand:VMMAX 2 "register_operand" "<vconstr>")))]
   "TARGET_VIS4"
   "<vis4_uminmax_insn><vbits>\t%1, %2, %0"
-  [(set_attr "type" "fga")])
+  [(set_attr "type" "fga")
+   (set_attr "subtype" "maxmin")])
 
 ;; The use of vis3_addsub_ss_patname in the VIS4 instruction below is
 ;; intended.
@@ -9402,7 +9551,8 @@
                              (match_operand:V8QI 2 "register_operand" "e")))]
   "TARGET_VIS4"
   "<vis3_addsub_ss_insn>8\t%1, %2, %0"
-  [(set_attr "type" "fga")])
+  [(set_attr "type" "fga")
+   (set_attr "subtype" "other")])
 
 (define_mode_iterator VAUS [V4HI V8QI])
 (define_code_iterator vis4_addsub_us [us_plus us_minus])
@@ -9417,7 +9567,8 @@
                             (match_operand:VAUS 2 "register_operand" "<vconstr>")))]
  "TARGET_VIS4"
  "<vis4_addsub_us_insn><vbits>\t%1, %2, %0"
- [(set_attr "type" "fga")])
+ [(set_attr "type" "fga")
+  (set_attr "subtype" "other")])
 
 (define_insn "fucmp<gcond:code>8<P:mode>_vis"
   [(set (match_operand:P 0 "register_operand" "=r")
-- 
2.3.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 7/7] sparc: M8 DFA scheduler
  2017-06-29 11:43 [PATCH 0/7] Support for the SPARC M8 cpu Jose E. Marchesi
  2017-06-29 11:43 ` [PATCH 6/7] sparc: support for VIS4B instructions Jose E. Marchesi
@ 2017-06-29 11:43 ` Jose E. Marchesi
  2017-06-29 11:43 ` [PATCH 3/7] sparc: introduce insn subtypes Jose E. Marchesi
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Jose E. Marchesi @ 2017-06-29 11:43 UTC (permalink / raw)
  To: gcc-patches

This patch adds a DFA scheduler modelling the core S5 in the SPARC M8
processors.

gcc/ChangeLog:

	* config/sparc/m8.md: New file.
	* config/sparc/sparc.md: Include m8.md.
---
 gcc/ChangeLog             |   5 +
 gcc/config/sparc/m8.md    | 242 ++++++++++++++++++++++++++++++++++++++++++++++
 gcc/config/sparc/sparc.md |   1 +
 3 files changed, 248 insertions(+)
 create mode 100644 gcc/config/sparc/m8.md

diff --git a/gcc/config/sparc/m8.md b/gcc/config/sparc/m8.md
new file mode 100644
index 0000000..f0fe1b2
--- /dev/null
+++ b/gcc/config/sparc/m8.md
@@ -0,0 +1,242 @@
+;; Scheduling description for the SPARC M8.
+;;   Copyright (C) 2017 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Thigs to improve:
+;;
+;; - Store instructions are implemented by micro-ops, one of which
+;;   generates the store address and is executed in the store address
+;;   generation unit in the slot0.  We need to model that.
+;;
+;; - There are two V3 pipes connected to different slots.  The current
+;;   implementation assumes that all the instructions executing in a
+;;   V3 pipe are issued to the unit in slot3.
+;;
+;; - Single-issue ALU operations incur an additional cycle of latency to
+;;   slot 0 and slot 1 instructions.  This is not currently reflected
+;;   in the DFA.
+
+(define_automaton "m8_0")
+
+;; The S5 core has two dual-issue queues, PQLS and PQEX.  Each queue
+;; is divided into two slots: PQLS corresponds to slots 0 and 1, and
+;; PQEX corresponds to slots 2 and 3.  The core can issue 4
+;; instructions per-cycle, and up to 4 instructions are committed each
+;; cycle.
+;;
+;;                            
+;;                   m8_slot0  - Load Unit.
+;;                             - Store address gen. Unit.
+;;                                                       
+;;                            
+;;   === PQLS ==>    m8_slot1  - Store data unit.
+;;                             - Branch unit.
+;;                                            
+;;                             
+;;   === PQEX ==>    m8_slot2  - Integer Unit (EXU2).                     
+;;                             - 3-cycles Crypto Unit (SPU2).
+;;                                                     
+;;                   m8_slot3  - Integer Unit (EXU3).
+;;                             - 3-cycles Crypto Unit (SPU3).
+;;                             - Floating-point and graphics unit (FPG).
+;;                             - Long-latency Crypto Unit.
+;;                             - Oracle Numbers Unit (ONU).
+
+(define_cpu_unit "m8_slot0,m8_slot1,m8_slot2,m8_slot3" "m8_0")
+
+;; Some instructions stall the pipeline and avoid any other
+;; instruction to be issued in the same cycle.  We assume the same for
+;; multi-instruction insns.
+
+(define_reservation "m8_single_issue" "m8_slot0 + m8_slot1 + m8_slot2 + m8_slot3")
+
+(define_insn_reservation "m8_single" 1
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "multi,savew,flushw,trap,bmask"))
+  "m8_single_issue")
+
+;; Most of the instructions executing in the integer units have a
+;; latency of 1.
+
+(define_insn_reservation "m8_integer" 1
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "ialu,ialuX,shift,cmove,compare,bmask"))
+  "(m8_slot2 | m8_slot3)")
+
+;; Flushing the instruction memory takes 27 cycles.
+
+
+(define_insn_reservation "m8_iflush" 27
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "iflush"))
+  "(m8_slot2 | m8_slot3), nothing*26")
+
+;; The integer multiplication instructions have a latency of 10 cycles
+;; and execute in integer units.
+;;
+;; Likewise for array*, edge* and pdistn instructions.
+;;
+;; However, the latency is only 9 cycles if the consumer of the
+;; operation is also capable of 9 cycles latency.  We model this with
+;; a bypass.
+
+(define_insn_reservation "m8_imul" 10
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "imul,array,edge,edgen,pdistn"))
+  "(m8_slot2 | m8_slot3), nothing*12")
+
+(define_bypass 9 "m8_imul" "m8_imul")
+
+;; The integer division instructions `sdiv' and `udivx' have a latency
+;; of 30 cycles and execute in integer units.
+
+(define_insn_reservation "m8_idiv" 30
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "idiv"))
+  "(m8_slot2 | m8_slot3), nothing*29")
+
+;; Both integer and floating-point load instructions have a latency of
+;; only 3 cycles,and execute in the slot0.
+;;
+;; Misaligned load instructions feature a latency of 11 cycles.
+;;
+;; The prefetch instruction also executes in the load unit, but it's
+;; latency is only 1 cycle.
+
+(define_insn_reservation "m8_load" 3
+  (and (eq_attr "cpu" "m8")
+       (ior (eq_attr "type" "fpload,sload")
+            (and (eq_attr "type" "load")
+                 (eq_attr "subtype" "regular"))))
+  "m8_slot0, nothing*2")
+
+;; (define_insn_reservation "m8_load_misalign" 11
+;;  (and (eq_attr "cpu" "m8")
+;;       (eq_attr "type" "load_mis,fpload_mis"))
+;;  "m8_slot0, nothing*10")
+
+(define_insn_reservation "m8_prefetch" 1
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "load")
+       (eq_attr "subtype" "prefetch"))
+  "m8_slot0")
+
+;; Both integer and floating-point store instructions have a latency
+;; of 1 cycle, and execute in the store data unit in slot1.
+;;
+;; However, misaligned store instructions feature a latency of 3
+;; cycles.
+
+(define_insn_reservation "m8_store" 1
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "store,fpstore"))
+  "m8_slot1")
+
+;; (define_insn_reservation "m8_store_misalign" 3
+;;   (and (eq_attr "cpu" "m8")
+;;        (eq_attr "type" "store_mis,fpstore_mis"))
+;;   "m8_slot1, nothing*2")
+
+;; Control-transfer instructions execute in the Branch Unit in the
+;; slot1.
+
+(define_insn_reservation "m8_cti" 1
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
+  "m8_slot1")
+
+;; Many instructions executing in the Floating-point and Graphics Unit
+;; (FGU) serving slot3 feature a default latency of 9 cycles.
+
+(define_insn_reservation "m8_fp" 9
+  (and (eq_attr "cpu" "m8")
+       (ior (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist")
+            (and (eq_attr "type" "fga")
+                 (eq_attr "subtype" "fpu"))))
+  "m8_slot3, nothing*8")
+
+;; Floating-point division and floating-point square-root instructions
+;; have high latencies.  They execute in the FGU.
+
+(define_insn_reservation "m8_fpdivs" 26
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "fpdivs"))
+  "m8_slot3, nothing*25")
+
+(define_insn_reservation "m8_fpsqrts" 33
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "fpsqrts"))
+  "m8_slot3, nothing*32")
+
+(define_insn_reservation "m8_fpdivd" 30
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "fpdivd"))
+  "m8_slot3, nothing*29")
+
+(define_insn_reservation "m8_fpsqrtd" 41
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "fpsqrtd"))
+  "m8_slot3, nothing*40")
+
+;; SIMD VIS instructions executing in the Floating-point and graphics
+;; unit (FPG) in slot3 usually have a latency of 5 cycles.
+;;
+;; However, the latency for many instructions is only 3 cycles if the
+;; consumer can also be executed in 3 cycles.  We model this with a
+;; bypass.  In these cases the instructions are executed in one of the
+;; two 3-cycle crypto units (SPU, also known as "v3-pipes") in slots 2
+;; and 3.
+
+(define_insn_reservation "m8_vis" 5
+  (and (eq_attr "cpu" "m8")
+       (ior (eq_attr "type" "viscmp,lzd")
+            (and (eq_attr "type" "fga")
+                 (eq_attr "subtype" "maxmin,cmask,other"))
+            (and (eq_attr "type" "vismv")
+                 (eq_attr "subtype" "single,movstouw"))
+            (and (eq_attr "type" "visl")
+                 (eq_attr "subtype" "single"))))
+  "m8_slot3, nothing*4")
+
+(define_bypass 3 "m8_vis" "m8_vis")
+
+(define_insn_reservation "m8_gsr" 5
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "gsr")
+       (eq_attr "subtype" "alignaddr"))
+  "m8_slot3, nothing*4")
+
+;; A few VIS instructions have a latency of 1.
+
+(define_insn_reservation "m8_vis_1cycle" 1
+  (and (eq_attr "cpu" "m8")
+       (ior (and (eq_attr "type" "vismv")
+                 (eq_attr "subtype" "double,movxtod,movdtox"))
+            (and (eq_attr "type" "visl")
+                 (eq_attr "subtype" "double"))
+            (and (eq_attr "type" "fga")
+                 (eq_attr "subtype" "addsub64"))))
+  "m8_slot3")
+
+;; Reading and writing to the gsr register takes more than 70 cycles.
+
+(define_insn_reservation "m8_gsr_reg" 70
+  (and (eq_attr "cpu" "m8")
+       (eq_attr "type" "gsr")
+       (eq_attr "subtype" "reg"))
+  "m8_slot3, nothing*69")
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 407544b..cac1bd9 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -613,6 +613,7 @@
 (include "niagara2.md")
 (include "niagara4.md")
 (include "niagara7.md")
+(include "m8.md")
 
 
 ;; Operand and operator predicates and constraints
-- 
2.3.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/7] sparc: put VIS compare instructions in it's own insn type and adjust DFAs
  2017-06-29 11:43 [PATCH 0/7] Support for the SPARC M8 cpu Jose E. Marchesi
                   ` (4 preceding siblings ...)
  2017-06-29 11:43 ` [PATCH 4/7] sparc: reworked M7 DFA based on instruction subtypes Jose E. Marchesi
@ 2017-06-29 11:43 ` Jose E. Marchesi
  2017-06-29 11:43 ` [PATCH 1/7] sparc: put bmask* " Jose E. Marchesi
  2017-07-04  8:46 ` [PATCH 0/7] Support for the SPARC M8 cpu Rainer Orth
  7 siblings, 0 replies; 12+ messages in thread
From: Jose E. Marchesi @ 2017-06-29 11:43 UTC (permalink / raw)
  To: gcc-patches

This patch introduces a new value for the insn type attribute viscmp.
VIS comparison insn are adapted to use it, and finally the DFA
schedulers are updated accordingly.

gcc/ChangeLog:

	* config/sparc/sparc.md ("type"): New insn type viscmp.
	("fcmp<gcond:code><GCM:gcm_name><P:mode>_vis"): Set insn type to
	viscmp.
	("fpcmp<gcond:code>8<P:mode>_vis"): Likewise.
	("fucmp<gcond:code>8<P:mode>_vis"): Likewise.
	("fpcmpu<gcond:code><GCM:gcm_name><P:mode>_vis"): Likewise.
	* config/sparc/niagara7.md ("n7_vis_logical_v3pipe"): Handle
	viscmp.
	("n7_vis_logical_11cycle"): Likewise.
	* config/sparc/niagara4.md ("n4_vis_logical"): Likewise.
	* config/sparc/niagara2.md ("niag3_vis": Likewise.
	* config/sparc/niagara.md ("niag_vis"): Likewise.
	* config/sparc/ultra3.md ("us3_fga"): Likewise.
	* config/sparc/ultra1_2.md ("us1_fga_double"): Likewise.
---
 gcc/ChangeLog                | 17 +++++++++++++++++
 gcc/config/sparc/niagara.md  |  2 +-
 gcc/config/sparc/niagara2.md |  4 ++--
 gcc/config/sparc/niagara4.md |  5 +++--
 gcc/config/sparc/niagara7.md |  4 ++--
 gcc/config/sparc/sparc.md    | 15 ++++++---------
 gcc/config/sparc/ultra1_2.md |  8 ++++----
 gcc/config/sparc/ultra3.md   |  2 +-
 8 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/gcc/config/sparc/niagara.md b/gcc/config/sparc/niagara.md
index f9a1f6d..a8e23b8 100644
--- a/gcc/config/sparc/niagara.md
+++ b/gcc/config/sparc/niagara.md
@@ -114,5 +114,5 @@
  */
 (define_insn_reservation "niag_vis" 8
   (and (eq_attr "cpu" "niagara")
-    (eq_attr "type" "fga,visl,vismv,fgm_pack,fgm_mul,pdist,edge,edgen,gsr,array,bmask"))
+    (eq_attr "type" "fga,visl,viscmp,vismv,fgm_pack,fgm_mul,pdist,edge,edgen,gsr,array,bmask"))
   "niag_pipe*8")
diff --git a/gcc/config/sparc/niagara2.md b/gcc/config/sparc/niagara2.md
index 34ee630..3190d55 100644
--- a/gcc/config/sparc/niagara2.md
+++ b/gcc/config/sparc/niagara2.md
@@ -111,10 +111,10 @@
 
 (define_insn_reservation "niag2_vis" 6
   (and (eq_attr "cpu" "niagara2")
-    (eq_attr "type" "fga,vismv,visl,fgm_pack,fgm_mul,pdist,edge,edgen,array,bmask,gsr"))
+    (eq_attr "type" "fga,vismv,visl,viscmp,fgm_pack,fgm_mul,pdist,edge,edgen,array,bmask,gsr"))
   "niag2_pipe*6")
 
 (define_insn_reservation "niag3_vis" 9
   (and (eq_attr "cpu" "niagara3")
-    (eq_attr "type" "fga,vismv,visl,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,array,bmask,gsr"))
+    (eq_attr "type" "fga,vismv,visl,viscmp,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,array,bmask,gsr"))
   "niag2_pipe*9")
diff --git a/gcc/config/sparc/niagara4.md b/gcc/config/sparc/niagara4.md
index cc1bb75..a3417d2 100644
--- a/gcc/config/sparc/niagara4.md
+++ b/gcc/config/sparc/niagara4.md
@@ -90,8 +90,9 @@
 
 (define_insn_reservation "n4_vis_logical" 3
   (and (eq_attr "cpu" "niagara4")
-    (and (eq_attr "type" "visl,pdistn")
-      (eq_attr "fptype" "double")))
+       (ior (and (eq_attr "type" "visl,pdistn")
+                 (eq_attr "fptype" "double"))
+            (eq_attr "type" "viscmp")))
   "n4_slot1, nothing*2")
 
 (define_insn_reservation "n4_vis_logical_11cycle" 11
diff --git a/gcc/config/sparc/niagara7.md b/gcc/config/sparc/niagara7.md
index 3dc8f9e..3f46198 100644
--- a/gcc/config/sparc/niagara7.md
+++ b/gcc/config/sparc/niagara7.md
@@ -123,13 +123,13 @@
 
 (define_insn_reservation "n7_vis_logical_v3pipe" 11
   (and (eq_attr "cpu" "niagara7")
-    (and (eq_attr "type" "visl,pdistn")
+    (and (eq_attr "type" "visl,viscmp,pdistn")
          (eq_attr "v3pipe" "true")))
   "n7_slot1, nothing*2")
 
 (define_insn_reservation "n7_vis_logical_11cycle" 11
   (and (eq_attr "cpu" "niagara7")
-    (and (eq_attr "type" "visl")
+    (and (eq_attr "type" "visl,viscmp")
       (eq_attr "v3pipe" "false")))
   "n7_slot1, nothing*10")
 
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index da23060..04da8ae 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -281,7 +281,8 @@
    fpcmp,
    fpmul,fpdivs,fpdivd,
    fpsqrts,fpsqrtd,
-   fga,visl,vismv,fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,gsr,array,bmask,
+   fga,visl,vismv,viscmp,
+   fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,gsr,array,bmask,
    cmove,
    ialuX,
    multi,savew,flushw,iflush,trap,lzd"
@@ -9059,8 +9060,7 @@
 	 UNSPEC_FCMP))]
   "TARGET_VIS"
   "fcmp<gcond:code><GCM:gcm_name>\t%1, %2, %0"
-  [(set_attr "type" "visl")
-   (set_attr "fptype" "double")
+  [(set_attr "type" "viscmp")
    (set_attr "v3pipe" "true")])
 
 (define_insn "fpcmp<gcond:code>8<P:mode>_vis"
@@ -9070,8 +9070,7 @@
 	 UNSPEC_FCMP))]
   "TARGET_VIS4"
   "fpcmp<gcond:code>8\t%1, %2, %0"
-  [(set_attr "type" "visl")
-   (set_attr "fptype" "double")])
+  [(set_attr "type" "viscmp")])
 
 (define_expand "vcond<GCM:mode><GCM:mode>"
   [(match_operand:GCM 0 "register_operand" "")
@@ -9427,8 +9426,7 @@
 	 UNSPEC_FUCMP))]
   "TARGET_VIS3"
   "fucmp<gcond:code>8\t%1, %2, %0"
-  [(set_attr "type" "visl")
-   (set_attr "v3pipe" "true")])
+  [(set_attr "type" "viscmp")])
 
 (define_insn "fpcmpu<gcond:code><GCM:gcm_name><P:mode>_vis"
   [(set (match_operand:P 0 "register_operand" "=r")
@@ -9437,8 +9435,7 @@
 	 UNSPEC_FUCMP))]
   "TARGET_VIS4"
   "fpcmpu<gcond:code><GCM:gcm_name>\t%1, %2, %0"
-  [(set_attr "type" "visl")
-   (set_attr "fptype" "double")])
+  [(set_attr "type" "viscmp")])
 
 (define_insn "*naddsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
diff --git a/gcc/config/sparc/ultra1_2.md b/gcc/config/sparc/ultra1_2.md
index 6af2859..a4fb883 100644
--- a/gcc/config/sparc/ultra1_2.md
+++ b/gcc/config/sparc/ultra1_2.md
@@ -263,10 +263,10 @@
 
 (define_insn_reservation "us1_fga_double"
   2
-  (and (and
-         (eq_attr "cpu" "ultrasparc")
-         (eq_attr "type" "fga,visl,vismv"))
-       (eq_attr "fptype" "double"))
+  (and (eq_attr "cpu" "ultrasparc")
+       (ior (and (eq_attr "type" "fga,visl,vismv")
+                 (eq_attr "fptype" "double"))
+            (eq_attr "type" "viscmp")))
   "us1_fpa + us1_fp_double + us1_slotany, nothing")
 
 (define_bypass 1 "us1_fga_double" "us1_fga_double")
diff --git a/gcc/config/sparc/ultra3.md b/gcc/config/sparc/ultra3.md
index f5b81d6..db20cd9 100644
--- a/gcc/config/sparc/ultra3.md
+++ b/gcc/config/sparc/ultra3.md
@@ -176,7 +176,7 @@
 (define_insn_reservation "us3_fga"
   3
   (and (eq_attr "cpu" "ultrasparc3")
-       (eq_attr "type" "fga,visl,vismv"))
+       (eq_attr "type" "fga,visl,viscmp,vismv"))
   "us3_fpa + us3_slotany, nothing*2")
 
 (define_insn_reservation "us3_fgm"
-- 
2.3.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 0/7] Support for the SPARC M8 cpu
@ 2017-06-29 11:43 Jose E. Marchesi
  2017-06-29 11:43 ` [PATCH 6/7] sparc: support for VIS4B instructions Jose E. Marchesi
                   ` (7 more replies)
  0 siblings, 8 replies; 12+ messages in thread
From: Jose E. Marchesi @ 2017-06-29 11:43 UTC (permalink / raw)
  To: gcc-patches

This patch serie adds support for the SPARC M8 processor to GCC.
The SPARC M8 processor implements the Oracle SPARC Architecture 2017.

The first four patches are preparatory work:

- bmask* instructions are put in their own instruction type.  It makes
  little sense to have them in the same category than array
  instructions.

- Similarly, VIS compare instructions are put in their own instruction
  type.  This is to better accommodate subtypes, which are not quite
  the same than the subtypes of `visl' instructions.

- The introduction of a new `subtype' insn attribute in sparc.md
  avoids the need for adjusting the instruction scheduler DFAs for
  previous cpu models every time a new cpu is introduced.

- The full set of SPARC instructions used in sparc.md, and their
  position in the type/subtype hierarchy, is documented in a comment.
  This eases the modification of the DFA schedulers, and the addition
  of new cpus.

- The M7 DFA scheduler is reworked:

  + To use the new type/subtype hierarchy.
  + The v3pipe insn attribute is no longer needed.
  + More accurate latencies for instructions.
  + The C4 core pipeline is documented in a comment in niagara7.md.

The next three patches introduce M8 support proper:

- Support for -mcpu=m8 (we are thus suggesting to abandon the niagaraN
  denomination for M8 and later processors.)

- Support for a new VIS level, VIS4B, covering the new VIS
  instructions introduced in OSA2017 and implemented in the M8.  Also
  built-ins.

  Note that no new VIS level was formally introduced in OSA2017, even
  if many new VIS instructions were added to the spec.  We introduced
  VIS4B for coherence (like availability of builtins and visintrin.h
  depending on the value of __VIS__) and avoided using VIS5 in case it
  is introduced in future versions of the Oracle SPARC Architecture.

- A M8 DFA scheduler:

  + Also based on the new type/subtype hierarchy.
  + The functional units in the C5 core are explicitly documented in a
    comment in m8.md.

See the individual patch descriptions for more information and
associated ChangeLog entries.

After this serie gets integrated upstream we will be contributing more
support for M8 capabilities, such as support for using the new
misaligned load/store instructions for memory accesses known to be
misaligned at compile-time.

Note that full binutils support for M8 was upstreamed in May 19.
Bootstrapped and tested in sparc64-linux-gnu.  No regressions.


Jose E. Marchesi (7):
  sparc: put bmask* instructions in it's own insn type and adjust DFAs
  sparc: put VIS compare instructions in it's own insn type and adjust
    DFAs
  sparc: introduce insn subtypes
  sparc: reworked M7 DFA based on instruction subtypes
  sparc: basic support for the SPARC M8 cpu
  sparc: support for VIS4B instructions
  sparc: M8 DFA scheduler

 gcc/ChangeLog                               | 226 +++++++++++++++++
 gcc/config.gcc                              |   2 +-
 gcc/config.in                               |   4 +
 gcc/config/sparc/constraints.md             |  12 +-
 gcc/config/sparc/driver-sparc.c             |   1 +
 gcc/config/sparc/m8.md                      | 242 ++++++++++++++++++
 gcc/config/sparc/niagara.md                 |   2 +-
 gcc/config/sparc/niagara2.md                |   4 +-
 gcc/config/sparc/niagara4.md                |   7 +-
 gcc/config/sparc/niagara7.md                | 181 +++++++++-----
 gcc/config/sparc/predicates.md              |  27 +++
 gcc/config/sparc/sol2.h                     |  14 +-
 gcc/config/sparc/sparc-c.c                  |   7 +-
 gcc/config/sparc/sparc-opts.h               |   1 +
 gcc/config/sparc/sparc.c                    | 312 ++++++++++++++++++++++--
 gcc/config/sparc/sparc.h                    |  20 +-
 gcc/config/sparc/sparc.md                   | 364 +++++++++++++++++++++-------
 gcc/config/sparc/sparc.opt                  |   7 +
 gcc/config/sparc/ultra1_2.md                |   8 +-
 gcc/config/sparc/ultra3.md                  |   4 +-
 gcc/configure                               |  35 +++
 gcc/configure.ac                            |  12 +
 gcc/doc/extend.texi                         |  39 +++
 gcc/doc/invoke.texi                         |  25 +-
 gcc/testsuite/ChangeLog                     |   8 +
 gcc/testsuite/gcc.target/sparc/dictunpack.c |  25 ++
 gcc/testsuite/gcc.target/sparc/fpcmpdeshl.c |  25 ++
 gcc/testsuite/gcc.target/sparc/fpcmpshl.c   |  81 +++++++
 gcc/testsuite/gcc.target/sparc/fpcmpurshl.c |  25 ++
 gcc/testsuite/gcc.target/sparc/fpcmpushl.c  |  43 ++++
 30 files changed, 1579 insertions(+), 184 deletions(-)
 create mode 100644 gcc/config/sparc/m8.md
 create mode 100644 gcc/testsuite/gcc.target/sparc/dictunpack.c
 create mode 100644 gcc/testsuite/gcc.target/sparc/fpcmpdeshl.c
 create mode 100644 gcc/testsuite/gcc.target/sparc/fpcmpshl.c
 create mode 100644 gcc/testsuite/gcc.target/sparc/fpcmpurshl.c
 create mode 100644 gcc/testsuite/gcc.target/sparc/fpcmpushl.c

-- 
2.3.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/7] Support for the SPARC M8 cpu
  2017-06-29 11:43 [PATCH 0/7] Support for the SPARC M8 cpu Jose E. Marchesi
                   ` (6 preceding siblings ...)
  2017-06-29 11:43 ` [PATCH 1/7] sparc: put bmask* " Jose E. Marchesi
@ 2017-07-04  8:46 ` Rainer Orth
  2017-07-05  8:04   ` Jose E. Marchesi
  7 siblings, 1 reply; 12+ messages in thread
From: Rainer Orth @ 2017-07-04  8:46 UTC (permalink / raw)
  To: Jose E. Marchesi; +Cc: gcc-patches

Hi Jose,

> This patch serie adds support for the SPARC M8 processor to GCC.
> The SPARC M8 processor implements the Oracle SPARC Architecture 2017.
[...]
> Note that full binutils support for M8 was upstreamed in May 19.
> Bootstrapped and tested in sparc64-linux-gnu.  No regressions.

since the patch is touching Solaris-specific files, too, please also
regtest it on Solaris (S12 with Studio 12.6 fbe would be best, I
suppose).

Thanks.
        Rainer

-- 
-----------------------------------------------------------------------------
Rainer Orth, Center for Biotechnology, Bielefeld University

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/7] Support for the SPARC M8 cpu
  2017-07-04  8:46 ` [PATCH 0/7] Support for the SPARC M8 cpu Rainer Orth
@ 2017-07-05  8:04   ` Jose E. Marchesi
  2017-07-05  8:40     ` Rainer Orth
  0 siblings, 1 reply; 12+ messages in thread
From: Jose E. Marchesi @ 2017-07-05  8:04 UTC (permalink / raw)
  To: Rainer Orth; +Cc: gcc-patches


Hi Rainer.
    
    > This patch serie adds support for the SPARC M8 processor to GCC.
    > The SPARC M8 processor implements the Oracle SPARC Architecture 2017.
    [...]
    > Note that full binutils support for M8 was upstreamed in May 19.
    > Bootstrapped and tested in sparc64-linux-gnu.  No regressions.
    
    since the patch is touching Solaris-specific files, too, please also
    regtest it on Solaris (S12 with Studio 12.6 fbe would be best, I
    suppose).

I tested in sparc-sun-solaris2.12 and it indeed catched a couple of
typos in the VIS4B built-ins registration in 32-bit mode.  There were no
regressions.

I am preparing a second version of the patch serie that I will be
submitting today.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/7] Support for the SPARC M8 cpu
  2017-07-05  8:04   ` Jose E. Marchesi
@ 2017-07-05  8:40     ` Rainer Orth
  2017-07-05 11:38       ` Jose E. Marchesi
  0 siblings, 1 reply; 12+ messages in thread
From: Rainer Orth @ 2017-07-05  8:40 UTC (permalink / raw)
  To: Jose E. Marchesi; +Cc: gcc-patches

Hi Jose,

>     > This patch serie adds support for the SPARC M8 processor to GCC.
>     > The SPARC M8 processor implements the Oracle SPARC Architecture 2017.
>     [...]
>     > Note that full binutils support for M8 was upstreamed in May 19.
>     > Bootstrapped and tested in sparc64-linux-gnu.  No regressions.
>     
>     since the patch is touching Solaris-specific files, too, please also
>     regtest it on Solaris (S12 with Studio 12.6 fbe would be best, I
>     suppose).
>
> I tested in sparc-sun-solaris2.12 and it indeed catched a couple of
> typos in the VIS4B built-ins registration in 32-bit mode.  There were no
> regressions.

fine, thanks.

> I am preparing a second version of the patch serie that I will be
> submitting today.

Ok.  I was a bit astonished lately to find that a couple of SPARC
patches to binutils and gdb had only been tested (or even only
supported) Linux/SPARC, not Solaris/SPARC.  However, it's great to see
Oracle finally engage in the community here.

	Rainer

-- 
-----------------------------------------------------------------------------
Rainer Orth, Center for Biotechnology, Bielefeld University

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/7] Support for the SPARC M8 cpu
  2017-07-05  8:40     ` Rainer Orth
@ 2017-07-05 11:38       ` Jose E. Marchesi
  0 siblings, 0 replies; 12+ messages in thread
From: Jose E. Marchesi @ 2017-07-05 11:38 UTC (permalink / raw)
  To: Rainer Orth; +Cc: gcc-patches

    
    > I am preparing a second version of the patch serie that I will be
    > submitting today.
    
    Ok.  I was a bit astonished lately to find that a couple of SPARC
    patches to binutils and gdb had only been tested (or even only
    supported) Linux/SPARC, not Solaris/SPARC.

For binutils, I always try to test all our patches with
--enable-targets=all, plus:

For changes in sparc code:

SPARC_TARGETS="sparc-aout sparc-linux sparc-vxworks sparc64-linux
               sparc-sun-solaris2.12"

For changes in common code, the above plus:

X86_TARGETS="i386-darwin i386-lynxos i586-linux i686-nacl i686-pc-beos
             i686-pc-elf i686-pe i686-vxworks x86_64-linux
             x86_64-w64-mingw32 x86_64-nacl "

MIPS_TARGETS="mips-linux mips-vxworks mips64-linux mipsel-linux-gnu
              mipsisa32el-linux mips64-openbsd mipstx39-elf"

For big extensive changes in common code, the above plus around 80 more
targets covering powerpc, arm, alpha, arm, arc, vax, etc.

That said, we should probably do more native testing on Solaris/SPARC to
avoid breakage in that platform... we will look into improving that :)

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-07-05 11:38 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-29 11:43 [PATCH 0/7] Support for the SPARC M8 cpu Jose E. Marchesi
2017-06-29 11:43 ` [PATCH 6/7] sparc: support for VIS4B instructions Jose E. Marchesi
2017-06-29 11:43 ` [PATCH 7/7] sparc: M8 DFA scheduler Jose E. Marchesi
2017-06-29 11:43 ` [PATCH 3/7] sparc: introduce insn subtypes Jose E. Marchesi
2017-06-29 11:43 ` [PATCH 5/7] sparc: basic support for the SPARC M8 cpu Jose E. Marchesi
2017-06-29 11:43 ` [PATCH 4/7] sparc: reworked M7 DFA based on instruction subtypes Jose E. Marchesi
2017-06-29 11:43 ` [PATCH 2/7] sparc: put VIS compare instructions in it's own insn type and adjust DFAs Jose E. Marchesi
2017-06-29 11:43 ` [PATCH 1/7] sparc: put bmask* " Jose E. Marchesi
2017-07-04  8:46 ` [PATCH 0/7] Support for the SPARC M8 cpu Rainer Orth
2017-07-05  8:04   ` Jose E. Marchesi
2017-07-05  8:40     ` Rainer Orth
2017-07-05 11:38       ` Jose E. Marchesi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).