public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
@ 2013-07-08 10:57 Chung-Ju Wu
  2013-07-09 23:44 ` Joseph S. Myers
  0 siblings, 1 reply; 18+ messages in thread
From: Chung-Ju Wu @ 2013-07-08 10:57 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 397 bytes --]

Hi,

This patch consists of back-end machine descriptions of nds32 port.
We follow the GNU coding standards and GCC coding convention when
writing programs.  Here are the md file and common target hook
implementation.


gcc/
2013-07-08  Chung-Ju Wu  <jasonwucj@gmail.com>
	    Shiva Chen  <shiva0217@gmail.com>

	* config/nds32/nds32.md: New file.
	* common/config/nds32: New directory and files.

[-- Attachment #2: 2-nds32-backend-md-part2.patch --]
[-- Type: application/octet-stream, Size: 90372 bytes --]

diff --git gcc/common/config/nds32/nds32-common.c gcc/common/config/nds32/nds32-common.c
new file mode 100644
index 0000000..618ee0a
--- /dev/null
+++ gcc/common/config/nds32/nds32-common.c
@@ -0,0 +1,157 @@
+/* Common hooks of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* This is used to identify used ISA when doing code generation.
+   Initialize it with macro TARGET_DEFAULT_ISA,
+   which is defined in nds32-isa-xxx.h file.
+   User can specify it by using '-misa=X' option.  */
+enum nds32_isa nds32_selected_isa = TARGET_DEFAULT_ISA;
+
+/* ------------------------------------------------------------------------ */
+
+/* ======================================================================== */
+/* 17.3 Run-time Target Specification */
+/* ======================================================================== */
+
+static bool nds32_handle_option (struct gcc_options *, struct gcc_options *,
+				 const struct cl_decoded_option *, location_t);
+
+static const struct default_options nds32_option_optimization_table[] =
+{
+  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+
+  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+};
+
+/* ------------------------------------------------------------------------ */
+
+/* ======================================================================== */
+/* 17.3 Run-time Target Specification */
+/* ======================================================================== */
+
+static bool
+nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded,
+		     location_t loc ATTRIBUTE_UNUSED)
+{
+  size_t     code  = decoded->opt_index;
+  const char *arg  = decoded->arg;
+  int        value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_misr_vector_size_:
+      /* Check the valid vector size: 4 or 16.  */
+      if (value != 4 && value != 16)
+	{
+	  warning (0, "For the option -misr-vector-size=X, the valid X "
+		      "must be: 4 or 16");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_mcache_block_size_:
+      /* Check valid value: 4 8 16 32 64 128 256 512.  */
+      if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+	{
+	  warning (0, "For the option -mcache-block-size=X, the valid X "
+		      "must be: 4, 8, 16, 32, 64, 128, 256, or 512");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_misa_:
+      /* Check valid ISA: v2 v3 v3m.  */
+      if (strcmp (arg, "v2") == 0)
+	{
+	  nds32_selected_isa = NDS32_ISA_V2;
+	}
+      else if (strcmp (arg, "v3") == 0)
+	{
+	  nds32_selected_isa = NDS32_ISA_V3;
+	}
+      else if (strcmp (arg, "v3m") == 0)
+	{
+	  nds32_selected_isa = NDS32_ISA_V3M;
+	}
+      else
+	{
+	  warning (0, "For the option -misa=X, the valid X "
+		      "must be: v2 v3 v3m");
+	  return false;
+	}
+
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* ======================================================================== */
+/* 17.3 Run-time Target Specification */
+/* ======================================================================== */
+
+/* The value of TARGET_CPU_DEFAULT is set by
+   the process of 'configure' and 'make' stage.
+   Please check gcc/config.gcc for more implementation detail.  */
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_CPU_DEFAULT)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION nds32_handle_option
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE nds32_option_optimization_table
+
+/* ======================================================================== */
+/* 17.21 Defining the Output Assembler Language */
+/* ======================================================================== */
+
+/* 17.21.9 Assembler Commands for Exception Regions */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* ------------------------------------------------------------------------ */
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git gcc/config/nds32/nds32.md gcc/config/nds32/nds32.md
new file mode 100644
index 0000000..6dc9143
--- /dev/null
+++ gcc/config/nds32/nds32.md
@@ -0,0 +1,2833 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32.intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32.multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32.doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32.peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "*store_si"
+  [(set (match_operand:SI 1 "memory_operand"   "=U45, U33, U37, U45, m")
+	(match_operand:SI 0 "register_operand" "   l,   l,   l,   d, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_store (operands, 4);
+
+    default:
+      return nds32_output_32bit_store (operands, 4);
+    }
+}
+  [(set_attr "type"   "store,store,store,store,store")
+   (set_attr "length" "    2,    2,    2,    2,    4")])
+
+(define_insn "*store_<mode>"
+  [(set (match_operand:QIHI 1 "memory_operand"   "=U33, m")
+	(match_operand:QIHI 0 "register_operand" "   l, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_store (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_store (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "store,store")
+   (set_attr "length" "    2,    4")])
+
+(define_insn "*load_si"
+  [(set (match_operand:SI 1 "register_operand" "=  l,   l,   l,   d, r")
+	(match_operand:SI 0 "memory_operand"   " U45, U33, U37, U45, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_load (operands, 4);
+
+    default:
+      return nds32_output_32bit_load (operands, 4);
+    }
+}
+  [(set_attr "type"   "load,load,load,load,load")
+   (set_attr "length" "   2,   2,   2,   2,   4")])
+
+(define_insn "*load_<mode>"
+  [(set (match_operand:QIHI 1 "register_operand" "=  l, r")
+	(match_operand:QIHI 0 "memory_operand"   " U33, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_load (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_load (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "load,load")
+   (set_attr "length" "   2,   4")])
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHISI 1 "register_operand" "=r, m, r")
+	(match_operand:QIHISI 0 "register_operand" " r, r, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "mov55\t%1, %0";
+      else
+	return "ori\t%1, %0, 0";
+    case 1:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 2:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "alu,store,load")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (const_int 4)
+       ;; Alternative 2
+       (const_int 4)
+     ])])
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to support QI and HI in the split patterns.
+;; Also, we use const_int_operand to limit that only CONST_INT
+;; is able to match such instruction template.
+;;
+;; Besides, in the split condition, we ask big-constant split to be
+;; performed after reload phase. So that the mov2add optimization
+;; in postreload have chance to optimize the code.
+
+(define_insn_and_split "*movsi_const"
+  [(set (match_operand:QIHISI 0 "register_operand"  "=   d,    r,    r,    r,    t,    r")
+	(match_operand:QIHISI 1 "const_int_operand" " Ip05, Is05, Is20, Ihig, Ispl, Ispl"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "movpi45\t%0, %1";
+      /* else fall through.  */
+    case 1:
+      if (get_attr_length (insn) == 2)
+	return "movi55\t%0, %1";
+      /* else fall through.  */
+    case 2:
+      return "movi\t%0, %1";
+    case 3:
+      return "sethi\t%0, hi20(%1)";
+    case 4:
+      /* Use $r15, if the value is NOT in the range of Is20,
+         we must output "sethi + ori" directly since
+         we may already passed the split stage.  */
+      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
+
+    default:
+      return "#";
+    }
+}
+   "reload_completed
+    && satisfies_constraint_Ispl (operands[1])
+    && !satisfies_constraint_Is20 (operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:QIHISI (match_dup 0) (match_dup 2)))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xfff);
+  operands[1] = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);
+}
+  [(set_attr "type" "alu,alu,alu,alu,alu,alu")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 4)
+       ;; Alternative 4
+       (const_int 8)
+       ;; Alternative 5
+       (const_int 8)
+     ])])
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" " i")))]
+  ""
+{
+  return "sethi\t%0, hi20(%1)";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"  " 0")
+		   (match_operand:SI 2 "immediate_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(zero_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (zero_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (zero_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*zero_extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*zero_extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(zero_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*zero_extend<mode>si2_load"
+  [(set (match_operand:SI 1 "register_operand"                 "=  l, *r")
+	(zero_extend:SI (match_operand:QIHI 0 "memory_operand" " U33,  m")))]
+  ""
+{
+  if (which_alternative == 0)
+    return nds32_output_16bit_load (operands, <byte>);
+  else
+    return nds32_output_32bit_load (operands, <byte>);
+}
+  [(set_attr "length" "2, 4")
+   (set_attr "type" "load,load")])
+
+;; Sign extension instructions.
+
+(define_expand "extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(sign_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (sign_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (sign_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(sign_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*extend<mode>si2_load"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(sign_extend:SI (match_operand:QIHI 1 "memory_operand" " m")))]
+  ""
+{
+  rtx mem_addr_op;
+  rtx op0;
+  rtx op1;
+
+  /* Retrieve rtx X from (mem (X ...)).  */
+  mem_addr_op = XEXP (operands[1], 0);
+
+  switch (GET_CODE (mem_addr_op))
+    {
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+         (mem (const (...)))
+         => access global variables,
+            use "lbsi.gp / lhsi.gp" */
+      return "l<size>si.gp\t%0, %1";
+
+    case REG:
+      /* (mem (reg X))
+         => access location by using register,
+            use "lbsi / lhsi" */
+      return "l<size>si\t%0, %1";
+
+    case PLUS:
+      /* get operands first */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (plus reg reg))
+         => access location by adding two registers,
+            use "lbs / lhs" */
+      if (REG_P (op0) && REG_P (op1))
+	return "l<size>s\t%0, %1";
+
+      /* (mem (plus reg const_int))
+         => access location by adding one register with const_int,
+            use "lbsi / lhsi" */
+      if (REG_P (op0) && CONST_INT_P (op1))
+	return "l<size>si\t%0, %1";
+
+      /* (mem (plus (mult reg const_int) reg))
+         => access location by adding one register with
+            multiplication of register and const_int,
+            use "lbs / lhs" */
+      if (GET_CODE (op0) == MULT && REG_P (op1)
+	  &&       REG_P (XEXP (op0, 0))
+	  && CONST_INT_P (XEXP (op0, 1)))
+	return "l<size>s\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_MODIFY:
+      /* Get operands first.  */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (reg))))
+         => access location by using register which will be
+            post modified with reg,
+            use "lbs.bi/ lhs.bi / lws.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && REG_P (XEXP (op1, 1)))
+	return "l<size>s.bi\t%0, %1";
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (const_int))))
+         => access location by using register which will be
+            post modified with const_int,
+            use "lbsi.bi/ lhsi.bi / lwsi.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && CONST_INT_P (XEXP (op1, 1)))
+	return "l<size>si.bi\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+         => access location by using register which will be
+            post increment,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, <byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+         => access location by using register which will be
+            post decrement,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, -<byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case LO_SUM:
+      operands[2] = XEXP (mem_addr_op, 1);
+      operands[1] = XEXP (mem_addr_op, 0);
+      return "l<size>si\t%0, [%1 + lo12(%2)]";
+
+    default:
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+    }
+
+other_cases:
+  gcc_unreachable ();
+}
+  [(set_attr "type" "load")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nds32_nonmemory_nonsymbol_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = gen_int_mode (INTVAL (operands[2]), SImode);
+})
+
+(define_insn "*add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                      "=   d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"         "    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_reg_or_int_operand" " Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+  "@
+  addi45\t%0, %2
+  addi333\t%0, %1, %2
+  add45\t%0, %2
+  add333\t%0, %1, %2
+  addi10.sp\t%2
+  addri36.sp\t%0, %2
+  addi\t%0, %1, %2
+  add\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" "")
+		 (match_operand:SI 2 "nds32_rimm15s_operand" "")))]
+  ""
+  ""
+)
+
+(define_insn "*sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=   d,    l, d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" "    0,    l, 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "nds32_rimm15s_operand" " Iu05, Iu03, r, l,    r, r")))]
+  ""
+  "@
+  subi45\t%0, %2
+  subi333\t%0, %1, %2
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, ldh")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, ldh")
+		 (match_operand:SI 2 "register_operand" "  w, ldh")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mul33\t%0, %2";
+    case 1:
+      return "mul\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32.doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_expand "andsi3"
+  [(set (match_operand:SI         0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+{
+  /* If operands[2] is const_int,
+     we might be able to use other more efficient instructions.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int mask = INTVAL (operands[2]);
+
+      if (mask == 255)
+	{
+	  /* ($r0 & 0xff)  ==>  (zeb $r0, $r0) */
+	  operands[1] = convert_to_mode (QImode, operands[1], 1);
+	  emit_insn (gen_zero_extendqisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+      else if (mask == 65535)
+	{
+	  /* ($r0 & 0xffff)  ==>  (zeh $r0, $r0) */
+	  operands[1] = convert_to_mode (HImode, operands[1], 1);
+	  emit_insn (gen_zero_extendhisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+    }
+})
+
+(define_insn "*andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    0,    0,    r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Ixls, Ix11, Ibms, Ifex, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff  -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0x01  -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "xlsb33\t%0, %1";
+    case 4:
+      return "x11b33\t%0, %1";
+    case 5:
+      operands[2] = GEN_INT(floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 6:
+      operands[2] = GEN_INT(floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 7:
+      return "andi\t%0, %1, %2";
+    case 8:
+      operands[2] = GEN_INT(~mask);
+      return "bitci\t%0, %1, %2";
+    case 9:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For iorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+;; This is the iorsi3 pattern for V3/V3M ISA,
+;; which DOES HAVE 'or33' instruction.
+;; So we can identify 'or Rt3,Ra3,Rb3' case and set its length to be 2.
+(define_insn "*iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For xorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+(define_insn "*xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For negsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(neg:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; Note that there is NO 'neg33' instruction for V2 ISA.
+;; So 'subri A,B,0' (its semantic is 'A = 0 - B')
+;; is the only option for V2 ISA.
+(define_insn "*negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For one_cmplsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; This is the one_cmplsi2 pattern
+(define_insn "*one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"            "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "    l,    r, r")
+		   (match_operand:SI 2 "general_operand"  " Iu03, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "slli333\t%0, %1, %2";
+    case 1:
+      return "slli\t%0, %1, %2";
+    case 2:
+      return "sll\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srai45\t%0, %2";
+    case 1:
+      return "srai\t%0, %1, %2";
+    case 2:
+      return "sra\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srli45\t%0, %2";
+    case 1:
+      return "srli\t%0, %1, %2";
+    case 2:
+      return "srl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direcly compare of EQ and NE.
+         If we don't do this, the conditional move transformation will fail
+	 when produce (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         The reason we do not physically change their rtx position is that
+         gcc will also do opimization by reverse condition,
+         which may break up our transformation semantic
+         if we physically change rtx right now.
+         So we just pick up the corresponding comparison operator
+         based on the reverse status, leaving the "swap position" job
+         after reload phase by using define_insn_and_split strategy.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:QIHI 0 "register_operand" "")
+	(if_then_else:QIHI (match_operand 1 "comparison_operator" "")
+			   (match_operand:QIHI 2 "register_operand" "")
+			   (match_operand:QIHI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  rtx insn;
+
+  /* For QImode and HImode conditional move,
+     make them to be SImode behavior.  */
+  operands[0] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+  operands[2] = simplify_gen_subreg (SImode, operands[2], <MODE>mode, 0);
+  operands[3] = simplify_gen_subreg (SImode, operands[3], <MODE>mode, 0);
+
+  insn = gen_movsicc (operands[0], operands[1], operands[2], operands[3]);
+
+  if (!insn)
+    FAIL;
+
+  emit_insn (insn);
+  DONE;
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "*movsicc"
+  [(set (match_operand:SI 0 "register_operand"                     "=r, r")
+	(if_then_else:SI (match_operator 1 "nds32_equality_comparison_operator"
+			   [(match_operand:SI 2 "register_operand" " r, r")
+			    (const_int 0)])
+			 (match_operand:SI 3 "register_operand"    " 0, r")
+			 (match_operand:SI 4 "register_operand"    " r, 0")))]
+  "TARGET_CMOV"
+  "#"
+  "reload_completed"
+  [(pc)]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx then_op = operands[3];
+  rtx else_op = operands[4];
+  rtx tmp;
+
+  /* According to the implementation in "movsicc" naming pattern,
+     if we make transformation in which the comparison code is EQ,
+     the desired target is at "else" part position semantically.
+     Now it is the time (after reload_completed) to physically
+     swap it to "then" part position.  */
+  if (code == EQ)
+    {
+      tmp     = then_op;
+      then_op = else_op;
+      else_op = tmp;
+    }
+
+  /* Choosing cmovz or cmovn is based on reload phase result.
+     After reload phase, one source operand will use
+     the same register as result operand.
+     We can use cmovz/cmovn to catch the other source operand
+     which has different register.
+     So We check register number to determine using cmovz or cmovn.  */
+  if (REGNO(then_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovz (operands[0], operands[2], else_op, operands[0]));
+  else if (REGNO(else_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovn (operands[0], operands[2], then_op, operands[0]));
+  else
+    gcc_unreachable ();
+
+  DONE;
+})
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	       *  =>
+	       *    bnezs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	       *  =>
+	       *    beqzs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	       *  =>
+	       *    bnez38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	       *  =>
+	       *    beqz38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	   *  =>
+	   *    bnez  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	   *  =>
+	   *    beqz  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to non-V3 ISAs,
+;; because they HAVE NO beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "!TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	   *  =>
+	   *    bne  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	   *  =>
+	   *    beq  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3,
+;; because V3 DOES HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	       *  =>
+	       *    bne  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	       *  =>
+	       *    bnec  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	       *  =>
+	       *    beq  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	       *  =>
+	       *    beqc  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	   * =>
+	   *   blez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	   * =>
+	   *   bltz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	   * =>
+	   *   bgez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	   * =>
+	   *   bgtz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jr5\t%0";
+  else
+    return "jr\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%0";
+  else
+    return "jral\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+        (if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2 "general_operand" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "register_operand" "r"))
+		         (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%1";
+  else
+    return "jral\t%1";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push();
+  else
+    nds32_expand_prologue();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop();
+  else
+    nds32_expand_epilogue();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop16"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; stack push/pop multiple
+
+(define_insn "*stack_push_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_PUSH_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'push.s' instruction.  */
+  const char *push_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create push_str string.
+     Note that 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    push_str = "push.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  sprintf (str_buffer, "%s%s%s",
+		       push_str,
+		       RbRe_str,
+		       En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+(define_insn "*stack_pop_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_POP_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'pop.s' instruction.  */
+  const char *pop_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create pop_str string.
+     Note that 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    pop_str = "pop.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  sprintf (str_buffer, "%s%s%s",
+		       pop_str,
+		       RbRe_str,
+		       En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+;; stack v3push/v3pop
+
+(define_insn "*stack_v3push"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3PUSH)
+     ])]
+  ""
+{
+  return "v3push\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+(define_insn "*stack_v3pop"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3POP)
+     ])]
+  ""
+{
+  return "v3pop\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_func_return"
+  [(set (pc)
+	(unspec:SI [(reg:SI LP_REGNUM)] UNSPEC_FUNC_RETURN))]
+  ""
+  "ret5"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_addsi3 (reg, operands[0],
+			     GEN_INT (-INTVAL (operands[1]))));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-07-08 10:57 [PATCH 2/6] Andes nds32: machine description of nds32 porting (2) Chung-Ju Wu
@ 2013-07-09 23:44 ` Joseph S. Myers
  2013-07-24 15:52   ` Chung-Ju Wu
  0 siblings, 1 reply; 18+ messages in thread
From: Joseph S. Myers @ 2013-07-09 23:44 UTC (permalink / raw)
  To: Chung-Ju Wu; +Cc: gcc-patches

On Mon, 8 Jul 2013, Chung-Ju Wu wrote:

> +/* This is used to identify used ISA when doing code generation.
> +   Initialize it with macro TARGET_DEFAULT_ISA,
> +   which is defined in nds32-isa-xxx.h file.
> +   User can specify it by using '-misa=X' option.  */
> +enum nds32_isa nds32_selected_isa = TARGET_DEFAULT_ISA;

Rather than using global state, put this in the gcc_options structure 
using a Variable entry in your .opt file.

> +	  warning (0, "For the option -misr-vector-size=X, the valid X "
> +		      "must be: 4 or 16");

The diagnostics in this function should all not start with a capital 
letter.  Invalid arguments to an option should be errors, not warnings.  
Since you have a location passed to this function, use error_at rather 
than the legacy functions that implicitly use input_location.

> +    case OPT_misa_:
> +      /* Check valid ISA: v2 v3 v3m.  */
> +      if (strcmp (arg, "v2") == 0)

Use Enum in the .opt file and get all the conversion from strings to 
integer values, and errors for unknown values, done automatically.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-07-09 23:44 ` Joseph S. Myers
@ 2013-07-24 15:52   ` Chung-Ju Wu
  2013-07-25  9:42     ` Chung-Ju Wu
  0 siblings, 1 reply; 18+ messages in thread
From: Chung-Ju Wu @ 2013-07-24 15:52 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2111 bytes --]

Hi, Joseph,

Sorry for the late revised patch.
We have completed all of it based on your review comments.

On 7/10/13 7:27 AM, Joseph S. Myers wrote:
> On Mon, 8 Jul 2013, Chung-Ju Wu wrote:
>
>> +/* This is used to identify used ISA when doing code generation.
>> +   Initialize it with macro TARGET_DEFAULT_ISA,
>> +   which is defined in nds32-isa-xxx.h file.
>> +   User can specify it by using '-misa=X' option.  */
>> +enum nds32_isa nds32_selected_isa = TARGET_DEFAULT_ISA;
>
> Rather than using global state, put this in the gcc_options structure
> using a Variable entry in your .opt file.
>

Thanks for the suggestion.  We removed global state and
added handling in nds32.opt to achieve the same purpose of
setting instruction set architecture.

Also, after discussing with other toolchain engineers,
we replaced -misa option with -march option.

>> +	  warning (0, "For the option -misr-vector-size=X, the valid X "
>> +		      "must be: 4 or 16");
>
> The diagnostics in this function should all not start with a capital
> letter.  Invalid arguments to an option should be errors, not warnings.
> Since you have a location passed to this function, use error_at rather
> than the legacy functions that implicitly use input_location.
>

Fixed it accordingly.

>> +    case OPT_misa_:
>> +      /* Check valid ISA: v2 v3 v3m.  */
>> +      if (strcmp (arg, "v2") == 0)
>
> Use Enum in the .opt file and get all the conversion from strings to
> integer values, and errors for unknown values, done automatically.
>

Thanks for the suggestion.  You guided us a better approach
to deal with it automatically.  We modified it accordingly.


Thanks for the review comments.
A revised patch is provided and here is a summary:

   1. Use error_at () for diagnostics statement and start with lowercase.
   2. Some process can be done in nds32.opt.
      Remove unnecessary parts from nds32-common.c file.


gcc/
2013-07-24  Chung-Ju Wu  <jasonwucj@gmail.com>
	    Shiva Chen  <shiva0217@gmail.com>

	* config/nds32/nds32.md: New file.
	* common/config/nds32: New directory and files.


Best regards,
jasonwucj


[-- Attachment #2: 2-nds32-backend-md-part2.v2.patch --]
[-- Type: text/plain, Size: 89221 bytes --]

diff --git gcc/common/config/nds32/nds32-common.c gcc/common/config/nds32/nds32-common.c
new file mode 100644
index 0000000..0b4321e
--- /dev/null
+++ gcc/common/config/nds32/nds32-common.c
@@ -0,0 +1,122 @@
+/* Common hooks of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_HANDLE_OPTION.  */
+static bool
+nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded,
+		     location_t loc)
+{
+  size_t     code  = decoded->opt_index;
+  int        value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_misr_vector_size_:
+      /* Check the valid vector size: 4 or 16.  */
+      if (value != 4 && value != 16)
+	{
+	  error_at (loc, "for the option -misr-vector-size=X, the valid X "
+			 "must be: 4 or 16");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_mcache_block_size_:
+      /* Check valid value: 4 8 16 32 64 128 256 512.  */
+      if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+	{
+	  error_at (loc, "for the option -mcache-block-size=X, the valid X "
+			 "must be: 4, 8, 16, 32, 64, 128, 256, or 512");
+	  return false;
+	}
+
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options nds32_option_optimization_table[] =
+{
+  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+
+  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+};
+
+/* ------------------------------------------------------------------------ */
+
+/* ======================================================================== */
+/* Run-time Target Specification.  */
+/* ======================================================================== */
+
+/* Default enable
+     TARGET_GP_DIRECT: Generate gp-imply instruction.
+     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
+     TARGET_PERF_EXT : Generate performance extention instrcution.
+     TARGET_CMOV     : Generate conditional move instruction.  */
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (MASK_GP_DIRECT				\
+   | MASK_16_BIT				\
+   | MASK_PERF_EXT				\
+   | MASK_CMOV)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION nds32_handle_option
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE nds32_option_optimization_table
+
+/* ======================================================================== */
+/* Defining the Output Assembler Language.  */
+/* ======================================================================== */
+
+/* Assembler Commands for Exception Regions.  */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* ------------------------------------------------------------------------ */
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git gcc/config/nds32/nds32.md gcc/config/nds32/nds32.md
new file mode 100644
index 0000000..66f4e00
--- /dev/null
+++ gcc/config/nds32/nds32.md
@@ -0,0 +1,2835 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32.intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32.multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32.doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32.peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "*store_si"
+  [(set (match_operand:SI 1 "memory_operand"   "=U45, U33, U37, U45, m")
+	(match_operand:SI 0 "register_operand" "   l,   l,   l,   d, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_store (operands, 4);
+
+    default:
+      return nds32_output_32bit_store (operands, 4);
+    }
+}
+  [(set_attr "type"   "store,store,store,store,store")
+   (set_attr "length" "    2,    2,    2,    2,    4")])
+
+(define_insn "*store_<mode>"
+  [(set (match_operand:QIHI 1 "memory_operand"   "=U33, m")
+	(match_operand:QIHI 0 "register_operand" "   l, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_store (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_store (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "store,store")
+   (set_attr "length" "    2,    4")])
+
+(define_insn "*load_si"
+  [(set (match_operand:SI 1 "register_operand" "=  l,   l,   l,   d, r")
+	(match_operand:SI 0 "memory_operand"   " U45, U33, U37, U45, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_load (operands, 4);
+
+    default:
+      return nds32_output_32bit_load (operands, 4);
+    }
+}
+  [(set_attr "type"   "load,load,load,load,load")
+   (set_attr "length" "   2,   2,   2,   2,   4")])
+
+(define_insn "*load_<mode>"
+  [(set (match_operand:QIHI 1 "register_operand" "=  l, r")
+	(match_operand:QIHI 0 "memory_operand"   " U33, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_load (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_load (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "load,load")
+   (set_attr "length" "   2,   4")])
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHISI 1 "register_operand" "=r, m, r")
+	(match_operand:QIHISI 0 "register_operand" " r, r, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "mov55\t%1, %0";
+      else
+	return "ori\t%1, %0, 0";
+    case 1:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 2:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "alu,store,load")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (const_int 4)
+       ;; Alternative 2
+       (const_int 4)
+     ])])
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to support QI and HI in the split patterns.
+;; Also, we use const_int_operand to limit that only CONST_INT
+;; is able to match such instruction template.
+;;
+;; Besides, in the split condition, we ask big-constant split to be
+;; performed after reload phase. So that the mov2add optimization
+;; in postreload have chance to optimize the code.
+
+(define_insn_and_split "*movsi_const"
+  [(set (match_operand:QIHISI 0 "register_operand"  "=   d,    r,    r,    r,    t,    r")
+	(match_operand:QIHISI 1 "const_int_operand" " Ip05, Is05, Is20, Ihig, Ispl, Ispl"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "movpi45\t%0, %1";
+      /* else fall through.  */
+    case 1:
+      if (get_attr_length (insn) == 2)
+	return "movi55\t%0, %1";
+      /* else fall through.  */
+    case 2:
+      return "movi\t%0, %1";
+    case 3:
+      return "sethi\t%0, hi20(%1)";
+    case 4:
+      /* Use $r15, if the value is NOT in the range of Is20,
+         we must output "sethi + ori" directly since
+         we may already passed the split stage.  */
+      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
+
+    default:
+      return "#";
+    }
+}
+   "reload_completed
+    && satisfies_constraint_Ispl (operands[1])
+    && !satisfies_constraint_Is20 (operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:QIHISI (match_dup 0) (match_dup 2)))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xfff);
+  operands[1] = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);
+}
+  [(set_attr "type" "alu,alu,alu,alu,alu,alu")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 4)
+       ;; Alternative 4
+       (const_int 8)
+       ;; Alternative 5
+       (const_int 8)
+     ])])
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" " i")))]
+  ""
+{
+  return "sethi\t%0, hi20(%1)";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"  " 0")
+		   (match_operand:SI 2 "immediate_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(zero_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (zero_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (zero_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*zero_extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*zero_extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(zero_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*zero_extend<mode>si2_load"
+  [(set (match_operand:SI 1 "register_operand"                 "=  l, *r")
+	(zero_extend:SI (match_operand:QIHI 0 "memory_operand" " U33,  m")))]
+  ""
+{
+  if (which_alternative == 0)
+    return nds32_output_16bit_load (operands, <byte>);
+  else
+    return nds32_output_32bit_load (operands, <byte>);
+}
+  [(set_attr "length" "2, 4")
+   (set_attr "type" "load,load")])
+
+;; Sign extension instructions.
+
+(define_expand "extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(sign_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (sign_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (sign_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(sign_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*extend<mode>si2_load"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(sign_extend:SI (match_operand:QIHI 1 "memory_operand" " m")))]
+  ""
+{
+  rtx mem_addr_op;
+  rtx op0;
+  rtx op1;
+
+  /* Retrieve rtx X from (mem (X ...)).  */
+  mem_addr_op = XEXP (operands[1], 0);
+
+  switch (GET_CODE (mem_addr_op))
+    {
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+         (mem (const (...)))
+         => access global variables,
+            use "lbsi.gp / lhsi.gp" */
+      return "l<size>si.gp\t%0, %1";
+
+    case REG:
+      /* (mem (reg X))
+         => access location by using register,
+            use "lbsi / lhsi" */
+      return "l<size>si\t%0, %1";
+
+    case PLUS:
+      /* get operands first */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (plus reg reg))
+         => access location by adding two registers,
+            use "lbs / lhs" */
+      if (REG_P (op0) && REG_P (op1))
+	return "l<size>s\t%0, %1";
+
+      /* (mem (plus reg const_int))
+         => access location by adding one register with const_int,
+            use "lbsi / lhsi" */
+      if (REG_P (op0) && CONST_INT_P (op1))
+	return "l<size>si\t%0, %1";
+
+      /* (mem (plus (mult reg const_int) reg))
+         => access location by adding one register with
+            multiplication of register and const_int,
+            use "lbs / lhs" */
+      if (GET_CODE (op0) == MULT && REG_P (op1)
+	  &&       REG_P (XEXP (op0, 0))
+	  && CONST_INT_P (XEXP (op0, 1)))
+	return "l<size>s\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_MODIFY:
+      /* Get operands first.  */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (reg))))
+         => access location by using register which will be
+            post modified with reg,
+            use "lbs.bi/ lhs.bi / lws.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && REG_P (XEXP (op1, 1)))
+	return "l<size>s.bi\t%0, %1";
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (const_int))))
+         => access location by using register which will be
+            post modified with const_int,
+            use "lbsi.bi/ lhsi.bi / lwsi.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && CONST_INT_P (XEXP (op1, 1)))
+	return "l<size>si.bi\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+         => access location by using register which will be
+            post increment,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, <byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+         => access location by using register which will be
+            post decrement,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, -<byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case LO_SUM:
+      operands[2] = XEXP (mem_addr_op, 1);
+      operands[1] = XEXP (mem_addr_op, 0);
+      return "l<size>si\t%0, [%1 + lo12(%2)]";
+
+    default:
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+    }
+
+other_cases:
+  gcc_unreachable ();
+}
+  [(set_attr "type" "load")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nds32_nonmemory_nonsymbol_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = gen_int_mode (INTVAL (operands[2]), SImode);
+})
+
+(define_insn "*add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                      "=   d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"         "    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_reg_or_int_operand" " Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+  "@
+  addi45\t%0, %2
+  addi333\t%0, %1, %2
+  add45\t%0, %2
+  add333\t%0, %1, %2
+  addi10.sp\t%2
+  addri36.sp\t%0, %2
+  addi\t%0, %1, %2
+  add\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" "")
+		 (match_operand:SI 2 "nds32_rimm15s_operand" "")))]
+  ""
+  ""
+)
+
+(define_insn "*sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=   d,    l, d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" "    0,    l, 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "nds32_rimm15s_operand" " Iu05, Iu03, r, l,    r, r")))]
+  ""
+  "@
+  subi45\t%0, %2
+  subi333\t%0, %1, %2
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, r")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
+		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mul33\t%0, %2";
+    case 1:
+      return "mul\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32.doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_expand "andsi3"
+  [(set (match_operand:SI         0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+{
+  /* If operands[2] is const_int,
+     we might be able to use other more efficient instructions.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int mask = INTVAL (operands[2]);
+
+      if (mask == 255)
+	{
+	  /* ($r0 & 0xff)  ==>  (zeb $r0, $r0) */
+	  operands[1] = convert_to_mode (QImode, operands[1], 1);
+	  emit_insn (gen_zero_extendqisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+      else if (mask == 65535)
+	{
+	  /* ($r0 & 0xffff)  ==>  (zeh $r0, $r0) */
+	  operands[1] = convert_to_mode (HImode, operands[1], 1);
+	  emit_insn (gen_zero_extendhisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+    }
+})
+
+(define_insn "*andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    0,    0,    r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Ixls, Ix11, Ibms, Ifex, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff  -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0x01  -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "xlsb33\t%0, %1";
+    case 4:
+      return "x11b33\t%0, %1";
+    case 5:
+      operands[2] = GEN_INT(floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 6:
+      operands[2] = GEN_INT(floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 7:
+      return "andi\t%0, %1, %2";
+    case 8:
+      operands[2] = GEN_INT(~mask);
+      return "bitci\t%0, %1, %2";
+    case 9:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For iorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+;; This is the iorsi3 pattern for V3/V3M ISA,
+;; which DOES HAVE 'or33' instruction.
+;; So we can identify 'or Rt3,Ra3,Rb3' case and set its length to be 2.
+(define_insn "*iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For xorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+(define_insn "*xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For negsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(neg:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; Note that there is NO 'neg33' instruction for V2 ISA.
+;; So 'subri A,B,0' (its semantic is 'A = 0 - B')
+;; is the only option for V2 ISA.
+(define_insn "*negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For one_cmplsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; This is the one_cmplsi2 pattern
+(define_insn "*one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"            "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "    l,    r, r")
+		   (match_operand:SI 2 "general_operand"  " Iu03, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "slli333\t%0, %1, %2";
+    case 1:
+      return "slli\t%0, %1, %2";
+    case 2:
+      return "sll\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srai45\t%0, %2";
+    case 1:
+      return "srai\t%0, %1, %2";
+    case 2:
+      return "sra\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srli45\t%0, %2";
+    case 1:
+      return "srli\t%0, %1, %2";
+    case 2:
+      return "srl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direcly compare of EQ and NE.
+         If we don't do this, the conditional move transformation will fail
+	 when produce (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         The reason we do not physically change their rtx position is that
+         gcc will also do opimization by reverse condition,
+         which may break up our transformation semantic
+         if we physically change rtx right now.
+         So we just pick up the corresponding comparison operator
+         based on the reverse status, leaving the "swap position" job
+         after reload phase by using define_insn_and_split strategy.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:QIHI 0 "register_operand" "")
+	(if_then_else:QIHI (match_operand 1 "comparison_operator" "")
+			   (match_operand:QIHI 2 "register_operand" "")
+			   (match_operand:QIHI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  rtx insn;
+
+  /* For QImode and HImode conditional move,
+     make them to be SImode behavior.  */
+  operands[0] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+  operands[2] = simplify_gen_subreg (SImode, operands[2], <MODE>mode, 0);
+  operands[3] = simplify_gen_subreg (SImode, operands[3], <MODE>mode, 0);
+
+  insn = gen_movsicc (operands[0], operands[1], operands[2], operands[3]);
+
+  if (!insn)
+    FAIL;
+
+  emit_insn (insn);
+  DONE;
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "*movsicc"
+  [(set (match_operand:SI 0 "register_operand"                     "=r, r")
+	(if_then_else:SI (match_operator 1 "nds32_equality_comparison_operator"
+			   [(match_operand:SI 2 "register_operand" " r, r")
+			    (const_int 0)])
+			 (match_operand:SI 3 "register_operand"    " 0, r")
+			 (match_operand:SI 4 "register_operand"    " r, 0")))]
+  "TARGET_CMOV"
+  "#"
+  "reload_completed"
+  [(pc)]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx then_op = operands[3];
+  rtx else_op = operands[4];
+  rtx tmp;
+
+  /* According to the implementation in "movsicc" naming pattern,
+     if we make transformation in which the comparison code is EQ,
+     the desired target is at "else" part position semantically.
+     Now it is the time (after reload_completed) to physically
+     swap it to "then" part position.  */
+  if (code == EQ)
+    {
+      tmp     = then_op;
+      then_op = else_op;
+      else_op = tmp;
+    }
+
+  /* Choosing cmovz or cmovn is based on reload phase result.
+     After reload phase, one source operand will use
+     the same register as result operand.
+     We can use cmovz/cmovn to catch the other source operand
+     which has different register.
+     So We check register number to determine using cmovz or cmovn.  */
+  if (REGNO(then_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovz (operands[0], operands[2], else_op, operands[0]));
+  else if (REGNO(else_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovn (operands[0], operands[2], then_op, operands[0]));
+  else
+    gcc_unreachable ();
+
+  DONE;
+})
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	       *  =>
+	       *    bnezs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	       *  =>
+	       *    beqzs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	       *  =>
+	       *    bnez38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	       *  =>
+	       *    beqz38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	   *  =>
+	   *    bnez  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	   *  =>
+	   *    beqz  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to non-V3 ISAs,
+;; because they HAVE NO beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "!TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	   *  =>
+	   *    bne  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	   *  =>
+	   *    beq  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3,
+;; because V3 DOES HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	       *  =>
+	       *    bne  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	       *  =>
+	       *    bnec  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	       *  =>
+	       *    beq  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	       *  =>
+	       *    beqc  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	   * =>
+	   *   blez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	   * =>
+	   *   bltz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	   * =>
+	   *   bgez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	   * =>
+	   *   bgtz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jr5\t%0";
+  else
+    return "jr\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%0";
+  else
+    return "jral\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+        (if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2 "general_operand" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "register_operand" "r"))
+		         (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%1";
+  else
+    return "jral\t%1";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push();
+  else
+    nds32_expand_prologue();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop();
+  else
+    nds32_expand_epilogue();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop16"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; stack push/pop multiple
+
+(define_insn "*stack_push_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_PUSH_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'push.s' instruction.  */
+  const char *push_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create push_str string.
+     Note that 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    push_str = "push.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			push_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+(define_insn "*stack_pop_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_POP_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'pop.s' instruction.  */
+  const char *pop_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create pop_str string.
+     Note that 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    pop_str = "pop.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			pop_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+;; stack v3push/v3pop
+
+(define_insn "*stack_v3push"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3PUSH)
+     ])]
+  ""
+{
+  return "v3push\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+(define_insn "*stack_v3pop"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3POP)
+     ])]
+  ""
+{
+  return "v3pop\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_func_return"
+  [(set (pc)
+	(unspec:SI [(reg:SI LP_REGNUM)] UNSPEC_FUNC_RETURN))]
+  ""
+  "ret5"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_addsi3 (reg, operands[0],
+			     GEN_INT (-INTVAL (operands[1]))));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-07-24 15:52   ` Chung-Ju Wu
@ 2013-07-25  9:42     ` Chung-Ju Wu
  2013-09-08 16:17       ` Chung-Ju Wu
  0 siblings, 1 reply; 18+ messages in thread
From: Chung-Ju Wu @ 2013-07-25  9:42 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1196 bytes --]

On 7/24/13 11:50 PM, Chung-Ju Wu wrote:
>
> Thanks for the review comments.
> A revised patch is provided and here is a summary:
>
>    1. Use error_at () for diagnostics statement and start with lowercase.
>    2. Some process can be done in nds32.opt.
>       Remove unnecessary parts from nds32-common.c file.
>
>
> gcc/
> 2013-07-24  Chung-Ju Wu  <jasonwucj@gmail.com>
>          Shiva Chen  <shiva0217@gmail.com>
>
>      * config/nds32/nds32.md: New file.
>      * common/config/nds32: New directory and files.
>
>
> Best regards,
> jasonwucj
>

According to the summary of the revised patch on:
   http://gcc.gnu.org/ml/gcc-patches/2013-07/msg01138.html

we used form-feeds (Control-L character) to separate logical sections
and we need to modify it accordingly in this patch.

So we created another revised patch and here is a summary.
The new modification is listed as item 3:

   1. Use error_at () for diagnostics statement and start with lowercase.
   2. Some process can be done in nds32.opt.
      Remove unnecessary parts from nds32-common.c file.
   3. Use form-feeds (Control-L character) to separate logical sections.

Thanks again for your review.


Best regards,
jasonwucj




[-- Attachment #2: 2-nds32-backend-md-part2.v2.patch --]
[-- Type: text/plain, Size: 88853 bytes --]

diff --git gcc/common/config/nds32/nds32-common.c gcc/common/config/nds32/nds32-common.c
new file mode 100644
index 0000000..f82f725
--- /dev/null
+++ gcc/common/config/nds32/nds32-common.c
@@ -0,0 +1,117 @@
+/* Common hooks of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_HANDLE_OPTION.  */
+static bool
+nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded,
+		     location_t loc)
+{
+  size_t     code  = decoded->opt_index;
+  int        value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_misr_vector_size_:
+      /* Check the valid vector size: 4 or 16.  */
+      if (value != 4 && value != 16)
+	{
+	  error_at (loc, "for the option -misr-vector-size=X, the valid X "
+			 "must be: 4 or 16");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_mcache_block_size_:
+      /* Check valid value: 4 8 16 32 64 128 256 512.  */
+      if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+	{
+	  error_at (loc, "for the option -mcache-block-size=X, the valid X "
+			 "must be: 4, 8, 16, 32, 64, 128, 256, or 512");
+	  return false;
+	}
+
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options nds32_option_optimization_table[] =
+{
+  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+
+  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+};
+
+/* ------------------------------------------------------------------------ */
+\f
+/* Run-time Target Specification.  */
+
+/* Default enable
+     TARGET_GP_DIRECT: Generate gp-imply instruction.
+     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
+     TARGET_PERF_EXT : Generate performance extention instrcution.
+     TARGET_CMOV     : Generate conditional move instruction.  */
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (MASK_GP_DIRECT				\
+   | MASK_16_BIT				\
+   | MASK_PERF_EXT				\
+   | MASK_CMOV)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION nds32_handle_option
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE nds32_option_optimization_table
+
+\f
+/* Defining the Output Assembler Language.  */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* ------------------------------------------------------------------------ */
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git gcc/config/nds32/nds32.md gcc/config/nds32/nds32.md
new file mode 100644
index 0000000..66f4e00
--- /dev/null
+++ gcc/config/nds32/nds32.md
@@ -0,0 +1,2835 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32.intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32.multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32.doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32.peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "*store_si"
+  [(set (match_operand:SI 1 "memory_operand"   "=U45, U33, U37, U45, m")
+	(match_operand:SI 0 "register_operand" "   l,   l,   l,   d, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_store (operands, 4);
+
+    default:
+      return nds32_output_32bit_store (operands, 4);
+    }
+}
+  [(set_attr "type"   "store,store,store,store,store")
+   (set_attr "length" "    2,    2,    2,    2,    4")])
+
+(define_insn "*store_<mode>"
+  [(set (match_operand:QIHI 1 "memory_operand"   "=U33, m")
+	(match_operand:QIHI 0 "register_operand" "   l, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_store (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_store (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "store,store")
+   (set_attr "length" "    2,    4")])
+
+(define_insn "*load_si"
+  [(set (match_operand:SI 1 "register_operand" "=  l,   l,   l,   d, r")
+	(match_operand:SI 0 "memory_operand"   " U45, U33, U37, U45, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_load (operands, 4);
+
+    default:
+      return nds32_output_32bit_load (operands, 4);
+    }
+}
+  [(set_attr "type"   "load,load,load,load,load")
+   (set_attr "length" "   2,   2,   2,   2,   4")])
+
+(define_insn "*load_<mode>"
+  [(set (match_operand:QIHI 1 "register_operand" "=  l, r")
+	(match_operand:QIHI 0 "memory_operand"   " U33, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_load (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_load (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "load,load")
+   (set_attr "length" "   2,   4")])
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHISI 1 "register_operand" "=r, m, r")
+	(match_operand:QIHISI 0 "register_operand" " r, r, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "mov55\t%1, %0";
+      else
+	return "ori\t%1, %0, 0";
+    case 1:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 2:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "alu,store,load")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (const_int 4)
+       ;; Alternative 2
+       (const_int 4)
+     ])])
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to support QI and HI in the split patterns.
+;; Also, we use const_int_operand to limit that only CONST_INT
+;; is able to match such instruction template.
+;;
+;; Besides, in the split condition, we ask big-constant split to be
+;; performed after reload phase. So that the mov2add optimization
+;; in postreload have chance to optimize the code.
+
+(define_insn_and_split "*movsi_const"
+  [(set (match_operand:QIHISI 0 "register_operand"  "=   d,    r,    r,    r,    t,    r")
+	(match_operand:QIHISI 1 "const_int_operand" " Ip05, Is05, Is20, Ihig, Ispl, Ispl"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "movpi45\t%0, %1";
+      /* else fall through.  */
+    case 1:
+      if (get_attr_length (insn) == 2)
+	return "movi55\t%0, %1";
+      /* else fall through.  */
+    case 2:
+      return "movi\t%0, %1";
+    case 3:
+      return "sethi\t%0, hi20(%1)";
+    case 4:
+      /* Use $r15, if the value is NOT in the range of Is20,
+         we must output "sethi + ori" directly since
+         we may already passed the split stage.  */
+      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
+
+    default:
+      return "#";
+    }
+}
+   "reload_completed
+    && satisfies_constraint_Ispl (operands[1])
+    && !satisfies_constraint_Is20 (operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:QIHISI (match_dup 0) (match_dup 2)))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xfff);
+  operands[1] = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);
+}
+  [(set_attr "type" "alu,alu,alu,alu,alu,alu")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 4)
+       ;; Alternative 4
+       (const_int 8)
+       ;; Alternative 5
+       (const_int 8)
+     ])])
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" " i")))]
+  ""
+{
+  return "sethi\t%0, hi20(%1)";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"  " 0")
+		   (match_operand:SI 2 "immediate_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(zero_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (zero_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (zero_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*zero_extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*zero_extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(zero_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*zero_extend<mode>si2_load"
+  [(set (match_operand:SI 1 "register_operand"                 "=  l, *r")
+	(zero_extend:SI (match_operand:QIHI 0 "memory_operand" " U33,  m")))]
+  ""
+{
+  if (which_alternative == 0)
+    return nds32_output_16bit_load (operands, <byte>);
+  else
+    return nds32_output_32bit_load (operands, <byte>);
+}
+  [(set_attr "length" "2, 4")
+   (set_attr "type" "load,load")])
+
+;; Sign extension instructions.
+
+(define_expand "extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(sign_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (sign_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (sign_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(sign_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*extend<mode>si2_load"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(sign_extend:SI (match_operand:QIHI 1 "memory_operand" " m")))]
+  ""
+{
+  rtx mem_addr_op;
+  rtx op0;
+  rtx op1;
+
+  /* Retrieve rtx X from (mem (X ...)).  */
+  mem_addr_op = XEXP (operands[1], 0);
+
+  switch (GET_CODE (mem_addr_op))
+    {
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+         (mem (const (...)))
+         => access global variables,
+            use "lbsi.gp / lhsi.gp" */
+      return "l<size>si.gp\t%0, %1";
+
+    case REG:
+      /* (mem (reg X))
+         => access location by using register,
+            use "lbsi / lhsi" */
+      return "l<size>si\t%0, %1";
+
+    case PLUS:
+      /* get operands first */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (plus reg reg))
+         => access location by adding two registers,
+            use "lbs / lhs" */
+      if (REG_P (op0) && REG_P (op1))
+	return "l<size>s\t%0, %1";
+
+      /* (mem (plus reg const_int))
+         => access location by adding one register with const_int,
+            use "lbsi / lhsi" */
+      if (REG_P (op0) && CONST_INT_P (op1))
+	return "l<size>si\t%0, %1";
+
+      /* (mem (plus (mult reg const_int) reg))
+         => access location by adding one register with
+            multiplication of register and const_int,
+            use "lbs / lhs" */
+      if (GET_CODE (op0) == MULT && REG_P (op1)
+	  &&       REG_P (XEXP (op0, 0))
+	  && CONST_INT_P (XEXP (op0, 1)))
+	return "l<size>s\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_MODIFY:
+      /* Get operands first.  */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (reg))))
+         => access location by using register which will be
+            post modified with reg,
+            use "lbs.bi/ lhs.bi / lws.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && REG_P (XEXP (op1, 1)))
+	return "l<size>s.bi\t%0, %1";
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (const_int))))
+         => access location by using register which will be
+            post modified with const_int,
+            use "lbsi.bi/ lhsi.bi / lwsi.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && CONST_INT_P (XEXP (op1, 1)))
+	return "l<size>si.bi\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+         => access location by using register which will be
+            post increment,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, <byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+         => access location by using register which will be
+            post decrement,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, -<byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case LO_SUM:
+      operands[2] = XEXP (mem_addr_op, 1);
+      operands[1] = XEXP (mem_addr_op, 0);
+      return "l<size>si\t%0, [%1 + lo12(%2)]";
+
+    default:
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+    }
+
+other_cases:
+  gcc_unreachable ();
+}
+  [(set_attr "type" "load")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nds32_nonmemory_nonsymbol_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = gen_int_mode (INTVAL (operands[2]), SImode);
+})
+
+(define_insn "*add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                      "=   d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"         "    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_reg_or_int_operand" " Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+  "@
+  addi45\t%0, %2
+  addi333\t%0, %1, %2
+  add45\t%0, %2
+  add333\t%0, %1, %2
+  addi10.sp\t%2
+  addri36.sp\t%0, %2
+  addi\t%0, %1, %2
+  add\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" "")
+		 (match_operand:SI 2 "nds32_rimm15s_operand" "")))]
+  ""
+  ""
+)
+
+(define_insn "*sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=   d,    l, d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" "    0,    l, 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "nds32_rimm15s_operand" " Iu05, Iu03, r, l,    r, r")))]
+  ""
+  "@
+  subi45\t%0, %2
+  subi333\t%0, %1, %2
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, r")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
+		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mul33\t%0, %2";
+    case 1:
+      return "mul\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32.doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_expand "andsi3"
+  [(set (match_operand:SI         0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+{
+  /* If operands[2] is const_int,
+     we might be able to use other more efficient instructions.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int mask = INTVAL (operands[2]);
+
+      if (mask == 255)
+	{
+	  /* ($r0 & 0xff)  ==>  (zeb $r0, $r0) */
+	  operands[1] = convert_to_mode (QImode, operands[1], 1);
+	  emit_insn (gen_zero_extendqisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+      else if (mask == 65535)
+	{
+	  /* ($r0 & 0xffff)  ==>  (zeh $r0, $r0) */
+	  operands[1] = convert_to_mode (HImode, operands[1], 1);
+	  emit_insn (gen_zero_extendhisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+    }
+})
+
+(define_insn "*andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    0,    0,    r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Ixls, Ix11, Ibms, Ifex, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff  -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0x01  -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "xlsb33\t%0, %1";
+    case 4:
+      return "x11b33\t%0, %1";
+    case 5:
+      operands[2] = GEN_INT(floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 6:
+      operands[2] = GEN_INT(floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 7:
+      return "andi\t%0, %1, %2";
+    case 8:
+      operands[2] = GEN_INT(~mask);
+      return "bitci\t%0, %1, %2";
+    case 9:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For iorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+;; This is the iorsi3 pattern for V3/V3M ISA,
+;; which DOES HAVE 'or33' instruction.
+;; So we can identify 'or Rt3,Ra3,Rb3' case and set its length to be 2.
+(define_insn "*iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For xorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+(define_insn "*xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For negsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(neg:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; Note that there is NO 'neg33' instruction for V2 ISA.
+;; So 'subri A,B,0' (its semantic is 'A = 0 - B')
+;; is the only option for V2 ISA.
+(define_insn "*negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For one_cmplsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; This is the one_cmplsi2 pattern
+(define_insn "*one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"            "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "    l,    r, r")
+		   (match_operand:SI 2 "general_operand"  " Iu03, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "slli333\t%0, %1, %2";
+    case 1:
+      return "slli\t%0, %1, %2";
+    case 2:
+      return "sll\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srai45\t%0, %2";
+    case 1:
+      return "srai\t%0, %1, %2";
+    case 2:
+      return "sra\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srli45\t%0, %2";
+    case 1:
+      return "srli\t%0, %1, %2";
+    case 2:
+      return "srl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direcly compare of EQ and NE.
+         If we don't do this, the conditional move transformation will fail
+	 when produce (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         The reason we do not physically change their rtx position is that
+         gcc will also do opimization by reverse condition,
+         which may break up our transformation semantic
+         if we physically change rtx right now.
+         So we just pick up the corresponding comparison operator
+         based on the reverse status, leaving the "swap position" job
+         after reload phase by using define_insn_and_split strategy.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:QIHI 0 "register_operand" "")
+	(if_then_else:QIHI (match_operand 1 "comparison_operator" "")
+			   (match_operand:QIHI 2 "register_operand" "")
+			   (match_operand:QIHI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  rtx insn;
+
+  /* For QImode and HImode conditional move,
+     make them to be SImode behavior.  */
+  operands[0] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+  operands[2] = simplify_gen_subreg (SImode, operands[2], <MODE>mode, 0);
+  operands[3] = simplify_gen_subreg (SImode, operands[3], <MODE>mode, 0);
+
+  insn = gen_movsicc (operands[0], operands[1], operands[2], operands[3]);
+
+  if (!insn)
+    FAIL;
+
+  emit_insn (insn);
+  DONE;
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "*movsicc"
+  [(set (match_operand:SI 0 "register_operand"                     "=r, r")
+	(if_then_else:SI (match_operator 1 "nds32_equality_comparison_operator"
+			   [(match_operand:SI 2 "register_operand" " r, r")
+			    (const_int 0)])
+			 (match_operand:SI 3 "register_operand"    " 0, r")
+			 (match_operand:SI 4 "register_operand"    " r, 0")))]
+  "TARGET_CMOV"
+  "#"
+  "reload_completed"
+  [(pc)]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx then_op = operands[3];
+  rtx else_op = operands[4];
+  rtx tmp;
+
+  /* According to the implementation in "movsicc" naming pattern,
+     if we make transformation in which the comparison code is EQ,
+     the desired target is at "else" part position semantically.
+     Now it is the time (after reload_completed) to physically
+     swap it to "then" part position.  */
+  if (code == EQ)
+    {
+      tmp     = then_op;
+      then_op = else_op;
+      else_op = tmp;
+    }
+
+  /* Choosing cmovz or cmovn is based on reload phase result.
+     After reload phase, one source operand will use
+     the same register as result operand.
+     We can use cmovz/cmovn to catch the other source operand
+     which has different register.
+     So We check register number to determine using cmovz or cmovn.  */
+  if (REGNO(then_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovz (operands[0], operands[2], else_op, operands[0]));
+  else if (REGNO(else_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovn (operands[0], operands[2], then_op, operands[0]));
+  else
+    gcc_unreachable ();
+
+  DONE;
+})
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	       *  =>
+	       *    bnezs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	       *  =>
+	       *    beqzs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	       *  =>
+	       *    bnez38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	       *  =>
+	       *    beqz38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	   *  =>
+	   *    bnez  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	   *  =>
+	   *    beqz  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to non-V3 ISAs,
+;; because they HAVE NO beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "!TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	   *  =>
+	   *    bne  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	   *  =>
+	   *    beq  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3,
+;; because V3 DOES HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	       *  =>
+	       *    bne  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	       *  =>
+	       *    bnec  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	       *  =>
+	       *    beq  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	       *  =>
+	       *    beqc  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	   * =>
+	   *   blez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	   * =>
+	   *   bltz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	   * =>
+	   *   bgez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	   * =>
+	   *   bgtz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jr5\t%0";
+  else
+    return "jr\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%0";
+  else
+    return "jral\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+        (if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2 "general_operand" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "register_operand" "r"))
+		         (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%1";
+  else
+    return "jral\t%1";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push();
+  else
+    nds32_expand_prologue();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop();
+  else
+    nds32_expand_epilogue();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop16"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; stack push/pop multiple
+
+(define_insn "*stack_push_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_PUSH_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'push.s' instruction.  */
+  const char *push_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create push_str string.
+     Note that 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    push_str = "push.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			push_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+(define_insn "*stack_pop_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_POP_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'pop.s' instruction.  */
+  const char *pop_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create pop_str string.
+     Note that 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    pop_str = "pop.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			pop_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+;; stack v3push/v3pop
+
+(define_insn "*stack_v3push"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3PUSH)
+     ])]
+  ""
+{
+  return "v3push\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+(define_insn "*stack_v3pop"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3POP)
+     ])]
+  ""
+{
+  return "v3pop\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_func_return"
+  [(set (pc)
+	(unspec:SI [(reg:SI LP_REGNUM)] UNSPEC_FUNC_RETURN))]
+  ""
+  "ret5"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_addsi3 (reg, operands[0],
+			     GEN_INT (-INTVAL (operands[1]))));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-07-25  9:42     ` Chung-Ju Wu
@ 2013-09-08 16:17       ` Chung-Ju Wu
  2013-09-14 15:15         ` Richard Sandiford
  0 siblings, 1 reply; 18+ messages in thread
From: Chung-Ju Wu @ 2013-09-08 16:17 UTC (permalink / raw)
  Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 606 bytes --]

On 7/25/13 5:42 PM, Chung-Ju Wu wrote:
> On 7/24/13 11:50 PM, Chung-Ju Wu wrote:
> So we created another revised patch and here is a summary.
> The new modification is listed as item 3:
> 
>   1. Use error_at () for diagnostics statement and start with lowercase.
>   2. Some process can be done in nds32.opt.
>      Remove unnecessary parts from nds32-common.c file.
>   3. Use form-feeds (Control-L character) to separate logical sections.
> 

It has been a while since last v2 patch.
I create a new v3 patch to fix some typo and indentation.

Is it OK to apply on the trunk?


Best regards,
jasonwucj



[-- Attachment #2: 2-nds32-backend-md-part2.v3.patch --]
[-- Type: text/plain, Size: 88913 bytes --]

diff --git gcc/common/config/nds32/nds32-common.c gcc/common/config/nds32/nds32-common.c
new file mode 100644
index 0000000..f82f725
--- /dev/null
+++ gcc/common/config/nds32/nds32-common.c
@@ -0,0 +1,117 @@
+/* Common hooks of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_HANDLE_OPTION.  */
+static bool
+nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded,
+		     location_t loc)
+{
+  size_t     code  = decoded->opt_index;
+  int        value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_misr_vector_size_:
+      /* Check the valid vector size: 4 or 16.  */
+      if (value != 4 && value != 16)
+	{
+	  error_at (loc, "for the option -misr-vector-size=X, the valid X "
+			 "must be: 4 or 16");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_mcache_block_size_:
+      /* Check valid value: 4 8 16 32 64 128 256 512.  */
+      if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+	{
+	  error_at (loc, "for the option -mcache-block-size=X, the valid X "
+			 "must be: 4, 8, 16, 32, 64, 128, 256, or 512");
+	  return false;
+	}
+
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options nds32_option_optimization_table[] =
+{
+  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+
+  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+};
+
+/* ------------------------------------------------------------------------ */
+\f
+/* Run-time Target Specification.  */
+
+/* Default enable
+     TARGET_GP_DIRECT: Generate gp-imply instruction.
+     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
+     TARGET_PERF_EXT : Generate performance extention instrcution.
+     TARGET_CMOV     : Generate conditional move instruction.  */
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (MASK_GP_DIRECT				\
+   | MASK_16_BIT				\
+   | MASK_PERF_EXT				\
+   | MASK_CMOV)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION nds32_handle_option
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE nds32_option_optimization_table
+
+\f
+/* Defining the Output Assembler Language.  */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* ------------------------------------------------------------------------ */
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git gcc/config/nds32/nds32.md gcc/config/nds32/nds32.md
new file mode 100644
index 0000000..869726e
--- /dev/null
+++ gcc/config/nds32/nds32.md
@@ -0,0 +1,2835 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32.intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32.multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32.doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32.peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "*store_si"
+  [(set (match_operand:SI 0 "memory_operand"   "=U45, U33, U37, U45, m")
+	(match_operand:SI 1 "register_operand" "   l,   l,   l,   d, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_store (operands, 4);
+
+    default:
+      return nds32_output_32bit_store (operands, 4);
+    }
+}
+  [(set_attr "type"   "store,store,store,store,store")
+   (set_attr "length" "    2,    2,    2,    2,    4")])
+
+(define_insn "*store_<mode>"
+  [(set (match_operand:QIHI 0 "memory_operand"   "=U33, m")
+	(match_operand:QIHI 1 "register_operand" "   l, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_store (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_store (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "store,store")
+   (set_attr "length" "    2,    4")])
+
+(define_insn "*load_si"
+  [(set (match_operand:SI 0 "register_operand" "=  l,   l,   l,   d, r")
+	(match_operand:SI 1 "memory_operand"   " U45, U33, U37, U45, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_load (operands, 4);
+
+    default:
+      return nds32_output_32bit_load (operands, 4);
+    }
+}
+  [(set_attr "type"   "load,load,load,load,load")
+   (set_attr "length" "   2,   2,   2,   2,   4")])
+
+(define_insn "*load_<mode>"
+  [(set (match_operand:QIHI 0 "register_operand" "=  l, r")
+	(match_operand:QIHI 1 "memory_operand"   " U33, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_load (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_load (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "load,load")
+   (set_attr "length" "   2,   4")])
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "=r, m, r")
+	(match_operand:QIHISI 1 "register_operand" " r, r, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "mov55\t%0, %1";
+      else
+	return "ori\t%0, %1, 0";
+    case 1:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 2:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "alu,store,load")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (const_int 4)
+       ;; Alternative 2
+       (const_int 4)
+     ])])
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to support QI and HI in the split patterns.
+;; Also, we use const_int_operand to limit that only CONST_INT
+;; is able to match such instruction template.
+;;
+;; Besides, in the split condition, we ask big-constant split to be
+;; performed after reload phase. So that the mov2add optimization
+;; in postreload have chance to optimize the code.
+
+(define_insn_and_split "*movsi_const"
+  [(set (match_operand:QIHISI 0 "register_operand"  "=   d,    r,    r,    r,    t,    r")
+	(match_operand:QIHISI 1 "const_int_operand" " Ip05, Is05, Is20, Ihig, Ispl, Ispl"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "movpi45\t%0, %1";
+      /* else fall through.  */
+    case 1:
+      if (get_attr_length (insn) == 2)
+	return "movi55\t%0, %1";
+      /* else fall through.  */
+    case 2:
+      return "movi\t%0, %1";
+    case 3:
+      return "sethi\t%0, hi20(%1)";
+    case 4:
+      /* Use $r15, if the value is NOT in the range of Is20,
+         we must output "sethi + ori" directly since
+         we may already passed the split stage.  */
+      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
+
+    default:
+      return "#";
+    }
+}
+  "reload_completed
+   && satisfies_constraint_Ispl (operands[1])
+   && !satisfies_constraint_Is20 (operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:QIHISI (match_dup 0) (match_dup 2)))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xfff);
+  operands[1] = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);
+}
+  [(set_attr "type" "alu,alu,alu,alu,alu,alu")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 4)
+       ;; Alternative 4
+       (const_int 8)
+       ;; Alternative 5
+       (const_int 8)
+     ])])
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" " i")))]
+  ""
+{
+  return "sethi\t%0, hi20(%1)";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"  " 0")
+		   (match_operand:SI 2 "immediate_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(zero_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (zero_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (zero_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*zero_extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*zero_extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(zero_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*zero_extend<mode>si2_load"
+  [(set (match_operand:SI 0 "register_operand"                 "=  l, *r")
+	(zero_extend:SI (match_operand:QIHI 1 "memory_operand" " U33,  m")))]
+  ""
+{
+  if (which_alternative == 0)
+    return nds32_output_16bit_load (operands, <byte>);
+  else
+    return nds32_output_32bit_load (operands, <byte>);
+}
+  [(set_attr "length" "2, 4")
+   (set_attr "type" "load,load")])
+
+;; Sign extension instructions.
+
+(define_expand "extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(sign_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (sign_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (sign_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(sign_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*extend<mode>si2_load"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(sign_extend:SI (match_operand:QIHI 1 "memory_operand" " m")))]
+  ""
+{
+  rtx mem_addr_op;
+  rtx op0;
+  rtx op1;
+
+  /* Retrieve rtx X from (mem (X ...)).  */
+  mem_addr_op = XEXP (operands[1], 0);
+
+  switch (GET_CODE (mem_addr_op))
+    {
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+         (mem (const (...)))
+         => access global variables,
+            use "lbsi.gp / lhsi.gp" */
+      return "l<size>si.gp\t%0, %1";
+
+    case REG:
+      /* (mem (reg X))
+         => access location by using register,
+            use "lbsi / lhsi" */
+      return "l<size>si\t%0, %1";
+
+    case PLUS:
+      /* get operands first */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (plus reg reg))
+         => access location by adding two registers,
+            use "lbs / lhs" */
+      if (REG_P (op0) && REG_P (op1))
+	return "l<size>s\t%0, %1";
+
+      /* (mem (plus reg const_int))
+         => access location by adding one register with const_int,
+            use "lbsi / lhsi" */
+      if (REG_P (op0) && CONST_INT_P (op1))
+	return "l<size>si\t%0, %1";
+
+      /* (mem (plus (mult reg const_int) reg))
+         => access location by adding one register with
+            multiplication of register and const_int,
+            use "lbs / lhs" */
+      if (GET_CODE (op0) == MULT && REG_P (op1)
+	  &&       REG_P (XEXP (op0, 0))
+	  && CONST_INT_P (XEXP (op0, 1)))
+	return "l<size>s\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_MODIFY:
+      /* Get operands first.  */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (reg))))
+         => access location by using register which will be
+            post modified with reg,
+            use "lbs.bi/ lhs.bi / lws.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && REG_P (XEXP (op1, 1)))
+	return "l<size>s.bi\t%0, %1";
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (const_int))))
+         => access location by using register which will be
+            post modified with const_int,
+            use "lbsi.bi/ lhsi.bi / lwsi.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && CONST_INT_P (XEXP (op1, 1)))
+	return "l<size>si.bi\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+         => access location by using register which will be
+            post increment,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, <byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+         => access location by using register which will be
+            post decrement,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, -<byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case LO_SUM:
+      operands[2] = XEXP (mem_addr_op, 1);
+      operands[1] = XEXP (mem_addr_op, 0);
+      return "l<size>si\t%0, [%1 + lo12(%2)]";
+
+    default:
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+    }
+
+other_cases:
+  gcc_unreachable ();
+}
+  [(set_attr "type" "load")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nds32_nonmemory_nonsymbol_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = gen_int_mode (INTVAL (operands[2]), SImode);
+})
+
+(define_insn "*add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                      "=   d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"         "    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_reg_or_int_operand" " Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+  "@
+  addi45\t%0, %2
+  addi333\t%0, %1, %2
+  add45\t%0, %2
+  add333\t%0, %1, %2
+  addi10.sp\t%2
+  addri36.sp\t%0, %2
+  addi\t%0, %1, %2
+  add\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" "")
+		 (match_operand:SI 2 "nds32_rimm15s_operand" "")))]
+  ""
+  ""
+)
+
+(define_insn "*sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=   d,    l, d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" "    0,    l, 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "nds32_rimm15s_operand" " Iu05, Iu03, r, l,    r, r")))]
+  ""
+  "@
+  subi45\t%0, %2
+  subi333\t%0, %1, %2
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, r")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
+		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mul33\t%0, %2";
+    case 1:
+      return "mul\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32.doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_expand "andsi3"
+  [(set (match_operand:SI         0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+{
+  /* If operands[2] is const_int,
+     we might be able to use other more efficient instructions.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int mask = INTVAL (operands[2]);
+
+      if (mask == 255)
+	{
+	  /* ($r0 & 0xff)  ==>  (zeb $r0, $r0) */
+	  operands[1] = convert_to_mode (QImode, operands[1], 1);
+	  emit_insn (gen_zero_extendqisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+      else if (mask == 65535)
+	{
+	  /* ($r0 & 0xffff)  ==>  (zeh $r0, $r0) */
+	  operands[1] = convert_to_mode (HImode, operands[1], 1);
+	  emit_insn (gen_zero_extendhisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+    }
+})
+
+(define_insn "*andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    0,    0,    r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Ixls, Ix11, Ibms, Ifex, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff  -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0x01  -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "xlsb33\t%0, %1";
+    case 4:
+      return "x11b33\t%0, %1";
+    case 5:
+      operands[2] = GEN_INT(floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 6:
+      operands[2] = GEN_INT(floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 7:
+      return "andi\t%0, %1, %2";
+    case 8:
+      operands[2] = GEN_INT(~mask);
+      return "bitci\t%0, %1, %2";
+    case 9:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For iorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+;; This is the iorsi3 pattern for V3/V3M ISA,
+;; which DOES HAVE 'or33' instruction.
+;; So we can identify 'or Rt3,Ra3,Rb3' case and set its length to be 2.
+(define_insn "*iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For xorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+(define_insn "*xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For negsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(neg:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; Note that there is NO 'neg33' instruction for V2 ISA.
+;; So 'subri A,B,0' (its semantic is 'A = 0 - B')
+;; is the only option for V2 ISA.
+(define_insn "*negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For one_cmplsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; This is the one_cmplsi2 pattern
+(define_insn "*one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"            "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "    l,    r, r")
+		   (match_operand:SI 2 "general_operand"  " Iu03, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "slli333\t%0, %1, %2";
+    case 1:
+      return "slli\t%0, %1, %2";
+    case 2:
+      return "sll\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srai45\t%0, %2";
+    case 1:
+      return "srai\t%0, %1, %2";
+    case 2:
+      return "sra\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srli45\t%0, %2";
+    case 1:
+      return "srli\t%0, %1, %2";
+    case 2:
+      return "srl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direct comparison of EQ and NE.
+         If we don't split it, the conditional move transformation will fail
+         when producing (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         The reason we do not physically change their rtx position is that
+         gcc will also do opimization by reverse condition,
+         which may break up our transformation semantic
+         if we physically change rtx right now.
+         So we just pick up the corresponding comparison operator
+         based on the reverse status, leaving the "swap position" job
+         after reload phase by using define_insn_and_split strategy.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:QIHI 0 "register_operand" "")
+	(if_then_else:QIHI (match_operand 1 "comparison_operator" "")
+			   (match_operand:QIHI 2 "register_operand" "")
+			   (match_operand:QIHI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  rtx insn;
+
+  /* For QImode and HImode conditional move,
+     make them to be SImode behavior.  */
+  operands[0] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+  operands[2] = simplify_gen_subreg (SImode, operands[2], <MODE>mode, 0);
+  operands[3] = simplify_gen_subreg (SImode, operands[3], <MODE>mode, 0);
+
+  insn = gen_movsicc (operands[0], operands[1], operands[2], operands[3]);
+
+  if (!insn)
+    FAIL;
+
+  emit_insn (insn);
+  DONE;
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "*movsicc"
+  [(set (match_operand:SI 0 "register_operand"                     "=r, r")
+	(if_then_else:SI (match_operator 1 "nds32_equality_comparison_operator"
+			   [(match_operand:SI 2 "register_operand" " r, r")
+			    (const_int 0)])
+			 (match_operand:SI 3 "register_operand"    " 0, r")
+			 (match_operand:SI 4 "register_operand"    " r, 0")))]
+  "TARGET_CMOV"
+  "#"
+  "reload_completed"
+  [(pc)]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx then_op = operands[3];
+  rtx else_op = operands[4];
+  rtx tmp;
+
+  /* According to the implementation in "movsicc" naming pattern,
+     if we make transformation in which the comparison code is EQ,
+     the desired target is at "else" part position semantically.
+     Now it is the time (after reload_completed) to physically
+     swap it to "then" part position.  */
+  if (code == EQ)
+    {
+      tmp     = then_op;
+      then_op = else_op;
+      else_op = tmp;
+    }
+
+  /* Choosing cmovz or cmovn is based on reload phase result.
+     After reload phase, one source operand will use
+     the same register as result operand.
+     We can use cmovz/cmovn to catch the other source operand
+     which has different register.
+     So We check register number to determine using cmovz or cmovn.  */
+  if (REGNO(then_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovz (operands[0], operands[2], else_op, operands[0]));
+  else if (REGNO(else_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovn (operands[0], operands[2], then_op, operands[0]));
+  else
+    gcc_unreachable ();
+
+  DONE;
+})
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	       *  =>
+	       *    bnezs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	       *  =>
+	       *    beqzs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	       *  =>
+	       *    bnez38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	       *  =>
+	       *    beqz38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	   *  =>
+	   *    bnez  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	   *  =>
+	   *    beqz  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to non-V3 ISAs,
+;; because they HAVE NO beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3M"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	   *  =>
+	   *    bne  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	   *  =>
+	   *    beq  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3,
+;; because V3 DOES HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	       *  =>
+	       *    bne  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	       *  =>
+	       *    bnec  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	       *  =>
+	       *    beq  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	       *  =>
+	       *    beqc  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	   * =>
+	   *   blez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	   * =>
+	   *   bltz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	   * =>
+	   *   bgez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	   * =>
+	   *   bgtz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jr5\t%0";
+  else
+    return "jr\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%0";
+  else
+    return "jral\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+        (if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2 "general_operand" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "register_operand" "r"))
+		         (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%1";
+  else
+    return "jral\t%1";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push();
+  else
+    nds32_expand_prologue();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop();
+  else
+    nds32_expand_epilogue();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop16"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; stack push/pop multiple
+
+(define_insn "*stack_push_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_PUSH_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'push.s' instruction.  */
+  const char *push_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create push_str string.
+     Note that 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    push_str = "push.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			push_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+(define_insn "*stack_pop_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_POP_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'pop.s' instruction.  */
+  const char *pop_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create pop_str string.
+     Note that 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    pop_str = "pop.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			pop_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+;; stack v3push/v3pop
+
+(define_insn "*stack_v3push"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3PUSH)
+     ])]
+  ""
+{
+  return "v3push\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+(define_insn "*stack_v3pop"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3POP)
+     ])]
+  ""
+{
+  return "v3pop\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_func_return"
+  [(set (pc)
+	(unspec:SI [(reg:SI LP_REGNUM)] UNSPEC_FUNC_RETURN))]
+  ""
+  "ret5"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_addsi3 (reg, operands[0],
+			     GEN_INT (-INTVAL (operands[1]))));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-09-08 16:17       ` Chung-Ju Wu
@ 2013-09-14 15:15         ` Richard Sandiford
  2013-09-27 18:38           ` Chung-Ju Wu
  0 siblings, 1 reply; 18+ messages in thread
From: Richard Sandiford @ 2013-09-14 15:15 UTC (permalink / raw)
  To: Chung-Ju Wu; +Cc: gcc-patches

Some comments for part 2.

Chung-Ju Wu <jasonwucj@gmail.com> writes:
> +;; Include intrinsic functions definition.
> +(include "nds32.intrinsic.md")
> +
> +;; Include block move for nds32 multiple load/store behavior.
> +(include "nds32.multiple.md")
> +
> +;; Include DImode/DFmode operations.
> +(include "nds32.doubleword.md")
> +
> +;; Include peephole patterns.
> +(include "nds32.peephole2.md")

Usual gcc style is to use "-" rather than "." as a word separator in
filenames.

> +(define_insn "*store_si"
> +  [(set (match_operand:SI 0 "memory_operand"   "=U45, U33, U37, U45, m")
> +	(match_operand:SI 1 "register_operand" "   l,   l,   l,   d, r"))]
> +  ""

Loads, stores, register moves and constant moves should normally be in
the same pattern, so that anything operating on constraints can see all
the alternatives at once.  This might not be as important for LRA as it
was for reload, but it still seems like good practice.

> +(define_insn "*mov<mode>"
> +  [(set (match_operand:QIHISI 0 "register_operand" "=r, m, r")
> +	(match_operand:QIHISI 1 "register_operand" " r, r, m"))]
> +  ""
> +{
> +  switch (which_alternative)
> +    {
> +    case 0:
> +      if (get_attr_length (insn) == 2)
> +	return "mov55\t%0, %1";
> +      else
> +	return "ori\t%0, %1, 0";
> +    case 1:
> +      return nds32_output_32bit_store (operands, <byte>);
> +    case 2:
> +      return nds32_output_32bit_load (operands, <byte>);
> +
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +  [(set_attr "type" "alu,store,load")
> +   (set_attr "enabled" "1")
> +   (set_attr_alternative "length"
> +     [
> +       ;; Alternative 0
> +       (if_then_else (match_test "TARGET_16_BIT")
> +		     (const_int 2)
> +		     (const_int 4))
> +       ;; Alternative 1
> +       (const_int 4)
> +       ;; Alternative 2
> +       (const_int 4)
> +     ])])

The style used in the load and store patterns was:

(define_insn "*mov<mode>"
  [(set (match_operand:QIHISI 0 "register_operand" "=r, r, m, r")
	(match_operand:QIHISI 1 "register_operand" " r, r, r, m"))]
  ""
{
  switch (which_alternative)
    {
    case 0:
      return "mov55\t%0, %1";
    case 1:
      return "ori\t%0, %1, 0";
    case 2:
      return nds32_output_32bit_store (operands, <byte>);
    case 3:
      return nds32_output_32bit_load (operands, <byte>);
    default:
      gcc_unreachable ();
    }
}
  [(set_attr "type" "alu,alu,store,load")
   (set_attr "length" "2,4,4,4")])

which seems neater.  Did you try that but find that it didn't work here?

Same comment for other instructions where:

       (if_then_else (match_test "TARGET_16_BIT")
		     (const_int 2)
		     (const_int 4))

occurs (except for the special case of relaxable branch instructions,
where using the if_then_else is good).

> +;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
> +;; are able to match such instruction template.
> +(define_insn "*move_addr"
> +  [(set (match_operand:SI 0 "register_operand"       "=l, r")
> +	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
> +  ""
> +  "la\t%0, %1"
> +  [(set_attr "type" "move")
> +   (set_attr "length"  "8")])
> +
> +
> +(define_insn "*sethi"
> +  [(set (match_operand:SI 0 "register_operand"           "=r")
> +	(high:SI (match_operand:SI 1 "immediate_operand" " i")))]
> +  ""
> +{
> +  return "sethi\t%0, hi20(%1)";
> +}
> +  [(set_attr "type" "alu")
> +   (set_attr "length" "4")])
> +
> +
> +(define_insn "*lo_sum"
> +  [(set (match_operand:SI 0 "register_operand"             "=r")
> +	(lo_sum:SI (match_operand:SI 1 "register_operand"  " 0")
> +		   (match_operand:SI 2 "immediate_operand" " i")))]
> +  ""
> +  "ori\t%0, %1, lo12(%2)"
> +  [(set_attr "type" "alu")
> +   (set_attr "length" "4")])

Very minor, but "nds32_symbolic_operand" seems like a better choice for
*sethi and *lo_sum too, since (high ...) and (lo_sum ...) shouldn't be
used for const_ints.

Any pass would be in its rights to fuse a *sethi and *lo_sum pair back
into a single *move_addr.  Is that something you want to allow?
(That's a genuine question rather than a review comment btw.)

Is the "0" constraint on the *lo_sum really necessary?  It looks from
the later OR patterns as though this form of ORI allows the source and
destination registers to be different.

> +;; Zero extension instructions.
> +
> +(define_expand "zero_extend<mode>si2"
> +  [(set (match_operand:SI 0 "general_operand" "")
> +	(zero_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
> +  ""
> +{
> +  rtx tmp_reg;
> +
> +  /* We need to make sure operands[1] is a register.  */
> +  if (!REG_P (operands[1]))
> +    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);

Why do you need this?  It looks like the architecture has zero-extending loads.

> +
> +  /* If the pattern is "(mem X) <- (zero_extend (reg Y))",
> +     we create two rtx patterns:
> +       (reg:SI K) <- (zero_extend:SI (reg Y))
> +       (mem:SI X) <- (reg:SI K)
> +     The first rtx will be matched by '*zero_extend<mode>si2_reg' template,
> +     and the second rtx will be matched by mov naming pattern.  */
> +  if (MEM_P (operands[0]))
> +    {
> +      tmp_reg = gen_reg_rtx (SImode);
> +
> +      emit_insn (gen_zero_extend<mode>si2 (tmp_reg, operands[1]));
> +      emit_insn (gen_movsi (operands[0], tmp_reg));
> +
> +      DONE;
> +    }
> +})
> +
> +(define_insn "*zero_extend<mode>si2_reg"
> +  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
> +	(zero_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
> +  ""
> +{
> +  switch (which_alternative)
> +    {
> +    case 0:
> +      return "ze<size>33\t%0, %1";
> +    case 1:
> +      return "ze<size>\t%0, %1";
> +
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +  [(set_attr "type"   "alu,alu")
> +   (set_attr "length" "  2,  4")])
> +
> +(define_insn "*zero_extend<mode>si2_load"
> +  [(set (match_operand:SI 0 "register_operand"                 "=  l, *r")
> +	(zero_extend:SI (match_operand:QIHI 1 "memory_operand" " U33,  m")))]
> +  ""
> +{
> +  if (which_alternative == 0)
> +    return nds32_output_16bit_load (operands, <byte>);
> +  else
> +    return nds32_output_32bit_load (operands, <byte>);
> +}
> +  [(set_attr "length" "2, 4")
> +   (set_attr "type" "load,load")])

Here too it's better to have a single pattern with both the register
and memory alternatives.  It ought to be possible to define
"zero_extend<mode>si2" directly as a define_insn rather than a
define_expand:

(define_insn "zero_extend<mode>si2"
  [(set (match_operand:SI 0 "register_operand" "w,w,r,*r")
	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" "w,r,U33,m")))]
  ...

The target-independent code will then handle memory destinations.

Same comments for "extend<mode>si2".

> +;; Arithmetic instructions.
> +
> +(define_expand "addsi3"
> +  [(set (match_operand:SI 0 "register_operand" "")
> +	(plus:SI (match_operand:SI 1 "register_operand" "")
> +		 (match_operand:SI 2 "nds32_nonmemory_nonsymbol_operand" "")))]
> +  ""
> +{
> +  if (GET_CODE (operands[2]) == CONST_INT)
> +    operands[2] = gen_int_mode (INTVAL (operands[2]), SImode);
> +})

This looks like it's papering over a bug elsewhere.  Any CONST_INT passed
into to gen_addsi3 must already be correct for SImode.  If you find callers
where that isn't true, we need to fix them.  Also, any incorrect constants
are usually filtered out by the predicate.

> +(define_insn "*add<mode>3"
> +  [(set (match_operand:QIHISI 0 "register_operand"                      "=   d,    l,  d, l,    k,    l,    r, r")
> +	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"         "    0,    l, %0, l,    0,    k,    r, r")
> +		     (match_operand:QIHISI 2 "nds32_reg_or_int_operand" " Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
> +  ""
> +  "@
> +  addi45\t%0, %2
> +  addi333\t%0, %1, %2
> +  add45\t%0, %2
> +  add333\t%0, %1, %2
> +  addi10.sp\t%2
> +  addri36.sp\t%0, %2
> +  addi\t%0, %1, %2
> +  add\t%0, %1, %2"
> +  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu")
> +   (set_attr "length" "  2,  2,  2,  2,  2,  2,  4,  4")])

The predicates in the define_expand and define_insn are different.
They should usually be the same.

Without the gen_int_mode, this too could be defined directly as
a define_insn, without a separate define_expand.

> +(define_expand "subsi3"
> +  [(set (match_operand:SI 0 "register_operand" "")
> +	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" "")
> +		 (match_operand:SI 2 "nds32_rimm15s_operand" "")))]
> +  ""
> +  ""
> +)

Operand 2 shouldn't allow immediates.  They should all go via the
add optab instead.

> +
> +(define_insn "*sub<mode>3"
> +  [(set (match_operand:QIHISI 0 "register_operand"                    "=   d,    l, d, l,    r, r")
> +	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" "    0,    l, 0, l, Is15, r")
> +		      (match_operand:QIHISI 2 "nds32_rimm15s_operand" " Iu05, Iu03, r, l,    r, r")))]
> +  ""
> +  "@
> +  subi45\t%0, %2
> +  subi333\t%0, %1, %2
> +  sub45\t%0, %2
> +  sub333\t%0, %1, %2
> +  subri\t%0, %2, %1
> +  sub\t%0, %1, %2"
> +  [(set_attr "type"   "alu,alu,alu,alu,alu,alu")
> +   (set_attr "length" "  2,  2,  2,  2,  4,  4")])

Here too a direct define_insn seems better than a define_expand/define_insn
pair.

> +(define_expand "andsi3"
> +  [(set (match_operand:SI         0 "register_operand" "")
> +	(and:SI (match_operand:SI 1 "register_operand" "")
> +		(match_operand:SI 2 "general_operand"  "")))]
> +  ""
> +{
> +  /* If operands[2] is const_int,
> +     we might be able to use other more efficient instructions.  */
> +  if (GET_CODE (operands[2]) == CONST_INT)
> +    {
> +      int mask = INTVAL (operands[2]);
> +
> +      if (mask == 255)
> +	{
> +	  /* ($r0 & 0xff)  ==>  (zeb $r0, $r0) */
> +	  operands[1] = convert_to_mode (QImode, operands[1], 1);
> +	  emit_insn (gen_zero_extendqisi2 (operands[0], operands[1]));
> +	  DONE;
> +	}
> +      else if (mask == 65535)
> +	{
> +	  /* ($r0 & 0xffff)  ==>  (zeh $r0, $r0) */
> +	  operands[1] = convert_to_mode (HImode, operands[1], 1);
> +	  emit_insn (gen_zero_extendhisi2 (operands[0], operands[1]));
> +	  DONE;
> +	}
> +    }
> +})

It looks like the associated "*andsi3" insn also has a case for zeb.
That's usually the better approach.  Please consider adding a zeh case
to the "*andsi3" alternatives too and removing the code above.

With that change, the define_expand and define_insn could be fused.

> +;; For iorsi3 naming pattern, we have to use define_expand first,
> +;; and then design different anonymous patterns so that it can
> +;; simply set different instruction length according to ISA.
> +(define_expand "iorsi3"
> +  [(set (match_operand:SI 0 "register_operand"         "")
> +	(ior:SI (match_operand:SI 1 "register_operand" "")
> +		(match_operand:SI 2 "general_operand"  "")))]
> +  ""
> +  ""
> +)
> +
> +;; This is the iorsi3 pattern for V3/V3M ISA,
> +;; which DOES HAVE 'or33' instruction.
> +;; So we can identify 'or Rt3,Ra3,Rb3' case and set its length to be 2.
> +(define_insn "*iorsi3"
> +  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
> +	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
> +		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
> +  ""
> +{
> +  int one_position;
> +
> +  switch (which_alternative)
> +    {
> +    case 0:
> +      return "or33\t%0, %2";
> +    case 1:
> +      return "or\t%0, %1, %2";
> +    case 2:
> +      return "ori\t%0, %1, %2";
> +    case 3:
> +      /* If we reach this alternative,
> +         it must pass the nds32_can_use_bset_p() test,
> +         so that we can guarantee there is only one 1-bit
> +         within the immediate value.  */
> +      for (one_position = 31; one_position >= 0; one_position--)
> +	{
> +	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
> +	    {
> +	      /* Found the 1-bit position.  */
> +	      operands[2] = GEN_INT (one_position);
> +	      break;
> +	    }
> +	}
> +      return "bset\t%0, %1, %2";
> +
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +  [(set_attr "type"   "alu,alu,alu,alu")
> +   (set_attr "length" "  2,  4,  4,  4")])

I don't understand the comment above the define_expand, sorry.
This too looks like a case where "iorsi3" should just be a define_insn,
with no define_expand.

Case 3 could use exact_log2.

Same comments for xor.  (You might be able to use code iterators
and have a single set of patterns for or and xor, including the
shifting variants.)

> +;; For negsi2 naming pattern, we have to use define_expand first,
> +;; and then design different anonymous patterns so that it can
> +;; output assembly code according to ISA.
> +(define_expand "negsi2"
> +  [(set (match_operand:SI 0 "register_operand"         "")
> +	(neg:SI (match_operand:SI 1 "register_operand" "")))]
> +  ""
> +  ""
> +)
> +
> +;; Note that there is NO 'neg33' instruction for V2 ISA.
> +;; So 'subri A,B,0' (its semantic is 'A = 0 - B')
> +;; is the only option for V2 ISA.
> +(define_insn "*negsi2"
> +  [(set (match_operand:SI 0 "register_operand"         "=w, r")
> +	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
> +  ""
> +  "@
> +   neg33\t%0, %1
> +   subri\t%0, %1, 0"
> +  [(set_attr "type"   "alu,alu")
> +   (set_attr "length" "  2,  4")])

Here too the define_expand seems redundant.  Same for one_cmplsi2.

(Looks like you already define the shift instructions directly though,
thanks.)

> +;; Shift instructions.
> +
> +(define_insn "ashlsi3"
> +  [(set (match_operand:SI 0 "register_operand"            "=   l,    r, r")
> +	(ashift:SI (match_operand:SI 1 "register_operand" "    l,    r, r")
> +		   (match_operand:SI 2 "general_operand"  " Iu03, Iu05, r")))]

Operand 2 doesn't allow memory, so nonmemory_operand would be better
than general_operand.  Both are correct, but nonmemory_operand is
tighter and so forces the pre-RA optimisers to treat the load as
a separate instruction.

Same for the other shift instructions.

> +(define_expand "mov<mode>cc"
> +  [(set (match_operand:QIHI 0 "register_operand" "")
> +	(if_then_else:QIHI (match_operand 1 "comparison_operator" "")
> +			   (match_operand:QIHI 2 "register_operand" "")
> +			   (match_operand:QIHI 3 "register_operand" "")))]
> +  "TARGET_CMOV"
> +{
> +  rtx insn;
> +
> +  /* For QImode and HImode conditional move,
> +     make them to be SImode behavior.  */
> +  operands[0] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
> +  operands[2] = simplify_gen_subreg (SImode, operands[2], <MODE>mode, 0);
> +  operands[3] = simplify_gen_subreg (SImode, operands[3], <MODE>mode, 0);
> +
> +  insn = gen_movsicc (operands[0], operands[1], operands[2], operands[3]);
> +
> +  if (!insn)
> +    FAIL;
> +
> +  emit_insn (insn);
> +  DONE;
> +})

It'd be better to handle QI, HI and SI using a single template if possible.
Subregs are harder to optimise than plain registers.

> +
> +(define_insn "cmovz"
> +  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
> +        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
> +			     (const_int 0))
> +			 (match_operand:SI 2 "register_operand"     " r, 0")
> +			 (match_operand:SI 3 "register_operand"     " 0, r")))]
> +  "TARGET_CMOV"
> +  "@
> +   cmovz\t%0, %2, %1
> +   cmovz\t%0, %3, %1"
> +  [(set_attr "type" "move")
> +   (set_attr "length"  "4")])
> +
> +(define_insn "cmovn"
> +  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
> +	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
> +			     (const_int 0))
> +			 (match_operand:SI 2 "register_operand"     " r, 0")
> +			 (match_operand:SI 3 "register_operand"     " 0, r")))]
> +  "TARGET_CMOV"
> +  "@
> +   cmovn\t%0, %2, %1
> +   cmovn\t%0, %3, %1"
> +  [(set_attr "type" "move")
> +   (set_attr "length"  "4")])
> +
> +(define_insn_and_split "*movsicc"
> +  [(set (match_operand:SI 0 "register_operand"                     "=r, r")
> +	(if_then_else:SI (match_operator 1 "nds32_equality_comparison_operator"
> +			   [(match_operand:SI 2 "register_operand" " r, r")
> +			    (const_int 0)])
> +			 (match_operand:SI 3 "register_operand"    " 0, r")
> +			 (match_operand:SI 4 "register_operand"    " r, 0")))]
> +  "TARGET_CMOV"
> +  "#"
> +  "reload_completed"
> +  [(pc)]
> +{
> +  enum rtx_code code = GET_CODE (operands[1]);
> +  rtx then_op = operands[3];
> +  rtx else_op = operands[4];
> +  rtx tmp;
> +
> +  /* According to the implementation in "movsicc" naming pattern,
> +     if we make transformation in which the comparison code is EQ,
> +     the desired target is at "else" part position semantically.
> +     Now it is the time (after reload_completed) to physically
> +     swap it to "then" part position.  */
> +  if (code == EQ)
> +    {
> +      tmp     = then_op;
> +      then_op = else_op;
> +      else_op = tmp;
> +    }
> +
> +  /* Choosing cmovz or cmovn is based on reload phase result.
> +     After reload phase, one source operand will use
> +     the same register as result operand.
> +     We can use cmovz/cmovn to catch the other source operand
> +     which has different register.
> +     So We check register number to determine using cmovz or cmovn.  */
> +  if (REGNO(then_op) == REGNO(operands[0]))
> +    emit_insn (gen_cmovz (operands[0], operands[2], else_op, operands[0]));
> +  else if (REGNO(else_op) == REGNO(operands[0]))
> +    emit_insn (gen_cmovn (operands[0], operands[2], then_op, operands[0]));
> +  else
> +    gcc_unreachable ();
> +
> +  DONE;
> +})

I don't really see off-hand how the third instruction would match in its
define_insn form, since the earlier instructions ought to match first.
And it looks from first glance like the splitter is working around
a bug in the first two instructions.  E.g. shouldn't the first pattern be:

(define_insn "cmovz"
  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
			     (const_int 0))
			 (match_operand:SI 2 "register_operand"     " r, 0")
			 (match_operand:SI 3 "register_operand"     " 0, r")))]
  "TARGET_CMOV"
  "@
   cmovz\t%0, %2, %1
   cmovn\t%0, %3, %1"
  [(set_attr "type" "move")
   (set_attr "length"  "4")])

with the second alternative being "cmovn" rather than "cmovz"?
With a similar change to the "cmovn" pattern, the define_insn_and_split
ought to be unnecessary.

> +	  /* We want to plus 1 into the integer value
> +	     of operands[2] to create 'slt' instruction.
> +	     This caculation is performed on the host machine,
> +	     which may be 64-bit integer.
> +	     So the meaning of caculation result may be
> +	     different from the 32-bit nds32 target.
> +
> +	     For example:
> +	       0x7fffffff + 0x1 -> 0x80000000,
> +	       this value is POSITIVE on 64-bit machine,
> +	       but the expected value on 32-bit nds32 target
> +	       should be NEGATIVE value.
> +
> +	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
> +	     explicitly create SImode constant rtx.  */
> +	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);

The comment seems unnecessary.  gen_int_mode is better than GEN_INT
whereever it can be used.  It's new uses of GEN_INT that deserve
comments :-)

> +	  /* We want to plus 1 into the integer value
> +	     of operands[2] to create 'slt' instruction.
> +	     This caculation is performed on the host machine,
> +	     which may be 64-bit integer.
> +	     So the meaning of caculation result may be
> +	     different from the 32-bit nds32 target.
> +
> +	     For example:
> +	       0x7fffffff + 0x1 -> 0x80000000,
> +	       this value is POSITIVE on 64-bit machine,
> +	       but the expected value on 32-bit nds32 target
> +	       should be NEGATIVE value.
> +
> +	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
> +	     explicitly create SImode constant rtx.  */
> +	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
> +
> +	  if (code == LE)
> +	    {
> +	      /* LE, use slts instruction */
> +	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
> +	    }
> +	  else
> +	    {
> +	      /* LEU, use slt instruction */
> +	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
> +	    }

Same here, but (le:SI X INT_MAX) isn't the same as (lt:SI X INT_MIN).
I'm not sure we're guaranteed to have optimised away all those cases
by this point, but at least an assert would be good.

> +;; Subroutine call instruction returning no value.
> +;;   operands[0]: It should be a mem RTX whose address is
> +;;                the the address of the function.
> +;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
> +;;   operands[2]: It is the number of registers used as operands.
> +
> +(define_expand "call"
> +  [(parallel [(call (match_operand 0 "memory_operand" "")
> +		    (match_operand 1 "general_operand" ""))

Might as well just use (match_operand 1 "" ""), like the define_insns do.

FWIW you can drop trailing ""s from match_operands.

> +(define_expand "prologue" [(const_int 0)]
> +  ""
> +{
> +  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
> +  if (TARGET_V3PUSH)
> +    nds32_expand_prologue_v3push();
> +  else
> +    nds32_expand_prologue();
> +  DONE;
> +})

Space before "()".  Same for the epilogue pattern.

> +;; nop instruction.
> +
> +(define_insn "nop"
> +  [(const_int 0)]
> +  ""
> +  "nop16"
> +  [(set_attr "type"   "misc")
> +   (set_attr "length"    "2")])

What happens for !TARGET_16BIT?  Won't the alternative be disabled there?

> +  /* The En4 encoding string of the instruction is
> +     in the bitwise fashion of following: "fp gp lp sp".  */
> +  const char *En4_str[16] =

The optimisers would probably pick this up anyway, but:

  static const char *const En4_str[16] =

explicitly says that this is an array that should live in read-only storage.

> +  /* Create RbRe_str string.
> +     Note that we need to output ',' character if there exists En4 field.  */
> +  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
> +      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
> +  else
> +      RbRe_str = "";

Excess indentation.  GNU style is not to wrap "INTVAL (operands[2]) != 0"
in brackets.

Same comments for pop.

Overall it looks good to me FWIW.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-09-14 15:15         ` Richard Sandiford
@ 2013-09-27 18:38           ` Chung-Ju Wu
  2013-10-01 17:31             ` Richard Sandiford
  0 siblings, 1 reply; 18+ messages in thread
From: Chung-Ju Wu @ 2013-09-27 18:38 UTC (permalink / raw)
  To: gcc-patches, rdsandiford

[-- Attachment #1: Type: text/plain, Size: 28538 bytes --]

On 9/14/13 7:40 PM, Richard Sandiford wrote:
> Some comments for part 2.
> 

The followings are our revisions for part 2.

> Chung-Ju Wu <jasonwucj@gmail.com> writes:
>> +;; Include intrinsic functions definition.
>> +(include "nds32.intrinsic.md")
>> +
>> +;; Include block move for nds32 multiple load/store behavior.
>> +(include "nds32.multiple.md")
>> +
>> +;; Include DImode/DFmode operations.
>> +(include "nds32.doubleword.md")
>> +
>> +;; Include peephole patterns.
>> +(include "nds32.peephole2.md")
> 
> Usual gcc style is to use "-" rather than "." as a word separator in
> filenames.
> 

Fix it accordingly.

>> +(define_insn "*store_si"
>> +  [(set (match_operand:SI 0 "memory_operand"   "=U45, U33, U37, U45, m")
>> +	(match_operand:SI 1 "register_operand" "   l,   l,   l,   d, r"))]
>> +  ""
> 
> Loads, stores, register moves and constant moves should normally be in
> the same pattern, so that anything operating on constraints can see all
> the alternatives at once.  This might not be as important for LRA as it
> was for reload, but it still seems like good practice.
> 

Thanks for the suggestion.  Now we combine all the loads, stores,
register move, and constant move into a single pattern.
It works perfectly during the regression test.

>> +(define_insn "*mov<mode>"
>> +  [(set (match_operand:QIHISI 0 "register_operand" "=r, m, r")
>> +	(match_operand:QIHISI 1 "register_operand" " r, r, m"))]
>> +  ""
>> +{
>> +  switch (which_alternative)
>> +    {
>> +    case 0:
>> +      if (get_attr_length (insn) == 2)
>> +	return "mov55\t%0, %1";
>> +      else
>> +	return "ori\t%0, %1, 0";
>> +    case 1:
>> +      return nds32_output_32bit_store (operands, <byte>);
>> +    case 2:
>> +      return nds32_output_32bit_load (operands, <byte>);
>> +
>> +    default:
>> +      gcc_unreachable ();
>> +    }
>> +}
>> +  [(set_attr "type" "alu,store,load")
>> +   (set_attr "enabled" "1")
>> +   (set_attr_alternative "length"
>> +     [
>> +       ;; Alternative 0
>> +       (if_then_else (match_test "TARGET_16_BIT")
>> +		     (const_int 2)
>> +		     (const_int 4))
>> +       ;; Alternative 1
>> +       (const_int 4)
>> +       ;; Alternative 2
>> +       (const_int 4)
>> +     ])])
> 
> The style used in the load and store patterns was:
> 
> (define_insn "*mov<mode>"
>   [(set (match_operand:QIHISI 0 "register_operand" "=r, r, m, r")
> 	(match_operand:QIHISI 1 "register_operand" " r, r, r, m"))]
>   ""
> {
>   switch (which_alternative)
>     {
>     case 0:
>       return "mov55\t%0, %1";
>     case 1:
>       return "ori\t%0, %1, 0";
>     case 2:
>       return nds32_output_32bit_store (operands, <byte>);
>     case 3:
>       return nds32_output_32bit_load (operands, <byte>);
>     default:
>       gcc_unreachable ();
>     }
> }
>   [(set_attr "type" "alu,alu,store,load")
>    (set_attr "length" "2,4,4,4")])
> 
> which seems neater.  Did you try that but find that it didn't work here?
> 
> Same comment for other instructions where:
> 
>        (if_then_else (match_test "TARGET_16_BIT")
> 		     (const_int 2)
> 		     (const_int 4))
> 
> occurs (except for the special case of relaxable branch instructions,
> where using the if_then_else is good).
> 

Thanks for the comment.
I simplify the patterns as you suggested.

>> +;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
>> +;; are able to match such instruction template.
>> +(define_insn "*move_addr"
>> +  [(set (match_operand:SI 0 "register_operand"       "=l, r")
>> +	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
>> +  ""
>> +  "la\t%0, %1"
>> +  [(set_attr "type" "move")
>> +   (set_attr "length"  "8")])
>> +
>> +
>> +(define_insn "*sethi"
>> +  [(set (match_operand:SI 0 "register_operand"           "=r")
>> +	(high:SI (match_operand:SI 1 "immediate_operand" " i")))]
>> +  ""
>> +{
>> +  return "sethi\t%0, hi20(%1)";
>> +}
>> +  [(set_attr "type" "alu")
>> +   (set_attr "length" "4")])
>> +
>> +
>> +(define_insn "*lo_sum"
>> +  [(set (match_operand:SI 0 "register_operand"             "=r")
>> +	(lo_sum:SI (match_operand:SI 1 "register_operand"  " 0")
>> +		   (match_operand:SI 2 "immediate_operand" " i")))]
>> +  ""
>> +  "ori\t%0, %1, lo12(%2)"
>> +  [(set_attr "type" "alu")
>> +   (set_attr "length" "4")])
> 
> Very minor, but "nds32_symbolic_operand" seems like a better choice for
> *sethi and *lo_sum too, since (high ...) and (lo_sum ...) shouldn't be
> used for const_ints.
> 

Modify it accordingly.

> Any pass would be in its rights to fuse a *sethi and *lo_sum pair back
> into a single *move_addr.  Is that something you want to allow?
> (That's a genuine question rather than a review comment btw.)
> 

In most cases, when we want to load address into a register,
it would match *move_addr pattern to generate "la" pseudo instruction,
which is going to be expanded into "sethi + ori" instructions by assembler.

But we also need to have *sethi and *lo_sum patterns so that we can
construct full addressing mode data load/store like:

  (set (reg_a) (high symbol))
  (set (reg_t) (mem (lo_sum reg_a symbol)))
or
  (set (reg_a) (high symbol))
  (set (mem (lo_sum reg_a symbol) (reg_t)))

To my experiences, it seems such construction is performed
during reload phase.  For example, given such code fragment:

  int a;
  void foo () { a = 10; }

Compile such sample with '-O1 -fdump-rtl-all', we can have:

[a.c.213r.ira]
(insn 7 6 0 2 (set (mem/c:SI (symbol_ref:SI ("a")  <var_decl 0xb75200b8 a>) [0 a+0 S4 A32])
        (const_int 10 [0xa])) a.c:2 27 {*movsi}
     (nil))

[a.c.214r.reload]
(insn 11 6 13 2 (set (reg:SI 0 $r0 [42])
        (high:SI (symbol_ref:SI ("a")  <var_decl 0xb75200b8 a>))) a.c:2 29 {*sethi}
     (nil))
(insn 13 11 12 2 (set (reg:SI 0 $r0 [42])
        (reg:SI 0 $r0 [42])) a.c:2 27 {*movsi}
     (expr_list:REG_DEAD (reg:SI 0 $r0 [42])
        (nil)))
(insn 12 13 14 2 (set (reg:SI 1 $r1 [43])
        (const_int 10 [0xa])) a.c:2 27 {*movsi}
     (nil))
(insn 14 12 7 2 (set (reg:SI 1 $r1 [43])
        (reg:SI 1 $r1 [43])) a.c:2 27 {*movsi}
     (expr_list:REG_DEAD (reg:SI 1 $r1 [43])
        (nil)))
(insn 7 14 10 2 (set (mem/c:SI (lo_sum:SI (reg:SI 0 $r0 [42])
                (symbol_ref:SI ("a")  <var_decl 0xb75200b8 a>)) [0 a+0 S4 A32])
        (reg:SI 1 $r1 [43])) a.c:2 27 {*movsi}
     (expr_list:REG_DEAD (reg:SI 1 $r1 [43])
        (expr_list:REG_DEAD (reg:SI 0 $r0 [42])
            (nil))))

> Is the "0" constraint on the *lo_sum really necessary?  It looks from
> the later OR patterns as though this form of ORI allows the source and
> destination registers to be different.
> 

Fix it accordingly.

>> +;; Zero extension instructions.
>> +
>> +(define_expand "zero_extend<mode>si2"
>> +  [(set (match_operand:SI 0 "general_operand" "")
>> +	(zero_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
>> +  ""
>> +{
>> +  rtx tmp_reg;
>> +
>> +  /* We need to make sure operands[1] is a register.  */
>> +  if (!REG_P (operands[1]))
>> +    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
> 
> Why do you need this?  It looks like the architecture has zero-extending loads.
> 

You are right.  Such behavior is unnecessary, so I...

>> +
>> +  /* If the pattern is "(mem X) <- (zero_extend (reg Y))",
>> +     we create two rtx patterns:
>> +       (reg:SI K) <- (zero_extend:SI (reg Y))
>> +       (mem:SI X) <- (reg:SI K)
>> +     The first rtx will be matched by '*zero_extend<mode>si2_reg' template,
>> +     and the second rtx will be matched by mov naming pattern.  */
>> +  if (MEM_P (operands[0]))
>> +    {
>> +      tmp_reg = gen_reg_rtx (SImode);
>> +
>> +      emit_insn (gen_zero_extend<mode>si2 (tmp_reg, operands[1]));
>> +      emit_insn (gen_movsi (operands[0], tmp_reg));
>> +
>> +      DONE;
>> +    }
>> +})
>> +
>> +(define_insn "*zero_extend<mode>si2_reg"
>> +  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
>> +	(zero_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
>> +  ""
>> +{
>> +  switch (which_alternative)
>> +    {
>> +    case 0:
>> +      return "ze<size>33\t%0, %1";
>> +    case 1:
>> +      return "ze<size>\t%0, %1";
>> +
>> +    default:
>> +      gcc_unreachable ();
>> +    }
>> +}
>> +  [(set_attr "type"   "alu,alu")
>> +   (set_attr "length" "  2,  4")])
>> +
>> +(define_insn "*zero_extend<mode>si2_load"
>> +  [(set (match_operand:SI 0 "register_operand"                 "=  l, *r")
>> +	(zero_extend:SI (match_operand:QIHI 1 "memory_operand" " U33,  m")))]
>> +  ""
>> +{
>> +  if (which_alternative == 0)
>> +    return nds32_output_16bit_load (operands, <byte>);
>> +  else
>> +    return nds32_output_32bit_load (operands, <byte>);
>> +}
>> +  [(set_attr "length" "2, 4")
>> +   (set_attr "type" "load,load")])
> 
> Here too it's better to have a single pattern with both the register
> and memory alternatives.  It ought to be possible to define
> "zero_extend<mode>si2" directly as a define_insn rather than a
> define_expand:
> 
> (define_insn "zero_extend<mode>si2"
>   [(set (match_operand:SI 0 "register_operand" "w,w,r,*r")
> 	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" "w,r,U33,m")))]
>   ...
> 
> The target-independent code will then handle memory destinations.
> 
> Same comments for "extend<mode>si2".
> 

... follow your suggestion to have a single pattern for zero_extend pattern.
I also make similar changes for extend<mode>si2 pattern.

>> +;; Arithmetic instructions.
>> +
>> +(define_expand "addsi3"
>> +  [(set (match_operand:SI 0 "register_operand" "")
>> +	(plus:SI (match_operand:SI 1 "register_operand" "")
>> +		 (match_operand:SI 2 "nds32_nonmemory_nonsymbol_operand" "")))]
>> +  ""
>> +{
>> +  if (GET_CODE (operands[2]) == CONST_INT)
>> +    operands[2] = gen_int_mode (INTVAL (operands[2]), SImode);
>> +})
> 
> This looks like it's papering over a bug elsewhere.  Any CONST_INT passed
> into to gen_addsi3 must already be correct for SImode.  If you find callers
> where that isn't true, we need to fix them.  Also, any incorrect constants
> are usually filtered out by the predicate.
> 

Indeed.  After doing some investigation, it was designed to cover previous
bug that we use gen_addsi3 directly but passed incorrect const_int.
It should not happen in the current implementation.  So I remove it.

>> +(define_insn "*add<mode>3"
>> +  [(set (match_operand:QIHISI 0 "register_operand"                      "=   d,    l,  d, l,    k,    l,    r, r")
>> +	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"         "    0,    l, %0, l,    0,    k,    r, r")
>> +		     (match_operand:QIHISI 2 "nds32_reg_or_int_operand" " Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
>> +  ""
>> +  "@
>> +  addi45\t%0, %2
>> +  addi333\t%0, %1, %2
>> +  add45\t%0, %2
>> +  add333\t%0, %1, %2
>> +  addi10.sp\t%2
>> +  addri36.sp\t%0, %2
>> +  addi\t%0, %1, %2
>> +  add\t%0, %1, %2"
>> +  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu")
>> +   (set_attr "length" "  2,  2,  2,  2,  2,  2,  4,  4")])
> 
> The predicates in the define_expand and define_insn are different.
> They should usually be the same.
> 
> Without the gen_int_mode, this too could be defined directly as
> a define_insn, without a separate define_expand.
> 

Thanks for the suggestion.
Now I define single "add<mode>3" pattern for these operations.

>> +(define_expand "subsi3"
>> +  [(set (match_operand:SI 0 "register_operand" "")
>> +	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" "")
>> +		 (match_operand:SI 2 "nds32_rimm15s_operand" "")))]
>> +  ""
>> +  ""
>> +)
> 
> Operand 2 shouldn't allow immediates.  They should all go via the
> add optab instead.
> 

Thanks for the comment.
Now I define sub<mode>3 pattern without Iu05/Iu03 immediate:

(define_insn "sub<mode>3"
  [(set (match_operand:QIHISI 0 "register_operand"                    "=d, l,    r, r")
        (minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
                      (match_operand:QIHISI 2 "register_operand"      " r, l,    r, r")))]
  ...

And I also create two new constraints used in add<mode>3 pattern:

(define_insn "add<mode>3"
  [(set (match_operand:QIHISI 0 "register_operand"                   "=   d,    l, ...")
        (plus:QIHISI (match_operand:QIHISI 1 "register_operand"      "    0,    l, ...")
                     (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, ...")))]
  ...
{
  ...
    case 0:
      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
      return "subi45\t%0, %2";
    case 1:
      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
      return "subi333\t%0, %1, %2";
  ...
}

where In05 is in range of -31 ~ 0, and In03 is in range of -7 ~ 0.

>> +
>> +(define_insn "*sub<mode>3"
>> +  [(set (match_operand:QIHISI 0 "register_operand"                    "=   d,    l, d, l,    r, r")
>> +	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" "    0,    l, 0, l, Is15, r")
>> +		      (match_operand:QIHISI 2 "nds32_rimm15s_operand" " Iu05, Iu03, r, l,    r, r")))]
>> +  ""
>> +  "@
>> +  subi45\t%0, %2
>> +  subi333\t%0, %1, %2
>> +  sub45\t%0, %2
>> +  sub333\t%0, %1, %2
>> +  subri\t%0, %2, %1
>> +  sub\t%0, %1, %2"
>> +  [(set_attr "type"   "alu,alu,alu,alu,alu,alu")
>> +   (set_attr "length" "  2,  2,  2,  2,  4,  4")])
> 
> Here too a direct define_insn seems better than a define_expand/define_insn
> pair.
> 

Modify it accordingly.

>> +(define_expand "andsi3"
>> +  [(set (match_operand:SI         0 "register_operand" "")
>> +	(and:SI (match_operand:SI 1 "register_operand" "")
>> +		(match_operand:SI 2 "general_operand"  "")))]
>> +  ""
>> +{
>> +  /* If operands[2] is const_int,
>> +     we might be able to use other more efficient instructions.  */
>> +  if (GET_CODE (operands[2]) == CONST_INT)
>> +    {
>> +      int mask = INTVAL (operands[2]);
>> +
>> +      if (mask == 255)
>> +	{
>> +	  /* ($r0 & 0xff)  ==>  (zeb $r0, $r0) */
>> +	  operands[1] = convert_to_mode (QImode, operands[1], 1);
>> +	  emit_insn (gen_zero_extendqisi2 (operands[0], operands[1]));
>> +	  DONE;
>> +	}
>> +      else if (mask == 65535)
>> +	{
>> +	  /* ($r0 & 0xffff)  ==>  (zeh $r0, $r0) */
>> +	  operands[1] = convert_to_mode (HImode, operands[1], 1);
>> +	  emit_insn (gen_zero_extendhisi2 (operands[0], operands[1]));
>> +	  DONE;
>> +	}
>> +    }
>> +})
> 
> It looks like the associated "*andsi3" insn also has a case for zeb.
> That's usually the better approach.  Please consider adding a zeh case
> to the "*andsi3" alternatives too and removing the code above.
> 
> With that change, the define_expand and define_insn could be fused.
> 

Thanks for the comment.  Modify it accordingly.

>> +;; For iorsi3 naming pattern, we have to use define_expand first,
>> +;; and then design different anonymous patterns so that it can
>> +;; simply set different instruction length according to ISA.
>> +(define_expand "iorsi3"
>> +  [(set (match_operand:SI 0 "register_operand"         "")
>> +	(ior:SI (match_operand:SI 1 "register_operand" "")
>> +		(match_operand:SI 2 "general_operand"  "")))]
>> +  ""
>> +  ""
>> +)
>> +
>> +;; This is the iorsi3 pattern for V3/V3M ISA,
>> +;; which DOES HAVE 'or33' instruction.
>> +;; So we can identify 'or Rt3,Ra3,Rb3' case and set its length to be 2.
>> +(define_insn "*iorsi3"
>> +  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
>> +	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
>> +		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
>> +  ""
>> +{
>> +  int one_position;
>> +
>> +  switch (which_alternative)
>> +    {
>> +    case 0:
>> +      return "or33\t%0, %2";
>> +    case 1:
>> +      return "or\t%0, %1, %2";
>> +    case 2:
>> +      return "ori\t%0, %1, %2";
>> +    case 3:
>> +      /* If we reach this alternative,
>> +         it must pass the nds32_can_use_bset_p() test,
>> +         so that we can guarantee there is only one 1-bit
>> +         within the immediate value.  */
>> +      for (one_position = 31; one_position >= 0; one_position--)
>> +	{
>> +	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
>> +	    {
>> +	      /* Found the 1-bit position.  */
>> +	      operands[2] = GEN_INT (one_position);
>> +	      break;
>> +	    }
>> +	}
>> +      return "bset\t%0, %1, %2";
>> +
>> +    default:
>> +      gcc_unreachable ();
>> +    }
>> +}
>> +  [(set_attr "type"   "alu,alu,alu,alu")
>> +   (set_attr "length" "  2,  4,  4,  4")])
> 
> I don't understand the comment above the define_expand, sorry.
> This too looks like a case where "iorsi3" should just be a define_insn,
> with no define_expand.
> 

The define_expand is a legacy code.
It should not be necessary in current design.
Thanks for the catch.  Fix it accordingly.

> Case 3 could use exact_log2.
> 

Modify it accordingly.

> Same comments for xor. 

Modify it accordingly.

> (You might be able to use code iterators
> and have a single set of patterns for or and xor, including the
> shifting variants.)
> 

Thanks for the suggestion.  But I prefer not to use code iterators
because we may add different variants for 'or' and 'xor' separately.

>> +;; For negsi2 naming pattern, we have to use define_expand first,
>> +;; and then design different anonymous patterns so that it can
>> +;; output assembly code according to ISA.
>> +(define_expand "negsi2"
>> +  [(set (match_operand:SI 0 "register_operand"         "")
>> +	(neg:SI (match_operand:SI 1 "register_operand" "")))]
>> +  ""
>> +  ""
>> +)
>> +
>> +;; Note that there is NO 'neg33' instruction for V2 ISA.
>> +;; So 'subri A,B,0' (its semantic is 'A = 0 - B')
>> +;; is the only option for V2 ISA.
>> +(define_insn "*negsi2"
>> +  [(set (match_operand:SI 0 "register_operand"         "=w, r")
>> +	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
>> +  ""
>> +  "@
>> +   neg33\t%0, %1
>> +   subri\t%0, %1, 0"
>> +  [(set_attr "type"   "alu,alu")
>> +   (set_attr "length" "  2,  4")])
> 
> Here too the define_expand seems redundant.  Same for one_cmplsi2.
> 

Thanks for the catch.
I modify negsi2 and one_cmplsi2 patterns accordingly.

> (Looks like you already define the shift instructions directly though,
> thanks.)
> 
>> +;; Shift instructions.
>> +
>> +(define_insn "ashlsi3"
>> +  [(set (match_operand:SI 0 "register_operand"            "=   l,    r, r")
>> +	(ashift:SI (match_operand:SI 1 "register_operand" "    l,    r, r")
>> +		   (match_operand:SI 2 "general_operand"  " Iu03, Iu05, r")))]
> 
> Operand 2 doesn't allow memory, so nonmemory_operand would be better
> than general_operand.  Both are correct, but nonmemory_operand is
> tighter and so forces the pre-RA optimisers to treat the load as
> a separate instruction.
> 
> Same for the other shift instructions.
> 

Thanks for the suggestion.
It does help optimization for some cases.
Modify it accordingly.

>> +(define_expand "mov<mode>cc"
>> +  [(set (match_operand:QIHI 0 "register_operand" "")
>> +	(if_then_else:QIHI (match_operand 1 "comparison_operator" "")
>> +			   (match_operand:QIHI 2 "register_operand" "")
>> +			   (match_operand:QIHI 3 "register_operand" "")))]
>> +  "TARGET_CMOV"
>> +{
>> +  rtx insn;
>> +
>> +  /* For QImode and HImode conditional move,
>> +     make them to be SImode behavior.  */
>> +  operands[0] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
>> +  operands[2] = simplify_gen_subreg (SImode, operands[2], <MODE>mode, 0);
>> +  operands[3] = simplify_gen_subreg (SImode, operands[3], <MODE>mode, 0);
>> +
>> +  insn = gen_movsicc (operands[0], operands[1], operands[2], operands[3]);
>> +
>> +  if (!insn)
>> +    FAIL;
>> +
>> +  emit_insn (insn);
>> +  DONE;
>> +})
> 
> It'd be better to handle QI, HI and SI using a single template if possible.
> Subregs are harder to optimise than plain registers.
> 

Thanks for clarifying it.
I decide to remove QI, HI pattern and just leave movsicc pattern in place.

>> +
>> +(define_insn "cmovz"
>> +  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
>> +        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
>> +			     (const_int 0))
>> +			 (match_operand:SI 2 "register_operand"     " r, 0")
>> +			 (match_operand:SI 3 "register_operand"     " 0, r")))]
>> +  "TARGET_CMOV"
>> +  "@
>> +   cmovz\t%0, %2, %1
>> +   cmovz\t%0, %3, %1"
>> +  [(set_attr "type" "move")
>> +   (set_attr "length"  "4")])
>> +
>> +(define_insn "cmovn"
>> +  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
>> +	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
>> +			     (const_int 0))
>> +			 (match_operand:SI 2 "register_operand"     " r, 0")
>> +			 (match_operand:SI 3 "register_operand"     " 0, r")))]
>> +  "TARGET_CMOV"
>> +  "@
>> +   cmovn\t%0, %2, %1
>> +   cmovn\t%0, %3, %1"
>> +  [(set_attr "type" "move")
>> +   (set_attr "length"  "4")])
>> +
>> +(define_insn_and_split "*movsicc"
>> +  [(set (match_operand:SI 0 "register_operand"                     "=r, r")
>> +	(if_then_else:SI (match_operator 1 "nds32_equality_comparison_operator"
>> +			   [(match_operand:SI 2 "register_operand" " r, r")
>> +			    (const_int 0)])
>> +			 (match_operand:SI 3 "register_operand"    " 0, r")
>> +			 (match_operand:SI 4 "register_operand"    " r, 0")))]
>> +  "TARGET_CMOV"
>> +  "#"
>> +  "reload_completed"
>> +  [(pc)]
>> +{
>> +  enum rtx_code code = GET_CODE (operands[1]);
>> +  rtx then_op = operands[3];
>> +  rtx else_op = operands[4];
>> +  rtx tmp;
>> +
>> +  /* According to the implementation in "movsicc" naming pattern,
>> +     if we make transformation in which the comparison code is EQ,
>> +     the desired target is at "else" part position semantically.
>> +     Now it is the time (after reload_completed) to physically
>> +     swap it to "then" part position.  */
>> +  if (code == EQ)
>> +    {
>> +      tmp     = then_op;
>> +      then_op = else_op;
>> +      else_op = tmp;
>> +    }
>> +
>> +  /* Choosing cmovz or cmovn is based on reload phase result.
>> +     After reload phase, one source operand will use
>> +     the same register as result operand.
>> +     We can use cmovz/cmovn to catch the other source operand
>> +     which has different register.
>> +     So We check register number to determine using cmovz or cmovn.  */
>> +  if (REGNO(then_op) == REGNO(operands[0]))
>> +    emit_insn (gen_cmovz (operands[0], operands[2], else_op, operands[0]));
>> +  else if (REGNO(else_op) == REGNO(operands[0]))
>> +    emit_insn (gen_cmovn (operands[0], operands[2], then_op, operands[0]));
>> +  else
>> +    gcc_unreachable ();
>> +
>> +  DONE;
>> +})
> 
> I don't really see off-hand how the third instruction would match in its
> define_insn form, since the earlier instructions ought to match first.
> And it looks from first glance like the splitter is working around
> a bug in the first two instructions.  E.g. shouldn't the first pattern be:
> 
> (define_insn "cmovz"
>   [(set (match_operand:SI 0 "register_operand"                      "=r, r")
>         (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
> 			     (const_int 0))
> 			 (match_operand:SI 2 "register_operand"     " r, 0")
> 			 (match_operand:SI 3 "register_operand"     " 0, r")))]
>   "TARGET_CMOV"
>   "@
>    cmovz\t%0, %2, %1
>    cmovn\t%0, %3, %1"
>   [(set_attr "type" "move")
>    (set_attr "length"  "4")])
> 
> with the second alternative being "cmovn" rather than "cmovz"?
> With a similar change to the "cmovn" pattern, the define_insn_and_split
> ought to be unnecessary.
> 

I would say you are ABSOLUTELY right!
We follow your suggestion to modify "cmovz" and "cmovn" patterns.
With that change we can happily discard the define_insn_and_split part!
Thanks you very much!! :)

>> +	  /* We want to plus 1 into the integer value
>> +	     of operands[2] to create 'slt' instruction.
>> +	     This caculation is performed on the host machine,
>> +	     which may be 64-bit integer.
>> +	     So the meaning of caculation result may be
>> +	     different from the 32-bit nds32 target.
>> +
>> +	     For example:
>> +	       0x7fffffff + 0x1 -> 0x80000000,
>> +	       this value is POSITIVE on 64-bit machine,
>> +	       but the expected value on 32-bit nds32 target
>> +	       should be NEGATIVE value.
>> +
>> +	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
>> +	     explicitly create SImode constant rtx.  */
>> +	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
> 
> The comment seems unnecessary.  gen_int_mode is better than GEN_INT
> whereever it can be used.  It's new uses of GEN_INT that deserve
> comments :-)
> 

May I keep this comment for other engineers? :p

Some people may not be able to tell the difference yet,
and I wish I can keep this comment so that others are
able to refer a sample of why gen_int_mode() is better
than GEN_INT() wherever it can be used. :)

>> +	  /* We want to plus 1 into the integer value
>> +	     of operands[2] to create 'slt' instruction.
>> +	     This caculation is performed on the host machine,
>> +	     which may be 64-bit integer.
>> +	     So the meaning of caculation result may be
>> +	     different from the 32-bit nds32 target.
>> +
>> +	     For example:
>> +	       0x7fffffff + 0x1 -> 0x80000000,
>> +	       this value is POSITIVE on 64-bit machine,
>> +	       but the expected value on 32-bit nds32 target
>> +	       should be NEGATIVE value.
>> +
>> +	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
>> +	     explicitly create SImode constant rtx.  */
>> +	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
>> +
>> +	  if (code == LE)
>> +	    {
>> +	      /* LE, use slts instruction */
>> +	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
>> +	    }
>> +	  else
>> +	    {
>> +	      /* LEU, use slt instruction */
>> +	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
>> +	    }
> 
> Same here, but (le:SI X INT_MAX) isn't the same as (lt:SI X INT_MIN).
> I'm not sure we're guaranteed to have optimised away all those cases
> by this point, but at least an assert would be good.
> 

Thanks.  I add an assert in case the code is LE and
original operands[2] is INT_MAX.

>> +;; Subroutine call instruction returning no value.
>> +;;   operands[0]: It should be a mem RTX whose address is
>> +;;                the the address of the function.
>> +;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
>> +;;   operands[2]: It is the number of registers used as operands.
>> +
>> +(define_expand "call"
>> +  [(parallel [(call (match_operand 0 "memory_operand" "")
>> +		    (match_operand 1 "general_operand" ""))
> 
> Might as well just use (match_operand 1 "" ""), like the define_insns do.
> 
> FWIW you can drop trailing ""s from match_operands.
> 

Modify it accordingly.

>> +(define_expand "prologue" [(const_int 0)]
>> +  ""
>> +{
>> +  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
>> +  if (TARGET_V3PUSH)
>> +    nds32_expand_prologue_v3push();
>> +  else
>> +    nds32_expand_prologue();
>> +  DONE;
>> +})
> 
> Space before "()".  Same for the epilogue pattern.
> 

Fix it accordingly.

>> +;; nop instruction.
>> +
>> +(define_insn "nop"
>> +  [(const_int 0)]
>> +  ""
>> +  "nop16"
>> +  [(set_attr "type"   "misc")
>> +   (set_attr "length"    "2")])
> 
> What happens for !TARGET_16BIT?  Won't the alternative be disabled there?
> 

Oh my god~ Thank you for the catch about such latent bug!
Fix it in the revised patch. :)

>> +  /* The En4 encoding string of the instruction is
>> +     in the bitwise fashion of following: "fp gp lp sp".  */
>> +  const char *En4_str[16] =
> 
> The optimisers would probably pick this up anyway, but:
> 
>   static const char *const En4_str[16] =
> 
> explicitly says that this is an array that should live in read-only storage.
> 

Fix it accordingly.

>> +  /* Create RbRe_str string.
>> +     Note that we need to output ',' character if there exists En4 field.  */
>> +  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
>> +      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
>> +  else
>> +      RbRe_str = "";
> 
> Excess indentation.  GNU style is not to wrap "INTVAL (operands[2]) != 0"
> in brackets.
> 
> Same comments for pop.
> 

Fix it accordingly.


> Overall it looks good to me FWIW.
> 
> Thanks,
> Richard
> 

A revised patch for nds32.md is attached.
Thanks for all of your review comments! :)

Best regards,
jasonwucj



[-- Attachment #2: 2-nds32-backend-md-part2.v3.revised.patch --]
[-- Type: text/plain, Size: 77136 bytes --]

diff --git gcc/common/config/nds32/nds32-common.c gcc/common/config/nds32/nds32-common.c
new file mode 100644
index 0000000..f82f725
--- /dev/null
+++ gcc/common/config/nds32/nds32-common.c
@@ -0,0 +1,117 @@
+/* Common hooks of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_HANDLE_OPTION.  */
+static bool
+nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded,
+		     location_t loc)
+{
+  size_t     code  = decoded->opt_index;
+  int        value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_misr_vector_size_:
+      /* Check the valid vector size: 4 or 16.  */
+      if (value != 4 && value != 16)
+	{
+	  error_at (loc, "for the option -misr-vector-size=X, the valid X "
+			 "must be: 4 or 16");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_mcache_block_size_:
+      /* Check valid value: 4 8 16 32 64 128 256 512.  */
+      if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+	{
+	  error_at (loc, "for the option -mcache-block-size=X, the valid X "
+			 "must be: 4, 8, 16, 32, 64, 128, 256, or 512");
+	  return false;
+	}
+
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options nds32_option_optimization_table[] =
+{
+  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+
+  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+};
+
+/* ------------------------------------------------------------------------ */
+\f
+/* Run-time Target Specification.  */
+
+/* Default enable
+     TARGET_GP_DIRECT: Generate gp-imply instruction.
+     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
+     TARGET_PERF_EXT : Generate performance extention instrcution.
+     TARGET_CMOV     : Generate conditional move instruction.  */
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (MASK_GP_DIRECT				\
+   | MASK_16_BIT				\
+   | MASK_PERF_EXT				\
+   | MASK_CMOV)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION nds32_handle_option
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE nds32_option_optimization_table
+
+\f
+/* Defining the Output Assembler Language.  */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* ------------------------------------------------------------------------ */
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git gcc/config/nds32/nds32.md gcc/config/nds32/nds32.md
new file mode 100644
index 0000000..e50b1b3
--- /dev/null
+++ gcc/config/nds32/nds32.md
@@ -0,0 +1,2425 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32-intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32-multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32-doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32-peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to support QI and HI in the split patterns.
+;; Besides, in the split condition, we ask big-constant split to be
+;; performed after reload phase.  So that the mov2add optimization
+;; in postreload have chance to optimize the code.
+
+(define_insn_and_split "*mov<mode>"
+  [(set (match_operand:QIHISI 0 "nonimmediate_operand"      "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r,    d,    r,    r,    r,    t,    r")
+	(match_operand:QIHISI 1 "nds32_nonsymbolic_operand" " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig, Ispl, Ispl"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, <byte>);
+    case 6:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 11:
+      return nds32_output_32bit_load (operands, <byte>);
+    case 12:
+      return "movpi45\t%0, %1";
+    case 13:
+      return "movi55\t%0, %1";
+    case 14:
+      return "movi\t%0, %1";
+    case 15:
+      return "sethi\t%0, hi20(%1)";
+    case 16:
+      /* Use $r15, if the value is NOT in the range of Is20,
+         we must output "sethi + ori" directly since
+         we may already passed the split stage.  */
+      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
+    case 17:
+      return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+  "reload_completed
+   && CONST_INT_P (operands[1])
+   && satisfies_constraint_Ispl (operands[1])
+   && !satisfies_constraint_Is20 (operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:QIHISI (match_dup 0) (match_dup 2)))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xfff);
+  operands[1] = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);
+}
+  [(set_attr "type"   "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,  2,  2,  4,  4,  8,  8")])
+
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))]
+  ""
+{
+  return "sethi\t%0, hi20(%1)";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"                  "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"       " r")
+		   (match_operand:SI 2 "nds32_symbolic_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_insn "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"                       "=l, r,   l, *r")
+	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, U33,  m")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+    case 2:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 3:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load,load")
+   (set_attr "length" "  2,  4,   2,   4")])
+
+
+;; Sign extension instructions.
+
+(define_insn "extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"                       "=l, r, r")
+	(sign_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, m")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load")
+   (set_attr "length" "  2,  4,   4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_insn "add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                   "=   d,    l,    d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"      "    0,    l,    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      /* addi Rt4,Rt4,-x  ==>  subi45 Rt4,x
+         where 0 <= x <= 31 */
+      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+      return "subi45\t%0, %2";
+    case 1:
+      /* addi Rt3,Ra3,-x  ==>  subi333 Rt3,Ra3,x
+         where 0 <= x <= 7 */
+      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+      return "subi333\t%0, %1, %2";
+    case 2:
+      return "addi45\t%0, %2";
+    case 3:
+      return "addi333\t%0, %1, %2";
+    case 4:
+      return "add45\t%0, %2";
+    case 5:
+      return "add333\t%0, %1, %2";
+    case 6:
+      return "addi10.sp\t%2";
+    case 7:
+      return "addri36.sp\t%0, %2";
+    case 8:
+      return "addi\t%0, %1, %2";
+    case 9:
+      return "add\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "register_operand"      " r, l,    r, r")))]
+  ""
+  "@
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, r")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
+		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mul33\t%0, %2";
+    case 1:
+      return "mul\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32-doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    l,    r,   r,     r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    l,    0,    0,    r,   r,     r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff   -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0xffff -> zeh33  Rt3,Ra3
+     andi Rt3,Ra3,0x01   -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff  -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "zeh33\t%0, %1";
+    case 4:
+      return "xlsb33\t%0, %1";
+    case 5:
+      return "x11b33\t%0, %1";
+    case 6:
+      operands[2] = GEN_INT (floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 7:
+      operands[2] = GEN_INT (floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 8:
+      return "zeb\t%0, %1";
+    case 9:
+      return "zeh\t%0, %1";
+    case 10:
+      return "andi\t%0, %1, %2";
+    case 11:
+      operands[2] = GEN_INT (~mask);
+      return "bitci\t%0, %1, %2";
+    case 12:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'or33' instruction.
+;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2.
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      /* Use exact_log2() to search the 1-bit position.  */
+      one_position = exact_log2 (INTVAL (operands[2]));
+      operands[2] = GEN_INT (one_position);
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'xor33' instruction.
+;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2.
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      /* Use exact_log2() to search the 1-bit position.  */
+      one_position = exact_log2 (INTVAL (operands[2]));
+      operands[2] = GEN_INT (one_position);
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'neg33' instruction.
+;; So we can identify 'xor Rt3,Ra3' case and set its length to be 2.
+;; And for V2 ISA, there is NO 'neg33' instruction.
+;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B').
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'not33' instruction.
+;; So we can identify 'not Rt3,Ra3' case and set its length to be 2.
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"             "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "    l,    r, r")
+		   (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "slli333\t%0, %1, %2";
+    case 1:
+      return "slli\t%0, %1, %2";
+    case 2:
+      return "sll\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srai45\t%0, %2";
+    case 1:
+      return "srai\t%0, %1, %2";
+    case 2:
+      return "sra\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srli45\t%0, %2";
+    case 1:
+      return "srli\t%0, %1, %2";
+    case 2:
+      return "srl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direct comparison of EQ and NE.
+         If we don't split it, the conditional move transformation will fail
+         when producing (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         Later we will have cmovz/cmovn instruction pattern to
+         match corresponding behavior and output instruction.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
+	     We better have an assert here in case GCC does not properly
+	     optimize it away.  */
+	  gcc_assert (code != LE || INTVAL (operands[2]) != INT_MAX);
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	          =>
+	            bnezs8  .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	          =>
+	            beqzs8  .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	          =>
+	            bnez38  $r0, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	          =>
+	            beqz38  $r0, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	      =>
+	        bnez  $r8, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	      =>
+	        beqz  $r8, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to non-V3 ISAs,
+;; because they HAVE NO beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3M"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	      =>
+	        bne  $r0, $r1, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	      =>
+	        beq  $r0, $r1, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3,
+;; because V3 DOES HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	          =>
+	            bne  $r0, $r1, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	          =>
+	            bnec  $r0, constant, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	          =>
+	            beq  $r0, $r1, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	          =>
+	            beqc  $r0, constant, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	     =>
+	       blez  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	     =>
+	       bltz  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	     =>
+	       bgez  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	     =>
+	       bgtz  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "jr5\t%0";
+    case 1:
+      return "jr\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "jral5\t%0";
+    case 1:
+      return "jral\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0)
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "jral5\t%1";
+    case 1:
+      return "jral\t%1";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push ();
+  else
+    nds32_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop ();
+  else
+    nds32_expand_epilogue ();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "nop16";
+  else
+    return "nop";
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; stack push/pop multiple
+
+(define_insn "*stack_push_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_PUSH_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'push.s' instruction.  */
+  const char *push_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  static const char * const En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create push_str string.
+     Note that 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    push_str = "push.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = INTVAL (operands[2]) != 0 ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			push_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+(define_insn "*stack_pop_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_POP_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'pop.s' instruction.  */
+  const char *pop_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  static const char * const En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create pop_str string.
+     Note that 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    pop_str = "pop.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = INTVAL (operands[2]) != 0 ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			pop_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+;; stack v3push/v3pop
+
+(define_insn "*stack_v3push"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3PUSH)
+     ])]
+  ""
+{
+  return "push25\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+(define_insn "*stack_v3pop"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3POP)
+     ])]
+  ""
+{
+  return "pop25\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_func_return"
+  [(set (pc)
+	(unspec:SI [(reg:SI LP_REGNUM)] UNSPEC_FUNC_RETURN))]
+  ""
+  "ret5"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx add_tmp;
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+      add_tmp = gen_int_mode (-INTVAL (operands[1]), SImode);
+
+      /* If the integer value is not in the range of imm15s,
+         we need to force register first because our addsi3 pattern
+         only accept nds32_rimm15s_operand predicate.  */
+      add_tmp = force_reg (SImode, add_tmp);
+
+      emit_insn (gen_addsi3 (reg, operands[0], add_tmp));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*btst"
+  [(set (match_operand:SI 0 "register_operand"                   "=   r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand"  "    r")
+			 (const_int 1)
+			 (match_operand:SI 2 "immediate_operand" " Iu05")))]
+  "TARGET_PERF_EXT"
+  "btst\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; ----------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-09-27 18:38           ` Chung-Ju Wu
@ 2013-10-01 17:31             ` Richard Sandiford
  2013-10-05 18:21               ` Chung-Ju Wu
  0 siblings, 1 reply; 18+ messages in thread
From: Richard Sandiford @ 2013-10-01 17:31 UTC (permalink / raw)
  To: Chung-Ju Wu; +Cc: gcc-patches

Chung-Ju Wu <jasonwucj@gmail.com> writes:
> +      /* Use $r15, if the value is NOT in the range of Is20,
> +         we must output "sethi + ori" directly since
> +         we may already passed the split stage.  */
> +      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
> +    case 17:
> +      return "#";

I don't really understand the comment for case 16.  Returning "#"
(like for case 17) forces a split even at the output stage.

In this case it might not be worth forcing a split though, so I don't
see any need to change the code.  I think the comment should be changed
to give a different reason though.

> +	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
> +	     We better have an assert here in case GCC does not properly
> +	     optimize it away.  */
> +	  gcc_assert (code != LE || INTVAL (operands[2]) != INT_MAX);

Sorry, I was being lazy when I said INT_MAX.  I really meant INT_MAX on
the target (assuming SImode == int), whereas INT_MAX here is a host thing.
0x7fffffff would be OK.

> +  /* Create RbRe_str string.
> +     Note that we need to output ',' character if there exists En4 field.  */
> +  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
> +      RbRe_str = INTVAL (operands[2]) != 0 ? "%0, %1, " : "%0, %1";
> +  else
> +      RbRe_str = "";

The "RbRe_str =" lines should only be indented by 2 extra spaces, not 4.
Same for pop.

Looks good otherwise, thanks.

Richard

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-10-01 17:31             ` Richard Sandiford
@ 2013-10-05 18:21               ` Chung-Ju Wu
  2013-10-06  9:57                 ` Richard Sandiford
  0 siblings, 1 reply; 18+ messages in thread
From: Chung-Ju Wu @ 2013-10-05 18:21 UTC (permalink / raw)
  To: gcc-patches, rdsandiford

[-- Attachment #1: Type: text/plain, Size: 3199 bytes --]

On 10/2/13 1:31 AM, Richard Sandiford wrote:
> Chung-Ju Wu <jasonwucj@gmail.com> writes:
>> +      /* Use $r15, if the value is NOT in the range of Is20,
>> +         we must output "sethi + ori" directly since
>> +         we may already passed the split stage.  */
>> +      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
>> +    case 17:
>> +      return "#";
> 
> I don't really understand the comment for case 16.  Returning "#"
> (like for case 17) forces a split even at the output stage.
> 
> In this case it might not be worth forcing a split though, so I don't
> see any need to change the code.  I think the comment should be changed
> to give a different reason though.
> 

Sorry for the misleading comment.

For case 17, we were trying to split large constant into two individual
rtx patterns into "sethi" + "addi" so that we can have chance to match
"addi" pattern with 16-bit instruction.

But case 16 is different.
This case is only produced at prologue/epilogue phase, using a temporary
register $r15 to hold a large constant for adjusting stack pointer. 
Since prologue/epilogue is after split1/split2 phase, we can only
output "sethi" + "ori" directly.
(The "addi" instruction with $r15 is a 32-bit instruction.)   

Perhaps I should have comment like this:

    case 16:
      /* This case is produced via prologue/epilogue phase.
         When we adjust stack pointer in prologue/epilogue, we may have a
         large constant that can not be fit in a single "addi" instruction.
         Therefore, we use $r15 to hold the large constant and then use
         "add" instruction.  Unfortunately, the prologue/epilogue is
         performed after split stage and we have no choice but only
         use "sethi + ori" directly.  */
      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
    case 17:
      /* We would like to split large constant into two
         individual rtl patterns: "sethi + addi".
         So that we can have chance to use 16-bit "addi" instruction.  */
      return "#";


>> +	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
>> +	     We better have an assert here in case GCC does not properly
>> +	     optimize it away.  */
>> +	  gcc_assert (code != LE || INTVAL (operands[2]) != INT_MAX);
> 
> Sorry, I was being lazy when I said INT_MAX.  I really meant INT_MAX on
> the target (assuming SImode == int), whereas INT_MAX here is a host thing.
> 0x7fffffff would be OK.
> 

Fix accordingly.

>> +  /* Create RbRe_str string.
>> +     Note that we need to output ',' character if there exists En4 field.  */
>> +  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
>> +      RbRe_str = INTVAL (operands[2]) != 0 ? "%0, %1, " : "%0, %1";
>> +  else
>> +      RbRe_str = "";
> 
> The "RbRe_str =" lines should only be indented by 2 extra spaces, not 4.
> Same for pop.
> 

Since we have refined stack push/pop implementation, we implement
nds32_output_stack_push () and nds32_output_stack_pop () in
the nds32.c and follow 2-space indent rule.


> Looks good otherwise, thanks.
> 
> Richard
> 

A revised-2 patch for nds32.md is attached.
Thanks for the comments! :)

Best regards,
jasonwucj





[-- Attachment #2: 2-nds32-backend-md-part2.v3.revised-2.patch --]
[-- Type: text/plain, Size: 72595 bytes --]

diff --git gcc/common/config/nds32/nds32-common.c gcc/common/config/nds32/nds32-common.c
new file mode 100644
index 0000000..f82f725
--- /dev/null
+++ gcc/common/config/nds32/nds32-common.c
@@ -0,0 +1,117 @@
+/* Common hooks of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_HANDLE_OPTION.  */
+static bool
+nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded,
+		     location_t loc)
+{
+  size_t     code  = decoded->opt_index;
+  int        value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_misr_vector_size_:
+      /* Check the valid vector size: 4 or 16.  */
+      if (value != 4 && value != 16)
+	{
+	  error_at (loc, "for the option -misr-vector-size=X, the valid X "
+			 "must be: 4 or 16");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_mcache_block_size_:
+      /* Check valid value: 4 8 16 32 64 128 256 512.  */
+      if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+	{
+	  error_at (loc, "for the option -mcache-block-size=X, the valid X "
+			 "must be: 4, 8, 16, 32, 64, 128, 256, or 512");
+	  return false;
+	}
+
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options nds32_option_optimization_table[] =
+{
+  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+
+  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+};
+
+/* ------------------------------------------------------------------------ */
+\f
+/* Run-time Target Specification.  */
+
+/* Default enable
+     TARGET_GP_DIRECT: Generate gp-imply instruction.
+     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
+     TARGET_PERF_EXT : Generate performance extention instrcution.
+     TARGET_CMOV     : Generate conditional move instruction.  */
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (MASK_GP_DIRECT				\
+   | MASK_16_BIT				\
+   | MASK_PERF_EXT				\
+   | MASK_CMOV)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION nds32_handle_option
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE nds32_option_optimization_table
+
+\f
+/* Defining the Output Assembler Language.  */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* ------------------------------------------------------------------------ */
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git gcc/config/nds32/nds32.md gcc/config/nds32/nds32.md
new file mode 100644
index 0000000..1a5bbb0
--- /dev/null
+++ gcc/config/nds32/nds32.md
@@ -0,0 +1,2226 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32-intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32-multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32-doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32-peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to support QI and HI in the split patterns.
+;; Besides, in the split condition, we ask big-constant split to be
+;; performed after reload phase.  So that the mov2add optimization
+;; in postreload have chance to optimize the code.
+
+(define_insn_and_split "*mov<mode>"
+  [(set (match_operand:QIHISI 0 "nonimmediate_operand"      "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r,    d,    r,    r,    r,    t,    r")
+	(match_operand:QIHISI 1 "nds32_nonsymbolic_operand" " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig, Ispl, Ispl"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, <byte>);
+    case 6:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 11:
+      return nds32_output_32bit_load (operands, <byte>);
+    case 12:
+      return "movpi45\t%0, %1";
+    case 13:
+      return "movi55\t%0, %1";
+    case 14:
+      return "movi\t%0, %1";
+    case 15:
+      return "sethi\t%0, hi20(%1)";
+    case 16:
+      /* This case is produced via prologue/epilogue phase.
+         When we adjust stack pointer in prologue/epilogue, we may have a
+         large constant that can not be fit in a single "addi" instruction.
+         Therefore, we use $r15 to hold the large constant and then use
+         "add" instruction.  Unfortunately, the prologue/epilogue is
+         performed after split stage and we have no choice but only
+         use "sethi + ori" directly.  */
+      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
+    case 17:
+      /* We would like to split large constant into two
+         individual rtl patterns: "sethi + addi".
+         So that we can have chance to use 16-bit "addi" instruction.  */
+      return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+  "reload_completed
+   && CONST_INT_P (operands[1])
+   && satisfies_constraint_Ispl (operands[1])
+   && !satisfies_constraint_Is20 (operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:QIHISI (match_dup 0) (match_dup 2)))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xfff);
+  operands[1] = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);
+}
+  [(set_attr "type"   "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,  2,  2,  4,  4,  8,  8")])
+
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))]
+  ""
+  "sethi\t%0, hi20(%1)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"                  "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"       " r")
+		   (match_operand:SI 2 "nds32_symbolic_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_insn "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"                       "=l, r,   l, *r")
+	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, U33,  m")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+    case 2:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 3:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load,load")
+   (set_attr "length" "  2,  4,   2,   4")])
+
+
+;; Sign extension instructions.
+
+(define_insn "extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"                       "=l, r, r")
+	(sign_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, m")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load")
+   (set_attr "length" "  2,  4,   4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_insn "add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                   "=   d,    l,    d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"      "    0,    l,    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      /* addi Rt4,Rt4,-x  ==>  subi45 Rt4,x
+         where 0 <= x <= 31 */
+      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+      return "subi45\t%0, %2";
+    case 1:
+      /* addi Rt3,Ra3,-x  ==>  subi333 Rt3,Ra3,x
+         where 0 <= x <= 7 */
+      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+      return "subi333\t%0, %1, %2";
+    case 2:
+      return "addi45\t%0, %2";
+    case 3:
+      return "addi333\t%0, %1, %2";
+    case 4:
+      return "add45\t%0, %2";
+    case 5:
+      return "add333\t%0, %1, %2";
+    case 6:
+      return "addi10.sp\t%2";
+    case 7:
+      return "addri36.sp\t%0, %2";
+    case 8:
+      return "addi\t%0, %1, %2";
+    case 9:
+      return "add\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "register_operand"      " r, l,    r, r")))]
+  ""
+  "@
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, r")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
+		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  ""
+  "@
+  mul33\t%0, %2
+  mul\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32-doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    l,    r,   r,     r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    l,    0,    0,    r,   r,     r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff   -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0xffff -> zeh33  Rt3,Ra3
+     andi Rt3,Ra3,0x01   -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff  -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "zeh33\t%0, %1";
+    case 4:
+      return "xlsb33\t%0, %1";
+    case 5:
+      return "x11b33\t%0, %1";
+    case 6:
+      operands[2] = GEN_INT (floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 7:
+      operands[2] = GEN_INT (floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 8:
+      return "zeb\t%0, %1";
+    case 9:
+      return "zeh\t%0, %1";
+    case 10:
+      return "andi\t%0, %1, %2";
+    case 11:
+      operands[2] = GEN_INT (~mask);
+      return "bitci\t%0, %1, %2";
+    case 12:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'or33' instruction.
+;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2.
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      /* Use exact_log2() to search the 1-bit position.  */
+      one_position = exact_log2 (INTVAL (operands[2]));
+      operands[2] = GEN_INT (one_position);
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'xor33' instruction.
+;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2.
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      /* Use exact_log2() to search the 1-bit position.  */
+      one_position = exact_log2 (INTVAL (operands[2]));
+      operands[2] = GEN_INT (one_position);
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'neg33' instruction.
+;; So we can identify 'xor Rt3,Ra3' case and set its length to be 2.
+;; And for V2 ISA, there is NO 'neg33' instruction.
+;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B').
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'not33' instruction.
+;; So we can identify 'not Rt3,Ra3' case and set its length to be 2.
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"             "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "    l,    r, r")
+		   (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))]
+  ""
+  "@
+  slli333\t%0, %1, %2
+  slli\t%0, %1, %2
+  sll\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+  ""
+  "@
+  srai45\t%0, %2
+  srai\t%0, %1, %2
+  sra\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+  ""
+  "@
+  srli45\t%0, %2
+  srli\t%0, %1, %2
+  srl\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direct comparison of EQ and NE.
+         If we don't split it, the conditional move transformation will fail
+         when producing (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         Later we will have cmovz/cmovn instruction pattern to
+         match corresponding behavior and output instruction.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
+	     We better have an assert here in case GCC does not properly
+	     optimize it away.  The INT_MAX here is 0x7fffffff for target.  */
+	  gcc_assert (code != LE || INTVAL (operands[2]) != 0x7fffffff);
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	          =>
+	            bnezs8  .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	          =>
+	            beqzs8  .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	          =>
+	            bnez38  $r0, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	          =>
+	            beqz38  $r0, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	      =>
+	        bnez  $r8, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	      =>
+	        beqz  $r8, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to non-V3 ISAs,
+;; because they HAVE NO beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3M"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	      =>
+	        bne  $r0, $r1, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	      =>
+	        beq  $r0, $r1, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3,
+;; because V3 DOES HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	          =>
+	            bne  $r0, $r1, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	          =>
+	            bnec  $r0, constant, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	          =>
+	            beq  $r0, $r1, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	          =>
+	            beqc  $r0, constant, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	     =>
+	       blez  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	     =>
+	       bltz  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	     =>
+	       bgez  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	     =>
+	       bgtz  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))]
+  ""
+  "@
+  jr5\t%0
+  jr\t%0"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "@
+  jral5\t%0
+  jral\t%0"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0)
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "@
+  jral5\t%1
+  jral\t%1"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push ();
+  else
+    nds32_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop ();
+  else
+    nds32_expand_epilogue ();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "nop16";
+  else
+    return "nop";
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; Stack push/pop operations
+;; ----------------------------------------------------------------------------
+
+;; The pattern for stack push.
+;; Both stack_push_multiple and stack_v3push use the following pattern.
+;; So we need to use TARGET_V3PUSH to determine the instruction length.
+(define_insn "*stack_push"
+  [(match_parallel 0 "nds32_stack_push_operation"
+     [(set (mem:SI (plus:SI (reg:SI SP_REGNUM) (const_int -4)))
+	   (match_operand:SI 1 "register_operand" ""))
+     ])]
+  ""
+{
+  return nds32_output_stack_push ();
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_V3PUSH")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; The pattern for stack pop.
+;; Both stack_pop_multiple and stack_v3pop use the following pattern.
+;; So we need to use TARGET_V3PUSH to determine the instruction length.
+(define_insn "*stack_pop"
+  [(match_parallel 0 "nds32_stack_pop_operation"
+     [(set (match_operand:SI 1 "register_operand" "")
+	   (mem:SI (reg:SI SP_REGNUM)))
+     ])]
+  ""
+{
+  return nds32_output_stack_pop ();
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_V3PUSH")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_volatile_func_return"
+  [(set (pc)
+	(unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_FUNC_RETURN))]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "ret5";
+  else
+    return "ret";
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx add_tmp;
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+      add_tmp = gen_int_mode (-INTVAL (operands[1]), SImode);
+
+      /* If the integer value is not in the range of imm15s,
+         we need to force register first because our addsi3 pattern
+         only accept nds32_rimm15s_operand predicate.  */
+      add_tmp = force_reg (SImode, add_tmp);
+
+      emit_insn (gen_addsi3 (reg, operands[0], add_tmp));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*btst"
+  [(set (match_operand:SI 0 "register_operand"                   "=   r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand"  "    r")
+			 (const_int 1)
+			 (match_operand:SI 2 "immediate_operand" " Iu05")))]
+  "TARGET_PERF_EXT"
+  "btst\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; ----------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-10-05 18:21               ` Chung-Ju Wu
@ 2013-10-06  9:57                 ` Richard Sandiford
  2013-10-06 10:28                   ` Chung-Lin Tang
                                     ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Richard Sandiford @ 2013-10-06  9:57 UTC (permalink / raw)
  To: Chung-Ju Wu; +Cc: gcc-patches

Chung-Ju Wu <jasonwucj@gmail.com> writes:
> On 10/2/13 1:31 AM, Richard Sandiford wrote:
>> Chung-Ju Wu <jasonwucj@gmail.com> writes:
>>> +      /* Use $r15, if the value is NOT in the range of Is20,
>>> +         we must output "sethi + ori" directly since
>>> +         we may already passed the split stage.  */
>>> +      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
>>> +    case 17:
>>> +      return "#";
>> 
>> I don't really understand the comment for case 16.  Returning "#"
>> (like for case 17) forces a split even at the output stage.
>> 
>> In this case it might not be worth forcing a split though, so I don't
>> see any need to change the code.  I think the comment should be changed
>> to give a different reason though.
>> 
>
> Sorry for the misleading comment.
>
> For case 17, we were trying to split large constant into two individual
> rtx patterns into "sethi" + "addi" so that we can have chance to match
> "addi" pattern with 16-bit instruction.
>
> But case 16 is different.
> This case is only produced at prologue/epilogue phase, using a temporary
> register $r15 to hold a large constant for adjusting stack pointer. 
> Since prologue/epilogue is after split1/split2 phase, we can only
> output "sethi" + "ori" directly.
> (The "addi" instruction with $r15 is a 32-bit instruction.)

But this code is in the output template of the define_insn.  That code
is only executed during final, after all passes have been run.  If the
template returns "#", final will split the instruction itself, which is
possible even at that late stage.  "#" doesn't have any effect on the
passes themselves.

(FWIW, there's also a split3 pass that runs after prologue/epilogue
generation but before sched2.)

However, ISTR there is/was a rule that prologue instructions shouldn't
be split, since they'd lose their RTX_FRAME_RELATED_P bit or something.
Maybe you hit an ICE because of that?

Another way to handle this would be to have the movsi expander split
large constant moves.  When can_create_pseudo_p (), the intermediate
results can be stored in new registers, otherwise they should reuse
operands[0].  Two advantages to doing it that way are that high parts
can be shared before RA, and that calls to emit_move_insn from the
prologue code will split the move automatically.  I think many ports
do it that way (including MIPS FWIW).

Thanks,
Richard

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-10-06  9:57                 ` Richard Sandiford
@ 2013-10-06 10:28                   ` Chung-Lin Tang
  2013-10-06 10:33                     ` Richard Sandiford
  2013-10-06 13:24                   ` Chung-Ju Wu
  2013-10-13 13:29                   ` Chung-Ju Wu
  2 siblings, 1 reply; 18+ messages in thread
From: Chung-Lin Tang @ 2013-10-06 10:28 UTC (permalink / raw)
  To: Chung-Ju Wu, gcc-patches, rdsandiford

On 2013/10/6 05:57 PM, Richard Sandiford wrote:
>> > But case 16 is different.
>> > This case is only produced at prologue/epilogue phase, using a temporary
>> > register $r15 to hold a large constant for adjusting stack pointer. 
>> > Since prologue/epilogue is after split1/split2 phase, we can only
>> > output "sethi" + "ori" directly.
>> > (The "addi" instruction with $r15 is a 32-bit instruction.)
> But this code is in the output template of the define_insn.  That code
> is only executed during final, after all passes have been run.  If the
> template returns "#", final will split the instruction itself, which is
> possible even at that late stage.  "#" doesn't have any effect on the
> passes themselves.
> 
> (FWIW, there's also a split3 pass that runs after prologue/epilogue
> generation but before sched2.)
> 
> However, ISTR there is/was a rule that prologue instructions shouldn't
> be split, since they'd lose their RTX_FRAME_RELATED_P bit or something.
> Maybe you hit an ICE because of that?
> 
> Another way to handle this would be to have the movsi expander split
> large constant moves.  When can_create_pseudo_p (), the intermediate
> results can be stored in new registers, otherwise they should reuse
> operands[0].  Two advantages to doing it that way are that high parts
> can be shared before RA, and that calls to emit_move_insn from the
> prologue code will split the move automatically.  I think many ports
> do it that way (including MIPS FWIW).

FWIW, most ports usually just handle such "large adjustment" cases in
the prologue/epilogue code manually; either multiple SP-adjustments, or
use of a temp register (better control of RTX_FRAME_RELATED_P anyways).
You might be able to get it to work, but trying to rely on the splitter
does not seem like best practice...

Chung-Lin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-10-06 10:28                   ` Chung-Lin Tang
@ 2013-10-06 10:33                     ` Richard Sandiford
  2013-10-06 11:27                       ` Chung-Lin Tang
  0 siblings, 1 reply; 18+ messages in thread
From: Richard Sandiford @ 2013-10-06 10:33 UTC (permalink / raw)
  To: Chung-Lin Tang; +Cc: Chung-Ju Wu, gcc-patches

Chung-Lin Tang <cltang@codesourcery.com> writes:
> On 2013/10/6 05:57 PM, Richard Sandiford wrote:
>>> > But case 16 is different.
>>> > This case is only produced at prologue/epilogue phase, using a temporary
>>> > register $r15 to hold a large constant for adjusting stack pointer. 
>>> > Since prologue/epilogue is after split1/split2 phase, we can only
>>> > output "sethi" + "ori" directly.
>>> > (The "addi" instruction with $r15 is a 32-bit instruction.)
>> But this code is in the output template of the define_insn.  That code
>> is only executed during final, after all passes have been run.  If the
>> template returns "#", final will split the instruction itself, which is
>> possible even at that late stage.  "#" doesn't have any effect on the
>> passes themselves.
>> 
>> (FWIW, there's also a split3 pass that runs after prologue/epilogue
>> generation but before sched2.)
>> 
>> However, ISTR there is/was a rule that prologue instructions shouldn't
>> be split, since they'd lose their RTX_FRAME_RELATED_P bit or something.
>> Maybe you hit an ICE because of that?
>> 
>> Another way to handle this would be to have the movsi expander split
>> large constant moves.  When can_create_pseudo_p (), the intermediate
>> results can be stored in new registers, otherwise they should reuse
>> operands[0].  Two advantages to doing it that way are that high parts
>> can be shared before RA, and that calls to emit_move_insn from the
>> prologue code will split the move automatically.  I think many ports
>> do it that way (including MIPS FWIW).
>
> FWIW, most ports usually just handle such "large adjustment" cases in
> the prologue/epilogue code manually; either multiple SP-adjustments, or
> use of a temp register (better control of RTX_FRAME_RELATED_P anyways).
> You might be able to get it to work, but trying to rely on the splitter
> does not seem like best practice...

To be clear, I wasn't talking about relying on the splitter in the
define_split sense.  I was saying that the move expanders could
split large constants.

MIPS prologue code does use emit_move_insn to move large constants,
which automatically produces a split form from the outset.  I don't
really agree that it's bad practice.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-10-06 10:33                     ` Richard Sandiford
@ 2013-10-06 11:27                       ` Chung-Lin Tang
  2013-10-06 13:39                         ` Chung-Ju Wu
  0 siblings, 1 reply; 18+ messages in thread
From: Chung-Lin Tang @ 2013-10-06 11:27 UTC (permalink / raw)
  To: Chung-Ju Wu, gcc-patches, rdsandiford

On 2013/10/6 下午 06:33, Richard Sandiford wrote:
> Chung-Lin Tang <cltang@codesourcery.com> writes:
>> On 2013/10/6 05:57 PM, Richard Sandiford wrote:
>>>>> But case 16 is different.
>>>>> This case is only produced at prologue/epilogue phase, using a temporary
>>>>> register $r15 to hold a large constant for adjusting stack pointer. 
>>>>> Since prologue/epilogue is after split1/split2 phase, we can only
>>>>> output "sethi" + "ori" directly.
>>>>> (The "addi" instruction with $r15 is a 32-bit instruction.)
>>> But this code is in the output template of the define_insn.  That code
>>> is only executed during final, after all passes have been run.  If the
>>> template returns "#", final will split the instruction itself, which is
>>> possible even at that late stage.  "#" doesn't have any effect on the
>>> passes themselves.
>>>
>>> (FWIW, there's also a split3 pass that runs after prologue/epilogue
>>> generation but before sched2.)
>>>
>>> However, ISTR there is/was a rule that prologue instructions shouldn't
>>> be split, since they'd lose their RTX_FRAME_RELATED_P bit or something.
>>> Maybe you hit an ICE because of that?
>>>
>>> Another way to handle this would be to have the movsi expander split
>>> large constant moves.  When can_create_pseudo_p (), the intermediate
>>> results can be stored in new registers, otherwise they should reuse
>>> operands[0].  Two advantages to doing it that way are that high parts
>>> can be shared before RA, and that calls to emit_move_insn from the
>>> prologue code will split the move automatically.  I think many ports
>>> do it that way (including MIPS FWIW).
>>
>> FWIW, most ports usually just handle such "large adjustment" cases in
>> the prologue/epilogue code manually; either multiple SP-adjustments, or
>> use of a temp register (better control of RTX_FRAME_RELATED_P anyways).
>> You might be able to get it to work, but trying to rely on the splitter
>> does not seem like best practice...
> 
> To be clear, I wasn't talking about relying on the splitter in the
> define_split sense.  I was saying that the move expanders could
> split large constants.

Okay, I sort of missed the context.

> MIPS prologue code does use emit_move_insn to move large constants,
> which automatically produces a split form from the outset.  I don't
> really agree that it's bad practice.

I think that's mostly the same as what I meant by "manually"; it seems
that there's lots of MIPS backend machinery starting from
mips_legitimize_move(), so it's not really "automatic" ;)

Chung-Lin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-10-06  9:57                 ` Richard Sandiford
  2013-10-06 10:28                   ` Chung-Lin Tang
@ 2013-10-06 13:24                   ` Chung-Ju Wu
  2013-10-13 13:29                   ` Chung-Ju Wu
  2 siblings, 0 replies; 18+ messages in thread
From: Chung-Ju Wu @ 2013-10-06 13:24 UTC (permalink / raw)
  To: gcc-patches, rdsandiford

2013/10/6 Richard Sandiford <rdsandiford@googlemail.com>:
> Chung-Ju Wu <jasonwucj@gmail.com> writes:
>> On 10/2/13 1:31 AM, Richard Sandiford wrote:
>>> Chung-Ju Wu <jasonwucj@gmail.com> writes:
>>>> +      /* Use $r15, if the value is NOT in the range of Is20,
>>>> +         we must output "sethi + ori" directly since
>>>> +         we may already passed the split stage.  */
>>>> +      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
>>>> +    case 17:
>>>> +      return "#";
>>>
>>> I don't really understand the comment for case 16.  Returning "#"
>>> (like for case 17) forces a split even at the output stage.
>>>
>>> In this case it might not be worth forcing a split though, so I don't
>>> see any need to change the code.  I think the comment should be changed
>>> to give a different reason though.
>>>
>>
>> Sorry for the misleading comment.
>>
>> For case 17, we were trying to split large constant into two individual
>> rtx patterns into "sethi" + "addi" so that we can have chance to match
>> "addi" pattern with 16-bit instruction.
>>
>> But case 16 is different.
>> This case is only produced at prologue/epilogue phase, using a temporary
>> register $r15 to hold a large constant for adjusting stack pointer.
>> Since prologue/epilogue is after split1/split2 phase, we can only
>> output "sethi" + "ori" directly.
>> (The "addi" instruction with $r15 is a 32-bit instruction.)
>
> But this code is in the output template of the define_insn.  That code
> is only executed during final, after all passes have been run.  If the
> template returns "#", final will split the instruction itself, which is
> possible even at that late stage.  "#" doesn't have any effect on the
> passes themselves.
>
> (FWIW, there's also a split3 pass that runs after prologue/epilogue
> generation but before sched2.)
>
> However, ISTR there is/was a rule that prologue instructions shouldn't
> be split, since they'd lose their RTX_FRAME_RELATED_P bit or something.
> Maybe you hit an ICE because of that?
>

Ah... yes, you are right.  In the nds32_force_addi_stack_int(),
I move a large constant to a temp register for stack pointer adjustment:

+      /* $r15 is going to be temporary register to hold the value.  */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      /* Create one more instruction to move value
+         into the temporary register.  */
+      value_move_insn = emit_move_insn (tmp_reg, GEN_INT (full_value));
+
+      /* At prologue, we need to tell GCC that this is frame related insn,
+         so that we can consider this instruction to output debug information.
+         If full_value is NEGATIVE, it means this function
+         is invoked by expand_prologue.  */
+      if (full_value < 0)
+        RTX_FRAME_RELATED_P (value_move_insn) = 1;
+
+      /* Create new 'add' rtx.  */
+      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+                                   stack_pointer_rtx,
+                                   tmp_reg);
+      /* Emit rtx into insn list and receive its transformed insn rtx.  */
+      sp_adjust_insn = emit_insn (sp_adjust_insn);
+
+      /* At prologue, we need to tell GCC that this is frame related insn,
+         so that we can consider this instruction to output debug information.
+         If full_value is NEGATIVE, it means this function
+         is invoked by expand_prologue.  */
+      if (full_value < 0)
+        RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;

If there is a rule to avoid spliting instructions with RTX_FRAME_RELATED_P,
I think it is the case why I hit an ICE of unrecognized insn for
'value_move_insn'.

It seems that my comment to case 16 is incorrect.
Thanks for clarifying it.

> Another way to handle this would be to have the movsi expander split
> large constant moves.  When can_create_pseudo_p (), the intermediate
> results can be stored in new registers, otherwise they should reuse
> operands[0].  Two advantages to doing it that way are that high parts
> can be shared before RA, and that calls to emit_move_insn from the
> prologue code will split the move automatically.  I think many ports
> do it that way (including MIPS FWIW).
>

Do you mean that I should split large constant by myself in movsi
(or starting from movsi) for both case 16 and case 17?

Thanks for the suggestion.  I'll try to implement it. :)


Best regards,
jasonwucj

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-10-06 11:27                       ` Chung-Lin Tang
@ 2013-10-06 13:39                         ` Chung-Ju Wu
  0 siblings, 0 replies; 18+ messages in thread
From: Chung-Ju Wu @ 2013-10-06 13:39 UTC (permalink / raw)
  To: Chung-Lin Tang; +Cc: gcc-patches, rdsandiford

2013/10/6 Chung-Lin Tang <cltang@codesourcery.com>:
> On 2013/10/6 下午 06:33, Richard Sandiford wrote:
>> Chung-Lin Tang <cltang@codesourcery.com> writes:
>>> On 2013/10/6 05:57 PM, Richard Sandiford wrote:
>>>> Another way to handle this would be to have the movsi expander split
>>>> large constant moves.  When can_create_pseudo_p (), the intermediate
>>>> results can be stored in new registers, otherwise they should reuse
>>>> operands[0].  Two advantages to doing it that way are that high parts
>>>> can be shared before RA, and that calls to emit_move_insn from the
>>>> prologue code will split the move automatically.  I think many ports
>>>> do it that way (including MIPS FWIW).
>>>
>>> FWIW, most ports usually just handle such "large adjustment" cases in
>>> the prologue/epilogue code manually; either multiple SP-adjustments, or
>>> use of a temp register (better control of RTX_FRAME_RELATED_P anyways).
>>> You might be able to get it to work, but trying to rely on the splitter
>>> does not seem like best practice...
>>
>> To be clear, I wasn't talking about relying on the splitter in the
>> define_split sense.  I was saying that the move expanders could
>> split large constants.
>
>> MIPS prologue code does use emit_move_insn to move large constants,
>> which automatically produces a split form from the outset.  I don't
>> really agree that it's bad practice.
>
> I think that's mostly the same as what I meant by "manually"; it seems
> that there's lots of MIPS backend machinery starting from
> mips_legitimize_move(), so it's not really "automatic" ;)
>
> Chung-Lin
>

Hi, Chung-Lin,

Thanks for the hint. ^_^

I will follow Richard and your suggestion to split large constant
via movsi manually.  So that it will be automatically split whenever
emit_move_insn() is used. :)


Best regards,
jasonwucj

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-10-06  9:57                 ` Richard Sandiford
  2013-10-06 10:28                   ` Chung-Lin Tang
  2013-10-06 13:24                   ` Chung-Ju Wu
@ 2013-10-13 13:29                   ` Chung-Ju Wu
  2013-10-13 16:43                     ` Richard Sandiford
  2 siblings, 1 reply; 18+ messages in thread
From: Chung-Ju Wu @ 2013-10-13 13:29 UTC (permalink / raw)
  To: gcc-patches, rdsandiford; +Cc: cltang

[-- Attachment #1: Type: text/plain, Size: 3952 bytes --]

Hi, Richard,

Sorry for the late response due to company projects.
The followings are my discovery and modifications.


On 10/6/13 5:57 PM, Richard Sandiford wrote:
> Chung-Ju Wu <jasonwucj@gmail.com> writes:
>> On 10/2/13 1:31 AM, Richard Sandiford wrote:
>>> Chung-Ju Wu <jasonwucj@gmail.com> writes:
>>>> +      /* Use $r15, if the value is NOT in the range of Is20,
>>>> +         we must output "sethi + ori" directly since
>>>> +         we may already passed the split stage.  */
>>>> +      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
>>>> +    case 17:
>>>> +      return "#";
>>>
>>> I don't really understand the comment for case 16.  Returning "#"
>>> (like for case 17) forces a split even at the output stage.
>>>
>>> In this case it might not be worth forcing a split though, so I don't
>>> see any need to change the code.  I think the comment should be changed
>>> to give a different reason though.
>>>
>>
>> Sorry for the misleading comment.
>>
>> For case 17, we were trying to split large constant into two individual
>> rtx patterns into "sethi" + "addi" so that we can have chance to match
>> "addi" pattern with 16-bit instruction.
>>
>> But case 16 is different.
>> This case is only produced at prologue/epilogue phase, using a temporary
>> register $r15 to hold a large constant for adjusting stack pointer. 
>> Since prologue/epilogue is after split1/split2 phase, we can only
>> output "sethi" + "ori" directly.
>> (The "addi" instruction with $r15 is a 32-bit instruction.)
> 
> But this code is in the output template of the define_insn.  That code
> is only executed during final, after all passes have been run.  If the
> template returns "#", final will split the instruction itself, which is
> possible even at that late stage.  "#" doesn't have any effect on the
> passes themselves.
> 
> (FWIW, there's also a split3 pass that runs after prologue/epilogue
> generation but before sched2.)
> 
> However, ISTR there is/was a rule that prologue instructions shouldn't
> be split, since they'd lose their RTX_FRAME_RELATED_P bit or something.
> Maybe you hit an ICE because of that?
> 

There is a statement in the try_split() in gcc/emit-rtl.c:

3458|   /* We're not good at redistributing frame information.  */
3459|   if (RTX_FRAME_RELATED_P (trial))
3460|     return trial;

You are correct, this is the statement that prevent the
instruction from splitting and then caused ICE in my porting.


> Another way to handle this would be to have the movsi expander split
> large constant moves.  When can_create_pseudo_p (), the intermediate
> results can be stored in new registers, otherwise they should reuse
> operands[0].  Two advantages to doing it that way are that high parts
> can be shared before RA, and that calls to emit_move_insn from the
> prologue code will split the move automatically.  I think many ports
> do it that way (including MIPS FWIW).
> 

Thanks for pointing out a direction.

I follow your suggestion to split large constant moves in the
movsi expander, using another predicate "nds32_move_operand"
to prevent large constant from being matched by define_insn:

+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHISI 0 "nonimmediate_operand" "...")
+        (match_operand:QIHISI 1 "nds32_move_operand"   "..."))]
+ ...

+(define_predicate "nds32_move_operand"
+  (and (match_operand 0 "general_operand")
+       (not (match_code "high,const,symbol_ref,label_ref")))
+{
+  /* If the constant op does NOT satisfy Is20 nor Ihig,
+     we can not perform move behavior with single instruction.  */
+  if (CONST_INT_P (op)
+      && !satisfies_constraint_Is20 (op)
+      && !satisfies_constraint_Ihig (op))
+    return false;
+
+  return true;
+})

Now the case 16 and case 17 can be removed from output template
of the define_insn.  Thank you very much!


> Thanks,
> Richard
> 

A revised-3 patch for nds32.md is attached.
Thanks for the comments! :)

Best regards,
jasonwucj




[-- Attachment #2: 2-nds32-backend-md-part2.v3.revised-3.patch --]
[-- Type: text/plain, Size: 71968 bytes --]

diff --git gcc/common/config/nds32/nds32-common.c gcc/common/config/nds32/nds32-common.c
new file mode 100644
index 0000000..f82f725
--- /dev/null
+++ gcc/common/config/nds32/nds32-common.c
@@ -0,0 +1,117 @@
+/* Common hooks of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_HANDLE_OPTION.  */
+static bool
+nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded,
+		     location_t loc)
+{
+  size_t     code  = decoded->opt_index;
+  int        value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_misr_vector_size_:
+      /* Check the valid vector size: 4 or 16.  */
+      if (value != 4 && value != 16)
+	{
+	  error_at (loc, "for the option -misr-vector-size=X, the valid X "
+			 "must be: 4 or 16");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_mcache_block_size_:
+      /* Check valid value: 4 8 16 32 64 128 256 512.  */
+      if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+	{
+	  error_at (loc, "for the option -mcache-block-size=X, the valid X "
+			 "must be: 4, 8, 16, 32, 64, 128, 256, or 512");
+	  return false;
+	}
+
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options nds32_option_optimization_table[] =
+{
+  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+
+  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+};
+
+/* ------------------------------------------------------------------------ */
+\f
+/* Run-time Target Specification.  */
+
+/* Default enable
+     TARGET_GP_DIRECT: Generate gp-imply instruction.
+     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
+     TARGET_PERF_EXT : Generate performance extention instrcution.
+     TARGET_CMOV     : Generate conditional move instruction.  */
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (MASK_GP_DIRECT				\
+   | MASK_16_BIT				\
+   | MASK_PERF_EXT				\
+   | MASK_CMOV)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION nds32_handle_option
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE nds32_option_optimization_table
+
+\f
+/* Defining the Output Assembler Language.  */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* ------------------------------------------------------------------------ */
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git gcc/config/nds32/nds32.md gcc/config/nds32/nds32.md
new file mode 100644
index 0000000..ef03a53
--- /dev/null
+++ gcc/config/nds32/nds32.md
@@ -0,0 +1,2221 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32-intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32-multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32-doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32-peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to split rtx for QI and HI patterns.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+
+  /* If operands[1] is a large constant and cannot be performed
+     move behavior with single instruction, we need to split it.  */
+  if (CONST_INT_P (operands[1])
+      && !satisfies_constraint_Is20 (operands[1])
+      && !satisfies_constraint_Ihig (operands[1]))
+    {
+      rtx high20_rtx;
+      rtx low12_rtx;
+      rtx tmp_rtx;
+
+      tmp_rtx = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+
+      high20_rtx = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);
+      low12_rtx = GEN_INT (INTVAL (operands[1]) & 0xfff);
+
+      emit_move_insn (tmp_rtx, high20_rtx);
+      emit_move_insn (operands[0], plus_constant (SImode,
+						  tmp_rtx,
+						  INTVAL (low12_rtx)));
+      DONE;
+    }
+})
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r,    d,    r,    r,    r")
+	(match_operand:QIHISI 1 "nds32_move_operand"   " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, <byte>);
+    case 6:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 11:
+      return nds32_output_32bit_load (operands, <byte>);
+    case 12:
+      return "movpi45\t%0, %1";
+    case 13:
+      return "movi55\t%0, %1";
+    case 14:
+      return "movi\t%0, %1";
+    case 15:
+      return "sethi\t%0, hi20(%1)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,  2,  2,  4,  4")])
+
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))]
+  ""
+  "sethi\t%0, hi20(%1)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"                  "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"       " r")
+		   (match_operand:SI 2 "nds32_symbolic_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_insn "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"                       "=l, r,   l, *r")
+	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, U33,  m")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+    case 2:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 3:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load,load")
+   (set_attr "length" "  2,  4,   2,   4")])
+
+
+;; Sign extension instructions.
+
+(define_insn "extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"                       "=l, r, r")
+	(sign_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, m")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load")
+   (set_attr "length" "  2,  4,   4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_insn "add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                   "=   d,    l,    d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"      "    0,    l,    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      /* addi Rt4,Rt4,-x  ==>  subi45 Rt4,x
+         where 0 <= x <= 31 */
+      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+      return "subi45\t%0, %2";
+    case 1:
+      /* addi Rt3,Ra3,-x  ==>  subi333 Rt3,Ra3,x
+         where 0 <= x <= 7 */
+      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+      return "subi333\t%0, %1, %2";
+    case 2:
+      return "addi45\t%0, %2";
+    case 3:
+      return "addi333\t%0, %1, %2";
+    case 4:
+      return "add45\t%0, %2";
+    case 5:
+      return "add333\t%0, %1, %2";
+    case 6:
+      return "addi10.sp\t%2";
+    case 7:
+      return "addri36.sp\t%0, %2";
+    case 8:
+      return "addi\t%0, %1, %2";
+    case 9:
+      return "add\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "register_operand"      " r, l,    r, r")))]
+  ""
+  "@
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, r")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
+		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  ""
+  "@
+  mul33\t%0, %2
+  mul\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32-doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    l,    r,   r,     r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    l,    0,    0,    r,   r,     r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff   -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0xffff -> zeh33  Rt3,Ra3
+     andi Rt3,Ra3,0x01   -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff  -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "zeh33\t%0, %1";
+    case 4:
+      return "xlsb33\t%0, %1";
+    case 5:
+      return "x11b33\t%0, %1";
+    case 6:
+      operands[2] = GEN_INT (floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 7:
+      operands[2] = GEN_INT (floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 8:
+      return "zeb\t%0, %1";
+    case 9:
+      return "zeh\t%0, %1";
+    case 10:
+      return "andi\t%0, %1, %2";
+    case 11:
+      operands[2] = GEN_INT (~mask);
+      return "bitci\t%0, %1, %2";
+    case 12:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'or33' instruction.
+;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2.
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      /* Use exact_log2() to search the 1-bit position.  */
+      one_position = exact_log2 (INTVAL (operands[2]));
+      operands[2] = GEN_INT (one_position);
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'xor33' instruction.
+;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2.
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      /* Use exact_log2() to search the 1-bit position.  */
+      one_position = exact_log2 (INTVAL (operands[2]));
+      operands[2] = GEN_INT (one_position);
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'neg33' instruction.
+;; So we can identify 'xor Rt3,Ra3' case and set its length to be 2.
+;; And for V2 ISA, there is NO 'neg33' instruction.
+;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B').
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'not33' instruction.
+;; So we can identify 'not Rt3,Ra3' case and set its length to be 2.
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"             "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "    l,    r, r")
+		   (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))]
+  ""
+  "@
+  slli333\t%0, %1, %2
+  slli\t%0, %1, %2
+  sll\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+  ""
+  "@
+  srai45\t%0, %2
+  srai\t%0, %1, %2
+  sra\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+  ""
+  "@
+  srli45\t%0, %2
+  srli\t%0, %1, %2
+  srl\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direct comparison of EQ and NE.
+         If we don't split it, the conditional move transformation will fail
+         when producing (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         Later we will have cmovz/cmovn instruction pattern to
+         match corresponding behavior and output instruction.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
+	     We better have an assert here in case GCC does not properly
+	     optimize it away.  The INT_MAX here is 0x7fffffff for target.  */
+	  gcc_assert (code != LE || INTVAL (operands[2]) != 0x7fffffff);
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	          =>
+	            bnezs8  .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	          =>
+	            beqzs8  .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	          =>
+	            bnez38  $r0, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	          =>
+	            beqz38  $r0, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	      =>
+	        bnez  $r8, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	      =>
+	        beqz  $r8, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to V2 ISA,
+;; because V2 DOES NOT HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V2"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	      =>
+	        bne  $r0, $r1, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	      =>
+	        beq  $r0, $r1, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3/V3M,
+;; because V3/V3M DO HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3 || TARGET_ISA_V3M"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	          =>
+	            bne  $r0, $r1, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	          =>
+	            bnec  $r0, constant, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	          =>
+	            beq  $r0, $r1, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	          =>
+	            beqc  $r0, constant, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	     =>
+	       blez  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	     =>
+	       bltz  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	     =>
+	       bgez  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	     =>
+	       bgtz  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))]
+  ""
+  "@
+  jr5\t%0
+  jr\t%0"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "@
+  jral5\t%0
+  jral\t%0"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0)
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "@
+  jral5\t%1
+  jral\t%1"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push ();
+  else
+    nds32_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop ();
+  else
+    nds32_expand_epilogue ();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "nop16";
+  else
+    return "nop";
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; Stack push/pop operations
+;; ----------------------------------------------------------------------------
+
+;; The pattern for stack push.
+;; Both stack_push_multiple and stack_v3push use the following pattern.
+;; So we need to use TARGET_V3PUSH to determine the instruction length.
+(define_insn "*stack_push"
+  [(match_parallel 0 "nds32_stack_push_operation"
+     [(set (mem:SI (plus:SI (reg:SI SP_REGNUM)
+			    (match_operand:SI 1 "const_int_operand" "")))
+	   (match_operand:SI 2 "register_operand" ""))
+     ])]
+  ""
+{
+  return nds32_output_stack_push ();
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_V3PUSH")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; The pattern for stack pop.
+;; Both stack_pop_multiple and stack_v3pop use the following pattern.
+;; So we need to use TARGET_V3PUSH to determine the instruction length.
+(define_insn "*stack_pop"
+  [(match_parallel 0 "nds32_stack_pop_operation"
+     [(set (match_operand:SI 1 "register_operand" "")
+	   (mem:SI (reg:SI SP_REGNUM)))
+     ])]
+  ""
+{
+  return nds32_output_stack_pop ();
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_V3PUSH")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_volatile_func_return"
+  [(set (pc)
+	(unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_FUNC_RETURN))]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "ret5";
+  else
+    return "ret";
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx add_tmp;
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+      add_tmp = gen_int_mode (-INTVAL (operands[1]), SImode);
+
+      /* If the integer value is not in the range of imm15s,
+         we need to force register first because our addsi3 pattern
+         only accept nds32_rimm15s_operand predicate.  */
+      add_tmp = force_reg (SImode, add_tmp);
+
+      emit_insn (gen_addsi3 (reg, operands[0], add_tmp));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*btst"
+  [(set (match_operand:SI 0 "register_operand"                   "=   r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand"  "    r")
+			 (const_int 1)
+			 (match_operand:SI 2 "immediate_operand" " Iu05")))]
+  "TARGET_PERF_EXT"
+  "btst\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; ----------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-10-13 13:29                   ` Chung-Ju Wu
@ 2013-10-13 16:43                     ` Richard Sandiford
  2013-10-16 12:56                       ` Chung-Ju Wu
  0 siblings, 1 reply; 18+ messages in thread
From: Richard Sandiford @ 2013-10-13 16:43 UTC (permalink / raw)
  To: Chung-Ju Wu; +Cc: gcc-patches, cltang

Chung-Ju Wu <jasonwucj@gmail.com> writes:
> +  /* If operands[1] is a large constant and cannot be performed
> +     move behavior with single instruction, we need to split it.  */

Suggest "...cannot by performed by a single instruction..."

> +      high20_rtx = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);

Better to use gen_int_mode (..., SImode).  Although GEN_INT will be OK
if the compiler uses an arithmetic shift for >>, that isn't guaranteed
(although I doubt any modern compiler would use anything else).

> +      low12_rtx = GEN_INT (INTVAL (operands[1]) & 0xfff);
> +
> +      emit_move_insn (tmp_rtx, high20_rtx);
> +      emit_move_insn (operands[0], plus_constant (SImode,
> +						  tmp_rtx,
> +						  INTVAL (low12_rtx)));

There's no need to create an rtx and then use INTVAL on it.
low12 can just be a HOST_WIDE_INT.

Looks good to me with those changes, thanks.

Richard

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/6] Andes nds32: machine description of nds32 porting (2).
  2013-10-13 16:43                     ` Richard Sandiford
@ 2013-10-16 12:56                       ` Chung-Ju Wu
  0 siblings, 0 replies; 18+ messages in thread
From: Chung-Ju Wu @ 2013-10-16 12:56 UTC (permalink / raw)
  To: Chung-Ju Wu, gcc-patches, Chung-Lin Tang, rdsandiford

[-- Attachment #1: Type: text/plain, Size: 1212 bytes --]

2013/10/14 Richard Sandiford <rdsandiford@googlemail.com>:
> Chung-Ju Wu <jasonwucj@gmail.com> writes:
>> +  /* If operands[1] is a large constant and cannot be performed
>> +     move behavior with single instruction, we need to split it.  */
>
> Suggest "...cannot by performed by a single instruction..."
>
>> +      high20_rtx = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);
>
> Better to use gen_int_mode (..., SImode).  Although GEN_INT will be OK
> if the compiler uses an arithmetic shift for >>, that isn't guaranteed
> (although I doubt any modern compiler would use anything else).
>

Modify it accordingly.

>> +      low12_rtx = GEN_INT (INTVAL (operands[1]) & 0xfff);
>> +
>> +      emit_move_insn (tmp_rtx, high20_rtx);
>> +      emit_move_insn (operands[0], plus_constant (SImode,
>> +                                               tmp_rtx,
>> +                                               INTVAL (low12_rtx)));
>
> There's no need to create an rtx and then use INTVAL on it.
> low12 can just be a HOST_WIDE_INT.
>

Modify it accordingly.

> Looks good to me with those changes, thanks.
>
> Richard

Thank you very much.
A revised-4 patch for nds32.md is attached. :)


Best regards,
jasonwucj

[-- Attachment #2: 2-nds32-backend-md-part2.v3.revised-4.patch --]
[-- Type: application/octet-stream, Size: 71958 bytes --]

diff --git gcc/common/config/nds32/nds32-common.c gcc/common/config/nds32/nds32-common.c
new file mode 100644
index 0000000..f82f725
--- /dev/null
+++ gcc/common/config/nds32/nds32-common.c
@@ -0,0 +1,117 @@
+/* Common hooks of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_HANDLE_OPTION.  */
+static bool
+nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded,
+		     location_t loc)
+{
+  size_t     code  = decoded->opt_index;
+  int        value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_misr_vector_size_:
+      /* Check the valid vector size: 4 or 16.  */
+      if (value != 4 && value != 16)
+	{
+	  error_at (loc, "for the option -misr-vector-size=X, the valid X "
+			 "must be: 4 or 16");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_mcache_block_size_:
+      /* Check valid value: 4 8 16 32 64 128 256 512.  */
+      if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+	{
+	  error_at (loc, "for the option -mcache-block-size=X, the valid X "
+			 "must be: 4, 8, 16, 32, 64, 128, 256, or 512");
+	  return false;
+	}
+
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options nds32_option_optimization_table[] =
+{
+  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+
+  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+};
+
+/* ------------------------------------------------------------------------ */
+\f
+/* Run-time Target Specification.  */
+
+/* Default enable
+     TARGET_GP_DIRECT: Generate gp-imply instruction.
+     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
+     TARGET_PERF_EXT : Generate performance extention instrcution.
+     TARGET_CMOV     : Generate conditional move instruction.  */
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (MASK_GP_DIRECT				\
+   | MASK_16_BIT				\
+   | MASK_PERF_EXT				\
+   | MASK_CMOV)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION nds32_handle_option
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE nds32_option_optimization_table
+
+\f
+/* Defining the Output Assembler Language.  */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* ------------------------------------------------------------------------ */
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git gcc/config/nds32/nds32.md gcc/config/nds32/nds32.md
new file mode 100644
index 0000000..4a832ea
--- /dev/null
+++ gcc/config/nds32/nds32.md
@@ -0,0 +1,2221 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32-intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32-multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32-doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32-peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to split rtx for QI and HI patterns.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+
+  /* If operands[1] is a large constant and cannot be performed
+     by a single instruction, we need to split it.  */
+  if (CONST_INT_P (operands[1])
+      && !satisfies_constraint_Is20 (operands[1])
+      && !satisfies_constraint_Ihig (operands[1]))
+    {
+      rtx high20_rtx;
+      HOST_WIDE_INT low12_int;
+      rtx tmp_rtx;
+
+      tmp_rtx = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+
+      high20_rtx = gen_int_mode ((INTVAL (operands[1]) >> 12) << 12, SImode);
+      low12_int = INTVAL (operands[1]) & 0xfff;
+
+      emit_move_insn (tmp_rtx, high20_rtx);
+      emit_move_insn (operands[0], plus_constant (SImode,
+						  tmp_rtx,
+						  low12_int));
+      DONE;
+    }
+})
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r,    d,    r,    r,    r")
+	(match_operand:QIHISI 1 "nds32_move_operand"   " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, <byte>);
+    case 6:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 11:
+      return nds32_output_32bit_load (operands, <byte>);
+    case 12:
+      return "movpi45\t%0, %1";
+    case 13:
+      return "movi55\t%0, %1";
+    case 14:
+      return "movi\t%0, %1";
+    case 15:
+      return "sethi\t%0, hi20(%1)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,  2,  2,  4,  4")])
+
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))]
+  ""
+  "sethi\t%0, hi20(%1)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"                  "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"       " r")
+		   (match_operand:SI 2 "nds32_symbolic_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_insn "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"                       "=l, r,   l, *r")
+	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, U33,  m")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+    case 2:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 3:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load,load")
+   (set_attr "length" "  2,  4,   2,   4")])
+
+
+;; Sign extension instructions.
+
+(define_insn "extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"                       "=l, r, r")
+	(sign_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, m")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load")
+   (set_attr "length" "  2,  4,   4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_insn "add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                   "=   d,    l,    d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"      "    0,    l,    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      /* addi Rt4,Rt4,-x  ==>  subi45 Rt4,x
+         where 0 <= x <= 31 */
+      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+      return "subi45\t%0, %2";
+    case 1:
+      /* addi Rt3,Ra3,-x  ==>  subi333 Rt3,Ra3,x
+         where 0 <= x <= 7 */
+      operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+      return "subi333\t%0, %1, %2";
+    case 2:
+      return "addi45\t%0, %2";
+    case 3:
+      return "addi333\t%0, %1, %2";
+    case 4:
+      return "add45\t%0, %2";
+    case 5:
+      return "add333\t%0, %1, %2";
+    case 6:
+      return "addi10.sp\t%2";
+    case 7:
+      return "addri36.sp\t%0, %2";
+    case 8:
+      return "addi\t%0, %1, %2";
+    case 9:
+      return "add\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "register_operand"      " r, l,    r, r")))]
+  ""
+  "@
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, r")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
+		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  ""
+  "@
+  mul33\t%0, %2
+  mul\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32-doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    l,    r,   r,     r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    l,    0,    0,    r,   r,     r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff   -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0xffff -> zeh33  Rt3,Ra3
+     andi Rt3,Ra3,0x01   -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff  -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "zeh33\t%0, %1";
+    case 4:
+      return "xlsb33\t%0, %1";
+    case 5:
+      return "x11b33\t%0, %1";
+    case 6:
+      operands[2] = GEN_INT (floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 7:
+      operands[2] = GEN_INT (floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 8:
+      return "zeb\t%0, %1";
+    case 9:
+      return "zeh\t%0, %1";
+    case 10:
+      return "andi\t%0, %1, %2";
+    case 11:
+      operands[2] = GEN_INT (~mask);
+      return "bitci\t%0, %1, %2";
+    case 12:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'or33' instruction.
+;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2.
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      /* Use exact_log2() to search the 1-bit position.  */
+      one_position = exact_log2 (INTVAL (operands[2]));
+      operands[2] = GEN_INT (one_position);
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'xor33' instruction.
+;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2.
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      /* Use exact_log2() to search the 1-bit position.  */
+      one_position = exact_log2 (INTVAL (operands[2]));
+      operands[2] = GEN_INT (one_position);
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'neg33' instruction.
+;; So we can identify 'xor Rt3,Ra3' case and set its length to be 2.
+;; And for V2 ISA, there is NO 'neg33' instruction.
+;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B').
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For V3/V3M ISA, we have 'not33' instruction.
+;; So we can identify 'not Rt3,Ra3' case and set its length to be 2.
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"             "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "    l,    r, r")
+		   (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))]
+  ""
+  "@
+  slli333\t%0, %1, %2
+  slli\t%0, %1, %2
+  sll\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+  ""
+  "@
+  srai45\t%0, %2
+  srai\t%0, %1, %2
+  sra\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+  ""
+  "@
+  srli45\t%0, %2
+  srli\t%0, %1, %2
+  srl\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direct comparison of EQ and NE.
+         If we don't split it, the conditional move transformation will fail
+         when producing (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         Later we will have cmovz/cmovn instruction pattern to
+         match corresponding behavior and output instruction.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
+	     We better have an assert here in case GCC does not properly
+	     optimize it away.  The INT_MAX here is 0x7fffffff for target.  */
+	  gcc_assert (code != LE || INTVAL (operands[2]) != 0x7fffffff);
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	          =>
+	            bnezs8  .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	          =>
+	            beqzs8  .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	          =>
+	            bnez38  $r0, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	          =>
+	            beqz38  $r0, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	      =>
+	        bnez  $r8, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	      =>
+	        beqz  $r8, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to V2 ISA,
+;; because V2 DOES NOT HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V2"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	      =>
+	        bne  $r0, $r1, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	      =>
+	        beq  $r0, $r1, .LCB0
+	        j  .L0
+	      .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3/V3M,
+;; because V3/V3M DO HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3 || TARGET_ISA_V3M"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	          =>
+	            bne  $r0, $r1, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	          =>
+	            bnec  $r0, constant, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	          =>
+	            beq  $r0, $r1, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	          =>
+	            beqc  $r0, constant, .LCB0
+	            j  .L0
+	          .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	     =>
+	       blez  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	     =>
+	       bltz  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	     =>
+	       bgez  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	     =>
+	       bgtz  $r8, .LCB0
+	       j  .L0
+	     .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))]
+  ""
+  "@
+  jr5\t%0
+  jr\t%0"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "@
+  jral5\t%0
+  jral\t%0"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0)
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "@
+  jral5\t%1
+  jral\t%1"
+  [(set_attr "type"   "branch,branch")
+   (set_attr "length" "     2,     4")])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push ();
+  else
+    nds32_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop ();
+  else
+    nds32_expand_epilogue ();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "nop16";
+  else
+    return "nop";
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; Stack push/pop operations
+;; ----------------------------------------------------------------------------
+
+;; The pattern for stack push.
+;; Both stack_push_multiple and stack_v3push use the following pattern.
+;; So we need to use TARGET_V3PUSH to determine the instruction length.
+(define_insn "*stack_push"
+  [(match_parallel 0 "nds32_stack_push_operation"
+     [(set (mem:SI (plus:SI (reg:SI SP_REGNUM)
+			    (match_operand:SI 1 "const_int_operand" "")))
+	   (match_operand:SI 2 "register_operand" ""))
+     ])]
+  ""
+{
+  return nds32_output_stack_push ();
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_V3PUSH")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; The pattern for stack pop.
+;; Both stack_pop_multiple and stack_v3pop use the following pattern.
+;; So we need to use TARGET_V3PUSH to determine the instruction length.
+(define_insn "*stack_pop"
+  [(match_parallel 0 "nds32_stack_pop_operation"
+     [(set (match_operand:SI 1 "register_operand" "")
+	   (mem:SI (reg:SI SP_REGNUM)))
+     ])]
+  ""
+{
+  return nds32_output_stack_pop ();
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_V3PUSH")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_volatile_func_return"
+  [(set (pc)
+	(unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_FUNC_RETURN))]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "ret5";
+  else
+    return "ret";
+}
+  [(set_attr "type" "misc")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx add_tmp;
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+      add_tmp = gen_int_mode (-INTVAL (operands[1]), SImode);
+
+      /* If the integer value is not in the range of imm15s,
+         we need to force register first because our addsi3 pattern
+         only accept nds32_rimm15s_operand predicate.  */
+      add_tmp = force_reg (SImode, add_tmp);
+
+      emit_insn (gen_addsi3 (reg, operands[0], add_tmp));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*btst"
+  [(set (match_operand:SI 0 "register_operand"                   "=   r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand"  "    r")
+			 (const_int 1)
+			 (match_operand:SI 2 "immediate_operand" " Iu05")))]
+  "TARGET_PERF_EXT"
+  "btst\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; ----------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2013-10-16 12:44 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-08 10:57 [PATCH 2/6] Andes nds32: machine description of nds32 porting (2) Chung-Ju Wu
2013-07-09 23:44 ` Joseph S. Myers
2013-07-24 15:52   ` Chung-Ju Wu
2013-07-25  9:42     ` Chung-Ju Wu
2013-09-08 16:17       ` Chung-Ju Wu
2013-09-14 15:15         ` Richard Sandiford
2013-09-27 18:38           ` Chung-Ju Wu
2013-10-01 17:31             ` Richard Sandiford
2013-10-05 18:21               ` Chung-Ju Wu
2013-10-06  9:57                 ` Richard Sandiford
2013-10-06 10:28                   ` Chung-Lin Tang
2013-10-06 10:33                     ` Richard Sandiford
2013-10-06 11:27                       ` Chung-Lin Tang
2013-10-06 13:39                         ` Chung-Ju Wu
2013-10-06 13:24                   ` Chung-Ju Wu
2013-10-13 13:29                   ` Chung-Ju Wu
2013-10-13 16:43                     ` Richard Sandiford
2013-10-16 12:56                       ` Chung-Ju Wu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).