public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-1949] New compact syntax for insn and insn_split in Machine Descriptions.
@ 2023-06-19 14:57 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2023-06-19 14:57 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:957ae90406591739b68e95ad49a0232faeb74217

commit r14-1949-g957ae90406591739b68e95ad49a0232faeb74217
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Mon Jun 19 15:47:46 2023 +0100

    New compact syntax for insn and insn_split in Machine Descriptions.
    
    This patch adds support for a compact syntax for specifying constraints in
    instruction patterns. Credit for the idea goes to Richard Earnshaw.
    
    With this new syntax we want a clean break from the current limitations to make
    something that is hopefully easier to use and maintain.
    
    The idea behind this compact syntax is that often times it's quite hard to
    correlate the entries in the constrains list, attributes and instruction lists.
    
    One has to count and this often is tedious.  Additionally when changing a single
    line in the insn multiple lines in a diff change, making it harder to see what's
    going on.
    
    This new syntax takes into account many of the common things that are done in MD
    files.   It's also worth saying that this version is intended to deal with the
    common case of a string based alternatives.   For C chunks we have some ideas
    but those are not intended to be addressed here.
    
    It's easiest to explain with an example:
    
    normal syntax:
    
    (define_insn_and_split "*movsi_aarch64"
      [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
            (match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
      "(register_operand (operands[0], SImode)
        || aarch64_reg_or_zero (operands[1], SImode))"
      "@
       mov\\t%w0, %w1
       mov\\t%w0, %w1
       mov\\t%w0, %w1
       mov\\t%w0, %1
       #
       * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
       ldr\\t%w0, %1
       ldr\\t%s0, %1
       str\\t%w1, %0
       str\\t%s1, %0
       adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
       adr\\t%x0, %c1
       adrp\\t%x0, %A1
       fmov\\t%s0, %w1
       fmov\\t%w0, %s1
       fmov\\t%s0, %s1
       * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
      "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
        && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
       [(const_int 0)]
       "{
           aarch64_expand_mov_immediate (operands[0], operands[1]);
           DONE;
        }"
      ;; The "mov_imm" type for CNT is just a placeholder.
      [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
                        load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
       (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
       (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
    ]
    )
    
    New syntax:
    
    (define_insn_and_split "*movsi_aarch64"
      [(set (match_operand:SI 0 "nonimmediate_operand")
            (match_operand:SI 1 "aarch64_mov_operand"))]
      "(register_operand (operands[0], SImode)
        || aarch64_reg_or_zero (operands[1], SImode))"
      {@ [cons: =0, 1; attrs: type, arch, length]
         [r , r  ; mov_reg  , *   , 4] mov\t%w0, %w1
         [k , r  ; mov_reg  , *   , 4] ^
         [r , k  ; mov_reg  , *   , 4] ^
         [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
         [r , n  ; mov_imm  , *   ,16] #
         /* The "mov_imm" type for CNT is just a placeholder.  */
         [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
         [r , m  ; load_4   , *   , 4] ldr\t%w0, %1
         [w , m  ; load_4   , fp  , 4] ldr\t%s0, %1
         [m , rZ ; store_4  , *   , 4] str\t%w1, %0
         [m , w  ; store_4  , fp  , 4] str\t%s1, %0
         [r , Usw; load_4   , *   , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
         [r , Usa; adr      , *   , 4] adr\t%x0, %c1
         [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
         [w , rZ ; f_mcr    , fp  , 4] fmov\t%s0, %w1
         [r , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
         [w , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
         [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
      }
      "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
        && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
      [(const_int 0)]
      {
        aarch64_expand_mov_immediate (operands[0], operands[1]);
        DONE;
      }
    )
    
    The main syntax rules are as follows (See docs for full rules):
      - Template must start with "{@" and end with "}" to use the new syntax.
      - "{@" is followed by a layout in parentheses which is "cons:" followed by
        a list of match_operand/match_scratch IDs, then a semicolon, then the
        same for attributes ("attrs:"). Both sections are optional (so you can
        use only cons, or only attrs, or both), and cons must come before attrs
        if present.
      - Each alternative begins with any amount of whitespace.
      - Following the whitespace is a comma-separated list of constraints and/or
        attributes within brackets [], with sections separated by a semicolon.
      - Following the closing ']' is any amount of whitespace, and then the actual
        asm output.
      - Spaces are allowed in the list (they will simply be removed).
      - All alternatives should be specified: a blank list should be
        "[,,]", "[,,;,]" etc., not "[]" or "" (however genattr may segfault if
        you leave certain attributes empty, I have found).
      - The actual constraint string in the match_operand or match_scratch, and
        the attribute string in the set_attr, must be blank or an empty string
        (you can't combine the old and new syntaxes).
      - The common idion * return can be shortened by using <<.
      - Any unexpanded iterators left during processing will result in an error at
        compile time.   If for some reason <> is needed in the output then these
        must be escaped using \.
      - Within an {@ block both multiline and singleline C comments are allowed, but
        when used outside of a C block they must be the only non-whitespace blocks on
        the line
      - Inside an {@ block any unexpanded iterators will result in a compile time
        fault instead of incorrect assembly being generated at runtime.  If the
        literal <> is needed in the output this needs to be escaped with \<\>.
      - This check is not performed inside C blocks (lines starting with *).
      - Instead of copying the previous instruction again in the next pattern, one
        can use ^ to refer to the previous asm string.
    
    This patch works by blindly transforming the new syntax into the old syntax,
    so it doesn't do extensive checking. However, it does verify that:
            - The correct number of constraints/attributes are specified.
            - You haven't mixed old and new syntax.
            - The specified operand IDs/attribute names actually exist.
            - You don't have duplicate cons
    
    If something goes wrong, it may write invalid constraints/attributes/template
    back into the rtx. But this shouldn't matter because error_at will cause the
    program to fail on exit anyway.
    
    Because this transformation occurs as early as possible (before patterns are
    queued), the rest of the compiler can completely ignore the new syntax and
    assume that the old syntax will always be used.
    
    This doesn't seem to have any measurable effect on the runtime of gen*
    programs.
    
    gcc/ChangeLog:
    
            * gensupport.cc (class conlist, add_constraints, add_attributes,
            skip_spaces, expect_char, preprocess_compact_syntax,
            parse_section_layout, parse_section, convert_syntax): New.
            (process_rtx): Check for conversion.
            * genoutput.cc (process_template): Check for unresolved iterators.
            (class data): Add compact_syntax_p.
            (gen_insn): Use it.
            * gensupport.h (compact_syntax): New.
            (hash-set.h): Include.
            * doc/md.texi: Document it.
    
    Co-Authored-By: Omar Tahir <Omar.Tahir2@arm.com>

Diff:
---
 gcc/doc/md.texi   | 163 ++++++++++++++++++
 gcc/genoutput.cc  |  48 +++++-
 gcc/gensupport.cc | 498 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/gensupport.h  |   3 +
 4 files changed, 709 insertions(+), 3 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index b08637bc854..052375b1a31 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -27,6 +27,7 @@ See the next chapter for information on the C header file.
                         from such an insn.
 * Output Statement::    For more generality, write C code to output
                         the assembler code.
+* Compact Syntax::      Compact syntax for writing machine descriptors.
 * Predicates::          Controlling what kinds of operands can be used
                         for an insn.
 * Constraints::         Fine-tuning operand selection.
@@ -713,6 +714,168 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template:
 @end group
 @end smallexample
 
+@node Compact Syntax
+@section Compact Syntax
+@cindex compact syntax
+
+When a @code{define_insn} or @code{define_insn_and_split} has multiple
+alternatives it may be beneficial to use the compact syntax when specifying
+alternatives.
+
+This syntax puts the constraints and attributes on the same horizontal line as
+the instruction assembly template.
+
+As an example
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r")
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv"))]
+  ""
+  "@@
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %1
+   #
+   * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);"
+  "&& true"
+   [(const_int 0)]
+  @{
+     aarch64_expand_mov_immediate (operands[0], operands[1]);
+     DONE;
+  @}
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm")
+   (set_attr "arch"   "*,*,*,*,*,sve")
+   (set_attr "length" "4,4,4,4,*,  4")
+]
+)
+@end group
+@end smallexample
+
+can be better expressed as:
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  @{@@ [cons: =0, 1; attrs: type, arch, length]
+     [r , r  ; mov_reg  , *   , 4] mov\t%w0, %w1
+     [k , r  ; mov_reg  , *   , 4] ^
+     [r , k  ; mov_reg  , *   , 4] ^
+     [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
+     [r , n  ; mov_imm  , *   , *] #
+     [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
+  @}
+  "&& true"
+  [(const_int 0)]
+  @{
+    aarch64_expand_mov_immediate (operands[0], operands[1]);
+    DONE;
+  @}
+)
+@end group
+@end smallexample
+
+The syntax rules are as follows:
+@itemize @bullet
+@item
+Templates must start with @samp{@{@@} to use the new syntax.
+
+@item
+@samp{@{@@} is followed by a layout in square brackets which is @samp{cons:}
+followed by a comma-separated list of @code{match_operand}/@code{match_scratch}
+operand numbers, then a semicolon, followed by the same for attributes
+(@samp{attrs:}).  Operand modifiers like @code{=} and @code{+} can be placed
+before an operand number.
+Both sections are optional (so you can use only @samp{cons}, or only
+@samp{attrs}, or both), and @samp{cons} must come before @samp{attrs} if
+present.
+
+@item
+Each alternative begins with any amount of whitespace.
+
+@item
+Following the whitespace is a comma-separated list of "constraints" and/or
+"attributes" within brackets @code{[]}, with sections separated by a semicolon.
+
+@item
+Should you want to copy the previous asm line, the symbol @code{^} can be used.
+This allows less copy pasting between alternative and reduces the number of
+lines to update on changes.
+
+@item
+When using C functions for output, the idiom @samp{* return @var{function};}
+can be replaced with the shorthand @samp{<< @var{function};}.
+
+@item
+Following the closing @samp{]} is any amount of whitespace, and then the actual
+asm output.
+
+@item
+Spaces are allowed in the list (they will simply be removed).
+
+@item
+All constraint alternatives should be specified.  For example, a list of
+of three blank alternatives should be written @samp{[,,]} rather than
+@samp{[]}.
+
+@item
+All attribute alternatives should be non-empty, with @samp{*}
+representing the default attribute value.  For example, a list of three
+default attribute values should be written @samp{[*,*,*]} rather than
+@samp{[]}.
+
+@item
+Within an @samp{@{@@} block both multiline and singleline C comments are
+allowed, but when used outside of a C block they must be the only non-whitespace
+blocks on the line.
+
+@item
+Within an @samp{@{@@} block, any iterators that do not get expanded will result
+in an error.  If for some reason it is required to have @code{<} or @code{>} in
+the output then these must be escaped using @backslashchar{}.
+
+@item
+It is possible to use the @samp{attrs} list to specify some attributes and to
+use the normal @code{set_attr} syntax to specify other attributes.  There must
+not be any overlap between the two lists.
+
+In other words, the following is valid:
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  @{@@ [cons: 0, 1; attrs: type, arch, length]@}
+  @dots{} 
+  [(set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+but this is not valid:
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  @{@@ [cons: 0, 1; attrs: type, arch, length]@}
+  @dots{} 
+  [(set_attr "arch" "bar")
+   (set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+because it specifies @code{arch} twice.
+@end itemize
+
 @node Predicates
 @section Predicates
 @cindex predicates
diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
index 163e8dfef4c..3fbdde70df3 100644
--- a/gcc/genoutput.cc
+++ b/gcc/genoutput.cc
@@ -157,6 +157,7 @@ public:
   int n_alternatives;		/* Number of alternatives in each constraint */
   int operand_number;		/* Operand index in the big array.  */
   int output_format;		/* INSN_OUTPUT_FORMAT_*.  */
+  bool compact_syntax_p;
   struct operand_data operand[MAX_MAX_OPERANDS];
 };
 
@@ -700,12 +701,51 @@ process_template (class data *d, const char *template_code)
 	  if (sp != ep)
 	    message_at (d->loc, "trailing whitespace in output template");
 
-	  while (cp < sp)
+	  /* Check for any unexpanded iterators.  */
+	  if (bp[0] != '*' && d->compact_syntax_p)
 	    {
-	      putchar (*cp);
-	      cp++;
+	      const char *p = cp;
+	      const char *last_bracket = nullptr;
+	      while (p < sp)
+		{
+		  if (*p == '\\' && p + 1 < sp)
+		    {
+		      putchar (*p);
+		      putchar (*(p+1));
+		      p += 2;
+		      continue;
+		    }
+
+		  if (*p == '>' && last_bracket && *last_bracket == '<')
+		    {
+		      int len = p - last_bracket;
+		      fatal_at (d->loc, "unresolved iterator '%.*s' in '%s'",
+				len - 1, last_bracket + 1, cp);
+		    }
+		  else if (*p == '<' || *p == '>')
+		    last_bracket = p;
+
+		  putchar (*p);
+		  p += 1;
+		}
+
+	      if (last_bracket)
+		{
+		  char *nl = strchr (const_cast<char*> (cp), '\n');
+		  if (nl)
+		    *nl = '\0';
+		  fatal_at (d->loc, "unmatched angle brackets, likely an "
+			    "error in iterator syntax in %s", cp);
+		}
+	    }
+	  else
+	    {
+	      while (cp < sp)
+		putchar (*(cp++));
 	    }
 
+	  cp = sp;
+
 	  if (!found_star)
 	    puts ("\",");
 	  else if (*bp != '*')
@@ -881,6 +921,8 @@ gen_insn (md_rtx_info *info)
   else
     d->name = 0;
 
+  d->compact_syntax_p = compact_syntax.contains (insn);
+
   /* Build up the list in the same order as the insns are seen
      in the machine description.  */
   d->next = 0;
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
index f9efc6eb757..980b49cd481 100644
--- a/gcc/gensupport.cc
+++ b/gcc/gensupport.cc
@@ -18,6 +18,8 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "bconfig.h"
+#define INCLUDE_STRING
+#define INCLUDE_VECTOR
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
@@ -33,6 +35,8 @@
 static rtx operand_data[MAX_OPERANDS];
 static rtx match_operand_entries_in_pattern[MAX_OPERANDS];
 static char used_operands_numbers[MAX_OPERANDS];
+/* List of entries which are part of the new syntax.  */
+hash_set<rtx> compact_syntax;
 
 
 /* In case some macros used by files we include need it, define this here.  */
@@ -545,6 +549,497 @@ gen_rewrite_sequence (rtvec vec)
   return new_vec;
 }
 
+/* The following is for handling the compact syntax for constraints and
+   attributes.
+
+   The normal syntax looks like this:
+
+       ...
+       (match_operand: 0 "s_register_operand" "r,I,k")
+       (match_operand: 2 "s_register_operand" "r,k,I")
+       ...
+       "@
+	<asm>
+	<asm>
+	<asm>"
+       ...
+       (set_attr "length" "4,8,8")
+
+   The compact syntax looks like this:
+
+       ...
+       (match_operand: 0 "s_register_operand")
+       (match_operand: 2 "s_register_operand")
+       ...
+       {@ [cons: 0, 2; attrs: length]
+	[r,r; 4] <asm>
+	[I,k; 8] <asm>
+	[k,I; 8] <asm>
+       }
+       ...
+       [<other attributes>]
+
+   This is the only place where this syntax needs to be handled.  Relevant
+   patterns are transformed from compact to the normal syntax before they are
+   queued, so none of the gen* programs need to know about this syntax at all.
+
+   Conversion process (convert_syntax):
+
+   0) Check that pattern actually uses new syntax (check for {@ ... }).
+
+   1) Get the "layout", i.e. the "[cons: 0 2; attrs: length]" from the above
+      example.  cons must come first; both are optional. Set up two vecs,
+      convec and attrvec, for holding the results of the transformation.
+
+   2) For each alternative: parse the list of constraints and/or attributes,
+      and enqueue them in the relevant lists in convec and attrvec.  By the end
+      of this process, convec[N].con and attrvec[N].con should contain regular
+      syntax constraint/attribute lists like "r,I,k".  Copy the asm to a string
+      as we go.
+
+   3) Search the rtx and write the constraint and attribute lists into the
+      correct places. Write the asm back into the template.  */
+
+/* Helper class for shuffling constraints/attributes in convert_syntax and
+   add_constraints/add_attributes.  This includes commas but not whitespace.  */
+
+class conlist {
+private:
+  std::string con;
+
+public:
+  std::string name;
+  int idx = -1;
+
+  conlist () = default;
+
+  /* [ns..ns + len) should be a string with the id of the rtx to match
+     i.e. if rtx is the relevant match_operand or match_scratch then
+     [ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then
+     [ns..ns + len) should equal XSTR (rtx, 0).  */
+  conlist (const char *ns, unsigned int len, bool numeric)
+  {
+    /* Trim leading whitespaces.  */
+    while (ISBLANK (*ns))
+      {
+	ns++;
+	len--;
+      }
+
+    /* Trim trailing whitespace.  */
+    for (int i = len - 1; i >= 0; i--, len--)
+      if (!ISBLANK (ns[i]))
+	break;
+
+    /* Parse off any modifiers.  */
+    while (!ISALNUM (*ns))
+      {
+	con += *(ns++);
+	len--;
+      }
+
+    name.assign (ns, len);
+    if (numeric)
+      idx = std::stoi (name);
+  }
+
+  /* Adds a character to the end of the string.  */
+  void add (char c)
+  {
+    con += c;
+  }
+
+  /* Output the string in the form of a brand-new char *, then effectively
+     clear the internal string by resetting len to 0.  */
+  char *out ()
+  {
+    /* Final character is always a trailing comma, so strip it out.  */
+    char *q = xstrndup (con.c_str (), con.size () - 1);
+    con.clear ();
+    return q;
+  }
+};
+
+typedef std::vector<conlist> vec_conlist;
+
+/* Add constraints to an rtx.  This function is similar to remove_constraints.
+   Errors if adding the constraints would overwrite existing constraints.  */
+
+static void
+add_constraints (rtx part, file_location loc, vec_conlist &cons)
+{
+  const char *format_ptr;
+
+  if (part == NULL_RTX)
+    return;
+
+  /* If match_op or match_scr, check if we have the right one, and if so, copy
+     over the constraint list.  */
+  if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH)
+    {
+      int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1;
+      unsigned id = XINT (part, 0);
+
+      if (id >= cons.size () || cons[id].idx == -1)
+	return;
+
+      if (XSTR (part, field)[0] != '\0')
+	{
+	  error_at (loc, "can't mix normal and compact constraint syntax");
+	  return;
+	}
+      XSTR (part, field) = cons[id].out ();
+      cons[id].idx = -1;
+    }
+
+  format_ptr = GET_RTX_FORMAT (GET_CODE (part));
+
+  /* Recursively search the rtx.  */
+  for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++)
+    switch (*format_ptr++)
+      {
+      case 'e':
+      case 'u':
+	add_constraints (XEXP (part, i), loc, cons);
+	break;
+      case 'E':
+	if (XVEC (part, i) != NULL)
+	  for (int j = 0; j < XVECLEN (part, i); j++)
+	    add_constraints (XVECEXP (part, i, j), loc, cons);
+	break;
+      default:
+	continue;
+      }
+}
+
+/* Add ATTRS to definition X's attribute list.  */
+
+static void
+add_attributes (rtx x, vec_conlist &attrs)
+{
+  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
+  rtvec orig = XVEC (x, attr_index);
+  if (orig)
+    {
+      size_t n_curr = XVECLEN (x, attr_index);
+      rtvec copy = rtvec_alloc (n_curr + attrs.size ());
+
+      /* Create a shallow copy of existing entries.  */
+      memcpy (&copy->elem[attrs.size ()], &orig->elem[0],
+	      sizeof (rtx) * n_curr);
+      XVEC (x, attr_index) = copy;
+    }
+   else
+    XVEC (x, attr_index) = rtvec_alloc (attrs.size ());
+
+  /* Create the new elements.  */
+  for (unsigned i = 0; i < attrs.size (); i++)
+    {
+      rtx attr = rtx_alloc (SET_ATTR);
+      XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
+      XSTR (attr, 1) = attrs[i].out ();
+      XVECEXP (x, attr_index, i) = attr;
+    }
+}
+
+/* Consumes spaces and tabs.  */
+
+static inline void
+skip_spaces (const char **str)
+{
+  while (ISBLANK (**str))
+    (*str)++;
+}
+
+/* Consumes the given character, if it's there.  */
+
+static inline bool
+expect_char (const char **str, char c)
+{
+  if (**str != c)
+    return false;
+  (*str)++;
+  return true;
+}
+
+/* Parses the section layout that follows a "{@" if using new syntax. Builds
+   a vector for a single section. E.g. if we have "attrs: length, arch]..."
+   then list will have two elements, the first for "length" and the second
+   for "arch".  */
+
+static void
+parse_section_layout (file_location loc, const char **templ, const char *label,
+		      vec_conlist &list, bool numeric)
+{
+  const char *name_start;
+  size_t label_len = strlen (label);
+  if (strncmp (label, *templ, label_len) == 0)
+    {
+      *templ += label_len;
+
+      /* Gather the names.  */
+      while (**templ != ';' && **templ != ']')
+	{
+	  skip_spaces (templ);
+	  name_start = *templ;
+	  int len = 0;
+	  char val = (*templ)[len];
+	  while (val != ',' && val != ';' && val != ']')
+	    {
+	      if (val == 0 || val == '\n')
+	        fatal_at (loc, "missing ']'");
+	      val = (*templ)[++len];
+	    }
+	  *templ += len;
+	  if (val == ',')
+	    (*templ)++;
+	  list.push_back (conlist (name_start, len, numeric));
+	}
+    }
+}
+
+/* Parse a section, a section is defined as a named space separated list, e.g.
+
+   foo: a, b, c
+
+   is a section named "foo" with entries a, b and c.  */
+
+static void
+parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no,
+	       vec_conlist &list, file_location loc, const char *name)
+{
+  unsigned int i;
+
+  /* Go through the list, one character at a time, adding said character
+     to the correct string.  */
+  for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
+    if (!ISBLANK (**templ))
+      {
+	if (**templ == 0 || **templ == '\n')
+	  fatal_at (loc, "missing ']'");
+	list[i].add (**templ);
+	if (**templ == ',')
+	  {
+	    ++i;
+	    if (i == n_elems)
+	      fatal_at (loc, "too many %ss in alternative %d: expected %d",
+			name, alt_no, n_elems);
+	  }
+      }
+
+  if (i + 1 < n_elems)
+    fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d",
+	      name, alt_no, n_elems, i);
+
+  list[i].add (',');
+}
+
+/* The compact syntax has more convience syntaxes.  As such we post process
+   the lines to get them back to something the normal syntax understands.  */
+
+static void
+preprocess_compact_syntax (file_location loc, int alt_no, std::string &line,
+			   std::string &last_line)
+{
+  /* Check if we're copying the last statement.  */
+  if (line.find ("^") == 0 && line.size () == 1)
+    {
+      if (last_line.empty ())
+	fatal_at (loc, "found instruction to copy previous line (^) in"
+		       "alternative %d but no previous line to copy", alt_no);
+      line = last_line;
+      return;
+    }
+
+  std::string result;
+  std::string buffer;
+  /* Check if we have << which means return c statement.  */
+  if (line.find ("<<") == 0)
+    {
+      result.append ("* return ");
+      const char *chunk = line.c_str () + 2;
+      skip_spaces (&chunk);
+      result.append (chunk);
+    }
+  else
+    result.append (line);
+
+  line = result;
+  return;
+}
+
+/* Converts an rtx from compact syntax to normal syntax if possible.  */
+
+static void
+convert_syntax (rtx x, file_location loc)
+{
+  int alt_no;
+  unsigned int templ_index;
+  const char *templ;
+  vec_conlist tconvec, convec, attrvec;
+
+  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
+
+  templ = XTMPL (x, templ_index);
+
+  /* Templates with constraints start with "{@".  */
+  if (strncmp ("*{@", templ, 3))
+    return;
+
+  /* Get the layout for the template.  */
+  templ += 3;
+  skip_spaces (&templ);
+
+  if (!expect_char (&templ, '['))
+    fatal_at (loc, "expecing `[' to begin section list");
+
+  parse_section_layout (loc, &templ, "cons:", tconvec, true);
+
+  /* Check for any duplicate cons entries and sort based on i.  */
+  for (auto e : tconvec)
+    {
+      unsigned idx = e.idx;
+      if (idx >= convec.size ())
+	convec.resize (idx + 1);
+
+      if (convec[idx].idx >= 0)
+	fatal_at (loc, "duplicate cons number found: %d", idx);
+      convec[idx] = e;
+    }
+  tconvec.clear ();
+
+  if (*templ != ']')
+    {
+      if (*templ == ';')
+	skip_spaces (&(++templ));
+      parse_section_layout (loc, &templ, "attrs:", attrvec, false);
+    }
+
+  if (!expect_char (&templ, ']'))
+    fatal_at (loc, "expecting `]` to end section list - section list must have "
+		   "cons first, attrs second");
+
+  /* We will write the un-constrainified template into new_templ.  */
+  std::string new_templ;
+  new_templ.append ("@");
+
+  /* Skip to the first proper line.  */
+  skip_spaces (&templ);
+  if (*templ == 0)
+    fatal_at (loc, "'{@...}' blocks must have at least one alternative");
+  if (*templ != '\n')
+    fatal_at (loc, "unexpected character '%c' after ']'", *templ);
+  templ++;
+
+  alt_no = 0;
+  std::string last_line;
+
+  /* Process the alternatives.  */
+  while (*(templ - 1) != '\0')
+    {
+      /* Skip leading whitespace.  */
+      std::string buffer;
+      skip_spaces (&templ);
+
+      /* Check if we're at the end.  */
+      if (templ[0] == '}' && templ[1] == '\0')
+	break;
+
+      if (expect_char (&templ, '['))
+	{
+	  new_templ += '\n';
+	  new_templ.append (buffer);
+	  /* Parse the constraint list, then the attribute list.  */
+	  if (convec.size () > 0)
+	    parse_section (&templ, convec.size (), alt_no, convec, loc,
+			   "constraint");
+
+	  if (attrvec.size () > 0)
+	    {
+	      if (convec.size () > 0 && !expect_char (&templ, ';'))
+		fatal_at (loc, "expected `;' to separate constraints "
+			       "and attributes in alternative %d", alt_no);
+
+	      parse_section (&templ, attrvec.size (), alt_no,
+			     attrvec, loc, "attribute");
+	    }
+
+	  if (!expect_char (&templ, ']'))
+	    fatal_at (loc, "expected end of constraint/attribute list but "
+			   "missing an ending `]' in alternative %d", alt_no);
+	}
+      else if (templ[0] == '/' && templ[1] == '/')
+	{
+	  templ += 2;
+	  /* Glob till newline or end of string.  */
+	  while (*templ != '\n' || *templ != '\0')
+	    templ++;
+
+	  /* Skip any newlines or whitespaces needed.  */
+	  while (ISSPACE(*templ))
+	    templ++;
+	  continue;
+	}
+      else if (templ[0] == '/' && templ[1] == '*')
+	{
+	  templ += 2;
+	  /* Glob till newline or end of multiline comment.  */
+	  while (templ[0] != 0 && templ[0] != '*' && templ[1] != '/')
+	    templ++;
+
+	while (templ[0] != '*' || templ[1] != '/')
+	  {
+	    if (templ[0] == 0)
+	      fatal_at (loc, "unterminated '/*'");
+	    templ++;
+	  }
+	templ += 2;
+
+	  /* Skip any newlines or whitespaces needed.  */
+	  while (ISSPACE(*templ))
+	    templ++;
+	  continue;
+	}
+      else
+	fatal_at (loc, "expected constraint/attribute list at beginning of "
+		       "alternative %d but missing a starting `['", alt_no);
+
+      /* Skip whitespace between list and asm.  */
+      skip_spaces (&templ);
+
+      /* Copy asm to new template.  */
+      std::string line;
+      while (*templ != '\n' && *templ != '\0')
+	line += *templ++;
+
+      /* Apply any pre-processing needed to the line.  */
+      preprocess_compact_syntax (loc, alt_no, line, last_line);
+      new_templ.append (line);
+      last_line = line;
+
+      /* Normal "*..." syntax expects the closing quote to be on the final
+	 line of asm, whereas we allow the closing "}" to be on its own line.
+	 Postpone copying the '\n' until we know that there is another
+	 alternative in the list.  */
+      while (ISSPACE (*templ))
+	templ++;
+      ++alt_no;
+    }
+
+  /* Write the constraints and attributes into their proper places.  */
+  if (convec.size () > 0)
+    add_constraints (x, loc, convec);
+
+  if (attrvec.size () > 0)
+    add_attributes (x, attrvec);
+
+  /* Copy over the new un-constrainified template.  */
+  XTMPL (x, templ_index) = xstrdup (new_templ.c_str ());
+
+  /* Register for later checks during iterator expansions.  */
+  compact_syntax.add (x);
+}
+
 /* Process a top level rtx in some way, queuing as appropriate.  */
 
 static void
@@ -553,10 +1048,12 @@ process_rtx (rtx desc, file_location loc)
   switch (GET_CODE (desc))
     {
     case DEFINE_INSN:
+      convert_syntax (desc, loc);
       queue_pattern (desc, &define_insn_tail, loc);
       break;
 
     case DEFINE_COND_EXEC:
+      convert_syntax (desc, loc);
       queue_pattern (desc, &define_cond_exec_tail, loc);
       break;
 
@@ -631,6 +1128,7 @@ process_rtx (rtx desc, file_location loc)
 	attr = XVEC (desc, split_code + 1);
 	PUT_CODE (desc, DEFINE_INSN);
 	XVEC (desc, 4) = attr;
+	convert_syntax (desc, loc);
 
 	/* Queue them.  */
 	insn_elem = queue_pattern (desc, &define_insn_tail, loc);
diff --git a/gcc/gensupport.h b/gcc/gensupport.h
index a1edfbd7190..7925e22ed41 100644
--- a/gcc/gensupport.h
+++ b/gcc/gensupport.h
@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_GENSUPPORT_H
 #define GCC_GENSUPPORT_H
 
+#include "hash-set.h"
 #include "read-md.h"
 
 struct obstack;
@@ -218,6 +219,8 @@ struct pattern_stats
   int num_operand_vars;
 };
 
+extern hash_set<rtx> compact_syntax;
+
 extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec);
 extern void compute_test_codes (rtx, file_location, char *);
 extern file_location get_file_location (rtx);

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-06-19 14:57 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-19 14:57 [gcc r14-1949] New compact syntax for insn and insn_split in Machine Descriptions Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).