public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5246] VAX: Add the `setmemhi' instruction
@ 2021-11-14 21:07 Maciej W. Rozycki
  0 siblings, 0 replies; only message in thread
From: Maciej W. Rozycki @ 2021-11-14 21:07 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3057f1ab737582a9fb37a3fb967ed8bf3659f2f4

commit r12-5246-g3057f1ab737582a9fb37a3fb967ed8bf3659f2f4
Author: Maciej W. Rozycki <macro@embecosm.com>
Date:   Sun Nov 14 21:01:51 2021 +0000

    VAX: Add the `setmemhi' instruction
    
    The MOVC5 machine instruction has `memset' semantics if encoded with a
    zero source length[1]:
    
    "4. MOVC5 with a zero source length operand is the preferred way
        to fill a block of memory with the fill character."
    
    Use that instruction to implement the `setmemhi' instruction then.  Use
    the AP register in the register deferred mode for the source address to
    yield the shortest possible encoding of the otherwise unused operand,
    observing that the address is never dereferenced if the source length is
    zero.
    
    The use of this instruction yields steadily better performance, at least
    with the Mariah VAX implementation, for a variable-length `memset' call
    expanded inline as a single MOVC5 operation compared to an equivalent
    libcall invocation:
    
    Length:   1, time elapsed:  0.971789 (builtin),  2.847303 (libcall)
    Length:   2, time elapsed:  0.907904 (builtin),  2.728259 (libcall)
    Length:   3, time elapsed:  1.038311 (builtin),  2.917245 (libcall)
    Length:   4, time elapsed:  0.775305 (builtin),  2.686088 (libcall)
    Length:   7, time elapsed:  1.112331 (builtin),  2.992968 (libcall)
    Length:   8, time elapsed:  0.856882 (builtin),  2.764885 (libcall)
    Length:  15, time elapsed:  1.256086 (builtin),  3.096660 (libcall)
    Length:  16, time elapsed:  1.001962 (builtin),  2.888131 (libcall)
    Length:  31, time elapsed:  1.590456 (builtin),  3.774164 (libcall)
    Length:  32, time elapsed:  1.288909 (builtin),  3.629622 (libcall)
    Length:  63, time elapsed:  3.430285 (builtin),  5.269789 (libcall)
    Length:  64, time elapsed:  3.265147 (builtin),  5.113156 (libcall)
    Length: 127, time elapsed:  6.438772 (builtin),  8.268305 (libcall)
    Length: 128, time elapsed:  6.268991 (builtin),  8.114557 (libcall)
    Length: 255, time elapsed: 12.417338 (builtin), 14.259678 (libcall)
    
    (times given in seconds per 1000000 `memset' invocations for the given
    length made in a loop).  It is clear from these figures that hardware
    does data coalescence for consecutive bytes rather than naively copying
    them one by one, as for lengths that are powers of 2 the figures are
    consistently lower than ones for their respective next lower lengths.
    
    The use of MOVC5 also requires at least 4 bytes less in terms of machine
    code as it avoids encoding the address of `memset' needed for the CALLS
    instruction used to make a libcall, as well as extra PUSHL instructions
    needed to pass arguments to the call as those can be encoded directly as
    the respective operands of the MOVC5 instruction.
    
    It is perhaps worth noting too that for constant lengths we prefer to
    emit up to 5 individual MOVx instructions rather than a single MOVC5
    instruction to clear memory and for consistency we copy this behavior
    here for filling memory with another value too, even though there may be
    a performance advantage with a string copy in comparison to a piecemeal
    copy, e.g.:
    
    Length:  40, time elapsed:  2.183192 (string),   2.638878 (piecemeal)
    
    But this is something for another change as it will have to be carefully
    evaluated.
    
    [1] DEC STD 032-0 "VAX Architecture Standard", Digital Equipment
        Corporation, A-DS-EL-00032-00-0 Rev J, December 15, 1989, Section
        3.10 "Character-String Instructions", p. 3-163
    
            gcc/
            * config/vax/vax.h (SET_RATIO): New macro.
            * config/vax/vax.md (UNSPEC_SETMEM_FILL): New constant.
            (setmemhi): New expander.
            (setmemhi1): New insn and splitter.
            (*setmemhi1): New insn.
    
            gcc/testsuite/
            * gcc.target/vax/setmem.c: New test.

Diff:
---
 gcc/config/vax/vax.h                  |  1 +
 gcc/config/vax/vax.md                 | 64 +++++++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/vax/setmem.c | 22 ++++++++++++
 3 files changed, 87 insertions(+)

diff --git a/gcc/config/vax/vax.h b/gcc/config/vax/vax.h
index b67d668ad99..006c0ab492d 100644
--- a/gcc/config/vax/vax.h
+++ b/gcc/config/vax/vax.h
@@ -433,6 +433,7 @@ enum reg_class { NO_REGS, ALL_REGS, LIM_REG_CLASSES };
    move-instruction pairs, we will do a cpymem or libcall instead.  */
 #define MOVE_RATIO(speed) ((speed) ? 6 : 3)
 #define CLEAR_RATIO(speed) ((speed) ? 6 : 2)
+#define SET_RATIO(speed) ((speed) ? 6 : 2)
 
 /* Nonzero if access to memory by bytes is slow and undesirable.  */
 #define SLOW_BYTE_ACCESS 0
diff --git a/gcc/config/vax/vax.md b/gcc/config/vax/vax.md
index 5b1b392b867..977dc809d5f 100644
--- a/gcc/config/vax/vax.md
+++ b/gcc/config/vax/vax.md
@@ -32,6 +32,12 @@
   VUNSPEC_PEM		    ; 'procedure_entry_mask' insn.
 ])
 
+;; UNSPEC usage:
+
+(define_c_enum "unspec" [
+  UNSPEC_SETMEM_FILL	    ; 'fill' operand to 'setmem' insn.
+])
+
 (define_constants
   [(VAX_AP_REGNUM 12)	    ; Register 12 contains the argument pointer
    (VAX_FP_REGNUM 13)	    ; Register 13 contains the frame pointer
@@ -438,6 +444,64 @@
    (clobber (reg:CC VAX_PSL_REGNUM))]
   "reload_completed"
   "movc3 %2,%1,%0")
+
+;; This is here to accept 4 arguments and pass the first 3 along
+;; to the setmemhi1 pattern that really does the work.
+(define_expand "setmemhi"
+  [(set (match_operand:BLK 0 "memory_operand" "")
+	(match_operand:QI 2 "general_operand" ""))
+   (use (match_operand:HI 1 "general_operand" ""))
+   (match_operand 3 "" "")]
+  ""
+  "
+{
+  emit_insn (gen_setmemhi1 (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+;; The srcaddr operand of MOVC5 is not dereferenced if srclen is zero, so we
+;; set it to (%ap) somewhat arbitrarily chosen for the shortest encoding.
+(define_insn_and_split "setmemhi1"
+  [(set (match_operand:BLK 0 "memory_operand" "=o")
+	(unspec:BLK [(use (match_operand:QI 2 "general_operand" "g"))]
+		    UNSPEC_SETMEM_FILL))
+   (use (match_operand:HI 1 "general_operand" "g"))
+   (clobber (reg:SI 0))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+   (clobber (reg:SI 4))
+   (clobber (reg:SI 5))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel
+     [(set (match_dup 0)
+	   (unspec:BLK [(use (match_dup 2))] UNSPEC_SETMEM_FILL))
+      (use (match_dup 1))
+      (clobber (reg:SI 0))
+      (clobber (reg:SI 1))
+      (clobber (reg:SI 2))
+      (clobber (reg:SI 3))
+      (clobber (reg:SI 4))
+      (clobber (reg:SI 5))
+      (clobber (reg:CC VAX_PSL_REGNUM))])]
+  "")
+
+(define_insn "*setmemhi1"
+  [(set (match_operand:BLK 0 "memory_operand" "=o")
+	(unspec:BLK [(use (match_operand:QI 2 "general_operand" "g"))]
+		    UNSPEC_SETMEM_FILL))
+   (use (match_operand:HI 1 "general_operand" "g"))
+   (clobber (reg:SI 0))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+   (clobber (reg:SI 4))
+   (clobber (reg:SI 5))
+   (clobber (reg:CC VAX_PSL_REGNUM))]
+  "reload_completed"
+  "movc5 $0,(%%ap),%2,%1,%0")
 \f
 ;; Extension and truncation insns.
 
diff --git a/gcc/testsuite/gcc.target/vax/setmem.c b/gcc/testsuite/gcc.target/vax/setmem.c
new file mode 100644
index 00000000000..6c60cfcd555
--- /dev/null
+++ b/gcc/testsuite/gcc.target/vax/setmem.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+
+#include <stddef.h>
+
+void *
+memset8 (void *block, int c, size_t size)
+{
+  unsigned char s8 = size;
+  return __builtin_memset (block, c, s8);
+}
+
+/* Expect assembly like:
+
+	movl 4(%ap),%r6
+	movzbl 12(%ap),%r7
+	movc5 $0,(%ap),8(%ap),%r7,(%r6)
+	movl %r6,%r0
+
+ */
+
+/* { dg-final { scan-assembler "\tmovc5 \\\$0,\\\(%ap\\\)," } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-11-14 21:07 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-14 21:07 [gcc r12-5246] VAX: Add the `setmemhi' instruction Maciej W. Rozycki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).