public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [committed] d: Merge upstream dmd 46133f761, druntime 0fd4364c
@ 2021-02-04 22:14 Iain Buclaw
  0 siblings, 0 replies; only message in thread
From: Iain Buclaw @ 2021-02-04 22:14 UTC (permalink / raw)
  To: gcc-patches

Hi,

This patch merges the D front-end implementation with upstream dmd
46133f761, and the D runtime library with upstream druntime 0fd4364c.

Backports built-in function handling from upstream, adding a new
intrinsic `byteswap(ushort)`.

Intrinsic modules have been updated accordingly in the runtime library,
and a few more platform-specific fixes have been downstreamed as well.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32, and
committed to mainline.

Regards,
Iain.

---
gcc/d/ChangeLog:

	* dmd/MERGE: Merge upstream dmd 46133f761.
	* d-builtins.cc (d_build_builtins_module): Set builtins as BUILTINgcc.
	(maybe_set_builtin_1): Likewise.
	* d-frontend.cc (eval_builtin): Adjust condition for early return.
	* intrinsics.cc (maybe_set_intrinsic): Set intrinsics as BUILTINgcc.
	(maybe_expand_intrinsic): Add case for INTRINSIC_BSWAP16.
	* intrinsics.def (INTRINSIC_BT): Update signature.
	(INTRINSIC_BT64): Likewise.
	(INTRINSIC_BSWAP16): New intrinsic.
	(INTRINSIC_VLOAD8): Update module.
	(INTRINSIC_VLOAD16): Likewise.
	(INTRINSIC_VLOAD32): Likewise.
	(INTRINSIC_VLOAD64): Likewise.
	(INTRINSIC_VSTORE8): Likewise.
	(INTRINSIC_VSTORE16): Likewise.
	(INTRINSIC_VSTORE32): Likewise.
	(INTRINSIC_VSTORE64): Likewise.
	(INTRINSIC_ADDS): Update signature.
	(INTRINSIC_ADDSL): Likewise.
	(INTRINSIC_ADDU): Likewise.
	(INTRINSIC_ADDUL): Likewise.
	(INTRINSIC_SUBS): Likewise.
	(INTRINSIC_SUBSL): Likewise.
	(INTRINSIC_SUBU): Likewise.
	(INTRINSIC_SUBUL): Likewise.
	(INTRINSIC_MULS): Likewise.
	(INTRINSIC_MULSL): Likewise.
	(INTRINSIC_MULU): Likewise.
	(INTRINSIC_MULUI): Likewise.
	(INTRINSIC_MULUL): Likewise.
	(INTRINSIC_NEGS): Likewise.
	(INTRINSIC_NEGSL): Likewise.


libphobos/ChangeLog:

	PR d/98910
	* libdruntime/MERGE: Merge upstream druntime 0fd4364c.
	* libdruntime/Makefile.am (DRUNTIME_DSOURCES): Add core/volatile.d.
	* libdruntime/Makefile.in: Regenerate.
	* testsuite/libphobos.allocations/tls_gc_integration.d: Update test.

gcc/testsuite/ChangeLog:

	* gdc.dg/intrinsics.d: Update test.
---
 gcc/d/d-builtins.cc                           |    4 +-
 gcc/d/d-frontend.cc                           |    2 +-
 gcc/d/dmd/MERGE                               |    2 +-
 gcc/d/dmd/declaration.h                       |   40 +-
 gcc/d/dmd/dinterpret.c                        |    2 +-
 gcc/d/dmd/idgen.c                             |   38 +
 gcc/d/dmd/root/ctfloat.h                      |   18 +
 gcc/d/intrinsics.cc                           |    7 +-
 gcc/d/intrinsics.def                          |   53 +-
 gcc/testsuite/gdc.dg/intrinsics.d             |    3 +
 libphobos/libdruntime/MERGE                   |    2 +-
 libphobos/libdruntime/Makefile.am             |   31 +-
 libphobos/libdruntime/Makefile.in             |   48 +-
 libphobos/libdruntime/core/bitop.d            |  145 ++-
 libphobos/libdruntime/core/checkedint.d       |   48 +-
 libphobos/libdruntime/core/cpuid.d            |   69 +-
 libphobos/libdruntime/core/internal/traits.d  |  152 ++-
 libphobos/libdruntime/core/simd.d             | 1096 ++++++++++-------
 libphobos/libdruntime/core/stdc/stdio.d       |    2 +-
 libphobos/libdruntime/core/sys/posix/locale.d |   28 +
 libphobos/libdruntime/core/thread/osthread.d  |    8 -
 libphobos/libdruntime/core/vararg.d           |  122 ++
 libphobos/libdruntime/core/volatile.d         |   67 +
 libphobos/libdruntime/rt/lifetime.d           |  109 +-
 .../tls_gc_integration.d                      |    2 +-
 25 files changed, 1329 insertions(+), 769 deletions(-)
 create mode 100644 libphobos/libdruntime/core/volatile.d

diff --git a/gcc/d/d-builtins.cc b/gcc/d/d-builtins.cc
index 3f1533b592f..c45edc2fa3f 100644
--- a/gcc/d/d-builtins.cc
+++ b/gcc/d/d-builtins.cc
@@ -566,7 +566,7 @@ d_build_builtins_module (Module *m)
 				   STCextern, tf);
       DECL_LANG_SPECIFIC (decl) = build_lang_decl (func);
       func->csym = decl;
-      func->builtin = BUILTINyes;
+      func->builtin = BUILTINgcc;
 
       members->push (func);
     }
@@ -706,7 +706,7 @@ maybe_set_builtin_1 (Dsymbol *d)
 	  /* Found a match, tell the frontend this is a builtin.  */
 	  DECL_LANG_SPECIFIC (t) = build_lang_decl (fd);
 	  fd->csym = t;
-	  fd->builtin = BUILTINyes;
+	  fd->builtin = BUILTINgcc;
 	  return;
 	}
     }
diff --git a/gcc/d/d-frontend.cc b/gcc/d/d-frontend.cc
index 32550ecfd64..84c70f8ee6a 100644
--- a/gcc/d/d-frontend.cc
+++ b/gcc/d/d-frontend.cc
@@ -158,7 +158,7 @@ isBuiltin (FuncDeclaration *fd)
 Expression *
 eval_builtin (Loc loc, FuncDeclaration *fd, Expressions *arguments)
 {
-  if (fd->builtin != BUILTINyes)
+  if (fd->builtin == BUILTINunimp)
     return NULL;
 
   tree decl = get_symbol_decl (fd);
diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE
index 342871f9a1a..89397c836e2 100644
--- a/gcc/d/dmd/MERGE
+++ b/gcc/d/dmd/MERGE
@@ -1,4 +1,4 @@
-5e2a81d9cbcd653d9eed52344d664e72ba1355bc
+46133f76172c26c89e2ebf9cd058cd1f1e8807ed
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/dmd repository.
diff --git a/gcc/d/dmd/declaration.h b/gcc/d/dmd/declaration.h
index a464f9beb71..6a3ec9e7d5c 100644
--- a/gcc/d/dmd/declaration.h
+++ b/gcc/d/dmd/declaration.h
@@ -503,9 +503,43 @@ enum ILS
 
 enum BUILTIN
 {
-    BUILTINunknown = -1,        // not known if this is a builtin
-    BUILTINno,                  // this is not a builtin
-    BUILTINyes                  // this is a builtin
+    BUILTINunknown = 255,   /// not known if this is a builtin
+    BUILTINunimp = 0,       /// this is not a builtin
+    BUILTINgcc,             /// this is a GCC builtin
+    BUILTINllvm,            /// this is an LLVM builtin
+    BUILTINsin,
+    BUILTINcos,
+    BUILTINtan,
+    BUILTINsqrt,
+    BUILTINfabs,
+    BUILTINldexp,
+    BUILTINlog,
+    BUILTINlog2,
+    BUILTINlog10,
+    BUILTINexp,
+    BUILTINexpm1,
+    BUILTINexp2,
+    BUILTINround,
+    BUILTINfloor,
+    BUILTINceil,
+    BUILTINtrunc,
+    BUILTINcopysign,
+    BUILTINpow,
+    BUILTINfmin,
+    BUILTINfmax,
+    BUILTINfma,
+    BUILTINisnan,
+    BUILTINisinfinity,
+    BUILTINisfinite,
+    BUILTINbsf,
+    BUILTINbsr,
+    BUILTINbswap,
+    BUILTINpopcnt,
+    BUILTINyl2x,
+    BUILTINyl2xp1,
+    BUILTINtoPrecFloat,
+    BUILTINtoPrecDouble,
+    BUILTINtoPrecReal
 };
 
 Expression *eval_builtin(Loc loc, FuncDeclaration *fd, Expressions *arguments);
diff --git a/gcc/d/dmd/dinterpret.c b/gcc/d/dmd/dinterpret.c
index 74c5b40741f..f868790f9e7 100644
--- a/gcc/d/dmd/dinterpret.c
+++ b/gcc/d/dmd/dinterpret.c
@@ -6801,7 +6801,7 @@ Expression *evaluateIfBuiltin(UnionExp *pue, InterState *istate, Loc loc,
     size_t nargs = arguments ? arguments->length : 0;
     if (!pthis)
     {
-        if (isBuiltin(fd) == BUILTINyes)
+        if (isBuiltin(fd) != BUILTINunimp)
         {
             Expressions args;
             args.setDim(nargs);
diff --git a/gcc/d/dmd/idgen.c b/gcc/d/dmd/idgen.c
index 09855a05688..322a293cf09 100644
--- a/gcc/d/dmd/idgen.c
+++ b/gcc/d/dmd/idgen.c
@@ -291,6 +291,8 @@ Msgtable msgtable[] =
     { "entrypoint", "__entrypoint" },
 
     // varargs implementation
+    { "stdc", NULL },
+    { "stdarg", NULL },
     { "va_start", NULL },
 
     // Builtin functions
@@ -304,16 +306,52 @@ Msgtable msgtable[] =
     { "_sqrt", "sqrt" },
     { "_pow", "pow" },
     { "atan2", NULL },
+    { "rint", NULL },
+    { "ldexp", NULL },
     { "rndtol", NULL },
+    { "exp", NULL },
     { "expm1", NULL },
     { "exp2", NULL },
     { "yl2x", NULL },
     { "yl2xp1", NULL },
+    { "log", NULL },
+    { "log2", NULL },
+    { "log10", NULL },
+    { "round", NULL },
+    { "floor", NULL },
+    { "trunc", NULL },
+    { "fmax", NULL },
+    { "fmin", NULL },
+    { "fma", NULL },
+    { "isnan", NULL },
+    { "isInfinity", NULL },
+    { "isfinite", NULL },
+    { "ceil", NULL },
+    { "copysign", NULL },
     { "fabs", NULL },
+    { "toPrec", NULL },
+    { "simd", NULL },
+    { "__prefetch", NULL },
+    { "__simd_sto", NULL },
+    { "__simd", NULL },
+    { "__simd_ib", NULL },
     { "bitop", NULL },
     { "bsf", NULL },
     { "bsr", NULL },
+    { "btc", NULL },
+    { "btr", NULL },
+    { "bts", NULL },
     { "bswap", NULL },
+    { "_volatile", "volatile" },
+    { "volatileLoad", NULL },
+    { "volatileStore", NULL },
+    { "_popcnt", NULL },
+    { "inp", NULL },
+    { "inpl", NULL },
+    { "inpw", NULL },
+    { "outp", NULL },
+    { "outpl", NULL },
+    { "outpw", NULL },
 
     // Traits
     { "isAbstractClass", NULL },
diff --git a/gcc/d/dmd/root/ctfloat.h b/gcc/d/dmd/root/ctfloat.h
index c72b8fc2062..4cdf362e598 100644
--- a/gcc/d/dmd/root/ctfloat.h
+++ b/gcc/d/dmd/root/ctfloat.h
@@ -29,6 +29,24 @@ struct CTFloat
     static real_t fabs(real_t x);
     static real_t ldexp(real_t n, int exp);
 
+    static real_t round(real_t x);
+    static real_t floor(real_t x);
+    static real_t ceil(real_t x);
+    static real_t trunc(real_t x);
+    static real_t log(real_t x);
+    static real_t log2(real_t x);
+    static real_t log10(real_t x);
+    static real_t pow(real_t x, real_t y);
+    static real_t exp(real_t x);
+    static real_t expm1(real_t x);
+    static real_t exp2(real_t x);
+
+    static real_t fmin(real_t x, real_t y);
+    static real_t fmax(real_t x, real_t y);
+    static real_t copysign(real_t x, real_t s);
+
+    static real_t fma(real_t x, real_t y, real_t z);
+
     static bool isIdentical(real_t a, real_t b);
     static bool isNaN(real_t r);
     static bool isSNaN(real_t r);
diff --git a/gcc/d/intrinsics.cc b/gcc/d/intrinsics.cc
index 70c98500085..7f97c1d1fa8 100644
--- a/gcc/d/intrinsics.cc
+++ b/gcc/d/intrinsics.cc
@@ -81,7 +81,7 @@ maybe_set_intrinsic (FuncDeclaration *decl)
 
   /* The builtin flag is updated only if we can evaluate the intrinsic
      at compile-time.  Such as the math or bitop intrinsics.  */
-  decl->builtin = BUILTINno;
+  decl->builtin = BUILTINunimp;
 
   /* Check if it's a compiler intrinsic.  We only require that any
      internally recognised intrinsics are declared in a module with
@@ -177,12 +177,12 @@ maybe_set_intrinsic (FuncDeclaration *decl)
 		 built-in function.  It could be `int pow(int, int)'.  */
 	      tree rettype = TREE_TYPE (TREE_TYPE (decl->csym));
 	      if (mathfn_built_in (rettype, BUILT_IN_POW) != NULL_TREE)
-		decl->builtin = BUILTINyes;
+		decl->builtin = BUILTINgcc;
 	      break;
 	    }
 
 	    default:
-	      decl->builtin = BUILTINyes;
+	      decl->builtin = BUILTINgcc;
 	      break;
 	    }
 
@@ -809,6 +809,7 @@ maybe_expand_intrinsic (tree callexp)
     case INTRINSIC_ROR_TIARG:
       return expand_intrinsic_rotate (intrinsic, callexp);
 
+    case INTRINSIC_BSWAP16:
     case INTRINSIC_BSWAP32:
     case INTRINSIC_BSWAP64:
     case INTRINSIC_CEIL:
diff --git a/gcc/d/intrinsics.def b/gcc/d/intrinsics.def
index e44843e77ff..dc6b104f6d5 100644
--- a/gcc/d/intrinsics.def
+++ b/gcc/d/intrinsics.def
@@ -42,17 +42,18 @@ DEF_D_BUILTIN (NONE, NONE, 0, 0, 0)
 
 DEF_D_BUILTIN (BSF, NONE, "bsf", "core.bitop", "FNaNbNiNfkZi")
 DEF_D_BUILTIN (BSR, NONE, "bsr", "core.bitop", "FNaNbNiNfkZi")
-DEF_D_BUILTIN (BT, NONE, "bt", "core.bitop", "FNaNbNixPkkZi")
+DEF_D_BUILTIN (BT, NONE, "bt", "core.bitop", "FNaNbNiMxPkkZi")
 DEF_D_BUILTIN (BTC, NONE, "btc", "core.bitop", "FNaNbNiPkkZi")
 DEF_D_BUILTIN (BTR, NONE, "btr", "core.bitop", "FNaNbNiPkkZi")
 DEF_D_BUILTIN (BTS, NONE, "bts", "core.bitop", "FNaNbNiPkkZi")
 DEF_D_BUILTIN (BSF64, NONE, "bsf", "core.bitop", "FNaNbNiNfmZi")
 DEF_D_BUILTIN (BSR64, NONE, "bsr", "core.bitop", "FNaNbNiNfmZi")
-DEF_D_BUILTIN (BT64, NONE, "bt", "core.bitop", "FNaNbNixPmmZi")
+DEF_D_BUILTIN (BT64, NONE, "bt", "core.bitop", "FNaNbNiMxPmmZi")
 DEF_D_BUILTIN (BTC64, NONE, "btc", "core.bitop", "FNaNbNiPmmZi")
 DEF_D_BUILTIN (BTR64, NONE, "btr", "core.bitop", "FNaNbNiPmmZi")
 DEF_D_BUILTIN (BTS64, NONE, "bts", "core.bitop", "FNaNbNiPmmZi")
 
+DEF_D_BUILTIN (BSWAP16, BSWAP16, "byteswap", "core.bitop", "FNaNbNiNftZt")
 DEF_D_BUILTIN (BSWAP32, BSWAP32, "bswap", "core.bitop", "FNaNbNiNfkZk")
 DEF_D_BUILTIN (BSWAP64, BSWAP64, "bswap", "core.bitop", "FNaNbNiNfmZm")
 
@@ -64,32 +65,34 @@ DEF_D_BUILTIN (ROL_TIARG, NONE, "rol", "core.bitop", "FNaI1TZI1T")
 DEF_D_BUILTIN (ROR, NONE, "ror", "core.bitop", "FNaI1TkZI1T")
 DEF_D_BUILTIN (ROR_TIARG, NONE, "ror", "core.bitop", "FNaI1TZI1T")
 
-DEF_D_BUILTIN (VLOAD8, NONE, "volatileLoad", "core.bitop", "FNbNiNfPhZh")
-DEF_D_BUILTIN (VLOAD16, NONE, "volatileLoad", "core.bitop", "FNbNiNfPtZt")
-DEF_D_BUILTIN (VLOAD32, NONE, "volatileLoad", "core.bitop", "FNbNiNfPkZk")
-DEF_D_BUILTIN (VLOAD64, NONE, "volatileLoad", "core.bitop", "FNbNiNfPmZm")
-DEF_D_BUILTIN (VSTORE8, NONE, "volatileStore", "core.bitop", "FNbNiNfPhhZv")
-DEF_D_BUILTIN (VSTORE16, NONE, "volatileStore", "core.bitop", "FNbNiNfPttZv")
-DEF_D_BUILTIN (VSTORE32, NONE, "volatileStore", "core.bitop", "FNbNiNfPkkZv")
-DEF_D_BUILTIN (VSTORE64, NONE, "volatileStore", "core.bitop", "FNbNiNfPmmZv")
+/* core.volatile intrinsics.  */
+
+DEF_D_BUILTIN (VLOAD8, NONE, "volatileLoad", "core.volatile", "FNbNiNfPhZh")
+DEF_D_BUILTIN (VLOAD16, NONE, "volatileLoad", "core.volatile", "FNbNiNfPtZt")
+DEF_D_BUILTIN (VLOAD32, NONE, "volatileLoad", "core.volatile", "FNbNiNfPkZk")
+DEF_D_BUILTIN (VLOAD64, NONE, "volatileLoad", "core.volatile", "FNbNiNfPmZm")
+DEF_D_BUILTIN (VSTORE8, NONE, "volatileStore", "core.volatile", "FNbNiNfPhhZv")
+DEF_D_BUILTIN (VSTORE16, NONE, "volatileStore", "core.volatile", "FNbNiNfPttZv")
+DEF_D_BUILTIN (VSTORE32, NONE, "volatileStore", "core.volatile", "FNbNiNfPkkZv")
+DEF_D_BUILTIN (VSTORE64, NONE, "volatileStore", "core.volatile", "FNbNiNfPmmZv")
 
 /* core.checkedint intrinsics.  */
 
-DEF_D_BUILTIN (ADDS, NONE, "adds", "core.checkedint", "FNaNbNiNfiiKbZi")
-DEF_D_BUILTIN (ADDSL, NONE, "adds", "core.checkedint", "FNaNbNiNfllKbZl")
-DEF_D_BUILTIN (ADDU, NONE, "addu", "core.checkedint", "FNaNbNiNfkkKbZk")
-DEF_D_BUILTIN (ADDUL, NONE, "addu", "core.checkedint", "FNaNbNiNfmmKbZm")
-DEF_D_BUILTIN (SUBS, NONE, "subs", "core.checkedint", "FNaNbNiNfiiKbZi")
-DEF_D_BUILTIN (SUBSL, NONE, "subs", "core.checkedint", "FNaNbNiNfllKbZl")
-DEF_D_BUILTIN (SUBU, NONE, "subu", "core.checkedint", "FNaNbNiNfkkKbZk")
-DEF_D_BUILTIN (SUBUL, NONE, "subu", "core.checkedint", "FNaNbNiNfmmKbZm")
-DEF_D_BUILTIN (MULS, NONE, "muls", "core.checkedint", "FNaNbNiNfiiKbZi")
-DEF_D_BUILTIN (MULSL, NONE, "muls", "core.checkedint", "FNaNbNiNfllKbZl")
-DEF_D_BUILTIN (MULU, NONE, "mulu", "core.checkedint", "FNaNbNiNfkkKbZk")
-DEF_D_BUILTIN (MULUI, NONE, "mulu", "core.checkedint", "FNaNbNiNfmkKbZm")
-DEF_D_BUILTIN (MULUL, NONE, "mulu", "core.checkedint", "FNaNbNiNfmmKbZm")
-DEF_D_BUILTIN (NEGS, NONE, "negs", "core.checkedint", "FNaNbNiNfiKbZi")
-DEF_D_BUILTIN (NEGSL, NONE, "negs", "core.checkedint", "FNaNbNiNflKbZl")
+DEF_D_BUILTIN (ADDS, NONE, "adds", "core.checkedint", "FiiKbZi")
+DEF_D_BUILTIN (ADDSL, NONE, "adds", "core.checkedint", "FllKbZl")
+DEF_D_BUILTIN (ADDU, NONE, "addu", "core.checkedint", "FkkKbZk")
+DEF_D_BUILTIN (ADDUL, NONE, "addu", "core.checkedint", "FmmKbZm")
+DEF_D_BUILTIN (SUBS, NONE, "subs", "core.checkedint", "FiiKbZi")
+DEF_D_BUILTIN (SUBSL, NONE, "subs", "core.checkedint", "FllKbZl")
+DEF_D_BUILTIN (SUBU, NONE, "subu", "core.checkedint", "FkkKbZk")
+DEF_D_BUILTIN (SUBUL, NONE, "subu", "core.checkedint", "FmmKbZm")
+DEF_D_BUILTIN (MULS, NONE, "muls", "core.checkedint", "FiiKbZi")
+DEF_D_BUILTIN (MULSL, NONE, "muls", "core.checkedint", "FllKbZl")
+DEF_D_BUILTIN (MULU, NONE, "mulu", "core.checkedint", "FkkKbZk")
+DEF_D_BUILTIN (MULUI, NONE, "mulu", "core.checkedint", "FmkKbZm")
+DEF_D_BUILTIN (MULUL, NONE, "mulu", "core.checkedint", "FmmKbZm")
+DEF_D_BUILTIN (NEGS, NONE, "negs", "core.checkedint", "FiKbZi")
+DEF_D_BUILTIN (NEGSL, NONE, "negs", "core.checkedint", "FlKbZl")
 
 /* core.math intrinsics.  */
 
diff --git a/gcc/testsuite/gdc.dg/intrinsics.d b/gcc/testsuite/gdc.dg/intrinsics.d
index a7752370582..d9ccc0ec5ce 100644
--- a/gcc/testsuite/gdc.dg/intrinsics.d
+++ b/gcc/testsuite/gdc.dg/intrinsics.d
@@ -3,6 +3,7 @@
 import core.bitop;
 import core.checkedint;
 import core.math;
+import core.volatile;
 import core.stdc.stdarg;
 
 //////////////////////////////////////////////////////
@@ -24,6 +25,8 @@ int test_btc(size_t *a, size_t b) { return btc(a, b); }
 int test_btr(size_t *a, size_t b) { return btr(a, b); }
 // { dg-final { scan-tree-dump-not " <retval> = bts " "original" } }
 int test_bts(size_t *a, size_t b) { return bts(a, b); }
+// { dg-final { scan-tree-dump " __builtin_bswap16 " "original" } }
+ushort test_byteswap(ushort a) { return byteswap(a); }
 // { dg-final { scan-tree-dump " __builtin_bswap32 " "original" } }
 uint test_bswap(uint a) { return bswap(a); }
 // { dg-final { scan-tree-dump " __builtin_bswap64 " "original" } }
diff --git a/libphobos/libdruntime/MERGE b/libphobos/libdruntime/MERGE
index 3485bde1200..a4a9a940baf 100644
--- a/libphobos/libdruntime/MERGE
+++ b/libphobos/libdruntime/MERGE
@@ -1,4 +1,4 @@
-9d0c8364450064d0b6e68da4384f8acd19eb454f
+0fd4364c4a4eb2ce0ebb8f613092c5bed7a63bf9
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/druntime repository.
diff --git a/libphobos/libdruntime/Makefile.am b/libphobos/libdruntime/Makefile.am
index df2c06c3dab..945271e028f 100644
--- a/libphobos/libdruntime/Makefile.am
+++ b/libphobos/libdruntime/Makefile.am
@@ -182,21 +182,22 @@ DRUNTIME_DSOURCES = core/atomic.d core/attribute.d core/bitop.d \
 	core/sync/rwmutex.d core/sync/semaphore.d core/thread/context.d \
 	core/thread/fiber.d core/thread/osthread.d core/thread/package.d \
 	core/thread/threadbase.d core/thread/threadgroup.d core/thread/types.d \
-	core/time.d core/vararg.d gc/bits.d gc/config.d gc/gcinterface.d \
-	gc/impl/conservative/gc.d gc/impl/manual/gc.d gc/os.d gc/pooltable.d \
-	gc/proxy.d gcc/attribute.d gcc/backtrace.d gcc/builtins.d gcc/deh.d \
-	gcc/emutls.d gcc/gthread.d gcc/sections/android.d \
-	gcc/sections/elf_shared.d gcc/sections/osx.d gcc/sections/package.d \
-	gcc/sections/win32.d gcc/sections/win64.d gcc/unwind/arm.d \
-	gcc/unwind/arm_common.d gcc/unwind/c6x.d gcc/unwind/generic.d \
-	gcc/unwind/package.d gcc/unwind/pe.d object.d rt/aApply.d rt/aApplyR.d \
-	rt/aaA.d rt/adi.d rt/arrayassign.d rt/arraycast.d rt/arraycat.d \
-	rt/cast_.d rt/config.d rt/critical_.d rt/deh.d rt/dmain2.d \
-	rt/invariant.d rt/lifetime.d rt/memory.d rt/minfo.d rt/monitor_.d \
-	rt/obj.d rt/qsort.d rt/sections.d rt/switch_.d rt/tlsgc.d \
-	rt/util/array.d rt/util/container/array.d rt/util/container/common.d \
-	rt/util/container/hashtab.d rt/util/container/treap.d rt/util/random.d \
-	rt/util/typeinfo.d rt/util/utf.d
+	core/time.d core/vararg.d core/volatile.d gc/bits.d gc/config.d \
+	gc/gcinterface.d gc/impl/conservative/gc.d gc/impl/manual/gc.d gc/os.d \
+	gc/pooltable.d gc/proxy.d gcc/attribute.d gcc/backtrace.d \
+	gcc/builtins.d gcc/deh.d gcc/emutls.d gcc/gthread.d \
+	gcc/sections/android.d gcc/sections/elf_shared.d gcc/sections/osx.d \
+	gcc/sections/package.d gcc/sections/win32.d gcc/sections/win64.d \
+	gcc/unwind/arm.d gcc/unwind/arm_common.d gcc/unwind/c6x.d \
+	gcc/unwind/generic.d gcc/unwind/package.d gcc/unwind/pe.d object.d \
+	rt/aApply.d rt/aApplyR.d rt/aaA.d rt/adi.d rt/arrayassign.d \
+	rt/arraycast.d rt/arraycat.d rt/cast_.d rt/config.d rt/critical_.d \
+	rt/deh.d rt/dmain2.d rt/invariant.d rt/lifetime.d rt/memory.d \
+	rt/minfo.d rt/monitor_.d rt/obj.d rt/qsort.d rt/sections.d \
+	rt/switch_.d rt/tlsgc.d rt/util/array.d rt/util/container/array.d \
+	rt/util/container/common.d rt/util/container/hashtab.d \
+	rt/util/container/treap.d rt/util/random.d rt/util/typeinfo.d \
+	rt/util/utf.d
 
 DRUNTIME_DSOURCES_STDCXX = core/stdcpp/exception.d \
 	core/stdcpp/typeinfo.d
diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in
index aadc6f3ede0..06c02961c86 100644
--- a/libphobos/libdruntime/Makefile.in
+++ b/libphobos/libdruntime/Makefile.in
@@ -206,14 +206,14 @@ am__objects_1 = core/atomic.lo core/attribute.lo core/bitop.lo \
 	core/thread/context.lo core/thread/fiber.lo \
 	core/thread/osthread.lo core/thread/package.lo \
 	core/thread/threadbase.lo core/thread/threadgroup.lo \
-	core/thread/types.lo core/time.lo core/vararg.lo gc/bits.lo \
-	gc/config.lo gc/gcinterface.lo gc/impl/conservative/gc.lo \
-	gc/impl/manual/gc.lo gc/os.lo gc/pooltable.lo gc/proxy.lo \
-	gcc/attribute.lo gcc/backtrace.lo gcc/builtins.lo gcc/deh.lo \
-	gcc/emutls.lo gcc/gthread.lo gcc/sections/android.lo \
-	gcc/sections/elf_shared.lo gcc/sections/osx.lo \
-	gcc/sections/package.lo gcc/sections/win32.lo \
-	gcc/sections/win64.lo gcc/unwind/arm.lo \
+	core/thread/types.lo core/time.lo core/vararg.lo \
+	core/volatile.lo gc/bits.lo gc/config.lo gc/gcinterface.lo \
+	gc/impl/conservative/gc.lo gc/impl/manual/gc.lo gc/os.lo \
+	gc/pooltable.lo gc/proxy.lo gcc/attribute.lo gcc/backtrace.lo \
+	gcc/builtins.lo gcc/deh.lo gcc/emutls.lo gcc/gthread.lo \
+	gcc/sections/android.lo gcc/sections/elf_shared.lo \
+	gcc/sections/osx.lo gcc/sections/package.lo \
+	gcc/sections/win32.lo gcc/sections/win64.lo gcc/unwind/arm.lo \
 	gcc/unwind/arm_common.lo gcc/unwind/c6x.lo \
 	gcc/unwind/generic.lo gcc/unwind/package.lo gcc/unwind/pe.lo \
 	object.lo rt/aApply.lo rt/aApplyR.lo rt/aaA.lo rt/adi.lo \
@@ -808,21 +808,22 @@ DRUNTIME_DSOURCES = core/atomic.d core/attribute.d core/bitop.d \
 	core/sync/rwmutex.d core/sync/semaphore.d core/thread/context.d \
 	core/thread/fiber.d core/thread/osthread.d core/thread/package.d \
 	core/thread/threadbase.d core/thread/threadgroup.d core/thread/types.d \
-	core/time.d core/vararg.d gc/bits.d gc/config.d gc/gcinterface.d \
-	gc/impl/conservative/gc.d gc/impl/manual/gc.d gc/os.d gc/pooltable.d \
-	gc/proxy.d gcc/attribute.d gcc/backtrace.d gcc/builtins.d gcc/deh.d \
-	gcc/emutls.d gcc/gthread.d gcc/sections/android.d \
-	gcc/sections/elf_shared.d gcc/sections/osx.d gcc/sections/package.d \
-	gcc/sections/win32.d gcc/sections/win64.d gcc/unwind/arm.d \
-	gcc/unwind/arm_common.d gcc/unwind/c6x.d gcc/unwind/generic.d \
-	gcc/unwind/package.d gcc/unwind/pe.d object.d rt/aApply.d rt/aApplyR.d \
-	rt/aaA.d rt/adi.d rt/arrayassign.d rt/arraycast.d rt/arraycat.d \
-	rt/cast_.d rt/config.d rt/critical_.d rt/deh.d rt/dmain2.d \
-	rt/invariant.d rt/lifetime.d rt/memory.d rt/minfo.d rt/monitor_.d \
-	rt/obj.d rt/qsort.d rt/sections.d rt/switch_.d rt/tlsgc.d \
-	rt/util/array.d rt/util/container/array.d rt/util/container/common.d \
-	rt/util/container/hashtab.d rt/util/container/treap.d rt/util/random.d \
-	rt/util/typeinfo.d rt/util/utf.d
+	core/time.d core/vararg.d core/volatile.d gc/bits.d gc/config.d \
+	gc/gcinterface.d gc/impl/conservative/gc.d gc/impl/manual/gc.d gc/os.d \
+	gc/pooltable.d gc/proxy.d gcc/attribute.d gcc/backtrace.d \
+	gcc/builtins.d gcc/deh.d gcc/emutls.d gcc/gthread.d \
+	gcc/sections/android.d gcc/sections/elf_shared.d gcc/sections/osx.d \
+	gcc/sections/package.d gcc/sections/win32.d gcc/sections/win64.d \
+	gcc/unwind/arm.d gcc/unwind/arm_common.d gcc/unwind/c6x.d \
+	gcc/unwind/generic.d gcc/unwind/package.d gcc/unwind/pe.d object.d \
+	rt/aApply.d rt/aApplyR.d rt/aaA.d rt/adi.d rt/arrayassign.d \
+	rt/arraycast.d rt/arraycat.d rt/cast_.d rt/config.d rt/critical_.d \
+	rt/deh.d rt/dmain2.d rt/invariant.d rt/lifetime.d rt/memory.d \
+	rt/minfo.d rt/monitor_.d rt/obj.d rt/qsort.d rt/sections.d \
+	rt/switch_.d rt/tlsgc.d rt/util/array.d rt/util/container/array.d \
+	rt/util/container/common.d rt/util/container/hashtab.d \
+	rt/util/container/treap.d rt/util/random.d rt/util/typeinfo.d \
+	rt/util/utf.d
 
 DRUNTIME_DSOURCES_STDCXX = core/stdcpp/exception.d \
 	core/stdcpp/typeinfo.d
@@ -1178,6 +1179,7 @@ core/thread/threadgroup.lo: core/thread/$(am__dirstamp)
 core/thread/types.lo: core/thread/$(am__dirstamp)
 core/time.lo: core/$(am__dirstamp)
 core/vararg.lo: core/$(am__dirstamp)
+core/volatile.lo: core/$(am__dirstamp)
 gc/$(am__dirstamp):
 	@$(MKDIR_P) gc
 	@: > gc/$(am__dirstamp)
diff --git a/libphobos/libdruntime/core/bitop.d b/libphobos/libdruntime/core/bitop.d
index 0daee55c7e9..25b5cd515b2 100644
--- a/libphobos/libdruntime/core/bitop.d
+++ b/libphobos/libdruntime/core/bitop.d
@@ -267,7 +267,7 @@ unittest
  * (No longer an intrisic - the compiler recognizes the patterns
  * in the body.)
  */
-int bt(in size_t* p, size_t bitnum) pure @system
+int bt(const scope size_t* p, size_t bitnum) pure @system
 {
     static if (size_t.sizeof == 8)
         return ((p[bitnum >> 6] & (1L << (bitnum & 63)))) != 0;
@@ -494,6 +494,34 @@ struct BitRange
     testIt(100, 6, 45, 89, 92, 99);
 }
 
+/**
+ * Swaps bytes in a 2 byte ushort.
+ * Params:
+ *      x = value
+ * Returns:
+ *      `x` with bytes swapped
+ */
+pragma(inline, false)
+ushort byteswap(ushort x) pure
+{
+    /* Calling it bswap(ushort) would break existing code that calls bswap(uint).
+     *
+     * This pattern is meant to be recognized by the dmd code generator.
+     * Don't change it without checking that an XCH instruction is still
+     * used to implement it.
+     * Inlining may also throw it off.
+     */
+    return cast(ushort) (((x >> 8) & 0xFF) | ((x << 8) & 0xFF00u));
+}
+
+///
+unittest
+{
+    assert(byteswap(cast(ushort)0xF234) == 0x34F2);
+    static ushort xx = 0xF234;
+    assert(byteswap(xx) == 0x34F2);
+}
+
 /**
  * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes
  * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3
@@ -501,19 +529,27 @@ struct BitRange
  */
 uint bswap(uint v) pure;
 
+///
+unittest
+{
+    assert(bswap(0x01020304u) == 0x04030201u);
+    static uint xx = 0x10203040u;
+    assert(bswap(xx) == 0x40302010u);
+}
+
 /**
  * Swaps bytes in an 8 byte ulong end-to-end, i.e. byte 0 becomes
  * byte 7, byte 1 becomes byte 6, etc.
+ * This is meant to be recognized by the compiler as an intrinsic.
  */
-ulong bswap(ulong v) pure
-{
-    auto sv = Split64(v);
-
-    const temp = sv.lo;
-    sv.lo = bswap(sv.hi);
-    sv.hi = bswap(temp);
+ulong bswap(ulong v) pure;
 
-    return (cast(ulong) sv.hi << 32) | sv.lo;
+///
+unittest
+{
+    assert(bswap(0x01020304_05060708uL) == 0x08070605_04030201uL);
+    static ulong xx = 0x10203040_50607080uL;
+    assert(bswap(xx) == 0x80706050_40302010uL);
 }
 
 version (DigitalMars) version (AnyX86) @system // not pure
@@ -722,57 +758,14 @@ version (DigitalMars) version (AnyX86)
 }
 
 
-/*************************************
- * Read/write value from/to the memory location indicated by ptr.
- *
- * These functions are recognized by the compiler, and calls to them are guaranteed
- * to not be removed (as dead assignment elimination or presumed to have no effect)
- * or reordered in the same thread.
- *
- * These reordering guarantees are only made with regards to other
- * operations done through these functions; the compiler is free to reorder regular
- * loads/stores with regards to loads/stores done through these functions.
- *
- * This is useful when dealing with memory-mapped I/O (MMIO) where a store can
- * have an effect other than just writing a value, or where sequential loads
- * with no intervening stores can retrieve
- * different values from the same location due to external stores to the location.
- *
- * These functions will, when possible, do the load/store as a single operation. In
- * general, this is possible when the size of the operation is less than or equal to
- * $(D (void*).sizeof), although some targets may support larger operations. If the
- * load/store cannot be done as a single operation, multiple smaller operations will be used.
- *
- * These are not to be conflated with atomic operations. They do not guarantee any
- * atomicity. This may be provided by coincidence as a result of the instructions
- * used on the target, but this should not be relied on for portable programs.
- * Further, no memory fences are implied by these functions.
- * They should not be used for communication between threads.
- * They may be used to guarantee a write or read cycle occurs at a specified address.
- */
-
-ubyte  volatileLoad(ubyte * ptr);
-ushort volatileLoad(ushort* ptr);  /// ditto
-uint   volatileLoad(uint  * ptr);  /// ditto
-ulong  volatileLoad(ulong * ptr);  /// ditto
-
-void volatileStore(ubyte * ptr, ubyte  value);   /// ditto
-void volatileStore(ushort* ptr, ushort value);   /// ditto
-void volatileStore(uint  * ptr, uint   value);   /// ditto
-void volatileStore(ulong * ptr, ulong  value);   /// ditto
-
-@system unittest
+deprecated("volatileLoad has been moved to core.volatile. Use core.volatile.volatileLoad instead.")
 {
-    alias TT(T...) = T;
+    public import core.volatile : volatileLoad;
+}
 
-    foreach (T; TT!(ubyte, ushort, uint, ulong))
-    {
-        T u;
-        T* p = &u;
-        volatileStore(p, 1);
-        T r = volatileLoad(p);
-        assert(r == u);
-    }
+deprecated("volatileStore has been moved to core.volatile. Use core.volatile.volatileStore instead.")
+{
+    public import core.volatile : volatileStore;
 }
 
 
@@ -954,51 +947,51 @@ version (D_InlineAsm_X86_64)
  *  Bitwise rotate `value` left (`rol`) or right (`ror`) by
  *  `count` bit positions.
  */
-pure T rol(T)(in T value, in uint count)
+pure T rol(T)(const T value, const uint count)
     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
 {
     assert(count < 8 * T.sizeof);
-    return cast(T) ((value << count) | (value >> (-count & (T.sizeof * 8 - 1))));
+    return cast(T) ((value << count) | (value >> (T.sizeof * 8 - count)));
 }
 /// ditto
-pure T ror(T)(in T value, in uint count)
+pure T ror(T)(const T value, const uint count)
     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
 {
     assert(count < 8 * T.sizeof);
-    return cast(T) ((value >> count) | (value << (-count & (T.sizeof * 8 - 1))));
+    return cast(T) ((value >> count) | (value << (T.sizeof * 8 - count)));
 }
 /// ditto
-pure T rol(uint count, T)(in T value)
+pure T rol(uint count, T)(const T value)
     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
 {
     static assert(count < 8 * T.sizeof);
-    return cast(T) ((value << count) | (value >> (-count & (T.sizeof * 8 - 1))));
+    return cast(T) ((value << count) | (value >> (T.sizeof * 8 - count)));
 }
 /// ditto
-pure T ror(uint count, T)(in T value)
+pure T ror(uint count, T)(const T value)
     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
 {
     static assert(count < 8 * T.sizeof);
-    return cast(T) ((value >> count) | (value << (-count & (T.sizeof * 8 - 1))));
+    return cast(T) ((value >> count) | (value << (T.sizeof * 8 - count)));
 }
 
 ///
 unittest
 {
-    ubyte a = 0b10101010U;
-    ulong b = ulong.max;
+    ubyte a = 0b11110000U;
+    ulong b = ~1UL;
 
-    assert(rol(a, 1) == 0b01010101);
-    assert(ror(a, 1) == 0b01010101);
-    assert(rol(a, 3) == 0b01010101);
-    assert(ror(a, 3) == 0b01010101);
+    assert(rol(a, 1) == 0b11100001);
+    assert(ror(a, 1) == 0b01111000);
+    assert(rol(a, 3) == 0b10000111);
+    assert(ror(a, 3) == 0b00011110);
 
     assert(rol(a, 0) == a);
     assert(ror(a, 0) == a);
 
-    assert(rol(b, 63) == ulong.max);
-    assert(ror(b, 63) == ulong.max);
+    assert(rol(b, 63) == ~(1UL << 63));
+    assert(ror(b, 63) == ~2UL);
 
-    assert(rol!3(a) == 0b01010101);
-    assert(ror!3(a) == 0b01010101);
+    assert(rol!3(a) == 0b10000111);
+    assert(ror!3(a) == 0b00011110);
 }
diff --git a/libphobos/libdruntime/core/checkedint.d b/libphobos/libdruntime/core/checkedint.d
index 237c8e412cc..57209adcbeb 100644
--- a/libphobos/libdruntime/core/checkedint.d
+++ b/libphobos/libdruntime/core/checkedint.d
@@ -47,7 +47,7 @@ pure:
  */
 
 pragma(inline, true)
-int adds(int x, int y, ref bool overflow)
+int adds()(int x, int y, ref bool overflow)
 {
     long r = cast(long)x + cast(long)y;
     if (r < int.min || r > int.max)
@@ -75,7 +75,7 @@ unittest
 
 /// ditto
 pragma(inline, true)
-long adds(long x, long y, ref bool overflow)
+long adds()(long x, long y, ref bool overflow)
 {
     long r = cast(ulong)x + cast(ulong)y;
     if (x <  0 && y <  0 && r >= 0 ||
@@ -106,7 +106,7 @@ static if (is(cent))
 {
 /// ditto
 pragma(inline, true)
-cent adds(cent x, cent y, ref bool overflow)
+cent adds()(cent x, cent y, ref bool overflow)
 {
     cent r = cast(ucent)x + cast(ucent)y;
     if (x <  0 && y <  0 && r >= 0 ||
@@ -149,7 +149,7 @@ unittest
  */
 
 pragma(inline, true)
-uint addu(uint x, uint y, ref bool overflow)
+uint addu()(uint x, uint y, ref bool overflow)
 {
     immutable uint r = x + y;
     if (r < x || r < y)
@@ -177,7 +177,7 @@ unittest
 
 /// ditto
 pragma(inline, true)
-ulong addu(ulong x, ulong y, ref bool overflow)
+ulong addu()(ulong x, ulong y, ref bool overflow)
 {
     immutable ulong r = x + y;
     if (r < x || r < y)
@@ -207,7 +207,7 @@ static if (is(ucent))
 {
 /// ditto
 pragma(inline, true)
-ucent addu(ucent x, ucent y, ref bool overflow)
+ucent addu()(ucent x, ucent y, ref bool overflow)
 {
     immutable ucent r = x + y;
     if (r < x || r < y)
@@ -249,7 +249,7 @@ unittest
  */
 
 pragma(inline, true)
-int subs(int x, int y, ref bool overflow)
+int subs()(int x, int y, ref bool overflow)
 {
     immutable long r = cast(long)x - cast(long)y;
     if (r < int.min || r > int.max)
@@ -277,7 +277,7 @@ unittest
 
 /// ditto
 pragma(inline, true)
-long subs(long x, long y, ref bool overflow)
+long subs()(long x, long y, ref bool overflow)
 {
     immutable long r = cast(ulong)x - cast(ulong)y;
     if (x <  0 && y >= 0 && r >= 0 ||
@@ -310,7 +310,7 @@ static if (is(cent))
 {
 /// ditto
 pragma(inline, true)
-cent subs(cent x, cent y, ref bool overflow)
+cent subs()(cent x, cent y, ref bool overflow)
 {
     immutable cent r = cast(ucent)x - cast(ucent)y;
     if (x <  0 && y >= 0 && r >= 0 ||
@@ -355,7 +355,7 @@ unittest
  */
 
 pragma(inline, true)
-uint subu(uint x, uint y, ref bool overflow)
+uint subu()(uint x, uint y, ref bool overflow)
 {
     if (x < y)
         overflow = true;
@@ -383,7 +383,7 @@ unittest
 
 /// ditto
 pragma(inline, true)
-ulong subu(ulong x, ulong y, ref bool overflow)
+ulong subu()(ulong x, ulong y, ref bool overflow)
 {
     if (x < y)
         overflow = true;
@@ -412,7 +412,7 @@ static if (is(ucent))
 {
 /// ditto
 pragma(inline, true)
-ucent subu(ucent x, ucent y, ref bool overflow)
+ucent subu()(ucent x, ucent y, ref bool overflow)
 {
     if (x < y)
         overflow = true;
@@ -450,7 +450,7 @@ unittest
  */
 
 pragma(inline, true)
-int negs(int x, ref bool overflow)
+int negs()(int x, ref bool overflow)
 {
     if (x == int.min)
         overflow = true;
@@ -474,7 +474,7 @@ unittest
 
 /// ditto
 pragma(inline, true)
-long negs(long x, ref bool overflow)
+long negs()(long x, ref bool overflow)
 {
     if (x == long.min)
         overflow = true;
@@ -500,7 +500,7 @@ static if (is(cent))
 {
 /// ditto
 pragma(inline, true)
-cent negs(cent x, ref bool overflow)
+cent negs()(cent x, ref bool overflow)
 {
     if (x == cent.min)
         overflow = true;
@@ -538,7 +538,7 @@ unittest
  */
 
 pragma(inline, true)
-int muls(int x, int y, ref bool overflow)
+int muls()(int x, int y, ref bool overflow)
 {
     long r = cast(long)x * cast(long)y;
     if (r < int.min || r > int.max)
@@ -568,11 +568,13 @@ unittest
 
 /// ditto
 pragma(inline, true)
-long muls(long x, long y, ref bool overflow)
+long muls()(long x, long y, ref bool overflow)
 {
     immutable long r = cast(ulong)x * cast(ulong)y;
     enum not0or1 = ~1L;
-    if ((x & not0or1) && ((r == y)? r : (r / x) != y))
+    if ((x & not0or1) &&
+        ((r == y) ? r != 0
+                  : (r == 0x8000_0000_0000_0000 && x == -1L) || ((r / x) != y)))
         overflow = true;
     return r;
 }
@@ -604,7 +606,7 @@ static if (is(cent))
 {
 /// ditto
 pragma(inline, true)
-cent muls(cent x, cent y, ref bool overflow)
+cent muls()(cent x, cent y, ref bool overflow)
 {
     immutable cent r = cast(ucent)x * cast(ucent)y;
     enum not0or1 = ~1L;
@@ -652,7 +654,7 @@ unittest
  */
 
 pragma(inline, true)
-uint mulu(uint x, uint y, ref bool overflow)
+uint mulu()(uint x, uint y, ref bool overflow)
 {
     immutable ulong r = ulong(x) * ulong(y);
     if (r >> 32)
@@ -682,7 +684,7 @@ unittest
 
 /// ditto
 pragma(inline, true)
-ulong mulu(ulong x, uint y, ref bool overflow)
+ulong mulu()(ulong x, uint y, ref bool overflow)
 {
     ulong r = x * y;
     if (x >> 32 &&
@@ -693,7 +695,7 @@ ulong mulu(ulong x, uint y, ref bool overflow)
 
 /// ditto
 pragma(inline, true)
-ulong mulu(ulong x, ulong y, ref bool overflow)
+ulong mulu()(ulong x, ulong y, ref bool overflow)
 {
     immutable ulong r = x * y;
     if ((x | y) >> 32 &&
@@ -751,7 +753,7 @@ static if (is(ucent))
 {
 /// ditto
 pragma(inline, true)
-ucent mulu(ucent x, ucent y, ref bool overflow)
+ucent mulu()(ucent x, ucent y, ref bool overflow)
 {
     immutable ucent r = x * y;
     if (x && (r / x) != y)
diff --git a/libphobos/libdruntime/core/cpuid.d b/libphobos/libdruntime/core/cpuid.d
index 2ba13b55bf1..e31f776d7ee 100644
--- a/libphobos/libdruntime/core/cpuid.d
+++ b/libphobos/libdruntime/core/cpuid.d
@@ -56,6 +56,9 @@
 
 module core.cpuid;
 
+version (GNU) version = GNU_OR_LDC;
+version (LDC) version = GNU_OR_LDC;
+
 @trusted:
 nothrow:
 @nogc:
@@ -318,10 +321,10 @@ private:
 struct CpuFeatures
 {
     bool probablyIntel; // true = _probably_ an Intel processor, might be faking
-    bool probablyAMD; // true = _probably_ an AMD processor
+    bool probablyAMD; // true = _probably_ an AMD or Hygon processor
     string processorName;
-    char [12] vendorID;
-    char [48] processorNameBuffer;
+    char [12] vendorID = 0;
+    char [48] processorNameBuffer = 0;
     uint features = 0;     // mmx, sse, sse2, hyperthreading, etc
     uint miscfeatures = 0; // sse3, etc.
     uint extfeatures = 0;  // HLE, AVX2, RTM, etc.
@@ -426,7 +429,7 @@ CpuFeatures* getCpuFeatures() @nogc nothrow
     }
 
 
-version (GNU) {
+version (GNU_OR_LDC) {
     version (X86)
         enum supportedX86 = true;
     else version (X86_64)
@@ -509,12 +512,12 @@ void getcacheinfoCPUID2()
     // for old single-core CPUs.
     uint numinfos = 1;
     do {
-        version (GNU) asm pure nothrow @nogc {
+        version (GNU_OR_LDC) asm pure nothrow @nogc {
             "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2);
         } else asm pure nothrow @nogc {
             mov EAX, 2;
             cpuid;
-            mov a, EAX;
+            mov a+0, EAX;
             mov a+4, EBX;
             mov a+8, ECX;
             mov a+12, EDX;
@@ -553,7 +556,7 @@ void getcacheinfoCPUID4()
     int cachenum = 0;
     for (;;) {
         uint a, b, number_of_sets;
-        version (GNU) asm pure nothrow @nogc {
+        version (GNU_OR_LDC) asm pure nothrow @nogc {
             "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx";
         } else asm pure nothrow @nogc {
             mov EAX, 4;
@@ -593,7 +596,7 @@ void getcacheinfoCPUID4()
 void getAMDcacheinfo()
 {
     uint dummy, c5, c6, d6;
-    version (GNU) asm pure nothrow @nogc {
+    version (GNU_OR_LDC) asm pure nothrow @nogc {
         "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx";
     } else asm pure nothrow @nogc {
         mov EAX, 0x8000_0005; // L1 cache
@@ -612,7 +615,7 @@ void getAMDcacheinfo()
         // AMD K6-III or K6-2+ or later.
         ubyte numcores = 1;
         if (max_extended_cpuid >= 0x8000_0008) {
-            version (GNU) asm pure nothrow @nogc {
+            version (GNU_OR_LDC) asm pure nothrow @nogc {
                 "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx";
             } else asm pure nothrow @nogc {
                 mov EAX, 0x8000_0008;
@@ -623,7 +626,7 @@ void getAMDcacheinfo()
             if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
         }
 
-        version (GNU) asm pure nothrow @nogc {
+        version (GNU_OR_LDC) asm pure nothrow @nogc {
             "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx";
         } else asm pure nothrow @nogc {
             mov EAX, 0x8000_0006; // L2/L3 cache
@@ -652,7 +655,7 @@ void getCpuInfo0B()
     int threadsPerCore;
     uint a, b, c, d;
     do {
-        version (GNU) asm pure nothrow @nogc {
+        version (GNU_OR_LDC) asm pure nothrow @nogc {
             "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level);
         } else asm pure nothrow @nogc {
             mov EAX, 0x0B;
@@ -684,7 +687,7 @@ void cpuidX86()
 
     uint a, b, c, d;
     uint* venptr = cast(uint*)cf.vendorID.ptr;
-    version (GNU)
+    version (GNU_OR_LDC)
     {
         asm pure nothrow @nogc {
             "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0);
@@ -729,9 +732,9 @@ void cpuidX86()
 
 
     cf.probablyIntel = cf.vendorID == "GenuineIntel";
-    cf.probablyAMD = cf.vendorID == "AuthenticAMD";
+    cf.probablyAMD = (cf.vendorID == "AuthenticAMD" || cf.vendorID == "HygonGenuine");
     uint apic = 0; // brand index, apic id
-    version (GNU) asm pure nothrow @nogc {
+    version (GNU_OR_LDC) asm pure nothrow @nogc {
         "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1);
     } else {
         asm pure nothrow @nogc {
@@ -754,7 +757,7 @@ void cpuidX86()
 
     if (max_cpuid >= 7)
     {
-        version (GNU) asm pure nothrow @nogc {
+        version (GNU_OR_LDC) asm pure nothrow @nogc {
             "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx";
         } else {
             uint ext;
@@ -770,8 +773,11 @@ void cpuidX86()
 
     if (cf.miscfeatures & OSXSAVE_BIT)
     {
-        version (GNU) asm pure nothrow @nogc {
-            "xgetbv" : "=a" (a), "=d" (d) : "c" (0);
+        version (GNU_OR_LDC) asm pure nothrow @nogc {
+            /* Old assemblers do not recognize xgetbv, and there is no easy way
+             * to conditionally compile based on the assembler used, so use the
+             * raw .byte sequence instead.  */
+            ".byte 0x0f, 0x01, 0xd0" : "=a" (a), "=d" (d) : "c" (0);
         } else asm pure nothrow @nogc {
             mov ECX, 0;
             xgetbv;
@@ -784,7 +790,7 @@ void cpuidX86()
     cf.amdfeatures = 0;
     cf.amdmiscfeatures = 0;
     if (max_extended_cpuid >= 0x8000_0001) {
-        version (GNU) asm pure nothrow @nogc {
+        version (GNU_OR_LDC) asm pure nothrow @nogc {
             "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx";
         } else {
             asm pure nothrow @nogc {
@@ -805,7 +811,7 @@ void cpuidX86()
         cf.maxCores = 1;
         if (hyperThreadingBit) {
             // determine max number of cores for AMD
-            version (GNU) asm pure nothrow @nogc {
+            version (GNU_OR_LDC) asm pure nothrow @nogc {
                 "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx";
             } else asm pure nothrow @nogc {
                 mov EAX, 0x8000_0008;
@@ -818,7 +824,7 @@ void cpuidX86()
 
     if (max_extended_cpuid >= 0x8000_0004) {
         uint* pnb = cast(uint*)cf.processorNameBuffer.ptr;
-        version (GNU)
+        version (GNU_OR_LDC)
         {
             asm pure nothrow @nogc {
                 "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002);
@@ -950,7 +956,7 @@ void cpuidX86()
         else cf.maxThreads = cf.maxCores;
 
         if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) {
-            version (GNU) asm pure nothrow @nogc {
+            version (GNU_OR_LDC) asm pure nothrow @nogc {
                 "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx";
             } else {
                 asm pure nothrow @nogc {
@@ -974,21 +980,18 @@ bool hasCPUID()
     else
     {
         uint flags;
-        version (GNU)
+        version (GNU_OR_LDC)
         {
             // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
-            // ASM template supports both AT&T and Intel syntax.
             asm nothrow @nogc { "
-                pushf{l|d}                 # Save EFLAGS
-                pushf{l|d}                 # Store EFLAGS
-                xor{l $0x00200000, (%%esp)| dword ptr [esp], 0x00200000}
-                                           # Invert the ID bit in stored EFLAGS
-                popf{l|d}                  # Load stored EFLAGS (with ID bit inverted)
-                pushf{l|d}                 # Store EFLAGS again (ID bit may or may not be inverted)
-                pop {%%}eax                # eax = modified EFLAGS (ID bit may or may not be inverted)
-                xor {(%%esp), %%eax|eax, [esp]}
-                                           # eax = whichever bits were changed
-                popf{l|d}                  # Restore original EFLAGS
+                pushfl                    # Save EFLAGS
+                pushfl                    # Store EFLAGS
+                xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS
+                popfl                     # Load stored EFLAGS (with ID bit inverted)
+                pushfl                    # Store EFLAGS again (ID bit may or may not be inverted)
+                popl %%eax                # eax = modified EFLAGS (ID bit may or may not be inverted)
+                xorl (%%esp), %%eax       # eax = whichever bits were changed
+                popfl                     # Restore original EFLAGS
                 " : "=a" (flags);
             }
         }
diff --git a/libphobos/libdruntime/core/internal/traits.d b/libphobos/libdruntime/core/internal/traits.d
index e56f016c355..9f79dd014b8 100644
--- a/libphobos/libdruntime/core/internal/traits.d
+++ b/libphobos/libdruntime/core/internal/traits.d
@@ -8,10 +8,16 @@
  */
 module core.internal.traits;
 
-/// taken from std.typetuple.TypeTuple
-template TypeTuple(TList...)
+alias AliasSeq(TList...) = TList;
+
+template Fields(T)
 {
-    alias TypeTuple = TList;
+    static if (is(T == struct) || is(T == union))
+        alias Fields = typeof(T.tupleof[0 .. $ - __traits(isNested, T)]);
+    else static if (is(T == class))
+        alias Fields = typeof(T.tupleof);
+    else
+        alias Fields = AliasSeq!T;
 }
 
 T trustedCast(T, U)(auto ref U u) @trusted pure nothrow
@@ -109,17 +115,17 @@ template staticIota(int beg, int end)
     {
         static if (beg >= end)
         {
-            alias staticIota = TypeTuple!();
+            alias staticIota = AliasSeq!();
         }
         else
         {
-            alias staticIota = TypeTuple!(+beg);
+            alias staticIota = AliasSeq!(+beg);
         }
     }
     else
     {
         enum mid = beg + (end - beg) / 2;
-        alias staticIota = TypeTuple!(staticIota!(beg, mid), staticIota!(mid, end));
+        alias staticIota = AliasSeq!(staticIota!(beg, mid), staticIota!(mid, end));
     }
 }
 
@@ -235,24 +241,150 @@ template hasElaborateCopyConstructor(T...)
         enum bool hasElaborateCopyConstructor = false;
 }
 
+template hasUnsharedIndirections(T)
+{
+    static if (is(T == immutable))
+        enum hasUnsharedIndirections = false;
+    else static if (is(T == struct) || is(T == union))
+        enum hasUnsharedIndirections = anySatisfy!(.hasUnsharedIndirections, Fields!T);
+    else static if (is(T : E[N], E, size_t N))
+        enum hasUnsharedIndirections = is(E == void) ? false : hasUnsharedIndirections!E;
+    else static if (isFunctionPointer!T)
+        enum hasUnsharedIndirections = false;
+    else static if (isPointer!T)
+        enum hasUnsharedIndirections = !is(T : shared(U)*, U) && !is(T : immutable(U)*, U);
+    else static if (isDynamicArray!T)
+        enum hasUnsharedIndirections = !is(T : shared(V)[], V) && !is(T : immutable(V)[], V);
+    else static if (is(T == class) || is(T == interface))
+        enum hasUnsharedIndirections = !is(T : shared(W), W);
+    else
+        enum hasUnsharedIndirections = isDelegate!T || __traits(isAssociativeArray, T); // TODO: how to handle these?
+}
+
+unittest
+{
+    static struct Foo { shared(int)* val; }
+
+    static assert(!hasUnsharedIndirections!(immutable(char)*));
+    static assert(!hasUnsharedIndirections!(string));
+
+    static assert(!hasUnsharedIndirections!(Foo));
+    static assert( hasUnsharedIndirections!(Foo*));
+    static assert(!hasUnsharedIndirections!(shared(Foo)*));
+    static assert(!hasUnsharedIndirections!(immutable(Foo)*));
+}
+
+enum bool isAggregateType(T) = is(T == struct) || is(T == union) ||
+                               is(T == class) || is(T == interface);
+
+enum bool isPointer(T) = is(T == U*, U) && !isAggregateType!T;
+
+enum bool isDynamicArray(T) = is(DynamicArrayTypeOf!T) && !isAggregateType!T;
+
+template OriginalType(T)
+{
+    template Impl(T)
+    {
+        static if (is(T U == enum)) alias Impl = OriginalType!U;
+        else                        alias Impl =              T;
+    }
+
+    alias OriginalType = ModifyTypePreservingTQ!(Impl, T);
+}
+
+template DynamicArrayTypeOf(T)
+{
+    static if (is(AliasThisTypeOf!T AT) && !is(AT[] == AT))
+        alias X = DynamicArrayTypeOf!AT;
+    else
+        alias X = OriginalType!T;
+
+    static if (is(Unqual!X : E[], E) && !is(typeof({ enum n = X.length; })))
+        alias DynamicArrayTypeOf = X;
+    else
+        static assert(0, T.stringof ~ " is not a dynamic array");
+}
+
+private template AliasThisTypeOf(T)
+    if (isAggregateType!T)
+{
+    alias members = __traits(getAliasThis, T);
+
+    static if (members.length == 1)
+        alias AliasThisTypeOf = typeof(__traits(getMember, T.init, members[0]));
+    else
+        static assert(0, T.stringof~" does not have alias this type");
+}
+
+template isFunctionPointer(T...)
+    if (T.length == 1)
+{
+    static if (is(T[0] U) || is(typeof(T[0]) U))
+    {
+        static if (is(U F : F*) && is(F == function))
+            enum bool isFunctionPointer = true;
+        else
+            enum bool isFunctionPointer = false;
+    }
+    else
+        enum bool isFunctionPointer = false;
+}
+
+template isDelegate(T...)
+    if (T.length == 1)
+{
+    static if (is(typeof(& T[0]) U : U*) && is(typeof(& T[0]) U == delegate))
+    {
+        // T is a (nested) function symbol.
+        enum bool isDelegate = true;
+    }
+    else static if (is(T[0] W) || is(typeof(T[0]) W))
+    {
+        // T is an expression or a type.  Take the type of it and examine.
+        enum bool isDelegate = is(W == delegate);
+    }
+    else
+        enum bool isDelegate = false;
+}
+
 // std.meta.Filter
 template Filter(alias pred, TList...)
 {
     static if (TList.length == 0)
     {
-        alias Filter = TypeTuple!();
+        alias Filter = AliasSeq!();
     }
     else static if (TList.length == 1)
     {
         static if (pred!(TList[0]))
-            alias Filter = TypeTuple!(TList[0]);
+            alias Filter = AliasSeq!(TList[0]);
         else
-            alias Filter = TypeTuple!();
+            alias Filter = AliasSeq!();
+    }
+    /* The next case speeds up compilation by reducing
+     * the number of Filter instantiations
+     */
+    else static if (TList.length == 2)
+    {
+        static if (pred!(TList[0]))
+        {
+            static if (pred!(TList[1]))
+                alias Filter = AliasSeq!(TList[0], TList[1]);
+            else
+                alias Filter = AliasSeq!(TList[0]);
+        }
+        else
+        {
+            static if (pred!(TList[1]))
+                alias Filter = AliasSeq!(TList[1]);
+            else
+                alias Filter = AliasSeq!();
+        }
     }
     else
     {
         alias Filter =
-            TypeTuple!(
+            AliasSeq!(
                 Filter!(pred, TList[ 0  .. $/2]),
                 Filter!(pred, TList[$/2 ..  $ ]));
     }
diff --git a/libphobos/libdruntime/core/simd.d b/libphobos/libdruntime/core/simd.d
index 32e2aaf5cfd..11a47118319 100644
--- a/libphobos/libdruntime/core/simd.d
+++ b/libphobos/libdruntime/core/simd.d
@@ -5,9 +5,10 @@
  *
  * Source: $(DRUNTIMESRC core/_simd.d)
  *
- * Copyright: Copyright Digital Mars 2012.
- * License:   $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
- * Authors:   $(WEB digitalmars.com, Walter Bright),
+ * Copyright: Copyright Digital Mars 2012-2020
+ * License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
+ * Authors:   $(HTTP digitalmars.com, Walter Bright),
+ * Source:    $(DRUNTIMESRC core/_simd.d)
  */
 
 module core.simd;
@@ -38,470 +39,523 @@ template Vector(T)
 
 /* Handy aliases
  */
-static if (is(Vector!(void[8])))    alias Vector!(void[8])  void8;          ///
-static if (is(Vector!(double[1])))  alias Vector!(double[1]) double1;       ///
-static if (is(Vector!(float[2])))   alias Vector!(float[2])  float2;        ///
-static if (is(Vector!(byte[8])))    alias Vector!(byte[8])  byte8;          ///
-static if (is(Vector!(ubyte[8])))   alias Vector!(ubyte[8]) ubyte8;         ///
-static if (is(Vector!(short[4])))   alias Vector!(short[4])  short4;        ///
-static if (is(Vector!(ushort[4])))  alias Vector!(ushort[4]) ushort4;       ///
-static if (is(Vector!(int[2])))     alias Vector!(int[2])    int2;          ///
-static if (is(Vector!(uint[2])))    alias Vector!(uint[2])   uint2;         ///
-static if (is(Vector!(long[1])))    alias Vector!(long[1])   long1;         ///
-static if (is(Vector!(ulong[1])))   alias Vector!(ulong[1])  ulong1;        ///
-
-static if (is(Vector!(void[16])))   alias Vector!(void[16])  void16;        ///
-static if (is(Vector!(double[2])))  alias Vector!(double[2]) double2;       ///
-static if (is(Vector!(float[4])))   alias Vector!(float[4])  float4;        ///
-static if (is(Vector!(byte[16])))   alias Vector!(byte[16])  byte16;        ///
-static if (is(Vector!(ubyte[16])))  alias Vector!(ubyte[16]) ubyte16;       ///
-static if (is(Vector!(short[8])))   alias Vector!(short[8])  short8;        ///
-static if (is(Vector!(ushort[8])))  alias Vector!(ushort[8]) ushort8;       ///
-static if (is(Vector!(int[4])))     alias Vector!(int[4])    int4;          ///
-static if (is(Vector!(uint[4])))    alias Vector!(uint[4])   uint4;         ///
-static if (is(Vector!(long[2])))    alias Vector!(long[2])   long2;         ///
-static if (is(Vector!(ulong[2])))   alias Vector!(ulong[2])  ulong2;        ///
-
-static if (is(Vector!(void[32])))   alias Vector!(void[32])   void32;        ///
-static if (is(Vector!(double[4])))  alias Vector!(double[4])  double4;       ///
-static if (is(Vector!(float[8])))   alias Vector!(float[8])   float8;        ///
-static if (is(Vector!(byte[32])))   alias Vector!(byte[32])   byte32;        ///
-static if (is(Vector!(ubyte[32])))  alias Vector!(ubyte[32])  ubyte32;       ///
-static if (is(Vector!(short[16])))  alias Vector!(short[16])  short16;       ///
-static if (is(Vector!(ushort[16]))) alias Vector!(ushort[16]) ushort16;      ///
-static if (is(Vector!(int[8])))     alias Vector!(int[8])     int8;          ///
-static if (is(Vector!(uint[8])))    alias Vector!(uint[8])    uint8;         ///
-static if (is(Vector!(long[4])))    alias Vector!(long[4])    long4;         ///
-static if (is(Vector!(ulong[4])))   alias Vector!(ulong[4])   ulong4;        ///
+static if (is(Vector!(void[8])))    alias Vector!(void[8])    void8;        ///
+static if (is(Vector!(double[1])))  alias Vector!(double[1])  double1;      ///
+static if (is(Vector!(float[2])))   alias Vector!(float[2])   float2;       ///
+static if (is(Vector!(byte[8])))    alias Vector!(byte[8])    byte8;        ///
+static if (is(Vector!(ubyte[8])))   alias Vector!(ubyte[8])   ubyte8;       ///
+static if (is(Vector!(short[4])))   alias Vector!(short[4])   short4;       ///
+static if (is(Vector!(ushort[4])))  alias Vector!(ushort[4])  ushort4;      ///
+static if (is(Vector!(int[2])))     alias Vector!(int[2])     int2;         ///
+static if (is(Vector!(uint[2])))    alias Vector!(uint[2])    uint2;        ///
+static if (is(Vector!(long[1])))    alias Vector!(long[1])    long1;        ///
+static if (is(Vector!(ulong[1])))   alias Vector!(ulong[1])   ulong1;       ///
+
+static if (is(Vector!(void[16])))   alias Vector!(void[16])   void16;       ///
+static if (is(Vector!(double[2])))  alias Vector!(double[2])  double2;      ///
+static if (is(Vector!(float[4])))   alias Vector!(float[4])   float4;       ///
+static if (is(Vector!(byte[16])))   alias Vector!(byte[16])   byte16;       ///
+static if (is(Vector!(ubyte[16])))  alias Vector!(ubyte[16])  ubyte16;      ///
+static if (is(Vector!(short[8])))   alias Vector!(short[8])   short8;       ///
+static if (is(Vector!(ushort[8])))  alias Vector!(ushort[8])  ushort8;      ///
+static if (is(Vector!(int[4])))     alias Vector!(int[4])     int4;         ///
+static if (is(Vector!(uint[4])))    alias Vector!(uint[4])    uint4;        ///
+static if (is(Vector!(long[2])))    alias Vector!(long[2])    long2;        ///
+static if (is(Vector!(ulong[2])))   alias Vector!(ulong[2])   ulong2;       ///
+
+static if (is(Vector!(void[32])))   alias Vector!(void[32])   void32;       ///
+static if (is(Vector!(double[4])))  alias Vector!(double[4])  double4;      ///
+static if (is(Vector!(float[8])))   alias Vector!(float[8])   float8;       ///
+static if (is(Vector!(byte[32])))   alias Vector!(byte[32])   byte32;       ///
+static if (is(Vector!(ubyte[32])))  alias Vector!(ubyte[32])  ubyte32;      ///
+static if (is(Vector!(short[16])))  alias Vector!(short[16])  short16;      ///
+static if (is(Vector!(ushort[16]))) alias Vector!(ushort[16]) ushort16;     ///
+static if (is(Vector!(int[8])))     alias Vector!(int[8])     int8;         ///
+static if (is(Vector!(uint[8])))    alias Vector!(uint[8])    uint8;        ///
+static if (is(Vector!(long[4])))    alias Vector!(long[4])    long4;        ///
+static if (is(Vector!(ulong[4])))   alias Vector!(ulong[4])   ulong4;       ///
+
+static if (is(Vector!(void[64])))   alias Vector!(void[64])   void64;       ///
+static if (is(Vector!(double[8])))  alias Vector!(double[8])  double8;      ///
+static if (is(Vector!(float[16])))  alias Vector!(float[16])  float16;      ///
+static if (is(Vector!(byte[64])))   alias Vector!(byte[64])   byte64;       ///
+static if (is(Vector!(ubyte[64])))  alias Vector!(ubyte[64])  ubyte64;      ///
+static if (is(Vector!(short[32])))  alias Vector!(short[32])  short32;      ///
+static if (is(Vector!(ushort[32]))) alias Vector!(ushort[32]) ushort32;     ///
+static if (is(Vector!(int[16])))    alias Vector!(int[16])    int16;        ///
+static if (is(Vector!(uint[16])))   alias Vector!(uint[16])   uint16;       ///
+static if (is(Vector!(long[8])))    alias Vector!(long[8])    long8;        ///
+static if (is(Vector!(ulong[8])))   alias Vector!(ulong[8])   ulong8;       ///
 
 version (D_SIMD)
 {
-  /** XMM opcodes that conform to the following:
-   *
-   *  opcode xmm1,xmm2/mem
-   *
-   * and do not have side effects (i.e. do not write to memory).
-   */
-  enum XMM
-  {
-    ADDSS = 0xF30F58,
-    ADDSD = 0xF20F58,
-    ADDPS = 0x000F58,
-    ADDPD = 0x660F58,
-    PADDB = 0x660FFC,
-    PADDW = 0x660FFD,
-    PADDD = 0x660FFE,
-    PADDQ = 0x660FD4,
-
-    SUBSS = 0xF30F5C,
-    SUBSD = 0xF20F5C,
-    SUBPS = 0x000F5C,
-    SUBPD = 0x660F5C,
-    PSUBB = 0x660FF8,
-    PSUBW = 0x660FF9,
-    PSUBD = 0x660FFA,
-    PSUBQ = 0x660FFB,
-
-    MULSS = 0xF30F59,
-    MULSD = 0xF20F59,
-    MULPS = 0x000F59,
-    MULPD = 0x660F59,
-    PMULLW = 0x660FD5,
-
-    DIVSS = 0xF30F5E,
-    DIVSD = 0xF20F5E,
-    DIVPS = 0x000F5E,
-    DIVPD = 0x660F5E,
-
-    PAND  = 0x660FDB,
-    POR   = 0x660FEB,
-
-    UCOMISS = 0x000F2E,
-    UCOMISD = 0x660F2E,
-
-    XORPS = 0x000F57,
-    XORPD = 0x660F57,
-
-    // Use STO and LOD instead of MOV to distinguish the direction
-    STOSS  = 0xF30F11,
-    STOSD  = 0xF20F11,
-    STOAPS = 0x000F29,
-    STOAPD = 0x660F29,
-    STODQA = 0x660F7F,
-    STOD   = 0x660F7E,        // MOVD reg/mem64, xmm   66 0F 7E /r
-    STOQ   = 0x660FD6,
-
-    LODSS  = 0xF30F10,
-    LODSD  = 0xF20F10,
-    LODAPS = 0x000F28,
-    LODAPD = 0x660F28,
-    LODDQA = 0x660F6F,
-    LODD   = 0x660F6E,        // MOVD xmm, reg/mem64   66 0F 6E /r
-    LODQ   = 0xF30F7E,
-
-    LODDQU   = 0xF30F6F,      // MOVDQU xmm1, xmm2/mem128  F3 0F 6F /r
-    STODQU   = 0xF30F7F,      // MOVDQU xmm1/mem128, xmm2  F3 0F 7F /r
-    MOVDQ2Q  = 0xF20FD6,      // MOVDQ2Q mmx, xmm          F2 0F D6 /r
-    MOVHLPS  = 0x0F12,        // MOVHLPS xmm1, xmm2        0F 12 /r
-    LODHPD   = 0x660F16,
-    STOHPD   = 0x660F17,      // MOVHPD mem64, xmm         66 0F 17 /r
-    LODHPS   = 0x0F16,
-    STOHPS   = 0x0F17,
-    MOVLHPS  = 0x0F16,
-    LODLPD   = 0x660F12,
-    STOLPD   = 0x660F13,
-    LODLPS   = 0x0F12,
-    STOLPS   = 0x0F13,
-    MOVMSKPD = 0x660F50,
-    MOVMSKPS = 0x0F50,
-    MOVNTDQ  = 0x660FE7,
-    MOVNTI   = 0x0FC3,
-    MOVNTPD  = 0x660F2B,
-    MOVNTPS  = 0x0F2B,
-    MOVNTQ   = 0x0FE7,
-    MOVQ2DQ  = 0xF30FD6,
-    LODUPD   = 0x660F10,
-    STOUPD   = 0x660F11,
-    LODUPS   = 0x0F10,
-    STOUPS   = 0x0F11,
-
-    PACKSSDW = 0x660F6B,
-    PACKSSWB = 0x660F63,
-    PACKUSWB = 0x660F67,
-    PADDSB = 0x660FEC,
-    PADDSW = 0x660FED,
-    PADDUSB = 0x660FDC,
-    PADDUSW = 0x660FDD,
-    PANDN = 0x660FDF,
-    PCMPEQB = 0x660F74,
-    PCMPEQD = 0x660F76,
-    PCMPEQW = 0x660F75,
-    PCMPGTB = 0x660F64,
-    PCMPGTD = 0x660F66,
-    PCMPGTW = 0x660F65,
-    PMADDWD = 0x660FF5,
-    PSLLW = 0x660FF1,
-    PSLLD = 0x660FF2,
-    PSLLQ = 0x660FF3,
-    PSRAW = 0x660FE1,
-    PSRAD = 0x660FE2,
-    PSRLW = 0x660FD1,
-    PSRLD = 0x660FD2,
-    PSRLQ = 0x660FD3,
-    PSUBSB = 0x660FE8,
-    PSUBSW = 0x660FE9,
-    PSUBUSB = 0x660FD8,
-    PSUBUSW = 0x660FD9,
-    PUNPCKHBW = 0x660F68,
-    PUNPCKHDQ = 0x660F6A,
-    PUNPCKHWD = 0x660F69,
-    PUNPCKLBW = 0x660F60,
-    PUNPCKLDQ = 0x660F62,
-    PUNPCKLWD = 0x660F61,
-    PXOR = 0x660FEF,
-    ANDPD = 0x660F54,
-    ANDPS = 0x0F54,
-    ANDNPD = 0x660F55,
-    ANDNPS = 0x0F55,
-    CMPPS = 0x0FC2,
-    CMPPD = 0x660FC2,
-    CMPSD = 0xF20FC2,
-    CMPSS = 0xF30FC2,
-    COMISD = 0x660F2F,
-    COMISS = 0x0F2F,
-    CVTDQ2PD = 0xF30FE6,
-    CVTDQ2PS = 0x0F5B,
-    CVTPD2DQ = 0xF20FE6,
-    CVTPD2PI = 0x660F2D,
-    CVTPD2PS = 0x660F5A,
-    CVTPI2PD = 0x660F2A,
-    CVTPI2PS = 0x0F2A,
-    CVTPS2DQ = 0x660F5B,
-    CVTPS2PD = 0x0F5A,
-    CVTPS2PI = 0x0F2D,
-    CVTSD2SI = 0xF20F2D,
-    CVTSD2SS = 0xF20F5A,
-    CVTSI2SD = 0xF20F2A,
-    CVTSI2SS = 0xF30F2A,
-    CVTSS2SD = 0xF30F5A,
-    CVTSS2SI = 0xF30F2D,
-    CVTTPD2PI = 0x660F2C,
-    CVTTPD2DQ = 0x660FE6,
-    CVTTPS2DQ = 0xF30F5B,
-    CVTTPS2PI = 0x0F2C,
-    CVTTSD2SI = 0xF20F2C,
-    CVTTSS2SI = 0xF30F2C,
-    MASKMOVDQU = 0x660FF7,
-    MASKMOVQ = 0x0FF7,
-    MAXPD = 0x660F5F,
-    MAXPS = 0x0F5F,
-    MAXSD = 0xF20F5F,
-    MAXSS = 0xF30F5F,
-    MINPD = 0x660F5D,
-    MINPS = 0x0F5D,
-    MINSD = 0xF20F5D,
-    MINSS = 0xF30F5D,
-    ORPD = 0x660F56,
-    ORPS = 0x0F56,
-    PAVGB = 0x660FE0,
-    PAVGW = 0x660FE3,
-    PMAXSW = 0x660FEE,
-    //PINSRW = 0x660FC4,
-    PMAXUB = 0x660FDE,
-    PMINSW = 0x660FEA,
-    PMINUB = 0x660FDA,
-    //PMOVMSKB = 0x660FD7,
-    PMULHUW = 0x660FE4,
-    PMULHW = 0x660FE5,
-    PMULUDQ = 0x660FF4,
-    PSADBW = 0x660FF6,
-    PUNPCKHQDQ = 0x660F6D,
-    PUNPCKLQDQ = 0x660F6C,
-    RCPPS = 0x0F53,
-    RCPSS = 0xF30F53,
-    RSQRTPS = 0x0F52,
-    RSQRTSS = 0xF30F52,
-    SQRTPD = 0x660F51,
-    SHUFPD = 0x660FC6,
-    SHUFPS = 0x0FC6,
-    SQRTPS = 0x0F51,
-    SQRTSD = 0xF20F51,
-    SQRTSS = 0xF30F51,
-    UNPCKHPD = 0x660F15,
-    UNPCKHPS = 0x0F15,
-    UNPCKLPD = 0x660F14,
-    UNPCKLPS = 0x0F14,
-
-    PSHUFD = 0x660F70,
-    PSHUFHW = 0xF30F70,
-    PSHUFLW = 0xF20F70,
-    PSHUFW = 0x0F70,
-    PSLLDQ = 0x07660F73,
-    PSRLDQ = 0x03660F73,
-
-    //PREFETCH = 0x0F18,
-
-// SSE3 Pentium 4 (Prescott)
-
-    ADDSUBPD = 0x660FD0,
-    ADDSUBPS = 0xF20FD0,
-    HADDPD   = 0x660F7C,
-    HADDPS   = 0xF20F7C,
-    HSUBPD   = 0x660F7D,
-    HSUBPS   = 0xF20F7D,
-    MOVDDUP  = 0xF20F12,
-    MOVSHDUP = 0xF30F16,
-    MOVSLDUP = 0xF30F12,
-    LDDQU    = 0xF20FF0,
-    MONITOR  = 0x0F01C8,
-    MWAIT    = 0x0F01C9,
-
-// SSSE3
-    PALIGNR = 0x660F3A0F,
-    PHADDD = 0x660F3802,
-    PHADDW = 0x660F3801,
-    PHADDSW = 0x660F3803,
-    PABSB = 0x660F381C,
-    PABSD = 0x660F381E,
-    PABSW = 0x660F381D,
-    PSIGNB = 0x660F3808,
-    PSIGND = 0x660F380A,
-    PSIGNW = 0x660F3809,
-    PSHUFB = 0x660F3800,
-    PMADDUBSW = 0x660F3804,
-    PMULHRSW = 0x660F380B,
-    PHSUBD = 0x660F3806,
-    PHSUBW = 0x660F3805,
-    PHSUBSW = 0x660F3807,
-
-// SSE4.1
-
-    BLENDPD   = 0x660F3A0D,
-    BLENDPS   = 0x660F3A0C,
-    BLENDVPD  = 0x660F3815,
-    BLENDVPS  = 0x660F3814,
-    DPPD      = 0x660F3A41,
-    DPPS      = 0x660F3A40,
-    EXTRACTPS = 0x660F3A17,
-    INSERTPS  = 0x660F3A21,
-    MPSADBW   = 0x660F3A42,
-    PBLENDVB  = 0x660F3810,
-    PBLENDW   = 0x660F3A0E,
-    PEXTRD    = 0x660F3A16,
-    PEXTRQ    = 0x660F3A16,
-    PINSRB    = 0x660F3A20,
-    PINSRD    = 0x660F3A22,
-    PINSRQ    = 0x660F3A22,
-
-    MOVNTDQA = 0x660F382A,
-    PACKUSDW = 0x660F382B,
-    PCMPEQQ = 0x660F3829,
-    PEXTRB = 0x660F3A14,
-    PHMINPOSUW = 0x660F3841,
-    PMAXSB = 0x660F383C,
-    PMAXSD = 0x660F383D,
-    PMAXUD = 0x660F383F,
-    PMAXUW = 0x660F383E,
-    PMINSB = 0x660F3838,
-    PMINSD = 0x660F3839,
-    PMINUD = 0x660F383B,
-    PMINUW = 0x660F383A,
-    PMOVSXBW = 0x660F3820,
-    PMOVSXBD = 0x660F3821,
-    PMOVSXBQ = 0x660F3822,
-    PMOVSXWD = 0x660F3823,
-    PMOVSXWQ = 0x660F3824,
-    PMOVSXDQ = 0x660F3825,
-    PMOVZXBW = 0x660F3830,
-    PMOVZXBD = 0x660F3831,
-    PMOVZXBQ = 0x660F3832,
-    PMOVZXWD = 0x660F3833,
-    PMOVZXWQ = 0x660F3834,
-    PMOVZXDQ = 0x660F3835,
-    PMULDQ   = 0x660F3828,
-    PMULLD   = 0x660F3840,
-    PTEST    = 0x660F3817,
-
-    ROUNDPD = 0x660F3A09,
-    ROUNDPS = 0x660F3A08,
-    ROUNDSD = 0x660F3A0B,
-    ROUNDSS = 0x660F3A0A,
-
-// SSE4.2
-    PCMPESTRI  = 0x660F3A61,
-    PCMPESTRM  = 0x660F3A60,
-    PCMPISTRI  = 0x660F3A63,
-    PCMPISTRM  = 0x660F3A62,
-    PCMPGTQ    = 0x660F3837,
-    //CRC32
-
-// SSE4a (AMD only)
-    // EXTRQ,INSERTQ,MOVNTSD,MOVNTSS
-
-// POPCNT and LZCNT (have their own CPUID bits)
-    POPCNT     = 0xF30FB8,
-    // LZCNT
-  }
-
-  /**
-   * Generate two operand instruction with XMM 128 bit operands.
-   *
-   * This is a compiler magic function - it doesn't behave like
-   * regular D functions.
-   *
-   * Parameters:
-   *      opcode  any of the XMM opcodes; it must be a compile time constant
-   *      op1     first operand
-   *      op2     second operand
-   * Returns:
-   *      result of opcode
-   */
-  pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2);
-
-  /**
-   * Unary SIMD instructions.
-   */
-  pure @safe void16 __simd(XMM opcode, void16 op1);
-  pure @safe void16 __simd(XMM opcode, double d);       ///
-  pure @safe void16 __simd(XMM opcode, float f);        ///
-
-  /****
-   * For instructions:
-   * CMPPD, CMPSS, CMPSD, CMPPS,
-   * PSHUFD, PSHUFHW, PSHUFLW,
-   * BLENDPD, BLENDPS, DPPD, DPPS,
-   * MPSADBW, PBLENDW,
-   * ROUNDPD, ROUNDPS, ROUNDSD, ROUNDSS
-   * Parameters:
-   *      opcode  any of the above XMM opcodes; it must be a compile time constant
-   *      op1     first operand
-   *      op2     second operand
-   *      imm8    third operand; must be a compile time constant
-   * Returns:
-   *      result of opcode
-   */
-  pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8);
-
-  /***
-   * For instructions with the imm8 version:
-   * PSLLD, PSLLQ, PSLLW, PSRAD, PSRAW, PSRLD, PSRLQ, PSRLW,
-   * PSRLDQ, PSLLDQ
-   * Parameters:
-   *      opcode  any of the XMM opcodes; it must be a compile time constant
-   *      op1     first operand
-   *      imm8    second operand; must be a compile time constant
-   * Returns:
-   *      result of opcode
-   */
-  pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8);
-
-  /*****
-   * For "store" operations of the form:
-   *    op1 op= op2
-   * Returns:
-   *    op2
-   * These cannot be marked as pure, as semantic() doesn't check them.
-   */
-  @safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2);
-  @safe void16 __simd_sto(XMM opcode, double op1, void16 op2); ///
-  @safe void16 __simd_sto(XMM opcode, float op1, void16 op2);  ///
-
-  /* The following use overloading to ensure correct typing.
-   * Compile with inlining on for best performance.
-   */
-
-  pure @safe short8 pcmpeq()(short8 v1, short8 v2)
-  {
-      return __simd(XMM.PCMPEQW, v1, v2);
-  }
-
-  pure @safe ushort8 pcmpeq()(ushort8 v1, ushort8 v2)
-  {
-      return __simd(XMM.PCMPEQW, v1, v2);
-  }
-
-  /*********************
-   * Emit prefetch instruction.
-   * Params:
-   *    address = address to be prefetched
-   *    writeFetch = true for write fetch, false for read fetch
-   *    locality = 0..3 (0 meaning least local, 3 meaning most local)
-   * Note:
-   *    The Intel mappings are:
-   *    $(TABLE
-   *    $(THEAD writeFetch, locality, Instruction)
-   *    $(TROW false, 0, prefetchnta)
-   *    $(TROW false, 1, prefetch2)
-   *    $(TROW false, 2, prefetch1)
-   *    $(TROW false, 3, prefetch0)
-   *    $(TROW false, 0, prefetchw)
-   *    $(TROW false, 1, prefetchw)
-   *    $(TROW false, 2, prefetchw)
-   *    $(TROW false, 3, prefetchw)
-   *    )
-   */
-  void prefetch(bool writeFetch, ubyte locality)(const(void)* address)
-  {
+    /** XMM opcodes that conform to the following:
+    *
+    *  opcode xmm1,xmm2/mem
+    *
+    * and do not have side effects (i.e. do not write to memory).
+    */
+    enum XMM
+    {
+        ADDSS = 0xF30F58,
+        ADDSD = 0xF20F58,
+        ADDPS = 0x000F58,
+        ADDPD = 0x660F58,
+        PADDB = 0x660FFC,
+        PADDW = 0x660FFD,
+        PADDD = 0x660FFE,
+        PADDQ = 0x660FD4,
+
+        SUBSS = 0xF30F5C,
+        SUBSD = 0xF20F5C,
+        SUBPS = 0x000F5C,
+        SUBPD = 0x660F5C,
+        PSUBB = 0x660FF8,
+        PSUBW = 0x660FF9,
+        PSUBD = 0x660FFA,
+        PSUBQ = 0x660FFB,
+
+        MULSS = 0xF30F59,
+        MULSD = 0xF20F59,
+        MULPS = 0x000F59,
+        MULPD = 0x660F59,
+        PMULLW = 0x660FD5,
+
+        DIVSS = 0xF30F5E,
+        DIVSD = 0xF20F5E,
+        DIVPS = 0x000F5E,
+        DIVPD = 0x660F5E,
+
+        PAND  = 0x660FDB,
+        POR   = 0x660FEB,
+
+        UCOMISS = 0x000F2E,
+        UCOMISD = 0x660F2E,
+
+        XORPS = 0x000F57,
+        XORPD = 0x660F57,
+
+        // Use STO and LOD instead of MOV to distinguish the direction
+        // (Destination is first operand, Source is second operand)
+        STOSS  = 0xF30F11,        /// MOVSS xmm1/m32, xmm2
+        STOSD  = 0xF20F11,        /// MOVSD xmm1/m64, xmm2
+        STOAPS = 0x000F29,        /// MOVAPS xmm2/m128, xmm1
+        STOAPD = 0x660F29,        /// MOVAPD xmm2/m128, xmm1
+        STODQA = 0x660F7F,        /// MOVDQA xmm2/m128, xmm1
+        STOD   = 0x660F7E,        /// MOVD reg/mem64, xmm   66 0F 7E /r
+        STOQ   = 0x660FD6,        /// MOVQ xmm2/m64, xmm1
+
+        LODSS  = 0xF30F10,        /// MOVSS xmm1, xmm2/m32
+        LODSD  = 0xF20F10,        /// MOVSD xmm1, xmm2/m64
+        LODAPS = 0x000F28,        /// MOVAPS xmm1, xmm2/m128
+        LODAPD = 0x660F28,        /// MOVAPD xmm1, xmm2/m128
+        LODDQA = 0x660F6F,        /// MOVDQA xmm1, xmm2/m128
+        LODD   = 0x660F6E,        /// MOVD xmm, reg/mem64   66 0F 6E /r
+        LODQ   = 0xF30F7E,        /// MOVQ xmm1, xmm2/m64
+
+        LODDQU   = 0xF30F6F,      /// MOVDQU xmm1, xmm2/mem128  F3 0F 6F /r
+        STODQU   = 0xF30F7F,      /// MOVDQU xmm1/mem128, xmm2  F3 0F 7F /r
+        MOVDQ2Q  = 0xF20FD6,      /// MOVDQ2Q mmx, xmm          F2 0F D6 /r
+        MOVHLPS  = 0x0F12,        /// MOVHLPS xmm1, xmm2        0F 12 /r
+        LODHPD   = 0x660F16,      /// MOVHPD xmm1, m64
+        STOHPD   = 0x660F17,      /// MOVHPD mem64, xmm1        66 0F 17 /r
+        LODHPS   = 0x0F16,        /// MOVHPS xmm1, m64
+        STOHPS   = 0x0F17,        /// MOVHPS m64, xmm1
+        MOVLHPS  = 0x0F16,        /// MOVLHPS xmm1, xmm2
+        LODLPD   = 0x660F12,      /// MOVLPD xmm1, m64
+        STOLPD   = 0x660F13,      /// MOVLPD m64, xmm1
+        LODLPS   = 0x0F12,        /// MOVLPS xmm1, m64
+        STOLPS   = 0x0F13,        /// MOVLPS m64, xmm1
+        MOVMSKPD = 0x660F50,      /// MOVMSKPD reg, xmm
+        MOVMSKPS = 0x0F50,        /// MOVMSKPS reg, xmm
+        MOVNTDQ  = 0x660FE7,      /// MOVNTDQ m128, xmm1
+        MOVNTI   = 0x0FC3,        /// MOVNTI m32, r32
+        MOVNTPD  = 0x660F2B,      /// MOVNTPD m128, xmm1
+        MOVNTPS  = 0x0F2B,        /// MOVNTPS m128, xmm1
+        MOVNTQ   = 0x0FE7,        /// MOVNTQ m64, mm
+        MOVQ2DQ  = 0xF30FD6,      /// MOVQ2DQ
+        LODUPD   = 0x660F10,      /// MOVUPD xmm1, xmm2/m128
+        STOUPD   = 0x660F11,      /// MOVUPD xmm2/m128, xmm1
+        LODUPS   = 0x0F10,        /// MOVUPS xmm1, xmm2/m128
+        STOUPS   = 0x0F11,        /// MOVUPS xmm2/m128, xmm1
+
+        PACKSSDW = 0x660F6B,
+        PACKSSWB = 0x660F63,
+        PACKUSWB = 0x660F67,
+        PADDSB = 0x660FEC,
+        PADDSW = 0x660FED,
+        PADDUSB = 0x660FDC,
+        PADDUSW = 0x660FDD,
+        PANDN = 0x660FDF,
+        PCMPEQB = 0x660F74,
+        PCMPEQD = 0x660F76,
+        PCMPEQW = 0x660F75,
+        PCMPGTB = 0x660F64,
+        PCMPGTD = 0x660F66,
+        PCMPGTW = 0x660F65,
+        PMADDWD = 0x660FF5,
+        PSLLW = 0x660FF1,
+        PSLLD = 0x660FF2,
+        PSLLQ = 0x660FF3,
+        PSRAW = 0x660FE1,
+        PSRAD = 0x660FE2,
+        PSRLW = 0x660FD1,
+        PSRLD = 0x660FD2,
+        PSRLQ = 0x660FD3,
+        PSUBSB = 0x660FE8,
+        PSUBSW = 0x660FE9,
+        PSUBUSB = 0x660FD8,
+        PSUBUSW = 0x660FD9,
+        PUNPCKHBW = 0x660F68,
+        PUNPCKHDQ = 0x660F6A,
+        PUNPCKHWD = 0x660F69,
+        PUNPCKLBW = 0x660F60,
+        PUNPCKLDQ = 0x660F62,
+        PUNPCKLWD = 0x660F61,
+        PXOR = 0x660FEF,
+        ANDPD = 0x660F54,
+        ANDPS = 0x0F54,
+        ANDNPD = 0x660F55,
+        ANDNPS = 0x0F55,
+        CMPPS = 0x0FC2,
+        CMPPD = 0x660FC2,
+        CMPSD = 0xF20FC2,
+        CMPSS = 0xF30FC2,
+        COMISD = 0x660F2F,
+        COMISS = 0x0F2F,
+        CVTDQ2PD = 0xF30FE6,
+        CVTDQ2PS = 0x0F5B,
+        CVTPD2DQ = 0xF20FE6,
+        CVTPD2PI = 0x660F2D,
+        CVTPD2PS = 0x660F5A,
+        CVTPI2PD = 0x660F2A,
+        CVTPI2PS = 0x0F2A,
+        CVTPS2DQ = 0x660F5B,
+        CVTPS2PD = 0x0F5A,
+        CVTPS2PI = 0x0F2D,
+        CVTSD2SI = 0xF20F2D,
+        CVTSD2SS = 0xF20F5A,
+        CVTSI2SD = 0xF20F2A,
+        CVTSI2SS = 0xF30F2A,
+        CVTSS2SD = 0xF30F5A,
+        CVTSS2SI = 0xF30F2D,
+        CVTTPD2PI = 0x660F2C,
+        CVTTPD2DQ = 0x660FE6,
+        CVTTPS2DQ = 0xF30F5B,
+        CVTTPS2PI = 0x0F2C,
+        CVTTSD2SI = 0xF20F2C,
+        CVTTSS2SI = 0xF30F2C,
+        MASKMOVDQU = 0x660FF7,
+        MASKMOVQ = 0x0FF7,
+        MAXPD = 0x660F5F,
+        MAXPS = 0x0F5F,
+        MAXSD = 0xF20F5F,
+        MAXSS = 0xF30F5F,
+        MINPD = 0x660F5D,
+        MINPS = 0x0F5D,
+        MINSD = 0xF20F5D,
+        MINSS = 0xF30F5D,
+        ORPD = 0x660F56,
+        ORPS = 0x0F56,
+        PAVGB = 0x660FE0,
+        PAVGW = 0x660FE3,
+        PMAXSW = 0x660FEE,
+        //PINSRW = 0x660FC4,
+        PMAXUB = 0x660FDE,
+        PMINSW = 0x660FEA,
+        PMINUB = 0x660FDA,
+        //PMOVMSKB = 0x660FD7,
+        PMULHUW = 0x660FE4,
+        PMULHW = 0x660FE5,
+        PMULUDQ = 0x660FF4,
+        PSADBW = 0x660FF6,
+        PUNPCKHQDQ = 0x660F6D,
+        PUNPCKLQDQ = 0x660F6C,
+        RCPPS = 0x0F53,
+        RCPSS = 0xF30F53,
+        RSQRTPS = 0x0F52,
+        RSQRTSS = 0xF30F52,
+        SQRTPD = 0x660F51,
+        SHUFPD = 0x660FC6,
+        SHUFPS = 0x0FC6,
+        SQRTPS = 0x0F51,
+        SQRTSD = 0xF20F51,
+        SQRTSS = 0xF30F51,
+        UNPCKHPD = 0x660F15,
+        UNPCKHPS = 0x0F15,
+        UNPCKLPD = 0x660F14,
+        UNPCKLPS = 0x0F14,
+
+        PSHUFD = 0x660F70,
+        PSHUFHW = 0xF30F70,
+        PSHUFLW = 0xF20F70,
+        PSHUFW = 0x0F70,
+        PSLLDQ = 0x07660F73,
+        PSRLDQ = 0x03660F73,
+
+        //PREFETCH = 0x0F18,
+
+        // SSE3 Pentium 4 (Prescott)
+
+        ADDSUBPD = 0x660FD0,
+        ADDSUBPS = 0xF20FD0,
+        HADDPD   = 0x660F7C,
+        HADDPS   = 0xF20F7C,
+        HSUBPD   = 0x660F7D,
+        HSUBPS   = 0xF20F7D,
+        MOVDDUP  = 0xF20F12,
+        MOVSHDUP = 0xF30F16,
+        MOVSLDUP = 0xF30F12,
+        LDDQU    = 0xF20FF0,
+        MONITOR  = 0x0F01C8,
+        MWAIT    = 0x0F01C9,
+
+        // SSSE3
+        PALIGNR = 0x660F3A0F,
+        PHADDD = 0x660F3802,
+        PHADDW = 0x660F3801,
+        PHADDSW = 0x660F3803,
+        PABSB = 0x660F381C,
+        PABSD = 0x660F381E,
+        PABSW = 0x660F381D,
+        PSIGNB = 0x660F3808,
+        PSIGND = 0x660F380A,
+        PSIGNW = 0x660F3809,
+        PSHUFB = 0x660F3800,
+        PMADDUBSW = 0x660F3804,
+        PMULHRSW = 0x660F380B,
+        PHSUBD = 0x660F3806,
+        PHSUBW = 0x660F3805,
+        PHSUBSW = 0x660F3807,
+
+        // SSE4.1
+
+        BLENDPD   = 0x660F3A0D,
+        BLENDPS   = 0x660F3A0C,
+        BLENDVPD  = 0x660F3815,
+        BLENDVPS  = 0x660F3814,
+        DPPD      = 0x660F3A41,
+        DPPS      = 0x660F3A40,
+        EXTRACTPS = 0x660F3A17,
+        INSERTPS  = 0x660F3A21,
+        MPSADBW   = 0x660F3A42,
+        PBLENDVB  = 0x660F3810,
+        PBLENDW   = 0x660F3A0E,
+        PEXTRD    = 0x660F3A16,
+        PEXTRQ    = 0x660F3A16,
+        PINSRB    = 0x660F3A20,
+        PINSRD    = 0x660F3A22,
+        PINSRQ    = 0x660F3A22,
+
+        MOVNTDQA = 0x660F382A,
+        PACKUSDW = 0x660F382B,
+        PCMPEQQ = 0x660F3829,
+        PEXTRB = 0x660F3A14,
+        PHMINPOSUW = 0x660F3841,
+        PMAXSB = 0x660F383C,
+        PMAXSD = 0x660F383D,
+        PMAXUD = 0x660F383F,
+        PMAXUW = 0x660F383E,
+        PMINSB = 0x660F3838,
+        PMINSD = 0x660F3839,
+        PMINUD = 0x660F383B,
+        PMINUW = 0x660F383A,
+        PMOVSXBW = 0x660F3820,
+        PMOVSXBD = 0x660F3821,
+        PMOVSXBQ = 0x660F3822,
+        PMOVSXWD = 0x660F3823,
+        PMOVSXWQ = 0x660F3824,
+        PMOVSXDQ = 0x660F3825,
+        PMOVZXBW = 0x660F3830,
+        PMOVZXBD = 0x660F3831,
+        PMOVZXBQ = 0x660F3832,
+        PMOVZXWD = 0x660F3833,
+        PMOVZXWQ = 0x660F3834,
+        PMOVZXDQ = 0x660F3835,
+        PMULDQ   = 0x660F3828,
+        PMULLD   = 0x660F3840,
+        PTEST    = 0x660F3817,
+
+        ROUNDPD = 0x660F3A09,
+        ROUNDPS = 0x660F3A08,
+        ROUNDSD = 0x660F3A0B,
+        ROUNDSS = 0x660F3A0A,
+
+        // SSE4.2
+        PCMPESTRI  = 0x660F3A61,
+        PCMPESTRM  = 0x660F3A60,
+        PCMPISTRI  = 0x660F3A63,
+        PCMPISTRM  = 0x660F3A62,
+        PCMPGTQ    = 0x660F3837,
+        //CRC32
+
+        // SSE4a (AMD only)
+        // EXTRQ,INSERTQ,MOVNTSD,MOVNTSS
+
+        // POPCNT and LZCNT (have their own CPUID bits)
+        POPCNT     = 0xF30FB8,
+        // LZCNT
+    }
+
+    /**
+    * Generate two operand instruction with XMM 128 bit operands.
+    *
+    * This is a compiler magic function - it doesn't behave like
+    * regular D functions.
+    *
+    * Parameters:
+    *      opcode = any of the XMM opcodes; it must be a compile time constant
+    *      op1    = first operand
+    *      op2    = second operand
+    * Returns:
+    *      result of opcode
+    */
+    pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2);
+
+    ///
+    unittest
+    {
+        float4 a;
+        a = cast(float4)__simd(XMM.PXOR, a, a);
+    }
+
+    /**
+    * Unary SIMD instructions.
+    */
+    pure @safe void16 __simd(XMM opcode, void16 op1);
+    pure @safe void16 __simd(XMM opcode, double d);   ///
+    pure @safe void16 __simd(XMM opcode, float f);    ///
+
+    ///
+    unittest
+    {
+        float4 a;
+        a = cast(float4)__simd(XMM.LODSS, a);
+    }
+
+    /****
+    * For instructions:
+    * CMPPD, CMPSS, CMPSD, CMPPS,
+    * PSHUFD, PSHUFHW, PSHUFLW,
+    * BLENDPD, BLENDPS, DPPD, DPPS,
+    * MPSADBW, PBLENDW,
+    * ROUNDPD, ROUNDPS, ROUNDSD, ROUNDSS
+    * Parameters:
+    *      opcode = any of the above XMM opcodes; it must be a compile time constant
+    *      op1    = first operand
+    *      op2    = second operand
+    *      imm8   = third operand; must be a compile time constant
+    * Returns:
+    *      result of opcode
+    */
+    pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8);
+
+    ///
+    unittest
+    {
+        float4 a;
+        a = cast(float4)__simd(XMM.CMPPD, a, a, 0x7A);
+    }
+
+    /***
+    * For instructions with the imm8 version:
+    * PSLLD, PSLLQ, PSLLW, PSRAD, PSRAW, PSRLD, PSRLQ, PSRLW,
+    * PSRLDQ, PSLLDQ
+    * Parameters:
+    *      opcode = any of the XMM opcodes; it must be a compile time constant
+    *      op1    = first operand
+    *      imm8   = second operand; must be a compile time constant
+    * Returns:
+    *      result of opcode
+    */
+    pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8);
+
+    ///
+    unittest
+    {
+        float4 a;
+        a = cast(float4) __simd_ib(XMM.PSRLQ, a, 0x7A);
+    }
+
+    /*****
+    * For "store" operations of the form:
+    *    op1 op= op2
+    * Returns:
+    *    op2
+    * These cannot be marked as pure, as semantic() doesn't check them.
+    */
+    @safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2);
+    @safe void16 __simd_sto(XMM opcode, double op1, void16 op2); ///
+    @safe void16 __simd_sto(XMM opcode, float op1, void16 op2);  ///
+
+    ///
+    unittest
+    {
+        void16 a;
+        float f = 1;
+        double d = 1;
+
+        cast(void)__simd_sto(XMM.STOUPS, a, a);
+        cast(void)__simd_sto(XMM.STOUPS, f, a);
+        cast(void)__simd_sto(XMM.STOUPS, d, a);
+    }
+
+    /* The following use overloading to ensure correct typing.
+    * Compile with inlining on for best performance.
+    */
+
+    pure @safe short8 pcmpeq()(short8 v1, short8 v2)
+    {
+        return cast(short8)__simd(XMM.PCMPEQW, v1, v2);
+    }
+
+    pure @safe ushort8 pcmpeq()(ushort8 v1, ushort8 v2)
+    {
+        return cast(ushort8)__simd(XMM.PCMPEQW, v1, v2);
+    }
+
+    /*********************
+    * Emit prefetch instruction.
+    * Params:
+    *    address = address to be prefetched
+    *    writeFetch = true for write fetch, false for read fetch
+    *    locality = 0..3 (0 meaning least local, 3 meaning most local)
+    * Note:
+    *    The Intel mappings are:
+    *    $(TABLE
+    *    $(THEAD writeFetch, locality, Instruction)
+    *    $(TROW false, 0, prefetchnta)
+    *    $(TROW false, 1, prefetch2)
+    *    $(TROW false, 2, prefetch1)
+    *    $(TROW false, 3, prefetch0)
+    *    $(TROW true, 0, prefetchw)
+    *    $(TROW true, 1, prefetchw)
+    *    $(TROW true, 2, prefetchw)
+    *    $(TROW true, 3, prefetchw)
+    *    )
+    */
+    void prefetch(bool writeFetch, ubyte locality)(const(void)* address)
+    {
         static if (writeFetch)
             __prefetch(address, 4);
         else static if (locality < 4)
             __prefetch(address, 3 - locality);
         else
             static assert(0, "0..3 expected for locality");
-  }
+    }
 
-  private void __prefetch(const(void*) address, ubyte encoding);
+    private void __prefetch(const(void*) address, ubyte encoding);
 
-  /*************************************
-   * Load unaligned vector from address.
-   * This is a compiler intrinsic.
-   * Params:
-   *    p = pointer to vector
-   * Returns:
-   *    vector
-   */
+    /*************************************
+    * Load unaligned vector from address.
+    * This is a compiler intrinsic.
+    * Params:
+    *    p = pointer to vector
+    * Returns:
+    *    vector
+    */
 
-  V loadUnaligned(V)(const V* p)
+    V loadUnaligned(V)(const V* p)
         if (is(V == void16) ||
             is(V == byte16) ||
             is(V == ubyte16) ||
@@ -510,8 +564,10 @@ version (D_SIMD)
             is(V == int4) ||
             is(V == uint4) ||
             is(V == long2) ||
-            is(V == ulong2))
-  {
+            is(V == ulong2) ||
+            is(V == double2) ||
+            is(V == float4))
+    {
         pragma(inline, true);
         static if (is(V == double2))
             return cast(V)__simd(XMM.LODUPD, *cast(const void16*)p);
@@ -519,19 +575,63 @@ version (D_SIMD)
             return cast(V)__simd(XMM.LODUPS, *cast(const void16*)p);
         else
             return cast(V)__simd(XMM.LODDQU, *cast(const void16*)p);
-  }
-
-  /*************************************
-   * Store vector to unaligned address.
-   * This is a compiler intrinsic.
-   * Params:
-   *    p = pointer to vector
-   *    value = value to store
-   * Returns:
-   *    value
-   */
-
-  V storeUnaligned(V)(V* p, V value)
+    }
+
+    @system
+    unittest
+    {
+        // Memory to load into the vector:
+        // Should have enough data to test all 16-byte alignments, and still
+        // have room for a 16-byte vector
+        ubyte[32] data;
+        foreach (i; 0..data.length)
+        {
+            data[i] = cast(ubyte)i;
+        }
+
+        // to test all alignments from 1 ~ 16
+        foreach (i; 0..16)
+        {
+            ubyte* d = &data[i];
+
+            void test(T)()
+            {
+                // load the data
+                T v = loadUnaligned(cast(T*)d);
+
+                // check that the data was loaded correctly
+                ubyte* ptrToV = cast(ubyte*)&v;
+                foreach (j; 0..T.sizeof)
+                {
+                    assert(ptrToV[j] == d[j]);
+                }
+            }
+
+            test!void16();
+            test!byte16();
+            test!ubyte16();
+            test!short8();
+            test!ushort8();
+            test!int4();
+            test!uint4();
+            test!long2();
+            test!ulong2();
+            test!double2();
+            test!float4();
+        }
+    }
+
+    /*************************************
+    * Store vector to unaligned address.
+    * This is a compiler intrinsic.
+    * Params:
+    *    p = pointer to vector
+    *    value = value to store
+    * Returns:
+    *    value
+    */
+
+    V storeUnaligned(V)(V* p, V value)
         if (is(V == void16) ||
             is(V == byte16) ||
             is(V == ubyte16) ||
@@ -540,8 +640,10 @@ version (D_SIMD)
             is(V == int4) ||
             is(V == uint4) ||
             is(V == long2) ||
-            is(V == ulong2))
-  {
+            is(V == ulong2) ||
+            is(V == double2) ||
+            is(V == float4))
+    {
         pragma(inline, true);
         static if (is(V == double2))
             return cast(V)__simd_sto(XMM.STOUPD, *cast(void16*)p, value);
@@ -549,5 +651,53 @@ version (D_SIMD)
             return cast(V)__simd_sto(XMM.STOUPS, *cast(void16*)p, value);
         else
             return cast(V)__simd_sto(XMM.STODQU, *cast(void16*)p, value);
-  }
+    }
+
+    @system
+    unittest
+    {
+        // Memory to store the vector to:
+        // Should have enough data to test all 16-byte alignments, and still
+        // have room for a 16-byte vector
+        ubyte[32] data;
+
+        // to test all alignments from 1 ~ 16
+        foreach (i; 0..16)
+        {
+            ubyte* d = &data[i];
+
+            void test(T)()
+            {
+                T v;
+
+                // populate v` with data
+                ubyte* ptrToV = cast(ubyte*)&v;
+                foreach (j; 0..T.sizeof)
+                {
+                    ptrToV[j] = cast(ubyte)j;
+                }
+
+                // store `v` to location pointed to by `d`
+                storeUnaligned(cast(T*)d, v);
+
+                // check that the the data was stored correctly
+                foreach (j; 0..T.sizeof)
+                {
+                    assert(ptrToV[j] == d[j]);
+                }
+            }
+
+            test!void16();
+            test!byte16();
+            test!ubyte16();
+            test!short8();
+            test!ushort8();
+            test!int4();
+            test!uint4();
+            test!long2();
+            test!ulong2();
+            test!double2();
+            test!float4();
+        }
+    }
 }
diff --git a/libphobos/libdruntime/core/stdc/stdio.d b/libphobos/libdruntime/core/stdc/stdio.d
index 00efe885289..67011596b1c 100644
--- a/libphobos/libdruntime/core/stdc/stdio.d
+++ b/libphobos/libdruntime/core/stdc/stdio.d
@@ -1527,7 +1527,7 @@ else version (OpenBSD)
     {
         void __sclearerr()(FILE* p)
         {
-            p._flags &= ~(__SERR|__SEOF);
+            p._flags = p._flags & ~(__SERR|__SEOF);
         }
 
         int __sfeof()(FILE* p)
diff --git a/libphobos/libdruntime/core/sys/posix/locale.d b/libphobos/libdruntime/core/sys/posix/locale.d
index 19c7e209ac7..18558a2696a 100644
--- a/libphobos/libdruntime/core/sys/posix/locale.d
+++ b/libphobos/libdruntime/core/sys/posix/locale.d
@@ -415,10 +415,38 @@ else version (Solaris)
         LC_ALL      = 6,
     }
 
+    ///
+    enum
+    {
+        LC_CTYPE_MASK    = (1 << LC_CTYPE),
+        LC_NUMERIC_MASK  = (1 << LC_NUMERIC),
+        LC_TIME_MASK     = (1 << LC_TIME),
+        LC_COLLATE_MASK  = (1 << LC_COLLATE),
+        LC_MONETARY_MASK = (1 << LC_MONETARY),
+        LC_MESSAGES_MASK = (1 << LC_MESSAGES),
+        LC_ALL_MASK      = 0x3f,
+    }
+
+    private struct _LC_locale_t;
+
+    ///
+    alias locale_t = _LC_locale_t**;
+
+    ///
+    enum LC_GLOBAL_LOCALE = (cast(locale_t)-1);
+
+    /// Duplicate existing locale
+    locale_t duplocale(locale_t locale);
+    /// Free an allocated locale
+    void     freelocale(locale_t locale);
     /// Natural language formatting for C
     lconv*   localeconv();
+    /// Create a new locale
+    locale_t newlocale(int mask, const char* locale, locale_t base);
     /// Set the C library's notion of natural language formatting style
     char*    setlocale(int category, const char* locale);
+    /// Set the per-thread locale
+    locale_t uselocale (locale_t locale);
 }
 else
     static assert(false, "unimplemented platform");
diff --git a/libphobos/libdruntime/core/thread/osthread.d b/libphobos/libdruntime/core/thread/osthread.d
index d81e0aa0607..defdc9586f1 100644
--- a/libphobos/libdruntime/core/thread/osthread.d
+++ b/libphobos/libdruntime/core/thread/osthread.d
@@ -1461,14 +1461,6 @@ in (fn)
     fn(sp);
 }
 
-version (Solaris)
-{
-    import core.sys.solaris.sys.priocntl;
-    import core.sys.solaris.sys.types;
-    import core.sys.posix.sys.wait : idtype_t;
-}
-
-
 version (Windows)
 private extern (D) void scanWindowsOnly(scope ScanAllThreadsTypeFn scan, ThreadBase _t) nothrow
 {
diff --git a/libphobos/libdruntime/core/vararg.d b/libphobos/libdruntime/core/vararg.d
index a02ffeaea23..935b2bdb287 100644
--- a/libphobos/libdruntime/core/vararg.d
+++ b/libphobos/libdruntime/core/vararg.d
@@ -17,3 +17,125 @@
 module core.vararg;
 
 public import core.stdc.stdarg;
+
+
+version (GNU) { /* TypeInfo-based va_arg overload unsupported */ }
+else:
+
+version (ARM)     version = ARM_Any;
+version (AArch64) version = ARM_Any;
+version (MIPS32)  version = MIPS_Any;
+version (MIPS64)  version = MIPS_Any;
+version (PPC)     version = PPC_Any;
+version (PPC64)   version = PPC_Any;
+
+version (ARM_Any)
+{
+    // Darwin uses a simpler varargs implementation
+    version (OSX) {}
+    else version (iOS) {}
+    else version (TVOS) {}
+    else version (WatchOS) {}
+    else:
+
+    version (ARM)     version = AAPCS32;
+    version (AArch64) version = AAPCS64;
+}
+
+
+///
+alias va_arg = core.stdc.stdarg.va_arg;
+
+
+/**
+ * Retrieve and store through parmn the next value that is of TypeInfo ti.
+ * Used when the static type is not known.
+ */
+void va_arg()(ref va_list ap, TypeInfo ti, void* parmn)
+{
+    version (X86)
+    {
+        // Wait until everyone updates to get TypeInfo.talign
+        //auto talign = ti.talign;
+        //auto p = cast(void*)(cast(size_t)ap + talign - 1) & ~(talign - 1);
+        auto p = ap;
+        auto tsize = ti.tsize;
+        ap = cast(va_list) (p + tsize.alignUp);
+        parmn[0..tsize] = p[0..tsize];
+    }
+    else version (Win64)
+    {
+        version (LDC) enum isLDC = true;
+        else          enum isLDC = false;
+
+        // Wait until everyone updates to get TypeInfo.talign
+        //auto talign = ti.talign;
+        //auto p = cast(void*)(cast(size_t)ap + talign - 1) & ~(talign - 1);
+        auto p = ap;
+        auto tsize = ti.tsize;
+        void* q;
+        if (isLDC && tsize == 16 && cast(TypeInfo_Array) ti)
+        {
+            q = p;
+            ap = cast(va_list) (p + tsize);
+        }
+        else
+        {
+            q = (tsize > size_t.sizeof || (tsize & (tsize - 1)) != 0) ? *cast(void**) p : p;
+            ap = cast(va_list) (p + size_t.sizeof);
+        }
+        parmn[0..tsize] = q[0..tsize];
+    }
+    else version (X86_64)
+    {
+        static import core.internal.vararg.sysv_x64;
+        core.internal.vararg.sysv_x64.va_arg(ap, ti, parmn);
+    }
+    else version (AAPCS32)
+    {
+        const tsize = ti.tsize;
+        if (ti.talign >= 8)
+            ap.__ap = ap.__ap.alignUp!8;
+        auto p = ap.__ap;
+        version (BigEndian)
+            p = adjustForBigEndian(p, tsize);
+        ap.__ap += tsize.alignUp;
+        parmn[0..tsize] = p[0..tsize];
+    }
+    else version (AAPCS64)
+    {
+        static import core.internal.vararg.aarch64;
+        core.internal.vararg.aarch64.va_arg(ap, ti, parmn);
+    }
+    else version (ARM_Any)
+    {
+        const tsize = ti.tsize;
+        auto p = cast(void*) ap;
+        version (BigEndian)
+            p = adjustForBigEndian(p, tsize);
+        ap += tsize.alignUp;
+        parmn[0..tsize] = p[0..tsize];
+    }
+    else version (PPC_Any)
+    {
+        if (ti.talign >= 8)
+            ap = ap.alignUp!8;
+        const tsize = ti.tsize;
+        auto p = cast(void*) ap;
+        version (BigEndian)
+            p = adjustForBigEndian(p, tsize);
+        ap += tsize.alignUp;
+        parmn[0..tsize] = p[0..tsize];
+    }
+    else version (MIPS_Any)
+    {
+        const tsize = ti.tsize;
+        auto p = cast(void*) ap;
+        version (BigEndian)
+            p = adjustForBigEndian(p, tsize);
+        ap += tsize.alignUp;
+        parmn[0..tsize] = p[0..tsize];
+    }
+    else
+        static assert(0, "Unsupported platform");
+}
diff --git a/libphobos/libdruntime/core/volatile.d b/libphobos/libdruntime/core/volatile.d
new file mode 100644
index 00000000000..1703450c65f
--- /dev/null
+++ b/libphobos/libdruntime/core/volatile.d
@@ -0,0 +1,67 @@
+/**
+ * This module declares intrinsics for volatile operations.
+ *
+ * Copyright: Copyright © 2019, The D Language Foundation
+ * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
+ * Authors:   Walter Bright, Ernesto Castellotti
+ * Source:    $(DRUNTIMESRC core/volatile.d)
+ */
+
+module core.volatile;
+
+nothrow:
+@safe:
+@nogc:
+
+/*************************************
+ * Read/write value from/to the memory location indicated by ptr.
+ *
+ * These functions are recognized by the compiler, and calls to them are guaranteed
+ * to not be removed (as dead assignment elimination or presumed to have no effect)
+ * or reordered in the same thread.
+ *
+ * These reordering guarantees are only made with regards to other
+ * operations done through these functions; the compiler is free to reorder regular
+ * loads/stores with regards to loads/stores done through these functions.
+ *
+ * This is useful when dealing with memory-mapped I/O (MMIO) where a store can
+ * have an effect other than just writing a value, or where sequential loads
+ * with no intervening stores can retrieve
+ * different values from the same location due to external stores to the location.
+ *
+ * These functions will, when possible, do the load/store as a single operation. In
+ * general, this is possible when the size of the operation is less than or equal to
+ * $(D (void*).sizeof), although some targets may support larger operations. If the
+ * load/store cannot be done as a single operation, multiple smaller operations will be used.
+ *
+ * These are not to be conflated with atomic operations. They do not guarantee any
+ * atomicity. This may be provided by coincidence as a result of the instructions
+ * used on the target, but this should not be relied on for portable programs.
+ * Further, no memory fences are implied by these functions.
+ * They should not be used for communication between threads.
+ * They may be used to guarantee a write or read cycle occurs at a specified address.
+ */
+
+ubyte  volatileLoad(ubyte * ptr);
+ushort volatileLoad(ushort* ptr);  /// ditto
+uint   volatileLoad(uint  * ptr);  /// ditto
+ulong  volatileLoad(ulong * ptr);  /// ditto
+
+void volatileStore(ubyte * ptr, ubyte  value);   /// ditto
+void volatileStore(ushort* ptr, ushort value);   /// ditto
+void volatileStore(uint  * ptr, uint   value);   /// ditto
+void volatileStore(ulong * ptr, ulong  value);   /// ditto
+
+@system unittest
+{
+    alias TT(T...) = T;
+
+    foreach (T; TT!(ubyte, ushort, uint, ulong))
+    {
+        T u;
+        T* p = &u;
+        volatileStore(p, 1);
+        T r = volatileLoad(p);
+        assert(r == u);
+    }
+}
diff --git a/libphobos/libdruntime/rt/lifetime.d b/libphobos/libdruntime/rt/lifetime.d
index 95f0ff57f42..6a6eb50eefa 100644
--- a/libphobos/libdruntime/rt/lifetime.d
+++ b/libphobos/libdruntime/rt/lifetime.d
@@ -44,17 +44,9 @@ private
     }
 }
 
-private immutable bool callStructDtorsDuringGC;
-
 extern (C) void lifetime_init()
 {
     // this is run before static ctors, so it is safe to modify immutables
-    import rt.config;
-    string s = rt_configOption("callStructDtorsDuringGC");
-    if (s != null)
-        cast() callStructDtorsDuringGC = s[0] == '1' || s[0] == 'y' || s[0] == 'Y';
-    else
-        cast() callStructDtorsDuringGC = true;
 }
 
 /**
@@ -214,9 +206,6 @@ inout(TypeInfo) unqualify(inout(TypeInfo) cti) pure nothrow @nogc
 // size used to store the TypeInfo at the end of an allocation for structs that have a destructor
 size_t structTypeInfoSize(const TypeInfo ti) pure nothrow @nogc
 {
-    if (!callStructDtorsDuringGC)
-        return 0;
-
     if (ti && typeid(ti) is typeid(TypeInfo_Struct)) // avoid a complete dynamic type cast
     {
         auto sti = cast(TypeInfo_Struct)cast(void*)ti;
@@ -975,7 +964,7 @@ extern (C) void[] _d_newarrayT(const TypeInfo ti, size_t length) pure nothrow
  */
 extern (C) void[] _d_newarrayiT(const TypeInfo ti, size_t length) pure nothrow
 {
-    import core.internal.traits : TypeTuple;
+    import core.internal.traits : AliasSeq;
 
     void[] result = _d_newarrayU(ti, length);
     auto tinext = unqualify(ti.next);
@@ -985,7 +974,7 @@ extern (C) void[] _d_newarrayiT(const TypeInfo ti, size_t length) pure nothrow
 
     switch (init.length)
     {
-    foreach (T; TypeTuple!(ubyte, ushort, uint, ulong))
+    foreach (T; AliasSeq!(ubyte, ushort, uint, ulong))
     {
     case T.sizeof:
         (cast(T*)result.ptr)[0 .. size * length / T.sizeof] = *cast(T*)init.ptr;
@@ -2539,33 +2528,30 @@ unittest
     delete arr1;
     assert(dtorCount == 7);
 
-    if (callStructDtorsDuringGC)
-    {
-        dtorCount = 0;
-        S1* s2 = new S1;
-        GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]);
-        assert(dtorCount == 1);
-        GC.free(s2);
+    dtorCount = 0;
+    S1* s2 = new S1;
+    GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]);
+    assert(dtorCount == 1);
+    GC.free(s2);
 
-        dtorCount = 0;
-        const(S1)* s3 = new const(S1);
-        GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]);
-        assert(dtorCount == 1);
-        GC.free(cast(void*)s3);
+    dtorCount = 0;
+    const(S1)* s3 = new const(S1);
+    GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]);
+    assert(dtorCount == 1);
+    GC.free(cast(void*)s3);
 
-        dtorCount = 0;
-        shared(S1)* s4 = new shared(S1);
-        GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]);
-        assert(dtorCount == 1);
-        GC.free(cast(void*)s4);
+    dtorCount = 0;
+    shared(S1)* s4 = new shared(S1);
+    GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]);
+    assert(dtorCount == 1);
+    GC.free(cast(void*)s4);
 
-        dtorCount = 0;
-        const(S1)[] carr1 = new const(S1)[5];
-        BlkInfo blkinf1 = GC.query(carr1.ptr);
-        GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]);
-        assert(dtorCount == 5);
-        GC.free(blkinf1.base);
-    }
+    dtorCount = 0;
+    const(S1)[] carr1 = new const(S1)[5];
+    BlkInfo blkinf1 = GC.query(carr1.ptr);
+    GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]);
+    assert(dtorCount == 5);
+    GC.free(blkinf1.base);
 
     dtorCount = 0;
     S1[] arr2 = new S1[10];
@@ -2573,14 +2559,11 @@ unittest
     arr2.assumeSafeAppend;
     assert(dtorCount == 4); // destructors run explicitely?
 
-    if (callStructDtorsDuringGC)
-    {
-        dtorCount = 0;
-        BlkInfo blkinf = GC.query(arr2.ptr);
-        GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]);
-        assert(dtorCount == 6);
-        GC.free(blkinf.base);
-    }
+    dtorCount = 0;
+    BlkInfo blkinf = GC.query(arr2.ptr);
+    GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]);
+    assert(dtorCount == 6);
+    GC.free(blkinf.base);
 
     // associative arrays
     import rt.aaA : entryDtor;
@@ -2590,36 +2573,27 @@ unittest
     S1[int] aa1;
     aa1[0] = S1(0);
     aa1[1] = S1(1);
-    if (callStructDtorsDuringGC)
-    {
-        dtorCount = 0;
-        aa1 = null;
-        GC.runFinalizers((cast(char*)(&entryDtor))[0..1]);
-        assert(dtorCount == 2);
-    }
+    dtorCount = 0;
+    aa1 = null;
+    GC.runFinalizers((cast(char*)(&entryDtor))[0..1]);
+    assert(dtorCount == 2);
 
     int[S1] aa2;
     aa2[S1(0)] = 0;
     aa2[S1(1)] = 1;
     aa2[S1(2)] = 2;
-    if (callStructDtorsDuringGC)
-    {
-        dtorCount = 0;
-        aa2 = null;
-        GC.runFinalizers((cast(char*)(&entryDtor))[0..1]);
-        assert(dtorCount == 3);
-    }
+    dtorCount = 0;
+    aa2 = null;
+    GC.runFinalizers((cast(char*)(&entryDtor))[0..1]);
+    assert(dtorCount == 3);
 
     S1[2][int] aa3;
     aa3[0] = [S1(0),S1(2)];
     aa3[1] = [S1(1),S1(3)];
-    if (callStructDtorsDuringGC)
-    {
-        dtorCount = 0;
-        aa3 = null;
-        GC.runFinalizers((cast(char*)(&entryDtor))[0..1]);
-        assert(dtorCount == 4);
-    }
+    dtorCount = 0;
+    aa3 = null;
+    GC.runFinalizers((cast(char*)(&entryDtor))[0..1]);
+    assert(dtorCount == 4);
 }
 
 // test class finalizers exception handling
@@ -2661,9 +2635,6 @@ unittest
 debug(SENTINEL) {} else
 unittest
 {
-    if (!callStructDtorsDuringGC)
-        return;
-
     bool test(E)()
     {
         import core.exception;
diff --git a/libphobos/testsuite/libphobos.allocations/tls_gc_integration.d b/libphobos/testsuite/libphobos.allocations/tls_gc_integration.d
index 44eb40c366d..7c084abcaf1 100644
--- a/libphobos/testsuite/libphobos.allocations/tls_gc_integration.d
+++ b/libphobos/testsuite/libphobos.allocations/tls_gc_integration.d
@@ -1,4 +1,4 @@
-import core.memory, core.thread, core.bitop;
+import core.memory, core.thread, core.volatile;
 
 /*
  * This test repeatedly performs operations on GC-allocated objects which
-- 
2.27.0


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-02-04 22:14 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-04 22:14 [committed] d: Merge upstream dmd 46133f761, druntime 0fd4364c Iain Buclaw

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).