public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed
From: Alexey Lapshin <alexey.lapshin@espressif.com>
To: "newlib@sourceware.org" <newlib@sourceware.org>
Cc: Alexey Gerenkov <alexey.gerenkov@espressif.com>,
	"jcmvbkbc@gmail.com" <jcmvbkbc@gmail.com>,
	Ivan Grokhotkov <ivan@espressif.com>
Subject: [PATCH, RFC 8/8] libc: fix xtensa PSRAM cache bug
Date: Thu, 11 May 2023 06:25:03 +0000	[thread overview]
Message-ID: <1f413c4816a78712571dff71af533091b1b4b206.camel@espressif.com> (raw)
In-Reply-To: <1cb9e9f8c8e4e1df52b439184333d628b3532f65.camel@espressif.com>

newlib:
        * libc/machine/xtensa/Makefile.am: add PSRAM_FIX flag to
AM_CCASFLAGS
        * libc/machine/xtensa/Makefile.in: Likewise.
        * libc/machine/xtensa/memcpy.S: add PSRAM_FIX workaround
        * libc/machine/xtensa/memset.S: Likewise.
        * libc/machine/xtensa/strcpy.S: Likewise.
        * libc/machine/xtensa/strncpy.S: Likewise.

---
 newlib/libc/machine/xtensa/Makefile.am |  3 +-
 newlib/libc/machine/xtensa/Makefile.in |  3 +-
 newlib/libc/machine/xtensa/memcpy.S    | 54 ++++++++++++++++++++++++++
 newlib/libc/machine/xtensa/memset.S    | 23 +++++++++++
 newlib/libc/machine/xtensa/strcpy.S    | 39 +++++++++++++++++++
 newlib/libc/machine/xtensa/strncpy.S   | 20 +++++++++-
 6 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/newlib/libc/machine/xtensa/Makefile.am
b/newlib/libc/machine/xtensa/Makefile.am
index 9307cd871..16f593523 100644
--- a/newlib/libc/machine/xtensa/Makefile.am
+++ b/newlib/libc/machine/xtensa/Makefile.am
@@ -3,8 +3,9 @@
 AUTOMAKE_OPTIONS = cygnus
 
 INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
+PSRAM_FLAG = $(if $(filter -mfix-esp32-psram-cache-issue,$(CCAS)
$(INCLUDES)),-DPSRAM_FIX=1,)
 
-AM_CCASFLAGS = $(INCLUDES)
+AM_CCASFLAGS = $(INCLUDES) $(PSRAM_FLAG)
 
 noinst_LIBRARIES = lib.a
 
diff --git a/newlib/libc/machine/xtensa/Makefile.in
b/newlib/libc/machine/xtensa/Makefile.in
index ef546441b..4bf06cb3c 100644
--- a/newlib/libc/machine/xtensa/Makefile.in
+++ b/newlib/libc/machine/xtensa/Makefile.in
@@ -173,7 +173,8 @@ top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 AUTOMAKE_OPTIONS = cygnus
 INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
-AM_CCASFLAGS = $(INCLUDES)
+PSRAM_FLAG = $(if $(filter -mfix-esp32-psram-cache-issue,$(CCAS)
$(INCLUDES)),-DPSRAM_FIX=1,)
+AM_CCASFLAGS = $(INCLUDES) $(PSRAM_FLAG)
 noinst_LIBRARIES = lib.a
 lib_a_SOURCES = setjmp.S memcpy.S memset.S strcmp.S strcpy.S strncpy.S
strlen.S
 lib_a_CCASFLAGS = $(AM_CCASFLAGS)
diff --git a/newlib/libc/machine/xtensa/memcpy.S
b/newlib/libc/machine/xtensa/memcpy.S
index 7cc8ed0fb..eefa64324 100644
--- a/newlib/libc/machine/xtensa/memcpy.S
+++ b/newlib/libc/machine/xtensa/memcpy.S
@@ -72,8 +72,16 @@ __memcpy_aux:
 #endif
 1:	l8ui	a6, a3, 0
 	addi	a3, a3, 1
+#ifdef PSRAM_FIX
+	nop
+	nop
+	nop
+#endif
 	s8i	a6, a5, 0
 	addi	a5, a5, 1
+#ifdef PSRAM_FIX
+	memw
+#endif
 #if !XCHAL_HAVE_LOOPS
 	bltu	a3, a7, 1b
 #endif
@@ -93,6 +101,9 @@ __memcpy_aux:
 	addi	a3, a3, 1
 	addi	a4, a4, -1
 	s8i	a6, a5, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	addi	a5, a5, 1
 
 	/* Return to main algorithm if dst is now aligned.  */
@@ -110,6 +121,9 @@ __memcpy_aux:
 	addi	a4, a4, -2
 	s8i	a6, a5, 0
 	s8i	a7, a5, 1
+#ifdef PSRAM_FIX
+	memw
+#endif
 	addi	a5, a5, 2
 
 	/* dst is now aligned; return to main algorithm.  */
@@ -143,6 +157,9 @@ memcpy:
 	slli	a8, a7, 4
 	add	a8, a8, a3	// a8 = end of last 16B source chunk
 #endif
+
+#ifndef PSRAM_FIX
+
 1:	l32i	a6, a3, 0
 	l32i	a7, a3, 4
 	s32i	a6, a5, 0
@@ -153,6 +170,25 @@ memcpy:
 	addi	a3, a3, 16
 	s32i	a7, a5, 12
 	addi	a5, a5, 16
+
+#else
+1:	l32i	a6, a3, 0
+	l32i	a7, a3, 4
+	s32i	a6, a5, 0
+	s32i	a7, a5, 4
+	memw
+	l32i	a6, a3, 8
+	l32i	a7, a3, 12
+	s32i	a6, a5, 8
+	s32i	a7, a5, 12
+	memw
+
+	addi	a3, a3, 16
+	addi	a5, a5, 16
+
+#endif
+
+
 #if !XCHAL_HAVE_LOOPS
 	bltu	a3, a8, 1b
 #endif
@@ -171,6 +207,9 @@ memcpy:
 3:	bbsi.l	a4, 2, 4f
 	bbsi.l	a4, 1, 5f
 	bbsi.l	a4, 0, 6f
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 	.align 4
@@ -181,6 +220,9 @@ memcpy:
 	addi	a5, a5, 4
 	bbsi.l	a4, 1, 5f
 	bbsi.l	a4, 0, 6f
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 	/* Copy 2 bytes.  */
@@ -189,6 +231,9 @@ memcpy:
 	s16i	a6, a5, 0
 	addi	a5, a5, 2
 	bbsi.l	a4, 0, 6f
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 	/* Copy 1 byte.  */
@@ -196,6 +241,9 @@ memcpy:
 	s8i	a6, a5, 0
 
 .Ldone:
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 
@@ -277,11 +325,17 @@ memcpy:
 	s8i	a7, a5, 1
 	addi	a5, a5, 2
 	bbsi.l	a4, 0, 6f
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 	/* Copy 1 byte.  */
 6:	l8ui	a6, a3, 0
 	s8i	a6, a5, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 	.end schedule
diff --git a/newlib/libc/machine/xtensa/memset.S
b/newlib/libc/machine/xtensa/memset.S
index dbfbe1a0c..a4480d5e7 100644
--- a/newlib/libc/machine/xtensa/memset.S
+++ b/newlib/libc/machine/xtensa/memset.S
@@ -59,6 +59,9 @@ __memset_aux:
 	add	a6, a5, a4	// a6 = ending address
 #endif
 1:	s8i	a3, a5, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	addi	a5, a5, 1
 #if !XCHAL_HAVE_LOOPS
 	bltu	a5, a6, 1b
@@ -79,6 +82,9 @@ __memset_aux:
 	s8i	a3, a5, 0
 	addi	a5, a5, 1
 	addi	a4, a4, -1
+#ifdef PSRAM_FIX
+	memw
+#endif
 
 	/* Now retest if dst is aligned.  */
 	_bbci.l	a5, 1, .Ldstaligned
@@ -92,6 +98,9 @@ __memset_aux:
 	s16i	a3, a5, 0
 	addi	a5, a5, 2
 	addi	a4, a4, -2
+#ifdef PSRAM_FIX
+	memw
+#endif
 
 	/* dst is now aligned; return to main algorithm */
 	j	.Ldstaligned
@@ -121,6 +130,14 @@ memset:
 	/* Get number of loop iterations with 16B per iteration.  */
 	srli	a7, a4, 4
 
+#ifdef PSRAM_FIX
+	//do not do this if we have less than one iteration to do
+	beqz	a7, 2f
+	//this seems to work to prefetch the cache line
+	s32i	a3, a5, 0
+	nop
+#endif
+
 	/* Destination is word-aligned.  */
 #if XCHAL_HAVE_LOOPS
 	loopnez	a7, 2f
@@ -158,11 +175,17 @@ memset:
 	/* Set 2 bytes.  */
 	s16i	a3, a5, 0
 	addi	a5, a5, 2
+#ifdef PSRAM_FIX
+	memw
+#endif
 
 5:	bbci.l	a4, 0, 6f
 
 	/* Set 1 byte.  */
 	s8i	a3, a5, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 6:	leaf_return
 
 	.end schedule
diff --git a/newlib/libc/machine/xtensa/strcpy.S
b/newlib/libc/machine/xtensa/strcpy.S
index 167aa9e08..a4e07e3ac 100644
--- a/newlib/libc/machine/xtensa/strcpy.S
+++ b/newlib/libc/machine/xtensa/strcpy.S
@@ -52,6 +52,9 @@ strcpy:
 	l8ui	a8, a3, 0	// get byte 0
 	addi	a3, a3, 1	// advance src pointer
 	s8i	a8, a10, 0	// store byte 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	beqz	a8, 1f		// if byte 0 is zero
 	addi	a10, a10, 1	// advance dst pointer
 	bbci.l	a3, 1, .Lsrcaligned // if src is now word-aligned
@@ -60,11 +63,17 @@ strcpy:
 	l8ui	a8, a3, 0	// get byte 0
 	/* 1-cycle interlock */
 	s8i	a8, a10, 0	// store byte 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	beqz	a8, 1f		// if byte 0 is zero
 	l8ui	a8, a3, 1	// get byte 0
 	addi	a3, a3, 2	// advance src pointer
 	s8i	a8, a10, 1	// store byte 0
 	addi	a10, a10, 2	// advance dst pointer
+#ifdef PSRAM_FIX
+	memw
+#endif
 	bnez	a8, .Lsrcaligned
 1:	leaf_return
 
@@ -93,6 +102,10 @@ strcpy:
 	bnone	a8, a5, .Lz1	// if byte 1 is zero
 	bnone	a8, a6, .Lz2	// if byte 2 is zero
 	s32i	a8, a10, 0	// store word to dst
+#ifdef PSRAM_FIX
+	l32i	a8, a10, 0
+	s32i	a8, a10, 0
+#endif
 	bnone	a8, a7, .Lz3	// if byte 3 is zero
 	addi	a10, a10, 4	// advance dst pointer
 
@@ -106,6 +119,11 @@ strcpy:
 	bnone	a8, a5, .Lz1	// if byte 1 is zero
 	bnone	a8, a6, .Lz2	// if byte 2 is zero
 	s32i	a8, a10, 0	// store word to dst
+#ifdef PSRAM_FIX
+	l32i	a8, a10, 0
+	s32i	a8, a10, 0
+#endif
+
 	bany	a8, a7, 1b	// if byte 3 is zero
 #endif /* !XCHAL_HAVE_LOOPS */
 
@@ -117,6 +135,9 @@ strcpy:
 	movi	a8, 0
 #endif
 	s8i	a8, a10, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 .Lz1:	/* Byte 1 is zero.  */
@@ -124,6 +145,9 @@ strcpy:
         extui   a8, a8, 16, 16
 #endif
 	s16i	a8, a10, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 .Lz2:	/* Byte 2 is zero.  */
@@ -133,6 +157,9 @@ strcpy:
 	s16i	a8, a10, 0
 	movi	a8, 0
 	s8i	a8, a10, 2
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 #if 1
@@ -162,6 +189,9 @@ strcpy:
 	addi	a3, a3, 1
 	s8i	a8, a10, 0
 	addi	a10, a10, 1
+#ifdef PSRAM_FIX
+	memw
+#endif
 #if XCHAL_HAVE_LOOPS
 	beqz	a8, 2f
 #else
@@ -214,6 +244,9 @@ strcpy:
 	movi	a8, 0
 #endif
 	s8i	a8, a10, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 .Lu1:	/* Byte 1 is zero.  */
@@ -221,12 +254,18 @@ strcpy:
         extui   a8, a8, 16, 16
 #endif
 	s16i	a8, a10, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 .Lu2:	/* Byte 2 is zero.  */
 	s16i	a8, a10, 0
 	movi	a8, 0
 	s8i	a8, a10, 2
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 #endif /* 0 */
diff --git a/newlib/libc/machine/xtensa/strncpy.S
b/newlib/libc/machine/xtensa/strncpy.S
index dc9363c2a..55f57d267 100644
--- a/newlib/libc/machine/xtensa/strncpy.S
+++ b/newlib/libc/machine/xtensa/strncpy.S
@@ -56,6 +56,9 @@ __strncpy_aux:
 	j	.Lfill
 
 .Lret:
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 
@@ -122,7 +125,11 @@ strncpy:
 	addi	a10, a10, 1
 	bnez    a4, .Lfillcleanup
 
-2:	leaf_return
+2:
+#ifdef PSRAM_FIX
+	memw
+#endif
+	leaf_return
 
 .Lfill1mod2: // dst address is odd
 	s8i	a9, a10, 0	// store byte 0
@@ -241,6 +248,11 @@ strncpy:
 #endif
 1:	l8ui	a8, a3, 0
 	addi	a3, a3, 1
+#ifdef PSRAM_FIX
+	nop
+	nop
+	nop
+#endif
 	s8i	a8, a10, 0
 	addi	a4, a4, -1
 	beqz	a4, 3f
@@ -252,7 +264,11 @@ strncpy:
 #endif
 2:	j	.Lfill
 
-3:	leaf_return
+3:
+#ifdef PSRAM_FIX
+	memw
+#endif
+	leaf_return
 .end schedule
 
 	.size	strncpy, . - strncpy
-- 
2.34.1


  parent reply	other threads:[~2023-05-11  6:25 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-11  6:03 [PATCH, RFC 0/8] add xtensa port Alexey Lapshin
2023-05-11  6:05 ` [PATCH, RFC 1/8] newlib: add system headers from include/$(sys_dir)/*/*.h Alexey Lapshin
2023-05-11  6:12 ` [PATCH, RFC 2/8] libc: sys: add xtensa port Alexey Lapshin
2023-05-11  6:18 ` [PATCH, RFC 3/8] libm: " Alexey Lapshin
2023-05-11  6:20 ` [PATCH, RFC 4/8] libc: " Alexey Lapshin
2023-05-11  6:21 ` [PATCH, RFC 5/8] libm: add attribute weak for __ieee754_sqrtf Alexey Lapshin
2023-05-11  6:22 ` [PATCH, RFC 6/8] libgloss: libnosys: add xtensa port Alexey Lapshin
2023-05-11  6:23 ` [PATCH, RFC 7/8] libgloss: " Alexey Lapshin
2023-05-11  6:25 ` Alexey Lapshin [this message]
2023-05-12  9:18 ` [PATCH, RFC 0/8] " Max Filippov
2023-05-15 13:45   ` [PATCH, RFC v2 " Alexey Lapshin
2023-05-15 13:47     ` [PATCH, RFC v2 1/8] newlib: add system headers from include/$(sys_dir)/*/*.h Alexey Lapshin
2023-05-15 13:48     ` [PATCH, RFC v2 2/8] libc: sys: add xtensa port Alexey Lapshin
2023-05-15 13:49     ` [PATCH, RFC v2 3/8] libm: " Alexey Lapshin
2023-05-15 13:50     ` [PATCH, RFC v2 4/8] libc: " Alexey Lapshin
2023-05-15 13:51     ` [PATCH, RFC v2 5/8] libm: add attribute weak for __ieee754_sqrtf Alexey Lapshin
2023-05-15 13:53     ` [PATCH, RFC v2 6/8] libgloss: libnosys: add xtensa port Alexey Lapshin
2023-05-15 13:54     ` [PATCH, RFC v2 7/8] libgloss: " Alexey Lapshin
2023-05-15 13:55     ` [PATCH, RFC v2 8/8] libc: xtensa: fix PSRAM cache bug Alexey Lapshin
2023-05-30 19:58     ` [PATCH, RFC v2 0/8] add xtensa port Jeff Johnston
2023-05-31  7:53       ` Alexey Lapshin
2023-08-09 20:00     ` [PATCH, RFC v3 0/3] " Alexey Lapshin
2023-08-09 20:02       ` [PATCH, RFC v3 1/3] libc: fix nested sys-include dirs install Alexey Lapshin
2023-08-09 20:03       ` [PATCH, RFC v3 2/3] newlib: add Xtensa port Alexey Lapshin
2023-08-09 20:04       ` [PATCH, RFC v3 3/3] libgloss: " Alexey Lapshin
2023-08-10 18:20       ` [PATCH, RFC v3 0/3] add xtensa port Jeff Johnston
2023-08-10 19:50         ` Alexey Lapshin
2023-08-10 22:12           ` Jeff Johnston
2023-08-10 22:15             ` Alexey Lapshin
2023-08-10 22:18               ` Jeff Johnston
2023-08-15  7:20                 ` Alexey Lapshin
2023-08-15 14:48                   ` Jeff Johnston
2023-08-15 15:07                     ` Alexey Lapshin
2023-08-15 17:53                       ` Jeff Johnston
2023-08-15 19:20                         ` Alexey Lapshin
2023-08-15 22:09                           ` Jeff Johnston
2023-08-16  7:46                             ` Alexey Lapshin
2023-08-16 18:55                               ` Jeff Johnston
2023-08-17 20:38                                 ` Alexey Lapshin
2023-08-17 22:25                                   ` Jeff Johnston
2023-08-18 10:13                                     ` Alexey Lapshin
2023-08-23 20:57                                       ` Jeff Johnston
2023-08-23 21:28                                         ` Alexey Lapshin
2023-08-24 15:01                                           ` Jeff Johnston
2023-08-24 15:45                                             ` Alexey Lapshin
2023-09-11 12:07 ` [PATCH, RFC 0/8] " Sebastian Huber
2023-09-11 12:15   ` Alexey Lapshin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1f413c4816a78712571dff71af533091b1b4b206.camel@espressif.com \
    --to=alexey.lapshin@espressif.com \
    --cc=alexey.gerenkov@espressif.com \
    --cc=ivan@espressif.com \
    --cc=jcmvbkbc@gmail.com \
    --cc=newlib@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).