public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Re: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
       [not found] <098ECE41A0A6114BB2A07F1EC238DE896616641A@de02wembxa.internal.synopsys.com>
@ 2015-10-23 21:13 ` Joern Wolfgang Rennecke
  2015-10-30 11:29   ` Claudiu Zissulescu
  0 siblings, 1 reply; 5+ messages in thread
From: Joern Wolfgang Rennecke @ 2015-10-23 21:13 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: Francois Bedard, jeremy.bennett

  * config/arc/gmon/profil.S (__profil, __profil_irq): Don't profile

Shouldn't profil should return -1 on failure?

config/arc/lib1funcs.S (__udivmodsi4):

@@ -393,7 +393,12 @@ SYM(__udivmodsi4):
         lsr_s r1,r1
         cmp_s r0,r1
         xor.f r2,lp_count,31
+#if !defined (__EM__) && !defined (__HS__)
         mov_s lp_count,r2
+#else
+       mov lp_count,r2
+       nop_s
+#endif /* !__EM__ && !__HS__ */
  #endif /* !__ARC_NORM__ */
         sub.cc r0,r0,r1
         mov_s r3,3

This is in ! __ARC_NORM__ code - so this should never happen for __HS__ .
It can happen for __EM__ - and then we have another problem.  AFAIK
__EM__ also has the LP_COUNT interlock that slows down every reference
to LP_COUNT by an ordinary instruction.  So this ARC600 code will run
rather slow on it.  OTOH, we can use the better handling of branch
instructions inside a zero overhead loop to our advantage here, like this:

#else /* ! __ARC_NORM__ */
         lsr_s r2,r0
         brhs.d r1,r2,.Lret0_3

#ifdef __EM__
         mov lp_count,-1
         asl_s r1,r1             ; den <<= 1
         lp 1f
.Loop1:
         brhi r1,r2,1f
         asl.ls r1,r1
1:
         sub_s r0,r0,r1
         lsr_s r1,r1
         cmp r0,r1
         not r2,lp_count
         mov.f lp_count,r2
#else /*  !__EM__  */
         mov_s r3,0
.Lloop1:
         asl_s r1,r1             ; den <<= 1
         brls.d r1,r2,@.Lloop1
         sub_s r3,r3,1
         sub_s r0,r0,r1
         lsr_s r1,r1
         cmp_s r0,r1
         not_s r2,r3
         mov.f lp_count,r3
#endif /* !__EM__ */
#endif /* !__ARC_NORM__

Hmm, somehow I lost the inner-loop explicit lp_count reference anyway, but
the version with lp is still one instruction inside the loop shorter.
Although the other is shorter statically.
OTOH, maybe not.f / mov_s is faster when followed by sub.cc ; depends on
what the ARC600 pipeline is like today.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
  2015-10-23 21:13 ` [PATCH 2/2][ARC] Add support for ARCv2 CPUs Joern Wolfgang Rennecke
@ 2015-10-30 11:29   ` Claudiu Zissulescu
  2015-11-10 14:01     ` Joern Wolfgang Rennecke
  0 siblings, 1 reply; 5+ messages in thread
From: Claudiu Zissulescu @ 2015-10-30 11:29 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois Bedard, jeremy.bennett

[-- Attachment #1: Type: text/plain, Size: 141 bytes --]

Hi,

Please find the updated patch.  Both ARC patches were tested using dg.exp. The ChangeLog entry is unchanged. 

Thank you,
Claudiu

[-- Attachment #2: 02-arcv2Updated.patch --]
[-- Type: application/octet-stream, Size: 12088 bytes --]

From 981b97246cd65908fa2560b8a346b02440b1a450 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Wed, 30 Sep 2015 12:27:49 +0200
Subject: [PATCH] Add ARCv2 basic support (Updated)

---
 libgcc/config/arc/dp-hack.h                |    2 +-
 libgcc/config/arc/gmon/dcache_linesz.S     |    2 +
 libgcc/config/arc/gmon/profil.S            |   11 ++++++++
 libgcc/config/arc/ieee-754/arc-ieee-754.h  |    7 +++++
 libgcc/config/arc/ieee-754/divdf3.S        |   37 +++++++++++++++++++++-------
 libgcc/config/arc/ieee-754/divsf3-stdmul.S |   14 +++++-----
 libgcc/config/arc/ieee-754/muldf3.S        |    8 +++---
 libgcc/config/arc/ieee-754/mulsf3.S        |    6 ++--
 libgcc/config/arc/lib1funcs.S              |   15 +++++++----
 libgcc/config/arc/t-arc700-uClibc          |    4 +-
 10 files changed, 75 insertions(+), 31 deletions(-)

diff --git a/libgcc/config/arc/dp-hack.h b/libgcc/config/arc/dp-hack.h
index c1ab9b2..a212e3b 100644
--- a/libgcc/config/arc/dp-hack.h
+++ b/libgcc/config/arc/dp-hack.h
@@ -48,7 +48,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define L_mul_df
 #define L_div_df
 #elif (!defined (__ARC700__) && !defined (__ARC_MUL64__) \
-       && !defined(__ARC_MUL32BY16__))
+       && !defined (__ARC_MUL32BY16__) && !defined (__HS__))
 #define L_mul_df
 #define L_div_df
 #undef QUIET_NAN
diff --git a/libgcc/config/arc/gmon/dcache_linesz.S b/libgcc/config/arc/gmon/dcache_linesz.S
index 8cf6442..972a587 100644
--- a/libgcc/config/arc/gmon/dcache_linesz.S
+++ b/libgcc/config/arc/gmon/dcache_linesz.S
@@ -38,6 +38,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 	.global	__dcache_linesz
 	.balign	4
 __dcache_linesz:
+#if !defined (__EM__) && !defined (__HS__)
 	lr	r12,[D_CACHE_BUILD]
 	extb_s	r0,r12
 	breq_s	r0,0,.Lsz_nocache
@@ -51,5 +52,6 @@ __dcache_linesz:
 	asl_s	r0,r0,r12
 	j_s	[blink]
 .Lsz_nocache:
+#endif /* !__EM__  && !__HS__ */
 	mov_s	r0,1
 	j_s	[blink]
diff --git a/libgcc/config/arc/gmon/profil.S b/libgcc/config/arc/gmon/profil.S
index 3be2869..df10dbd 100644
--- a/libgcc/config/arc/gmon/profil.S
+++ b/libgcc/config/arc/gmon/profil.S
@@ -45,6 +45,7 @@ __profil_offset:
 	.global	__dcache_linesz
 	.global __profil
 	FUNC(__profil)
+#if !defined (__EM__) && !defined (__HS__)
 .Lstop_profiling:
 	sr	r0,[CONTROL0]
 	j_s	[blink]
@@ -107,6 +108,12 @@ nocache:
 	j_s	[blink]
 	.balign	4
 1:	j	__profil_irq
+#else
+__profil:
+	.balign	4
+	mov_s	r0,-1
+	j_s	[blink]
+#endif /* !__EM__ && !__HS__ */
 	ENDFUNC(__profil)
 
 	FUNC(__profil_irq)
@@ -114,6 +121,7 @@ nocache:
 	.balign 32,0,12	; make sure the code spans no more that two cache lines
 	nop_s
 __profil_irq:
+#if !defined (__EM__) && !defined (__HS__)
 	push_s	r0
 	ld	r0,[__profil_offset]
 	push_s	r1
@@ -128,6 +136,9 @@ __profil_irq:
 nostore:ld.ab	r2,[sp,8]
 	pop_s	r0
 	j.f	[ilink1]
+#else
+	rtie
+#endif /* !__EM__  && !__HS__ */
 	ENDFUNC(__profil_irq)
 
 ; could save one cycle if the counters were allocated at link time and
diff --git a/libgcc/config/arc/ieee-754/arc-ieee-754.h b/libgcc/config/arc/ieee-754/arc-ieee-754.h
index 08a14a6..f1ac98e 100644
--- a/libgcc/config/arc/ieee-754/arc-ieee-754.h
+++ b/libgcc/config/arc/ieee-754/arc-ieee-754.h
@@ -54,3 +54,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define bmsk_l bmsk
 #define bxor_l bxor
 #define bcs_s blo_s
+#if defined (__HS__) || defined (__EM__)
+#define MPYHU   mpymu
+#define MPYH    mpym
+#else
+#define MPYHU   mpyhu
+#define MPYH    mpyh
+#endif
diff --git a/libgcc/config/arc/ieee-754/divdf3.S b/libgcc/config/arc/ieee-754/divdf3.S
index 2d000e4..27705ed 100644
--- a/libgcc/config/arc/ieee-754/divdf3.S
+++ b/libgcc/config/arc/ieee-754/divdf3.S
@@ -118,7 +118,7 @@ __divdf3_support: /* This label makes debugger output saner.  */
 	sub r11,r11,11
 	asl DBL1L,DBL1L,r11
 	sub r11,r11,1
-	mpyhu r5,r4,r8
+	MPYHU r5,r4,r8
 	sub r7,r7,r11
 	asl r4,r4,12
 	b.d .Lpast_denorm_dbl1
@@ -189,25 +189,33 @@ __divdf3:
 	asl r8,DBL1H,12
 	lsr r12,DBL1L,20
 	lsr r4,r8,26
+#ifdef __HS__
+	add3 r10,pcl,60 ; (.Ldivtab-.) >> 3
+#else
 	add3 r10,pcl,59 ; (.Ldivtab-.) >> 3
+#endif
 	ld.as r4,[r10,r4]
+#ifdef __HS__
+	ld.as r9,[pcl,182]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+#else
 	ld.as r9,[pcl,180]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+#endif
 	or r8,r8,r12
-	mpyhu r5,r4,r8
+	MPYHU r5,r4,r8
 	and.f r7,DBL1H,r9
 	asl r4,r4,12 ; having the asl here is a concession to the XMAC pipeline.
 	beq.d .Ldenorm_dbl1
 	and r6,DBL0H,r9
 .Lpast_denorm_dbl1: ; wb stall
 	sub r4,r4,r5
-	mpyhu r5,r4,r4
+	MPYHU r5,r4,r4
 	breq.d r6,0,.Ldenorm_dbl0
 	lsr r8,r8,1
 	asl r12,DBL0H,11
 	lsr r10,DBL0L,21
 .Lpast_denorm_dbl0: ; wb stall
 	bset r8,r8,31
-	mpyhu r11,r5,r8
+	MPYHU r11,r5,r8
 	add_s r12,r12,r10
 	bset r5,r12,31
 	cmp r5,r8
@@ -215,7 +223,7 @@ __divdf3:
 	; wb stall
 	lsr.cc r5,r5,1
 	sub r4,r4,r11 ; u1.31 inverse, about 30 bit
-	mpyhu r11,r5,r4 ; result fraction highpart
+	MPYHU r11,r5,r4 ; result fraction highpart
 	breq r7,r9,.Linf_nan_dbl1
 	lsr r8,r8,2 ; u3.29
 	add r5,r6, /* wait for immediate /  XMAC wb stall */ \
@@ -226,7 +234,7 @@ __divdf3:
 	asl_s DBL1L,DBL1L,9 ; u-29.23:9
 	sbc r6,r5,r7
 	; resource conflict (not for XMAC)
-	mpyhu r5,r11,DBL1L ; u-28.23:9
+	MPYHU r5,r11,DBL1L ; u-28.23:9
 	add.cs DBL0L,DBL0L,DBL0L
 	asl_s DBL0L,DBL0L,6 ; u-26.25:7
 	asl r10,r11,23
@@ -234,7 +242,7 @@ __divdf3:
 	; wb stall (before 'and' for XMAC)
 	lsr r7,r11,9
 	sub r5,DBL0L,r5 ; rest msw ; u-26.31:0
-	mpyh r12,r5,r4 ; result fraction lowpart
+	MPYH r12,r5,r4 ; result fraction lowpart
 	xor.f 0,DBL0H,DBL1H
 	and DBL0H,r6,r9
 	add_s DBL0H,DBL0H,r7 ; (XMAC wb stall)
@@ -261,7 +269,7 @@ __divdf3:
 	sub.cs DBL0H,DBL0H,1
 	sub.f r12,r12,2
 	; resource conflict (not for XMAC)
-	mpyhu r7,r12,DBL1L ; u-51.32
+	MPYHU r7,r12,DBL1L ; u-51.32
 	asl r5,r5,25 ; s-51.7:25
 	lsr r10,r10,7 ; u-51.30:2
 	; resource conflict (not for XMAC)
@@ -291,10 +299,21 @@ __divdf3:
 	rsub r7,r6,5
 	asr r10,r12,28
 	bmsk r4,r12,27
+#ifdef __HS__
+	min  r7, r7, 31
+	asr  DBL0L, r4, r7
+#else
 	asrs DBL0L,r4,r7
+#endif
 	add DBL1H,r11,r10
+#ifdef __HS__
+	abs.f r10, r4
+	sub.mi r10, r10, 1
+#endif
 	add.f r7,r6,32-5
+#ifdef __ARC700__
 	abss r10,r4
+#endif
 	asl r4,r4,r7
 	mov.mi r4,r10
 	add.f r10,r6,23
@@ -319,7 +338,7 @@ __divdf3:
 	and r9,DBL0L,1 ; tie-breaker: round to even
 	lsr r11,r11,7 ; u-51.30:2
 	; resource conflict (not for XMAC)
-	mpyhu r8,r12,DBL1L ; u-51.32
+	MPYHU r8,r12,DBL1L ; u-51.32
 	sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
 	add_s DBL1H,DBL1H,r11
 	; resource conflict (not for XMAC)
diff --git a/libgcc/config/arc/ieee-754/divsf3-stdmul.S b/libgcc/config/arc/ieee-754/divsf3-stdmul.S
index 09861d3..f13944a 100644
--- a/libgcc/config/arc/ieee-754/divsf3-stdmul.S
+++ b/libgcc/config/arc/ieee-754/divsf3-stdmul.S
@@ -144,7 +144,7 @@ __divsf3_support: /* This label makes debugger output saner.  */
 	ld.as r5,[r3,r5]
 	add r4,r6,r6
 	; load latency
-	mpyhu r7,r5,r4
+	MPYHU r7,r5,r4
 	bic.ne.f 0, \
 		0x60000000,r0 ; large number / denorm -> Inf
 	beq_s .Linf_NaN
@@ -152,7 +152,7 @@ __divsf3_support: /* This label makes debugger output saner.  */
 	; wb stall
 	; slow track
 	sub r7,r5,r7
-	mpyhu r8,r7,r6
+	MPYHU r8,r7,r6
 	asl_s r12,r12,23
 	and.f r2,r0,r9
 	add r2,r2,r12
@@ -160,7 +160,7 @@ __divsf3_support: /* This label makes debugger output saner.  */
 	; wb stall
 	bne.d .Lpast_denorm_fp1
 .Ldenorm_fp0:
-	mpyhu r8,r8,r7
+	MPYHU r8,r8,r7
 	bclr r12,r12,31
 	norm.f r3,r12 ; flag for 0/x -> 0 check
 	bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
@@ -209,7 +209,7 @@ __divsf3:
 	ld.as r5,[r3,r2]
 	asl r4,r1,9
 	ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
-	mpyhu r7,r5,r4
+	MPYHU r7,r5,r4
 	asl r6,r1,8
 	and.f r11,r1,r9
 	bset r6,r6,31
@@ -217,14 +217,14 @@ __divsf3:
 	; wb stall
 	beq .Ldenorm_fp1
 	sub r7,r5,r7
-	mpyhu r8,r7,r6
+	MPYHU r8,r7,r6
 	breq.d r11,r9,.Linf_nan_fp1
 	and.f r2,r0,r9
 	beq.d .Ldenorm_fp0
 	asl r12,r0,8
 	; wb stall
 	breq r2,r9,.Linf_nan_fp0
-	mpyhu r8,r8,r7
+	MPYHU r8,r8,r7
 .Lpast_denorm_fp1:
 	bset r3,r12,31
 .Lpast_denorm_fp0:
@@ -234,7 +234,7 @@ __divsf3:
 	/* wb stall */ \
 		0x3f000000
 	sub r7,r7,r8 ; u1.31 inverse, about 30 bit
-	mpyhu r3,r3,r7
+	MPYHU r3,r3,r7
 	sbc r2,r2,r11
 	xor.f 0,r0,r1
 	and r0,r2,r9
diff --git a/libgcc/config/arc/ieee-754/muldf3.S b/libgcc/config/arc/ieee-754/muldf3.S
index 805db5c..5f562e2 100644
--- a/libgcc/config/arc/ieee-754/muldf3.S
+++ b/libgcc/config/arc/ieee-754/muldf3.S
@@ -132,19 +132,19 @@ __muldf3_support: /* This label makes debugger output saner.  */
 	.balign 4
 __muldf3:
 	ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)]
-	mpyhu r4,DBL0L,DBL1L
+	MPYHU r4,DBL0L,DBL1L
 	bmsk r6,DBL0H,19
 	bset r6,r6,20
 	mpyu r7,r6,DBL1L
 	and r11,DBL0H,r9
 	breq r11,0,.Ldenorm_dbl0
-	mpyhu r8,r6,DBL1L
+	MPYHU r8,r6,DBL1L
 	bmsk r10,DBL1H,19
 	bset r10,r10,20
-	mpyhu r5,r10,DBL0L
+	MPYHU r5,r10,DBL0L
 	add.f r4,r4,r7
 	and r12,DBL1H,r9
-	mpyhu r7,r6,r10
+	MPYHU r7,r6,r10
 	breq r12,0,.Ldenorm_dbl1
 	adc.f r5,r5,r8
 	mpyu r8,r10,DBL0L
diff --git a/libgcc/config/arc/ieee-754/mulsf3.S b/libgcc/config/arc/ieee-754/mulsf3.S
index 7a6c791..df2660a 100644
--- a/libgcc/config/arc/ieee-754/mulsf3.S
+++ b/libgcc/config/arc/ieee-754/mulsf3.S
@@ -64,7 +64,7 @@ __mulsf3:
 	bset	r2,r0,23
 	asl_s	r2,r2,8
 	bset	r3,r4,23
-	mpyhu	r6,r2,r3
+	MPYHU	r6,r2,r3
 	and	r11,r0,r9
 	breq	r11,0,.Ldenorm_dbl0
 	mpyu	r7,r2,r3
@@ -144,7 +144,7 @@ __mulsf3:
 	add_s	r2,r2,r2
 	asl	r2,r2,r4
 	asl	r4,r4,23
-	mpyhu	r6,r2,r3
+	MPYHU	r6,r2,r3
 	breq	r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
 	sub.ne.f r12,r12,r4
 	mpyu	r7,r2,r3
@@ -163,7 +163,7 @@ __mulsf3:
 	asl	r4,r4,r3
 	sub_s	r3,r3,1
 	asl_s	r3,r3,23
-	mpyhu	r6,r2,r4
+	MPYHU	r6,r2,r4
 	sub.ne.f r11,r11,r3
 	bmsk	r8,r0,30
 	mpyu	r7,r2,r4
diff --git a/libgcc/config/arc/lib1funcs.S b/libgcc/config/arc/lib1funcs.S
index e59340a..022a2ea 100644
--- a/libgcc/config/arc/lib1funcs.S
+++ b/libgcc/config/arc/lib1funcs.S
@@ -79,7 +79,7 @@ SYM(__mulsi3):
 	j_s.d [blink]
 	mov_s r0,mlo
 	ENDFUNC(__mulsi3)
-#elif defined (__ARC700__)
+#elif defined (__ARC700__) || defined (__HS__)
 	HIDDEN_FUNC(__mulsi3)
 	mpyu	r0,r0,r1
 	nop_s
@@ -393,7 +393,12 @@ SYM(__udivmodsi4):
 	lsr_s r1,r1
 	cmp_s r0,r1
 	xor.f r2,lp_count,31
+#if !defined (__EM__)
 	mov_s lp_count,r2
+#else
+	mov lp_count,r2
+	nop_s
+#endif /* !__EM__ */
 #endif /* !__ARC_NORM__ */
 	sub.cc r0,r0,r1
 	mov_s r3,3
@@ -1260,7 +1265,7 @@ SYM(__ld_r13_to_r14_ret):
 #endif
 
 #ifdef  L_muldf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
 #include "ieee-754/muldf3.S"
 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
 #include "ieee-754/arc600-mul64/muldf3.S"
@@ -1276,7 +1281,7 @@ SYM(__ld_r13_to_r14_ret):
 #endif
 
 #ifdef  L_mulsf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
 #include "ieee-754/mulsf3.S"
 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
 #include "ieee-754/arc600-mul64/mulsf3.S"
@@ -1288,7 +1293,7 @@ SYM(__ld_r13_to_r14_ret):
 #endif
 
 #ifdef  L_divdf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
 #include "ieee-754/divdf3.S"
 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
 #include "ieee-754/arc600-mul64/divdf3.S"
@@ -1298,7 +1303,7 @@ SYM(__ld_r13_to_r14_ret):
 #endif
 
 #ifdef  L_divsf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
 #include "ieee-754/divsf3-stdmul.S"
 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
 #include "ieee-754/arc600-mul64/divsf3.S"
diff --git a/libgcc/config/arc/t-arc700-uClibc b/libgcc/config/arc/t-arc700-uClibc
index 651c3de..ff57039 100644
--- a/libgcc/config/arc/t-arc700-uClibc
+++ b/libgcc/config/arc/t-arc700-uClibc
@@ -28,10 +28,10 @@
 CRTSTUFF_T_CFLAGS += -mno-sdata
 
 # Compile crtbeginS.o and crtendS.o with pic.
-CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -mA7 -fPIC
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
 
 # Compile libgcc2.a with pic.
-TARGET_LIBGCC2_CFLAGS = -mA7 -fPIC
+TARGET_LIBGCC2_CFLAGS = -fPIC
 
 PROFILE_OSDEP = prof-freq.o
 
-- 
1.7.0.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
  2015-10-30 11:29   ` Claudiu Zissulescu
@ 2015-11-10 14:01     ` Joern Wolfgang Rennecke
  2015-11-11 12:33       ` Claudiu Zissulescu
  0 siblings, 1 reply; 5+ messages in thread
From: Joern Wolfgang Rennecke @ 2015-11-10 14:01 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: Francois Bedard, jeremy.bennett



On 30/10/15 11:22, Claudiu Zissulescu wrote:
> Hi,
>
> Please find the updated patch.  Both ARC patches were tested using dg.exp. The ChangeLog entry is unchanged.

This is OK.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
  2015-11-10 14:01     ` Joern Wolfgang Rennecke
@ 2015-11-11 12:33       ` Claudiu Zissulescu
  0 siblings, 0 replies; 5+ messages in thread
From: Claudiu Zissulescu @ 2015-11-11 12:33 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois Bedard, jeremy.bennett

This patch is committed.

Thanks Joern,
Claudiu

> -----Original Message-----
> From: Joern Wolfgang Rennecke [mailto:gnu@amylaar.uk]
> Sent: Tuesday, November 10, 2015 3:02 PM
> To: Claudiu Zissulescu; gcc-patches@gcc.gnu.org
> Cc: Francois Bedard; jeremy.bennett@embecosm.com
> Subject: Re: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
> 
> 
> 
> On 30/10/15 11:22, Claudiu Zissulescu wrote:
> > Hi,
> >
> > Please find the updated patch.  Both ARC patches were tested using
> dg.exp. The ChangeLog entry is unchanged.
> 
> This is OK.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2][ARC] Add support for ARCv2 CPUs
@ 2015-10-05 12:32 Claudiu Zissulescu
  0 siblings, 0 replies; 5+ messages in thread
From: Claudiu Zissulescu @ 2015-10-05 12:32 UTC (permalink / raw)
  To: gcc-patches, gnu; +Cc: Francois Bedard, jeremy.bennett

[-- Attachment #1: Type: text/plain, Size: 1036 bytes --]

Just realized this patch haven't went thru to the mailing list. Reposted.

This patch adds basic support (libgcc) for Synopsys' ARCv2 CPUs. 

Can this be committed?

Thanks,
Claudiu

ChangeLog:
2015-08-28  Claudiu Zissulescu  <claziss@synopsys.com>

                * config/arc/dp-hack.h: Add support for ARCHS.
                * config/arc/ieee-754/divdf3.S: Likewise.
                * config/arc/ieee-754/divsf3-stdmul.S: Likewise.
                * config/arc/ieee-754/muldf3.S: Likewise.
                * config/arc/ieee-754/mulsf3.S: Likewise
                * config/arc/lib1funcs.S: Likewise
                * config/arc/gmon/dcache_linesz.S: Don't read the build register
                for ARCv2 cores.
                * config/arc/gmon/profil.S (__profil, __profil_irq): Don't profile
                for ARCv2 cores.
                * config/arc/ieee-754/arc-ieee-754.h (MPYHU, MPYH): Define.
                * config/arc/t-arc700-uClibc: Remove hard selection for ARC 700
                cores.

[-- Attachment #2: 02-arcv2.patch --]
[-- Type: application/octet-stream, Size: 12154 bytes --]

From 2c6d2b12d883e9f2cab9325110a1b5d2886ec864 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Wed, 30 Sep 2015 12:27:49 +0200
Subject: [PATCH] Add ARCv2 basic support

---
 libgcc/config/arc/dp-hack.h                |    2 +-
 libgcc/config/arc/gmon/dcache_linesz.S     |    2 +
 libgcc/config/arc/gmon/profil.S            |    9 +++++++
 libgcc/config/arc/ieee-754/arc-ieee-754.h  |    7 +++++
 libgcc/config/arc/ieee-754/divdf3.S        |   37 +++++++++++++++++++++-------
 libgcc/config/arc/ieee-754/divsf3-stdmul.S |   14 +++++-----
 libgcc/config/arc/ieee-754/muldf3.S        |    8 +++---
 libgcc/config/arc/ieee-754/mulsf3.S        |    6 ++--
 libgcc/config/arc/lib1funcs.S              |   15 +++++++----
 libgcc/config/arc/t-arc700-uClibc          |    4 +-
 10 files changed, 73 insertions(+), 31 deletions(-)

diff --git a/libgcc/config/arc/dp-hack.h b/libgcc/config/arc/dp-hack.h
index c1ab9b2..a212e3b 100644
--- a/libgcc/config/arc/dp-hack.h
+++ b/libgcc/config/arc/dp-hack.h
@@ -48,7 +48,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define L_mul_df
 #define L_div_df
 #elif (!defined (__ARC700__) && !defined (__ARC_MUL64__) \
-       && !defined(__ARC_MUL32BY16__))
+       && !defined (__ARC_MUL32BY16__) && !defined (__HS__))
 #define L_mul_df
 #define L_div_df
 #undef QUIET_NAN
diff --git a/libgcc/config/arc/gmon/dcache_linesz.S b/libgcc/config/arc/gmon/dcache_linesz.S
index 8cf6442..972a587 100644
--- a/libgcc/config/arc/gmon/dcache_linesz.S
+++ b/libgcc/config/arc/gmon/dcache_linesz.S
@@ -38,6 +38,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 	.global	__dcache_linesz
 	.balign	4
 __dcache_linesz:
+#if !defined (__EM__) && !defined (__HS__)
 	lr	r12,[D_CACHE_BUILD]
 	extb_s	r0,r12
 	breq_s	r0,0,.Lsz_nocache
@@ -51,5 +52,6 @@ __dcache_linesz:
 	asl_s	r0,r0,r12
 	j_s	[blink]
 .Lsz_nocache:
+#endif /* !__EM__  && !__HS__ */
 	mov_s	r0,1
 	j_s	[blink]
diff --git a/libgcc/config/arc/gmon/profil.S b/libgcc/config/arc/gmon/profil.S
index 3be2869..ac3a7eb 100644
--- a/libgcc/config/arc/gmon/profil.S
+++ b/libgcc/config/arc/gmon/profil.S
@@ -45,6 +45,7 @@ __profil_offset:
 	.global	__dcache_linesz
 	.global __profil
 	FUNC(__profil)
+#if !defined (__EM__) && !defined (__HS__)
 .Lstop_profiling:
 	sr	r0,[CONTROL0]
 	j_s	[blink]
@@ -107,11 +108,18 @@ nocache:
 	j_s	[blink]
 	.balign	4
 1:	j	__profil_irq
+#else
+__profil:
+	.balign	4
+	mov_s	r0,0
+	j_s	[blink]
+#endif /* !__EM__ && !__HS__ */
 	ENDFUNC(__profil)
 
 	FUNC(__profil_irq)
 	.balign 4	; make final jump unaligned to avoid delay penalty
 	.balign 32,0,12	; make sure the code spans no more that two cache lines
+#if !defined (__EM__) && !defined (__HS__)
 	nop_s
 __profil_irq:
 	push_s	r0
@@ -129,6 +137,7 @@ nostore:ld.ab	r2,[sp,8]
 	pop_s	r0
 	j.f	[ilink1]
 	ENDFUNC(__profil_irq)
+#endif /* !__EM__  && !__HS__ */
 
 ; could save one cycle if the counters were allocated at link time and
 ; the contents of __profil_offset were pre-computed at link time, like this:
diff --git a/libgcc/config/arc/ieee-754/arc-ieee-754.h b/libgcc/config/arc/ieee-754/arc-ieee-754.h
index 08a14a6..f1ac98e 100644
--- a/libgcc/config/arc/ieee-754/arc-ieee-754.h
+++ b/libgcc/config/arc/ieee-754/arc-ieee-754.h
@@ -54,3 +54,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define bmsk_l bmsk
 #define bxor_l bxor
 #define bcs_s blo_s
+#if defined (__HS__) || defined (__EM__)
+#define MPYHU   mpymu
+#define MPYH    mpym
+#else
+#define MPYHU   mpyhu
+#define MPYH    mpyh
+#endif
diff --git a/libgcc/config/arc/ieee-754/divdf3.S b/libgcc/config/arc/ieee-754/divdf3.S
index 2d000e4..27705ed 100644
--- a/libgcc/config/arc/ieee-754/divdf3.S
+++ b/libgcc/config/arc/ieee-754/divdf3.S
@@ -118,7 +118,7 @@ __divdf3_support: /* This label makes debugger output saner.  */
 	sub r11,r11,11
 	asl DBL1L,DBL1L,r11
 	sub r11,r11,1
-	mpyhu r5,r4,r8
+	MPYHU r5,r4,r8
 	sub r7,r7,r11
 	asl r4,r4,12
 	b.d .Lpast_denorm_dbl1
@@ -189,25 +189,33 @@ __divdf3:
 	asl r8,DBL1H,12
 	lsr r12,DBL1L,20
 	lsr r4,r8,26
+#ifdef __HS__
+	add3 r10,pcl,60 ; (.Ldivtab-.) >> 3
+#else
 	add3 r10,pcl,59 ; (.Ldivtab-.) >> 3
+#endif
 	ld.as r4,[r10,r4]
+#ifdef __HS__
+	ld.as r9,[pcl,182]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+#else
 	ld.as r9,[pcl,180]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+#endif
 	or r8,r8,r12
-	mpyhu r5,r4,r8
+	MPYHU r5,r4,r8
 	and.f r7,DBL1H,r9
 	asl r4,r4,12 ; having the asl here is a concession to the XMAC pipeline.
 	beq.d .Ldenorm_dbl1
 	and r6,DBL0H,r9
 .Lpast_denorm_dbl1: ; wb stall
 	sub r4,r4,r5
-	mpyhu r5,r4,r4
+	MPYHU r5,r4,r4
 	breq.d r6,0,.Ldenorm_dbl0
 	lsr r8,r8,1
 	asl r12,DBL0H,11
 	lsr r10,DBL0L,21
 .Lpast_denorm_dbl0: ; wb stall
 	bset r8,r8,31
-	mpyhu r11,r5,r8
+	MPYHU r11,r5,r8
 	add_s r12,r12,r10
 	bset r5,r12,31
 	cmp r5,r8
@@ -215,7 +223,7 @@ __divdf3:
 	; wb stall
 	lsr.cc r5,r5,1
 	sub r4,r4,r11 ; u1.31 inverse, about 30 bit
-	mpyhu r11,r5,r4 ; result fraction highpart
+	MPYHU r11,r5,r4 ; result fraction highpart
 	breq r7,r9,.Linf_nan_dbl1
 	lsr r8,r8,2 ; u3.29
 	add r5,r6, /* wait for immediate /  XMAC wb stall */ \
@@ -226,7 +234,7 @@ __divdf3:
 	asl_s DBL1L,DBL1L,9 ; u-29.23:9
 	sbc r6,r5,r7
 	; resource conflict (not for XMAC)
-	mpyhu r5,r11,DBL1L ; u-28.23:9
+	MPYHU r5,r11,DBL1L ; u-28.23:9
 	add.cs DBL0L,DBL0L,DBL0L
 	asl_s DBL0L,DBL0L,6 ; u-26.25:7
 	asl r10,r11,23
@@ -234,7 +242,7 @@ __divdf3:
 	; wb stall (before 'and' for XMAC)
 	lsr r7,r11,9
 	sub r5,DBL0L,r5 ; rest msw ; u-26.31:0
-	mpyh r12,r5,r4 ; result fraction lowpart
+	MPYH r12,r5,r4 ; result fraction lowpart
 	xor.f 0,DBL0H,DBL1H
 	and DBL0H,r6,r9
 	add_s DBL0H,DBL0H,r7 ; (XMAC wb stall)
@@ -261,7 +269,7 @@ __divdf3:
 	sub.cs DBL0H,DBL0H,1
 	sub.f r12,r12,2
 	; resource conflict (not for XMAC)
-	mpyhu r7,r12,DBL1L ; u-51.32
+	MPYHU r7,r12,DBL1L ; u-51.32
 	asl r5,r5,25 ; s-51.7:25
 	lsr r10,r10,7 ; u-51.30:2
 	; resource conflict (not for XMAC)
@@ -291,10 +299,21 @@ __divdf3:
 	rsub r7,r6,5
 	asr r10,r12,28
 	bmsk r4,r12,27
+#ifdef __HS__
+	min  r7, r7, 31
+	asr  DBL0L, r4, r7
+#else
 	asrs DBL0L,r4,r7
+#endif
 	add DBL1H,r11,r10
+#ifdef __HS__
+	abs.f r10, r4
+	sub.mi r10, r10, 1
+#endif
 	add.f r7,r6,32-5
+#ifdef __ARC700__
 	abss r10,r4
+#endif
 	asl r4,r4,r7
 	mov.mi r4,r10
 	add.f r10,r6,23
@@ -319,7 +338,7 @@ __divdf3:
 	and r9,DBL0L,1 ; tie-breaker: round to even
 	lsr r11,r11,7 ; u-51.30:2
 	; resource conflict (not for XMAC)
-	mpyhu r8,r12,DBL1L ; u-51.32
+	MPYHU r8,r12,DBL1L ; u-51.32
 	sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
 	add_s DBL1H,DBL1H,r11
 	; resource conflict (not for XMAC)
diff --git a/libgcc/config/arc/ieee-754/divsf3-stdmul.S b/libgcc/config/arc/ieee-754/divsf3-stdmul.S
index 09861d3..f13944a 100644
--- a/libgcc/config/arc/ieee-754/divsf3-stdmul.S
+++ b/libgcc/config/arc/ieee-754/divsf3-stdmul.S
@@ -144,7 +144,7 @@ __divsf3_support: /* This label makes debugger output saner.  */
 	ld.as r5,[r3,r5]
 	add r4,r6,r6
 	; load latency
-	mpyhu r7,r5,r4
+	MPYHU r7,r5,r4
 	bic.ne.f 0, \
 		0x60000000,r0 ; large number / denorm -> Inf
 	beq_s .Linf_NaN
@@ -152,7 +152,7 @@ __divsf3_support: /* This label makes debugger output saner.  */
 	; wb stall
 	; slow track
 	sub r7,r5,r7
-	mpyhu r8,r7,r6
+	MPYHU r8,r7,r6
 	asl_s r12,r12,23
 	and.f r2,r0,r9
 	add r2,r2,r12
@@ -160,7 +160,7 @@ __divsf3_support: /* This label makes debugger output saner.  */
 	; wb stall
 	bne.d .Lpast_denorm_fp1
 .Ldenorm_fp0:
-	mpyhu r8,r8,r7
+	MPYHU r8,r8,r7
 	bclr r12,r12,31
 	norm.f r3,r12 ; flag for 0/x -> 0 check
 	bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
@@ -209,7 +209,7 @@ __divsf3:
 	ld.as r5,[r3,r2]
 	asl r4,r1,9
 	ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
-	mpyhu r7,r5,r4
+	MPYHU r7,r5,r4
 	asl r6,r1,8
 	and.f r11,r1,r9
 	bset r6,r6,31
@@ -217,14 +217,14 @@ __divsf3:
 	; wb stall
 	beq .Ldenorm_fp1
 	sub r7,r5,r7
-	mpyhu r8,r7,r6
+	MPYHU r8,r7,r6
 	breq.d r11,r9,.Linf_nan_fp1
 	and.f r2,r0,r9
 	beq.d .Ldenorm_fp0
 	asl r12,r0,8
 	; wb stall
 	breq r2,r9,.Linf_nan_fp0
-	mpyhu r8,r8,r7
+	MPYHU r8,r8,r7
 .Lpast_denorm_fp1:
 	bset r3,r12,31
 .Lpast_denorm_fp0:
@@ -234,7 +234,7 @@ __divsf3:
 	/* wb stall */ \
 		0x3f000000
 	sub r7,r7,r8 ; u1.31 inverse, about 30 bit
-	mpyhu r3,r3,r7
+	MPYHU r3,r3,r7
 	sbc r2,r2,r11
 	xor.f 0,r0,r1
 	and r0,r2,r9
diff --git a/libgcc/config/arc/ieee-754/muldf3.S b/libgcc/config/arc/ieee-754/muldf3.S
index 805db5c..5f562e2 100644
--- a/libgcc/config/arc/ieee-754/muldf3.S
+++ b/libgcc/config/arc/ieee-754/muldf3.S
@@ -132,19 +132,19 @@ __muldf3_support: /* This label makes debugger output saner.  */
 	.balign 4
 __muldf3:
 	ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)]
-	mpyhu r4,DBL0L,DBL1L
+	MPYHU r4,DBL0L,DBL1L
 	bmsk r6,DBL0H,19
 	bset r6,r6,20
 	mpyu r7,r6,DBL1L
 	and r11,DBL0H,r9
 	breq r11,0,.Ldenorm_dbl0
-	mpyhu r8,r6,DBL1L
+	MPYHU r8,r6,DBL1L
 	bmsk r10,DBL1H,19
 	bset r10,r10,20
-	mpyhu r5,r10,DBL0L
+	MPYHU r5,r10,DBL0L
 	add.f r4,r4,r7
 	and r12,DBL1H,r9
-	mpyhu r7,r6,r10
+	MPYHU r7,r6,r10
 	breq r12,0,.Ldenorm_dbl1
 	adc.f r5,r5,r8
 	mpyu r8,r10,DBL0L
diff --git a/libgcc/config/arc/ieee-754/mulsf3.S b/libgcc/config/arc/ieee-754/mulsf3.S
index 7a6c791..df2660a 100644
--- a/libgcc/config/arc/ieee-754/mulsf3.S
+++ b/libgcc/config/arc/ieee-754/mulsf3.S
@@ -64,7 +64,7 @@ __mulsf3:
 	bset	r2,r0,23
 	asl_s	r2,r2,8
 	bset	r3,r4,23
-	mpyhu	r6,r2,r3
+	MPYHU	r6,r2,r3
 	and	r11,r0,r9
 	breq	r11,0,.Ldenorm_dbl0
 	mpyu	r7,r2,r3
@@ -144,7 +144,7 @@ __mulsf3:
 	add_s	r2,r2,r2
 	asl	r2,r2,r4
 	asl	r4,r4,23
-	mpyhu	r6,r2,r3
+	MPYHU	r6,r2,r3
 	breq	r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
 	sub.ne.f r12,r12,r4
 	mpyu	r7,r2,r3
@@ -163,7 +163,7 @@ __mulsf3:
 	asl	r4,r4,r3
 	sub_s	r3,r3,1
 	asl_s	r3,r3,23
-	mpyhu	r6,r2,r4
+	MPYHU	r6,r2,r4
 	sub.ne.f r11,r11,r3
 	bmsk	r8,r0,30
 	mpyu	r7,r2,r4
diff --git a/libgcc/config/arc/lib1funcs.S b/libgcc/config/arc/lib1funcs.S
index e59340a..e7317a3 100644
--- a/libgcc/config/arc/lib1funcs.S
+++ b/libgcc/config/arc/lib1funcs.S
@@ -79,7 +79,7 @@ SYM(__mulsi3):
 	j_s.d [blink]
 	mov_s r0,mlo
 	ENDFUNC(__mulsi3)
-#elif defined (__ARC700__)
+#elif defined (__ARC700__) || defined (__HS__)
 	HIDDEN_FUNC(__mulsi3)
 	mpyu	r0,r0,r1
 	nop_s
@@ -393,7 +393,12 @@ SYM(__udivmodsi4):
 	lsr_s r1,r1
 	cmp_s r0,r1
 	xor.f r2,lp_count,31
+#if !defined (__EM__) && !defined (__HS__)
 	mov_s lp_count,r2
+#else
+	mov lp_count,r2
+	nop_s
+#endif /* !__EM__ && !__HS__ */
 #endif /* !__ARC_NORM__ */
 	sub.cc r0,r0,r1
 	mov_s r3,3
@@ -1260,7 +1265,7 @@ SYM(__ld_r13_to_r14_ret):
 #endif
 
 #ifdef  L_muldf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
 #include "ieee-754/muldf3.S"
 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
 #include "ieee-754/arc600-mul64/muldf3.S"
@@ -1276,7 +1281,7 @@ SYM(__ld_r13_to_r14_ret):
 #endif
 
 #ifdef  L_mulsf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
 #include "ieee-754/mulsf3.S"
 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
 #include "ieee-754/arc600-mul64/mulsf3.S"
@@ -1288,7 +1293,7 @@ SYM(__ld_r13_to_r14_ret):
 #endif
 
 #ifdef  L_divdf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
 #include "ieee-754/divdf3.S"
 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
 #include "ieee-754/arc600-mul64/divdf3.S"
@@ -1298,7 +1303,7 @@ SYM(__ld_r13_to_r14_ret):
 #endif
 
 #ifdef  L_divsf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
 #include "ieee-754/divsf3-stdmul.S"
 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
 #include "ieee-754/arc600-mul64/divsf3.S"
diff --git a/libgcc/config/arc/t-arc700-uClibc b/libgcc/config/arc/t-arc700-uClibc
index 651c3de..ff57039 100644
--- a/libgcc/config/arc/t-arc700-uClibc
+++ b/libgcc/config/arc/t-arc700-uClibc
@@ -28,10 +28,10 @@
 CRTSTUFF_T_CFLAGS += -mno-sdata
 
 # Compile crtbeginS.o and crtendS.o with pic.
-CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -mA7 -fPIC
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
 
 # Compile libgcc2.a with pic.
-TARGET_LIBGCC2_CFLAGS = -mA7 -fPIC
+TARGET_LIBGCC2_CFLAGS = -fPIC
 
 PROFILE_OSDEP = prof-freq.o
 
-- 
1.7.0.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-11-11 12:33 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <098ECE41A0A6114BB2A07F1EC238DE896616641A@de02wembxa.internal.synopsys.com>
2015-10-23 21:13 ` [PATCH 2/2][ARC] Add support for ARCv2 CPUs Joern Wolfgang Rennecke
2015-10-30 11:29   ` Claudiu Zissulescu
2015-11-10 14:01     ` Joern Wolfgang Rennecke
2015-11-11 12:33       ` Claudiu Zissulescu
2015-10-05 12:32 Claudiu Zissulescu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).