* Re: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
[not found] <098ECE41A0A6114BB2A07F1EC238DE896616641A@de02wembxa.internal.synopsys.com>
@ 2015-10-23 21:13 ` Joern Wolfgang Rennecke
2015-10-30 11:29 ` Claudiu Zissulescu
0 siblings, 1 reply; 5+ messages in thread
From: Joern Wolfgang Rennecke @ 2015-10-23 21:13 UTC (permalink / raw)
To: Claudiu Zissulescu, gcc-patches; +Cc: Francois Bedard, jeremy.bennett
* config/arc/gmon/profil.S (__profil, __profil_irq): Don't profile
Shouldn't profil should return -1 on failure?
config/arc/lib1funcs.S (__udivmodsi4):
@@ -393,7 +393,12 @@ SYM(__udivmodsi4):
lsr_s r1,r1
cmp_s r0,r1
xor.f r2,lp_count,31
+#if !defined (__EM__) && !defined (__HS__)
mov_s lp_count,r2
+#else
+ mov lp_count,r2
+ nop_s
+#endif /* !__EM__ && !__HS__ */
#endif /* !__ARC_NORM__ */
sub.cc r0,r0,r1
mov_s r3,3
This is in ! __ARC_NORM__ code - so this should never happen for __HS__ .
It can happen for __EM__ - and then we have another problem. AFAIK
__EM__ also has the LP_COUNT interlock that slows down every reference
to LP_COUNT by an ordinary instruction. So this ARC600 code will run
rather slow on it. OTOH, we can use the better handling of branch
instructions inside a zero overhead loop to our advantage here, like this:
#else /* ! __ARC_NORM__ */
lsr_s r2,r0
brhs.d r1,r2,.Lret0_3
#ifdef __EM__
mov lp_count,-1
asl_s r1,r1 ; den <<= 1
lp 1f
.Loop1:
brhi r1,r2,1f
asl.ls r1,r1
1:
sub_s r0,r0,r1
lsr_s r1,r1
cmp r0,r1
not r2,lp_count
mov.f lp_count,r2
#else /* !__EM__ */
mov_s r3,0
.Lloop1:
asl_s r1,r1 ; den <<= 1
brls.d r1,r2,@.Lloop1
sub_s r3,r3,1
sub_s r0,r0,r1
lsr_s r1,r1
cmp_s r0,r1
not_s r2,r3
mov.f lp_count,r3
#endif /* !__EM__ */
#endif /* !__ARC_NORM__
Hmm, somehow I lost the inner-loop explicit lp_count reference anyway, but
the version with lp is still one instruction inside the loop shorter.
Although the other is shorter statically.
OTOH, maybe not.f / mov_s is faster when followed by sub.cc ; depends on
what the ARC600 pipeline is like today.
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
2015-10-23 21:13 ` [PATCH 2/2][ARC] Add support for ARCv2 CPUs Joern Wolfgang Rennecke
@ 2015-10-30 11:29 ` Claudiu Zissulescu
2015-11-10 14:01 ` Joern Wolfgang Rennecke
0 siblings, 1 reply; 5+ messages in thread
From: Claudiu Zissulescu @ 2015-10-30 11:29 UTC (permalink / raw)
To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois Bedard, jeremy.bennett
[-- Attachment #1: Type: text/plain, Size: 141 bytes --]
Hi,
Please find the updated patch. Both ARC patches were tested using dg.exp. The ChangeLog entry is unchanged.
Thank you,
Claudiu
[-- Attachment #2: 02-arcv2Updated.patch --]
[-- Type: application/octet-stream, Size: 12088 bytes --]
From 981b97246cd65908fa2560b8a346b02440b1a450 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Wed, 30 Sep 2015 12:27:49 +0200
Subject: [PATCH] Add ARCv2 basic support (Updated)
---
libgcc/config/arc/dp-hack.h | 2 +-
libgcc/config/arc/gmon/dcache_linesz.S | 2 +
libgcc/config/arc/gmon/profil.S | 11 ++++++++
libgcc/config/arc/ieee-754/arc-ieee-754.h | 7 +++++
libgcc/config/arc/ieee-754/divdf3.S | 37 +++++++++++++++++++++-------
libgcc/config/arc/ieee-754/divsf3-stdmul.S | 14 +++++-----
libgcc/config/arc/ieee-754/muldf3.S | 8 +++---
libgcc/config/arc/ieee-754/mulsf3.S | 6 ++--
libgcc/config/arc/lib1funcs.S | 15 +++++++----
libgcc/config/arc/t-arc700-uClibc | 4 +-
10 files changed, 75 insertions(+), 31 deletions(-)
diff --git a/libgcc/config/arc/dp-hack.h b/libgcc/config/arc/dp-hack.h
index c1ab9b2..a212e3b 100644
--- a/libgcc/config/arc/dp-hack.h
+++ b/libgcc/config/arc/dp-hack.h
@@ -48,7 +48,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define L_mul_df
#define L_div_df
#elif (!defined (__ARC700__) && !defined (__ARC_MUL64__) \
- && !defined(__ARC_MUL32BY16__))
+ && !defined (__ARC_MUL32BY16__) && !defined (__HS__))
#define L_mul_df
#define L_div_df
#undef QUIET_NAN
diff --git a/libgcc/config/arc/gmon/dcache_linesz.S b/libgcc/config/arc/gmon/dcache_linesz.S
index 8cf6442..972a587 100644
--- a/libgcc/config/arc/gmon/dcache_linesz.S
+++ b/libgcc/config/arc/gmon/dcache_linesz.S
@@ -38,6 +38,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
.global __dcache_linesz
.balign 4
__dcache_linesz:
+#if !defined (__EM__) && !defined (__HS__)
lr r12,[D_CACHE_BUILD]
extb_s r0,r12
breq_s r0,0,.Lsz_nocache
@@ -51,5 +52,6 @@ __dcache_linesz:
asl_s r0,r0,r12
j_s [blink]
.Lsz_nocache:
+#endif /* !__EM__ && !__HS__ */
mov_s r0,1
j_s [blink]
diff --git a/libgcc/config/arc/gmon/profil.S b/libgcc/config/arc/gmon/profil.S
index 3be2869..df10dbd 100644
--- a/libgcc/config/arc/gmon/profil.S
+++ b/libgcc/config/arc/gmon/profil.S
@@ -45,6 +45,7 @@ __profil_offset:
.global __dcache_linesz
.global __profil
FUNC(__profil)
+#if !defined (__EM__) && !defined (__HS__)
.Lstop_profiling:
sr r0,[CONTROL0]
j_s [blink]
@@ -107,6 +108,12 @@ nocache:
j_s [blink]
.balign 4
1: j __profil_irq
+#else
+__profil:
+ .balign 4
+ mov_s r0,-1
+ j_s [blink]
+#endif /* !__EM__ && !__HS__ */
ENDFUNC(__profil)
FUNC(__profil_irq)
@@ -114,6 +121,7 @@ nocache:
.balign 32,0,12 ; make sure the code spans no more that two cache lines
nop_s
__profil_irq:
+#if !defined (__EM__) && !defined (__HS__)
push_s r0
ld r0,[__profil_offset]
push_s r1
@@ -128,6 +136,9 @@ __profil_irq:
nostore:ld.ab r2,[sp,8]
pop_s r0
j.f [ilink1]
+#else
+ rtie
+#endif /* !__EM__ && !__HS__ */
ENDFUNC(__profil_irq)
; could save one cycle if the counters were allocated at link time and
diff --git a/libgcc/config/arc/ieee-754/arc-ieee-754.h b/libgcc/config/arc/ieee-754/arc-ieee-754.h
index 08a14a6..f1ac98e 100644
--- a/libgcc/config/arc/ieee-754/arc-ieee-754.h
+++ b/libgcc/config/arc/ieee-754/arc-ieee-754.h
@@ -54,3 +54,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define bmsk_l bmsk
#define bxor_l bxor
#define bcs_s blo_s
+#if defined (__HS__) || defined (__EM__)
+#define MPYHU mpymu
+#define MPYH mpym
+#else
+#define MPYHU mpyhu
+#define MPYH mpyh
+#endif
diff --git a/libgcc/config/arc/ieee-754/divdf3.S b/libgcc/config/arc/ieee-754/divdf3.S
index 2d000e4..27705ed 100644
--- a/libgcc/config/arc/ieee-754/divdf3.S
+++ b/libgcc/config/arc/ieee-754/divdf3.S
@@ -118,7 +118,7 @@ __divdf3_support: /* This label makes debugger output saner. */
sub r11,r11,11
asl DBL1L,DBL1L,r11
sub r11,r11,1
- mpyhu r5,r4,r8
+ MPYHU r5,r4,r8
sub r7,r7,r11
asl r4,r4,12
b.d .Lpast_denorm_dbl1
@@ -189,25 +189,33 @@ __divdf3:
asl r8,DBL1H,12
lsr r12,DBL1L,20
lsr r4,r8,26
+#ifdef __HS__
+ add3 r10,pcl,60 ; (.Ldivtab-.) >> 3
+#else
add3 r10,pcl,59 ; (.Ldivtab-.) >> 3
+#endif
ld.as r4,[r10,r4]
+#ifdef __HS__
+ ld.as r9,[pcl,182]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+#else
ld.as r9,[pcl,180]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+#endif
or r8,r8,r12
- mpyhu r5,r4,r8
+ MPYHU r5,r4,r8
and.f r7,DBL1H,r9
asl r4,r4,12 ; having the asl here is a concession to the XMAC pipeline.
beq.d .Ldenorm_dbl1
and r6,DBL0H,r9
.Lpast_denorm_dbl1: ; wb stall
sub r4,r4,r5
- mpyhu r5,r4,r4
+ MPYHU r5,r4,r4
breq.d r6,0,.Ldenorm_dbl0
lsr r8,r8,1
asl r12,DBL0H,11
lsr r10,DBL0L,21
.Lpast_denorm_dbl0: ; wb stall
bset r8,r8,31
- mpyhu r11,r5,r8
+ MPYHU r11,r5,r8
add_s r12,r12,r10
bset r5,r12,31
cmp r5,r8
@@ -215,7 +223,7 @@ __divdf3:
; wb stall
lsr.cc r5,r5,1
sub r4,r4,r11 ; u1.31 inverse, about 30 bit
- mpyhu r11,r5,r4 ; result fraction highpart
+ MPYHU r11,r5,r4 ; result fraction highpart
breq r7,r9,.Linf_nan_dbl1
lsr r8,r8,2 ; u3.29
add r5,r6, /* wait for immediate / XMAC wb stall */ \
@@ -226,7 +234,7 @@ __divdf3:
asl_s DBL1L,DBL1L,9 ; u-29.23:9
sbc r6,r5,r7
; resource conflict (not for XMAC)
- mpyhu r5,r11,DBL1L ; u-28.23:9
+ MPYHU r5,r11,DBL1L ; u-28.23:9
add.cs DBL0L,DBL0L,DBL0L
asl_s DBL0L,DBL0L,6 ; u-26.25:7
asl r10,r11,23
@@ -234,7 +242,7 @@ __divdf3:
; wb stall (before 'and' for XMAC)
lsr r7,r11,9
sub r5,DBL0L,r5 ; rest msw ; u-26.31:0
- mpyh r12,r5,r4 ; result fraction lowpart
+ MPYH r12,r5,r4 ; result fraction lowpart
xor.f 0,DBL0H,DBL1H
and DBL0H,r6,r9
add_s DBL0H,DBL0H,r7 ; (XMAC wb stall)
@@ -261,7 +269,7 @@ __divdf3:
sub.cs DBL0H,DBL0H,1
sub.f r12,r12,2
; resource conflict (not for XMAC)
- mpyhu r7,r12,DBL1L ; u-51.32
+ MPYHU r7,r12,DBL1L ; u-51.32
asl r5,r5,25 ; s-51.7:25
lsr r10,r10,7 ; u-51.30:2
; resource conflict (not for XMAC)
@@ -291,10 +299,21 @@ __divdf3:
rsub r7,r6,5
asr r10,r12,28
bmsk r4,r12,27
+#ifdef __HS__
+ min r7, r7, 31
+ asr DBL0L, r4, r7
+#else
asrs DBL0L,r4,r7
+#endif
add DBL1H,r11,r10
+#ifdef __HS__
+ abs.f r10, r4
+ sub.mi r10, r10, 1
+#endif
add.f r7,r6,32-5
+#ifdef __ARC700__
abss r10,r4
+#endif
asl r4,r4,r7
mov.mi r4,r10
add.f r10,r6,23
@@ -319,7 +338,7 @@ __divdf3:
and r9,DBL0L,1 ; tie-breaker: round to even
lsr r11,r11,7 ; u-51.30:2
; resource conflict (not for XMAC)
- mpyhu r8,r12,DBL1L ; u-51.32
+ MPYHU r8,r12,DBL1L ; u-51.32
sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
add_s DBL1H,DBL1H,r11
; resource conflict (not for XMAC)
diff --git a/libgcc/config/arc/ieee-754/divsf3-stdmul.S b/libgcc/config/arc/ieee-754/divsf3-stdmul.S
index 09861d3..f13944a 100644
--- a/libgcc/config/arc/ieee-754/divsf3-stdmul.S
+++ b/libgcc/config/arc/ieee-754/divsf3-stdmul.S
@@ -144,7 +144,7 @@ __divsf3_support: /* This label makes debugger output saner. */
ld.as r5,[r3,r5]
add r4,r6,r6
; load latency
- mpyhu r7,r5,r4
+ MPYHU r7,r5,r4
bic.ne.f 0, \
0x60000000,r0 ; large number / denorm -> Inf
beq_s .Linf_NaN
@@ -152,7 +152,7 @@ __divsf3_support: /* This label makes debugger output saner. */
; wb stall
; slow track
sub r7,r5,r7
- mpyhu r8,r7,r6
+ MPYHU r8,r7,r6
asl_s r12,r12,23
and.f r2,r0,r9
add r2,r2,r12
@@ -160,7 +160,7 @@ __divsf3_support: /* This label makes debugger output saner. */
; wb stall
bne.d .Lpast_denorm_fp1
.Ldenorm_fp0:
- mpyhu r8,r8,r7
+ MPYHU r8,r8,r7
bclr r12,r12,31
norm.f r3,r12 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
@@ -209,7 +209,7 @@ __divsf3:
ld.as r5,[r3,r2]
asl r4,r1,9
ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
- mpyhu r7,r5,r4
+ MPYHU r7,r5,r4
asl r6,r1,8
and.f r11,r1,r9
bset r6,r6,31
@@ -217,14 +217,14 @@ __divsf3:
; wb stall
beq .Ldenorm_fp1
sub r7,r5,r7
- mpyhu r8,r7,r6
+ MPYHU r8,r7,r6
breq.d r11,r9,.Linf_nan_fp1
and.f r2,r0,r9
beq.d .Ldenorm_fp0
asl r12,r0,8
; wb stall
breq r2,r9,.Linf_nan_fp0
- mpyhu r8,r8,r7
+ MPYHU r8,r8,r7
.Lpast_denorm_fp1:
bset r3,r12,31
.Lpast_denorm_fp0:
@@ -234,7 +234,7 @@ __divsf3:
/* wb stall */ \
0x3f000000
sub r7,r7,r8 ; u1.31 inverse, about 30 bit
- mpyhu r3,r3,r7
+ MPYHU r3,r3,r7
sbc r2,r2,r11
xor.f 0,r0,r1
and r0,r2,r9
diff --git a/libgcc/config/arc/ieee-754/muldf3.S b/libgcc/config/arc/ieee-754/muldf3.S
index 805db5c..5f562e2 100644
--- a/libgcc/config/arc/ieee-754/muldf3.S
+++ b/libgcc/config/arc/ieee-754/muldf3.S
@@ -132,19 +132,19 @@ __muldf3_support: /* This label makes debugger output saner. */
.balign 4
__muldf3:
ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)]
- mpyhu r4,DBL0L,DBL1L
+ MPYHU r4,DBL0L,DBL1L
bmsk r6,DBL0H,19
bset r6,r6,20
mpyu r7,r6,DBL1L
and r11,DBL0H,r9
breq r11,0,.Ldenorm_dbl0
- mpyhu r8,r6,DBL1L
+ MPYHU r8,r6,DBL1L
bmsk r10,DBL1H,19
bset r10,r10,20
- mpyhu r5,r10,DBL0L
+ MPYHU r5,r10,DBL0L
add.f r4,r4,r7
and r12,DBL1H,r9
- mpyhu r7,r6,r10
+ MPYHU r7,r6,r10
breq r12,0,.Ldenorm_dbl1
adc.f r5,r5,r8
mpyu r8,r10,DBL0L
diff --git a/libgcc/config/arc/ieee-754/mulsf3.S b/libgcc/config/arc/ieee-754/mulsf3.S
index 7a6c791..df2660a 100644
--- a/libgcc/config/arc/ieee-754/mulsf3.S
+++ b/libgcc/config/arc/ieee-754/mulsf3.S
@@ -64,7 +64,7 @@ __mulsf3:
bset r2,r0,23
asl_s r2,r2,8
bset r3,r4,23
- mpyhu r6,r2,r3
+ MPYHU r6,r2,r3
and r11,r0,r9
breq r11,0,.Ldenorm_dbl0
mpyu r7,r2,r3
@@ -144,7 +144,7 @@ __mulsf3:
add_s r2,r2,r2
asl r2,r2,r4
asl r4,r4,23
- mpyhu r6,r2,r3
+ MPYHU r6,r2,r3
breq r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
sub.ne.f r12,r12,r4
mpyu r7,r2,r3
@@ -163,7 +163,7 @@ __mulsf3:
asl r4,r4,r3
sub_s r3,r3,1
asl_s r3,r3,23
- mpyhu r6,r2,r4
+ MPYHU r6,r2,r4
sub.ne.f r11,r11,r3
bmsk r8,r0,30
mpyu r7,r2,r4
diff --git a/libgcc/config/arc/lib1funcs.S b/libgcc/config/arc/lib1funcs.S
index e59340a..022a2ea 100644
--- a/libgcc/config/arc/lib1funcs.S
+++ b/libgcc/config/arc/lib1funcs.S
@@ -79,7 +79,7 @@ SYM(__mulsi3):
j_s.d [blink]
mov_s r0,mlo
ENDFUNC(__mulsi3)
-#elif defined (__ARC700__)
+#elif defined (__ARC700__) || defined (__HS__)
HIDDEN_FUNC(__mulsi3)
mpyu r0,r0,r1
nop_s
@@ -393,7 +393,12 @@ SYM(__udivmodsi4):
lsr_s r1,r1
cmp_s r0,r1
xor.f r2,lp_count,31
+#if !defined (__EM__)
mov_s lp_count,r2
+#else
+ mov lp_count,r2
+ nop_s
+#endif /* !__EM__ */
#endif /* !__ARC_NORM__ */
sub.cc r0,r0,r1
mov_s r3,3
@@ -1260,7 +1265,7 @@ SYM(__ld_r13_to_r14_ret):
#endif
#ifdef L_muldf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
#include "ieee-754/muldf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/muldf3.S"
@@ -1276,7 +1281,7 @@ SYM(__ld_r13_to_r14_ret):
#endif
#ifdef L_mulsf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
#include "ieee-754/mulsf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/mulsf3.S"
@@ -1288,7 +1293,7 @@ SYM(__ld_r13_to_r14_ret):
#endif
#ifdef L_divdf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
#include "ieee-754/divdf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/divdf3.S"
@@ -1298,7 +1303,7 @@ SYM(__ld_r13_to_r14_ret):
#endif
#ifdef L_divsf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
#include "ieee-754/divsf3-stdmul.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/divsf3.S"
diff --git a/libgcc/config/arc/t-arc700-uClibc b/libgcc/config/arc/t-arc700-uClibc
index 651c3de..ff57039 100644
--- a/libgcc/config/arc/t-arc700-uClibc
+++ b/libgcc/config/arc/t-arc700-uClibc
@@ -28,10 +28,10 @@
CRTSTUFF_T_CFLAGS += -mno-sdata
# Compile crtbeginS.o and crtendS.o with pic.
-CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -mA7 -fPIC
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
# Compile libgcc2.a with pic.
-TARGET_LIBGCC2_CFLAGS = -mA7 -fPIC
+TARGET_LIBGCC2_CFLAGS = -fPIC
PROFILE_OSDEP = prof-freq.o
--
1.7.0.4
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
2015-10-30 11:29 ` Claudiu Zissulescu
@ 2015-11-10 14:01 ` Joern Wolfgang Rennecke
2015-11-11 12:33 ` Claudiu Zissulescu
0 siblings, 1 reply; 5+ messages in thread
From: Joern Wolfgang Rennecke @ 2015-11-10 14:01 UTC (permalink / raw)
To: Claudiu Zissulescu, gcc-patches; +Cc: Francois Bedard, jeremy.bennett
On 30/10/15 11:22, Claudiu Zissulescu wrote:
> Hi,
>
> Please find the updated patch. Both ARC patches were tested using dg.exp. The ChangeLog entry is unchanged.
This is OK.
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
2015-11-10 14:01 ` Joern Wolfgang Rennecke
@ 2015-11-11 12:33 ` Claudiu Zissulescu
0 siblings, 0 replies; 5+ messages in thread
From: Claudiu Zissulescu @ 2015-11-11 12:33 UTC (permalink / raw)
To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois Bedard, jeremy.bennett
This patch is committed.
Thanks Joern,
Claudiu
> -----Original Message-----
> From: Joern Wolfgang Rennecke [mailto:gnu@amylaar.uk]
> Sent: Tuesday, November 10, 2015 3:02 PM
> To: Claudiu Zissulescu; gcc-patches@gcc.gnu.org
> Cc: Francois Bedard; jeremy.bennett@embecosm.com
> Subject: Re: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
>
>
>
> On 30/10/15 11:22, Claudiu Zissulescu wrote:
> > Hi,
> >
> > Please find the updated patch. Both ARC patches were tested using
> dg.exp. The ChangeLog entry is unchanged.
>
> This is OK.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/2][ARC] Add support for ARCv2 CPUs
@ 2015-10-05 12:32 Claudiu Zissulescu
0 siblings, 0 replies; 5+ messages in thread
From: Claudiu Zissulescu @ 2015-10-05 12:32 UTC (permalink / raw)
To: gcc-patches, gnu; +Cc: Francois Bedard, jeremy.bennett
[-- Attachment #1: Type: text/plain, Size: 1036 bytes --]
Just realized this patch haven't went thru to the mailing list. Reposted.
This patch adds basic support (libgcc) for Synopsys' ARCv2 CPUs.
Can this be committed?
Thanks,
Claudiu
ChangeLog:
2015-08-28 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/dp-hack.h: Add support for ARCHS.
* config/arc/ieee-754/divdf3.S: Likewise.
* config/arc/ieee-754/divsf3-stdmul.S: Likewise.
* config/arc/ieee-754/muldf3.S: Likewise.
* config/arc/ieee-754/mulsf3.S: Likewise
* config/arc/lib1funcs.S: Likewise
* config/arc/gmon/dcache_linesz.S: Don't read the build register
for ARCv2 cores.
* config/arc/gmon/profil.S (__profil, __profil_irq): Don't profile
for ARCv2 cores.
* config/arc/ieee-754/arc-ieee-754.h (MPYHU, MPYH): Define.
* config/arc/t-arc700-uClibc: Remove hard selection for ARC 700
cores.
[-- Attachment #2: 02-arcv2.patch --]
[-- Type: application/octet-stream, Size: 12154 bytes --]
From 2c6d2b12d883e9f2cab9325110a1b5d2886ec864 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Wed, 30 Sep 2015 12:27:49 +0200
Subject: [PATCH] Add ARCv2 basic support
---
libgcc/config/arc/dp-hack.h | 2 +-
libgcc/config/arc/gmon/dcache_linesz.S | 2 +
libgcc/config/arc/gmon/profil.S | 9 +++++++
libgcc/config/arc/ieee-754/arc-ieee-754.h | 7 +++++
libgcc/config/arc/ieee-754/divdf3.S | 37 +++++++++++++++++++++-------
libgcc/config/arc/ieee-754/divsf3-stdmul.S | 14 +++++-----
libgcc/config/arc/ieee-754/muldf3.S | 8 +++---
libgcc/config/arc/ieee-754/mulsf3.S | 6 ++--
libgcc/config/arc/lib1funcs.S | 15 +++++++----
libgcc/config/arc/t-arc700-uClibc | 4 +-
10 files changed, 73 insertions(+), 31 deletions(-)
diff --git a/libgcc/config/arc/dp-hack.h b/libgcc/config/arc/dp-hack.h
index c1ab9b2..a212e3b 100644
--- a/libgcc/config/arc/dp-hack.h
+++ b/libgcc/config/arc/dp-hack.h
@@ -48,7 +48,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define L_mul_df
#define L_div_df
#elif (!defined (__ARC700__) && !defined (__ARC_MUL64__) \
- && !defined(__ARC_MUL32BY16__))
+ && !defined (__ARC_MUL32BY16__) && !defined (__HS__))
#define L_mul_df
#define L_div_df
#undef QUIET_NAN
diff --git a/libgcc/config/arc/gmon/dcache_linesz.S b/libgcc/config/arc/gmon/dcache_linesz.S
index 8cf6442..972a587 100644
--- a/libgcc/config/arc/gmon/dcache_linesz.S
+++ b/libgcc/config/arc/gmon/dcache_linesz.S
@@ -38,6 +38,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
.global __dcache_linesz
.balign 4
__dcache_linesz:
+#if !defined (__EM__) && !defined (__HS__)
lr r12,[D_CACHE_BUILD]
extb_s r0,r12
breq_s r0,0,.Lsz_nocache
@@ -51,5 +52,6 @@ __dcache_linesz:
asl_s r0,r0,r12
j_s [blink]
.Lsz_nocache:
+#endif /* !__EM__ && !__HS__ */
mov_s r0,1
j_s [blink]
diff --git a/libgcc/config/arc/gmon/profil.S b/libgcc/config/arc/gmon/profil.S
index 3be2869..ac3a7eb 100644
--- a/libgcc/config/arc/gmon/profil.S
+++ b/libgcc/config/arc/gmon/profil.S
@@ -45,6 +45,7 @@ __profil_offset:
.global __dcache_linesz
.global __profil
FUNC(__profil)
+#if !defined (__EM__) && !defined (__HS__)
.Lstop_profiling:
sr r0,[CONTROL0]
j_s [blink]
@@ -107,11 +108,18 @@ nocache:
j_s [blink]
.balign 4
1: j __profil_irq
+#else
+__profil:
+ .balign 4
+ mov_s r0,0
+ j_s [blink]
+#endif /* !__EM__ && !__HS__ */
ENDFUNC(__profil)
FUNC(__profil_irq)
.balign 4 ; make final jump unaligned to avoid delay penalty
.balign 32,0,12 ; make sure the code spans no more that two cache lines
+#if !defined (__EM__) && !defined (__HS__)
nop_s
__profil_irq:
push_s r0
@@ -129,6 +137,7 @@ nostore:ld.ab r2,[sp,8]
pop_s r0
j.f [ilink1]
ENDFUNC(__profil_irq)
+#endif /* !__EM__ && !__HS__ */
; could save one cycle if the counters were allocated at link time and
; the contents of __profil_offset were pre-computed at link time, like this:
diff --git a/libgcc/config/arc/ieee-754/arc-ieee-754.h b/libgcc/config/arc/ieee-754/arc-ieee-754.h
index 08a14a6..f1ac98e 100644
--- a/libgcc/config/arc/ieee-754/arc-ieee-754.h
+++ b/libgcc/config/arc/ieee-754/arc-ieee-754.h
@@ -54,3 +54,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define bmsk_l bmsk
#define bxor_l bxor
#define bcs_s blo_s
+#if defined (__HS__) || defined (__EM__)
+#define MPYHU mpymu
+#define MPYH mpym
+#else
+#define MPYHU mpyhu
+#define MPYH mpyh
+#endif
diff --git a/libgcc/config/arc/ieee-754/divdf3.S b/libgcc/config/arc/ieee-754/divdf3.S
index 2d000e4..27705ed 100644
--- a/libgcc/config/arc/ieee-754/divdf3.S
+++ b/libgcc/config/arc/ieee-754/divdf3.S
@@ -118,7 +118,7 @@ __divdf3_support: /* This label makes debugger output saner. */
sub r11,r11,11
asl DBL1L,DBL1L,r11
sub r11,r11,1
- mpyhu r5,r4,r8
+ MPYHU r5,r4,r8
sub r7,r7,r11
asl r4,r4,12
b.d .Lpast_denorm_dbl1
@@ -189,25 +189,33 @@ __divdf3:
asl r8,DBL1H,12
lsr r12,DBL1L,20
lsr r4,r8,26
+#ifdef __HS__
+ add3 r10,pcl,60 ; (.Ldivtab-.) >> 3
+#else
add3 r10,pcl,59 ; (.Ldivtab-.) >> 3
+#endif
ld.as r4,[r10,r4]
+#ifdef __HS__
+ ld.as r9,[pcl,182]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+#else
ld.as r9,[pcl,180]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+#endif
or r8,r8,r12
- mpyhu r5,r4,r8
+ MPYHU r5,r4,r8
and.f r7,DBL1H,r9
asl r4,r4,12 ; having the asl here is a concession to the XMAC pipeline.
beq.d .Ldenorm_dbl1
and r6,DBL0H,r9
.Lpast_denorm_dbl1: ; wb stall
sub r4,r4,r5
- mpyhu r5,r4,r4
+ MPYHU r5,r4,r4
breq.d r6,0,.Ldenorm_dbl0
lsr r8,r8,1
asl r12,DBL0H,11
lsr r10,DBL0L,21
.Lpast_denorm_dbl0: ; wb stall
bset r8,r8,31
- mpyhu r11,r5,r8
+ MPYHU r11,r5,r8
add_s r12,r12,r10
bset r5,r12,31
cmp r5,r8
@@ -215,7 +223,7 @@ __divdf3:
; wb stall
lsr.cc r5,r5,1
sub r4,r4,r11 ; u1.31 inverse, about 30 bit
- mpyhu r11,r5,r4 ; result fraction highpart
+ MPYHU r11,r5,r4 ; result fraction highpart
breq r7,r9,.Linf_nan_dbl1
lsr r8,r8,2 ; u3.29
add r5,r6, /* wait for immediate / XMAC wb stall */ \
@@ -226,7 +234,7 @@ __divdf3:
asl_s DBL1L,DBL1L,9 ; u-29.23:9
sbc r6,r5,r7
; resource conflict (not for XMAC)
- mpyhu r5,r11,DBL1L ; u-28.23:9
+ MPYHU r5,r11,DBL1L ; u-28.23:9
add.cs DBL0L,DBL0L,DBL0L
asl_s DBL0L,DBL0L,6 ; u-26.25:7
asl r10,r11,23
@@ -234,7 +242,7 @@ __divdf3:
; wb stall (before 'and' for XMAC)
lsr r7,r11,9
sub r5,DBL0L,r5 ; rest msw ; u-26.31:0
- mpyh r12,r5,r4 ; result fraction lowpart
+ MPYH r12,r5,r4 ; result fraction lowpart
xor.f 0,DBL0H,DBL1H
and DBL0H,r6,r9
add_s DBL0H,DBL0H,r7 ; (XMAC wb stall)
@@ -261,7 +269,7 @@ __divdf3:
sub.cs DBL0H,DBL0H,1
sub.f r12,r12,2
; resource conflict (not for XMAC)
- mpyhu r7,r12,DBL1L ; u-51.32
+ MPYHU r7,r12,DBL1L ; u-51.32
asl r5,r5,25 ; s-51.7:25
lsr r10,r10,7 ; u-51.30:2
; resource conflict (not for XMAC)
@@ -291,10 +299,21 @@ __divdf3:
rsub r7,r6,5
asr r10,r12,28
bmsk r4,r12,27
+#ifdef __HS__
+ min r7, r7, 31
+ asr DBL0L, r4, r7
+#else
asrs DBL0L,r4,r7
+#endif
add DBL1H,r11,r10
+#ifdef __HS__
+ abs.f r10, r4
+ sub.mi r10, r10, 1
+#endif
add.f r7,r6,32-5
+#ifdef __ARC700__
abss r10,r4
+#endif
asl r4,r4,r7
mov.mi r4,r10
add.f r10,r6,23
@@ -319,7 +338,7 @@ __divdf3:
and r9,DBL0L,1 ; tie-breaker: round to even
lsr r11,r11,7 ; u-51.30:2
; resource conflict (not for XMAC)
- mpyhu r8,r12,DBL1L ; u-51.32
+ MPYHU r8,r12,DBL1L ; u-51.32
sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
add_s DBL1H,DBL1H,r11
; resource conflict (not for XMAC)
diff --git a/libgcc/config/arc/ieee-754/divsf3-stdmul.S b/libgcc/config/arc/ieee-754/divsf3-stdmul.S
index 09861d3..f13944a 100644
--- a/libgcc/config/arc/ieee-754/divsf3-stdmul.S
+++ b/libgcc/config/arc/ieee-754/divsf3-stdmul.S
@@ -144,7 +144,7 @@ __divsf3_support: /* This label makes debugger output saner. */
ld.as r5,[r3,r5]
add r4,r6,r6
; load latency
- mpyhu r7,r5,r4
+ MPYHU r7,r5,r4
bic.ne.f 0, \
0x60000000,r0 ; large number / denorm -> Inf
beq_s .Linf_NaN
@@ -152,7 +152,7 @@ __divsf3_support: /* This label makes debugger output saner. */
; wb stall
; slow track
sub r7,r5,r7
- mpyhu r8,r7,r6
+ MPYHU r8,r7,r6
asl_s r12,r12,23
and.f r2,r0,r9
add r2,r2,r12
@@ -160,7 +160,7 @@ __divsf3_support: /* This label makes debugger output saner. */
; wb stall
bne.d .Lpast_denorm_fp1
.Ldenorm_fp0:
- mpyhu r8,r8,r7
+ MPYHU r8,r8,r7
bclr r12,r12,31
norm.f r3,r12 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
@@ -209,7 +209,7 @@ __divsf3:
ld.as r5,[r3,r2]
asl r4,r1,9
ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
- mpyhu r7,r5,r4
+ MPYHU r7,r5,r4
asl r6,r1,8
and.f r11,r1,r9
bset r6,r6,31
@@ -217,14 +217,14 @@ __divsf3:
; wb stall
beq .Ldenorm_fp1
sub r7,r5,r7
- mpyhu r8,r7,r6
+ MPYHU r8,r7,r6
breq.d r11,r9,.Linf_nan_fp1
and.f r2,r0,r9
beq.d .Ldenorm_fp0
asl r12,r0,8
; wb stall
breq r2,r9,.Linf_nan_fp0
- mpyhu r8,r8,r7
+ MPYHU r8,r8,r7
.Lpast_denorm_fp1:
bset r3,r12,31
.Lpast_denorm_fp0:
@@ -234,7 +234,7 @@ __divsf3:
/* wb stall */ \
0x3f000000
sub r7,r7,r8 ; u1.31 inverse, about 30 bit
- mpyhu r3,r3,r7
+ MPYHU r3,r3,r7
sbc r2,r2,r11
xor.f 0,r0,r1
and r0,r2,r9
diff --git a/libgcc/config/arc/ieee-754/muldf3.S b/libgcc/config/arc/ieee-754/muldf3.S
index 805db5c..5f562e2 100644
--- a/libgcc/config/arc/ieee-754/muldf3.S
+++ b/libgcc/config/arc/ieee-754/muldf3.S
@@ -132,19 +132,19 @@ __muldf3_support: /* This label makes debugger output saner. */
.balign 4
__muldf3:
ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)]
- mpyhu r4,DBL0L,DBL1L
+ MPYHU r4,DBL0L,DBL1L
bmsk r6,DBL0H,19
bset r6,r6,20
mpyu r7,r6,DBL1L
and r11,DBL0H,r9
breq r11,0,.Ldenorm_dbl0
- mpyhu r8,r6,DBL1L
+ MPYHU r8,r6,DBL1L
bmsk r10,DBL1H,19
bset r10,r10,20
- mpyhu r5,r10,DBL0L
+ MPYHU r5,r10,DBL0L
add.f r4,r4,r7
and r12,DBL1H,r9
- mpyhu r7,r6,r10
+ MPYHU r7,r6,r10
breq r12,0,.Ldenorm_dbl1
adc.f r5,r5,r8
mpyu r8,r10,DBL0L
diff --git a/libgcc/config/arc/ieee-754/mulsf3.S b/libgcc/config/arc/ieee-754/mulsf3.S
index 7a6c791..df2660a 100644
--- a/libgcc/config/arc/ieee-754/mulsf3.S
+++ b/libgcc/config/arc/ieee-754/mulsf3.S
@@ -64,7 +64,7 @@ __mulsf3:
bset r2,r0,23
asl_s r2,r2,8
bset r3,r4,23
- mpyhu r6,r2,r3
+ MPYHU r6,r2,r3
and r11,r0,r9
breq r11,0,.Ldenorm_dbl0
mpyu r7,r2,r3
@@ -144,7 +144,7 @@ __mulsf3:
add_s r2,r2,r2
asl r2,r2,r4
asl r4,r4,23
- mpyhu r6,r2,r3
+ MPYHU r6,r2,r3
breq r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
sub.ne.f r12,r12,r4
mpyu r7,r2,r3
@@ -163,7 +163,7 @@ __mulsf3:
asl r4,r4,r3
sub_s r3,r3,1
asl_s r3,r3,23
- mpyhu r6,r2,r4
+ MPYHU r6,r2,r4
sub.ne.f r11,r11,r3
bmsk r8,r0,30
mpyu r7,r2,r4
diff --git a/libgcc/config/arc/lib1funcs.S b/libgcc/config/arc/lib1funcs.S
index e59340a..e7317a3 100644
--- a/libgcc/config/arc/lib1funcs.S
+++ b/libgcc/config/arc/lib1funcs.S
@@ -79,7 +79,7 @@ SYM(__mulsi3):
j_s.d [blink]
mov_s r0,mlo
ENDFUNC(__mulsi3)
-#elif defined (__ARC700__)
+#elif defined (__ARC700__) || defined (__HS__)
HIDDEN_FUNC(__mulsi3)
mpyu r0,r0,r1
nop_s
@@ -393,7 +393,12 @@ SYM(__udivmodsi4):
lsr_s r1,r1
cmp_s r0,r1
xor.f r2,lp_count,31
+#if !defined (__EM__) && !defined (__HS__)
mov_s lp_count,r2
+#else
+ mov lp_count,r2
+ nop_s
+#endif /* !__EM__ && !__HS__ */
#endif /* !__ARC_NORM__ */
sub.cc r0,r0,r1
mov_s r3,3
@@ -1260,7 +1265,7 @@ SYM(__ld_r13_to_r14_ret):
#endif
#ifdef L_muldf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
#include "ieee-754/muldf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/muldf3.S"
@@ -1276,7 +1281,7 @@ SYM(__ld_r13_to_r14_ret):
#endif
#ifdef L_mulsf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
#include "ieee-754/mulsf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/mulsf3.S"
@@ -1288,7 +1293,7 @@ SYM(__ld_r13_to_r14_ret):
#endif
#ifdef L_divdf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
#include "ieee-754/divdf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/divdf3.S"
@@ -1298,7 +1303,7 @@ SYM(__ld_r13_to_r14_ret):
#endif
#ifdef L_divsf3
-#ifdef __ARC700__
+#if defined (__ARC700__) || defined (__HS__)
#include "ieee-754/divsf3-stdmul.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/divsf3.S"
diff --git a/libgcc/config/arc/t-arc700-uClibc b/libgcc/config/arc/t-arc700-uClibc
index 651c3de..ff57039 100644
--- a/libgcc/config/arc/t-arc700-uClibc
+++ b/libgcc/config/arc/t-arc700-uClibc
@@ -28,10 +28,10 @@
CRTSTUFF_T_CFLAGS += -mno-sdata
# Compile crtbeginS.o and crtendS.o with pic.
-CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -mA7 -fPIC
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
# Compile libgcc2.a with pic.
-TARGET_LIBGCC2_CFLAGS = -mA7 -fPIC
+TARGET_LIBGCC2_CFLAGS = -fPIC
PROFILE_OSDEP = prof-freq.o
--
1.7.0.4
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2015-11-11 12:33 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <098ECE41A0A6114BB2A07F1EC238DE896616641A@de02wembxa.internal.synopsys.com>
2015-10-23 21:13 ` [PATCH 2/2][ARC] Add support for ARCv2 CPUs Joern Wolfgang Rennecke
2015-10-30 11:29 ` Claudiu Zissulescu
2015-11-10 14:01 ` Joern Wolfgang Rennecke
2015-11-11 12:33 ` Claudiu Zissulescu
2015-10-05 12:32 Claudiu Zissulescu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).