public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Re: [PATCH] amdgcn: Add gfx1036 target
       [not found] <66015f94.050a0220.c1dc7.3f4cSMTPIN_ADDED_MISSING@mx.google.com>
@ 2024-03-25 14:51 ` Andrew Stubbs
  0 siblings, 0 replies; 5+ messages in thread
From: Andrew Stubbs @ 2024-03-25 14:51 UTC (permalink / raw)
  To: Richard Biener, gcc-patches

On 25/03/2024 11:27, Richard Biener wrote:
> Add support for the gfx1036 RDNA2 APU integrated graphics devices.  The ROCm
> documentation warns that these may not be supported, but it seems to work
> at least partially.
> 
> x86 host bootstrap/regtest running, target-libgomp testing for the
> offload produces results comparable to those of gfx1030.  The nice
> thing is that gfx1036 is inside every Zen4 desktop CPU (Ryzen 7xxx)
> and testing on that doesn't interfere with a separate GPU used for
> your desktop (where I experienced crashes when using the GPU for both
> offload and graphics).
> 
> I'll note that while gfx1030 works with llvm14 gfx1036 needs llvm15
> as minimum version for the assembler.
> 
> OK for trunk?

OK.

> 
> I'll follow up with the libgomp testing test summary for archival
> purposes.  I still see linker errors for testcases using -g
> (the ld: ^[[0;31merror: ^[[0mincompatible mach:
> /tmp/ccr0oDpD.mkoffload.dbg.o^M kind)

This is caused by the --with-arch=gfx1036 not being picked up by 
mkoffload. It works fine if you use the default configuration or specify 
the -march explicitly. Either way, the bug is not in your patch.

For now, please test like this:

    RUNTESTFLAGS=--target_board=unix/-foffload=-march=gfx1036

Andrew

> Thanks,
> Richard.
> 
> gcc/ChangeLog:
> 
> 	* config.gcc (amdgcn): Add gfx1036 entries.
> 	* config/gcn/gcn-hsa.h (NO_XNACK): Likewise.
> 	(gcn_local_sym_hash): Likewise.
> 	* config/gcn/gcn-opts.h (enum processor_type): Likewise.
> 	(TARGET_GFX1036): New macro.
> 	* config/gcn/gcn.cc (gcn_option_override): Handle gfx1036.
> 	(gcn_omp_device_kind_arch_isa): Likewise.
> 	(output_file_start): Likewise.
> 	* config/gcn/gcn.h (TARGET_CPU_CPP_BUILTINS): Add __gfx1036__.
> 	(TARGET_CPU_CPP_BUILTINS): Rename __gfx1030 to __gfx1030__.
> 	* config/gcn/gcn.opt: Add gfx1036.
> 	* config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX1036): New.
> 	(main): Handle gfx1036.
> 	* config/gcn/t-omp-device: Add gfx1036 isa.
> 	* doc/install.texi (amdgcn): Add gfx1036.
> 	* doc/invoke.texi (-march): Likewise.
> 
> libgomp/ChangeLog:
> 
> 	* plugin/plugin-gcn.c (EF_AMDGPU_MACH): GFX1036.
> 	(gcn_gfx1103_s): New.
> 	(isa_hsa_name): Handle gfx1036.
> 	(isa_code): Likewise.
> 	(max_isa_vgprs): Likewise.
> ---
>   gcc/config.gcc              |  4 ++--
>   gcc/config/gcn/gcn-hsa.h    |  6 +++---
>   gcc/config/gcn/gcn-opts.h   |  2 ++
>   gcc/config/gcn/gcn.cc       | 10 ++++++++++
>   gcc/config/gcn/gcn.h        |  4 +++-
>   gcc/config/gcn/gcn.opt      |  3 +++
>   gcc/config/gcn/mkoffload.cc |  5 +++++
>   gcc/config/gcn/t-omp-device |  2 +-
>   gcc/doc/install.texi        |  3 ++-
>   gcc/doc/invoke.texi         |  3 +++
>   libgomp/plugin/plugin-gcn.c |  8 ++++++++
>   11 files changed, 42 insertions(+), 8 deletions(-)
> 
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index 87a5c92b6e3..17873ac2103 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -4560,7 +4560,7 @@ case "${target}" in
>   		for which in arch tune; do
>   			eval "val=\$with_$which"
>   			case ${val} in
> -			"" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030 | gfx1100 | gfx1103)
> +			"" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030 | gfx1036 | gfx1100 | gfx1103)
>   				# OK
>   				;;
>   			*)
> @@ -4576,7 +4576,7 @@ case "${target}" in
>   			TM_MULTILIB_CONFIG=
>   			;;
>   		xdefault | xyes)
> -			TM_MULTILIB_CONFIG=`echo "gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1100,gfx1103" | sed "s/${with_arch},\?//;s/,$//"`
> +			TM_MULTILIB_CONFIG=`echo "gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1036,gfx1100,gfx1103" | sed "s/${with_arch},\?//;s/,$//"`
>   			;;
>   		*)
>   			TM_MULTILIB_CONFIG="${with_multilib_list}"
> diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
> index ac32b8a328f..7d6e3141cea 100644
> --- a/gcc/config/gcn/gcn-hsa.h
> +++ b/gcc/config/gcn/gcn-hsa.h
> @@ -90,7 +90,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
>      the ELF flags (e_flags) of that generated file must be identical to those
>      generated by the compiler.  */
>   
> -#define NO_XNACK "march=fiji:;march=gfx1030:;march=gfx1100:;march=gfx1103:;" \
> +#define NO_XNACK "march=fiji:;march=gfx1030:;march=gfx1036:;march=gfx1100:;march=gfx1103:;" \
>       /* These match the defaults set in gcn.cc.  */ \
>       "!mxnack*|mxnack=default:%{march=gfx900|march=gfx906|march=gfx908:-mattr=-xnack};"
>   #define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;"
> @@ -106,8 +106,8 @@ extern unsigned int gcn_local_sym_hash (const char *name);
>   		  "%{" ABI_VERSION_SPEC "} " \
>   		  "%{" NO_XNACK XNACKOPT "} " \
>   		  "%{" NO_SRAM_ECC SRAMOPT "} " \
> -		  "%{march=gfx1030|march=gfx1100|march=gfx1103:-mattr=+wavefrontsize64} " \
> -		  "%{march=gfx1030|march=gfx1100|march=gfx1103:-mattr=+cumode} " \
> +		  "%{march=gfx1030|march=gfx1036|march=gfx1100|march=gfx1103:-mattr=+wavefrontsize64} " \
> +		  "%{march=gfx1030|march=gfx1036|march=gfx1100|march=gfx1103:-mattr=+cumode} " \
>   		  "-filetype=obj"
>   #define LINK_SPEC "--pie --export-dynamic"
>   #define LIB_SPEC  "-lc"
> diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
> index 285746f7f4d..49099bad7e7 100644
> --- a/gcc/config/gcn/gcn-opts.h
> +++ b/gcc/config/gcn/gcn-opts.h
> @@ -26,6 +26,7 @@ enum processor_type
>     PROCESSOR_GFX908,
>     PROCESSOR_GFX90a,
>     PROCESSOR_GFX1030,
> +  PROCESSOR_GFX1036,
>     PROCESSOR_GFX1100,
>     PROCESSOR_GFX1103
>   };
> @@ -36,6 +37,7 @@ enum processor_type
>   #define TARGET_GFX908 (gcn_arch == PROCESSOR_GFX908)
>   #define TARGET_GFX90a (gcn_arch == PROCESSOR_GFX90a)
>   #define TARGET_GFX1030 (gcn_arch == PROCESSOR_GFX1030)
> +#define TARGET_GFX1036 (gcn_arch == PROCESSOR_GFX1036)
>   #define TARGET_GFX1100 (gcn_arch == PROCESSOR_GFX1100)
>   #define TARGET_GFX1103 (gcn_arch == PROCESSOR_GFX1103)
>   
> diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
> index efb73af50c4..700e554855e 100644
> --- a/gcc/config/gcn/gcn.cc
> +++ b/gcc/config/gcn/gcn.cc
> @@ -139,6 +139,7 @@ gcn_option_override (void)
>         : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
>         : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
>         : gcn_arch == PROCESSOR_GFX1030 ? ISA_RDNA2
> +      : gcn_arch == PROCESSOR_GFX1036 ? ISA_RDNA2
>         : gcn_arch == PROCESSOR_GFX1100 ? ISA_RDNA3
>         : gcn_arch == PROCESSOR_GFX1103 ? ISA_RDNA3
>         : ISA_UNKNOWN);
> @@ -165,6 +166,7 @@ gcn_option_override (void)
>     /* gfx803 "Fiji", gfx1030 and gfx1100 do not support XNACK.  */
>     if (gcn_arch == PROCESSOR_FIJI
>         || gcn_arch == PROCESSOR_GFX1030
> +      || gcn_arch == PROCESSOR_GFX1036
>         || gcn_arch == PROCESSOR_GFX1100
>         || gcn_arch == PROCESSOR_GFX1103)
>       {
> @@ -172,6 +174,7 @@ gcn_option_override (void)
>   	error ("%<-mxnack=on%> is incompatible with %<-march=%s%>",
>   	       (gcn_arch == PROCESSOR_FIJI ? "fiji"
>   		: gcn_arch == PROCESSOR_GFX1030 ? "gfx1030"
> +		: gcn_arch == PROCESSOR_GFX1036 ? "gfx1036"
>   		: gcn_arch == PROCESSOR_GFX1100 ? "gfx1100"
>   		: gcn_arch == PROCESSOR_GFX1103 ? "gfx1103"
>   		: NULL));
> @@ -3049,6 +3052,8 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
>   	return gcn_arch == PROCESSOR_GFX90a;
>         if (strcmp (name, "gfx1030") == 0)
>   	return gcn_arch == PROCESSOR_GFX1030;
> +      if (strcmp (name, "gfx1036") == 0)
> +	return gcn_arch == PROCESSOR_GFX1036;
>         if (strcmp (name, "gfx1100") == 0)
>   	return gcn_arch == PROCESSOR_GFX1100;
>         if (strcmp (name, "gfx1103") == 0)
> @@ -6584,6 +6589,11 @@ output_file_start (void)
>         xnack = "";
>         sram_ecc = "";
>         break;
> +    case PROCESSOR_GFX1036:
> +      cpu = "gfx1036";
> +      xnack = "";
> +      sram_ecc = "";
> +      break;
>       case PROCESSOR_GFX1100:
>         cpu = "gfx1100";
>         xnack = "";
> diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
> index b003db6cd26..4148ceaf582 100644
> --- a/gcc/config/gcn/gcn.h
> +++ b/gcc/config/gcn/gcn.h
> @@ -48,7 +48,9 @@
>         else if (TARGET_GFX90a)                                                  \
>   	builtin_define ("__gfx90a__");                                         \
>         else if (TARGET_GFX1030)                                                 \
> -	builtin_define ("__gfx1030");                                          \
> +	builtin_define ("__gfx1030__");                                        \
> +      else if (TARGET_GFX1036)                                                 \
> +	builtin_define ("__gfx1036__");                                        \
>         else if (TARGET_GFX1100)                                                 \
>   	builtin_define ("__gfx1100__");                                        \
>         else if (TARGET_GFX1103)                                                 \
> diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
> index e06a2849d90..1067b45f294 100644
> --- a/gcc/config/gcn/gcn.opt
> +++ b/gcc/config/gcn/gcn.opt
> @@ -43,6 +43,9 @@ Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a)
>   EnumValue
>   Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
>   
> +EnumValue
> +Enum(gpu_type) String(gfx1036) Value(PROCESSOR_GFX1036)
> +
>   EnumValue
>   Enum(gpu_type) String(gfx1100) Value(PROCESSOR_GFX1100)
>   
> diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
> index 58eeada8295..04356b86195 100644
> --- a/gcc/config/gcn/mkoffload.cc
> +++ b/gcc/config/gcn/mkoffload.cc
> @@ -59,6 +59,8 @@
>   #define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
>   #undef  EF_AMDGPU_MACH_AMDGCN_GFX1030
>   #define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
> +#undef  EF_AMDGPU_MACH_AMDGCN_GFX1036
> +#define EF_AMDGPU_MACH_AMDGCN_GFX1036 0x45
>   #undef  EF_AMDGPU_MACH_AMDGCN_GFX1100
>   #define EF_AMDGPU_MACH_AMDGCN_GFX1100 0x41
>   #undef  EF_AMDGPU_MACH_AMDGCN_GFX1103
> @@ -971,6 +973,8 @@ main (int argc, char **argv)
>   	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a;
>         else if (strcmp (argv[i], "-march=gfx1030") == 0)
>   	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1030;
> +      else if (strcmp (argv[i], "-march=gfx1036") == 0)
> +	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1036;
>         else if (strcmp (argv[i], "-march=gfx1100") == 0)
>   	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1100;
>         else if (strcmp (argv[i], "-march=gfx1103") == 0)
> @@ -1016,6 +1020,7 @@ main (int argc, char **argv)
>       {
>       case EF_AMDGPU_MACH_AMDGCN_GFX803:
>       case EF_AMDGPU_MACH_AMDGCN_GFX1030:
> +    case EF_AMDGPU_MACH_AMDGCN_GFX1036:
>       case EF_AMDGPU_MACH_AMDGCN_GFX1100:
>       case EF_AMDGPU_MACH_AMDGCN_GFX1103:
>         SET_XNACK_UNSET (elf_flags);
> diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device
> index 037df2657cc..7bcf910cbd0 100644
> --- a/gcc/config/gcn/t-omp-device
> +++ b/gcc/config/gcn/t-omp-device
> @@ -1,4 +1,4 @@
>   omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.cc
>   	echo kind: gpu > $@
>   	echo arch: amdgcn gcn >> $@
> -	echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 gfx1103 >> $@
> +	echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1036 gfx1100 gfx1103 >> $@
> diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
> index a9872e86bee..269fe7ec870 100644
> --- a/gcc/doc/install.texi
> +++ b/gcc/doc/install.texi
> @@ -1266,7 +1266,8 @@ default set of libraries is selected based on the value of
>   @item amdgcn*-*-*
>   @var{list} is a comma separated list of ISA names (allowed values: @code{fiji},
>   @code{gfx900}, @code{gfx906}, @code{gfx908}, @code{gfx90a}, @code{gfx1030},
> -@code{gfx1100}, @code{gfx1103}).  It ought not include the name of the default
> +@code{gfx1036}, @code{gfx1100}, @code{gfx1103}).
> +It ought not include the name of the default
>   ISA, specified via @option{--with-arch}.  If @var{list} is empty, then there
>   will be no multilibs and only the default run-time library will be built.  If
>   @var{list} is @code{default} or @option{--with-multilib-list=} is not
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index e7b96212354..d09074e13de 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -21792,6 +21792,9 @@ Compile for CDNA2 Instinct MI200 series devices (gfx90a).
>   @item gfx1030
>   Compile for RDNA2 gfx1030 devices (GFX10 series).
>   
> +@item gfx1036
> +Compile for RDNA2 gfx1036 devices (GFX10 series).
> +
>   @item gfx1100
>   Compile for RDNA3 gfx1100 devices (GFX11 series).
>   
> diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
> index 53dc1c7d23a..1d183b61ca4 100644
> --- a/libgomp/plugin/plugin-gcn.c
> +++ b/libgomp/plugin/plugin-gcn.c
> @@ -391,6 +391,7 @@ typedef enum {
>     EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
>     EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f,
>     EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
> +  EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
>     EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
>     EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044
>   } EF_AMDGPU_MACH;
> @@ -1677,6 +1678,7 @@ const static char *gcn_gfx906_s = "gfx906";
>   const static char *gcn_gfx908_s = "gfx908";
>   const static char *gcn_gfx90a_s = "gfx90a";
>   const static char *gcn_gfx1030_s = "gfx1030";
> +const static char *gcn_gfx1036_s = "gfx1036";
>   const static char *gcn_gfx1100_s = "gfx1100";
>   const static char *gcn_gfx1103_s = "gfx1103";
>   const static int gcn_isa_name_len = 7;
> @@ -1700,6 +1702,8 @@ isa_hsa_name (int isa) {
>         return gcn_gfx90a_s;
>       case EF_AMDGPU_MACH_AMDGCN_GFX1030:
>         return gcn_gfx1030_s;
> +    case EF_AMDGPU_MACH_AMDGCN_GFX1036:
> +      return gcn_gfx1036_s;
>       case EF_AMDGPU_MACH_AMDGCN_GFX1100:
>         return gcn_gfx1100_s;
>       case EF_AMDGPU_MACH_AMDGCN_GFX1103:
> @@ -1746,6 +1750,9 @@ isa_code(const char *isa) {
>     if (!strncmp (isa, gcn_gfx1030_s, gcn_isa_name_len))
>       return EF_AMDGPU_MACH_AMDGCN_GFX1030;
>   
> +  if (!strncmp (isa, gcn_gfx1036_s, gcn_isa_name_len))
> +    return EF_AMDGPU_MACH_AMDGCN_GFX1036;
> +
>     if (!strncmp (isa, gcn_gfx1100_s, gcn_isa_name_len))
>       return EF_AMDGPU_MACH_AMDGCN_GFX1100;
>   
> @@ -1770,6 +1777,7 @@ max_isa_vgprs (int isa)
>       case EF_AMDGPU_MACH_AMDGCN_GFX90a:
>         return 512;
>       case EF_AMDGPU_MACH_AMDGCN_GFX1030:
> +    case EF_AMDGPU_MACH_AMDGCN_GFX1036:
>         return 512;  /* 512 SIMD32 = 256 wavefrontsize64.  */
>       case EF_AMDGPU_MACH_AMDGCN_GFX1100:
>       case EF_AMDGPU_MACH_AMDGCN_GFX1103:


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] amdgcn: Add gfx1036 target
@ 2024-03-25 15:18 Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2024-03-25 15:18 UTC (permalink / raw)
  To: gcc-patches; +Cc: ams

On Mon, 25 Mar 2024, Richard Biener wrote:

> Add support for the gfx1036 RDNA2 APU integrated graphics devices.  The ROCm
> documentation warns that these may not be supported, but it seems to work
> at least partially.
> 
> x86 host bootstrap/regtest running, target-libgomp testing for the
> offload produces results comparable to those of gfx1030.  The nice
> thing is that gfx1036 is inside every Zen4 desktop CPU (Ryzen 7xxx)
> and testing on that doesn't interfere with a separate GPU used for
> your desktop (where I experienced crashes when using the GPU for both
> offload and graphics).
> 
> I'll note that while gfx1030 works with llvm14 gfx1036 needs llvm15
> as minimum version for the assembler.
> 
> OK for trunk?
> 
> I'll follow up with the libgomp testing test summary for archival
> purposes.

Here's the result of a

make -k -j4 check-target-libgomp 
RUNTESTFLAGS="--target_board=unix/-foffload-options=-march=gfx1036"

I had to manually kill some hung processes from the OACC offload
testsuite.  I'll try again later this week with your locking fix
applied to newlib (not sure if that's also employed for non-I/O).

Richard.


cat <<'EOF' |
Native configuration is x86_64-pc-linux-gnu

		=== libgomp tests ===


Running target unix/-foffload-options=-march=gfx1036
FAIL: libgomp.c++/../libgomp.c-c++-common/icv-7.c execution test
FAIL: libgomp.c++/../libgomp.c-c++-common/teams-2.c execution test
FAIL: libgomp.c++/../libgomp.c-c++-common/teams-nteams-icv-1.c execution test
FAIL: libgomp.c++/../libgomp.c-c++-common/teams-nteams-icv-2.c execution test
FAIL: libgomp.c++/../libgomp.c-c++-common/teams-nteams-icv-3.c execution test
FAIL: libgomp.c++/../libgomp.c-c++-common/teams-nteams-icv-4.c execution test
FAIL: libgomp.c++/firstprivate-2.C (test for excess errors)
UNRESOLVED: libgomp.c++/firstprivate-2.C compilation failed to produce executable
XPASS: libgomp.c++/target-49.C execution test
FAIL: libgomp.c/../libgomp.c-c++-common/icv-7.c execution test
FAIL: libgomp.c/../libgomp.c-c++-common/teams-2.c execution test
FAIL: libgomp.c/../libgomp.c-c++-common/teams-nteams-icv-1.c execution test
FAIL: libgomp.c/../libgomp.c-c++-common/teams-nteams-icv-2.c execution test
FAIL: libgomp.c/../libgomp.c-c++-common/teams-nteams-icv-3.c execution test
FAIL: libgomp.c/../libgomp.c-c++-common/teams-nteams-icv-4.c execution test
FAIL: libgomp.c/declare-variant-4-gfx1030.c (test for excess errors)
FAIL: libgomp.c/declare-variant-4-gfx1100.c (test for excess errors)
FAIL: libgomp.c/declare-variant-4-gfx900.c (test for excess errors)
FAIL: libgomp.c/declare-variant-4-gfx906.c (test for excess errors)
FAIL: libgomp.c/declare-variant-4-gfx908.c (test for excess errors)
FAIL: libgomp.c/declare-variant-4-gfx90a.c (test for excess errors)
FAIL: libgomp.c/declare-variant-4.c execution test
FAIL: libgomp.c/declare-variant-4.c scan-amdgcn-amdhsa-offload-tree-dump optimized "= gfx[^ ]+ \\\\(\\\\);"
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/deep-copy-10.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/deep-copy-10.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/parallel-dims.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/parallel-dims.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/private-atomic-1-gang.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/private-atomic-1-gang.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/static-variable-1.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/static-variable-1.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/vprop-2.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/vprop-2.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/vprop.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  (test for excess errors)
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/deep-copy-10.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/deep-copy-10.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test
WARNING: libgomp.oacc-c/../libgomp.oacc-c-c++-common/parallel-dims.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test program timed out.
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/parallel-dims.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/parallel-dims.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/private-atomic-1-gang.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
WARNING: libgomp.oacc-c/../libgomp.oacc-c-c++-common/private-atomic-1-gang.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test program timed out.
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/private-atomic-1-gang.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/static-variable-1.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/static-variable-1.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/vprop-2.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O0  execution test
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/vprop-2.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  execution test
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/vprop.c -DACC_DEVICE_TYPE_radeon=1 -DACC_MEM_SHARED=0 -foffload=amdgcn-amdhsa  -O2  (test for excess errors)

		=== libgomp Summary ===

# of expected passes		30181
# of unexpected failures	43
# of unexpected successes	1
# of expected failures		705
# of unresolved testcases	1
# of unsupported tests		784

Compiler version: gcc libgomp 
Platform: x86_64-pc-linux-gnu
configure flags: --enable-languages=c,c++,fortran --disable-bootstrap --disable-libstdcxx-pch --enable-offload-targets=amdgcn-amdhsa --prefix=/abuild/rguenther/offload/install --host=x86_64-pc-linux-gnu --target=x86_64-pc-linux-gnu --build=x86_64-pc-linux-gnu
EOF
Mail -s "Results for gcc libgomp testsuite on x86_64-pc-linux-gnu" gcc-testresults@gcc.gnu.org &&
mv /abuild/rguenther/offload/obj-x86/./x86_64-pc-linux-gnu/libgomp/testsuite/libgomp.sum /abuild/rguenther/offload/obj-x86/./x86_64-pc-linux-gnu/libgomp/testsuite/libgomp.sum.sent &&
mv /abuild/rguenther/offload/obj-x86/./x86_64-pc-linux-gnu/libgomp/testsuite/libgomp.log /abuild/rguenther/offload/obj-x86/./x86_64-pc-linux-gnu/libgomp/testsuite/libgomp.log.sent &&
true

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] amdgcn: Add gfx1036 target
  2024-03-25 14:29 ` Tobias Burnus
@ 2024-03-25 14:39   ` Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2024-03-25 14:39 UTC (permalink / raw)
  To: Tobias Burnus; +Cc: gcc-patches, Andrew Stubbs

On Mon, 25 Mar 2024, Tobias Burnus wrote:

> Richard Biener wrote:
> > I'll follow up with the libgomp testing test summary for archival
> > purposes.  I still see linker errors for testcases using -g
> > (the ld: ^[[0;31merror: ^[[0mincompatible mach:
> > /tmp/ccr0oDpD.mkoffload.dbg.o^M kind)
> 
> Hmm, odd ? can you try compile with -save-temp and look at the relevant files
> with, e.g., readelf -h on the GCN files (e.g. 'readelf -h
> *.xamdgcn-amdhsa.mkoffload.*o') ? that should show under "Flags" what the
> program was compiled for.
> 
> We did encounter this issue with LLVM 18 and the solution was explicitly set
> the version both in the compiler via gcc/config/gcn/gcn-hsa.h's
> 
> #define ABI_VERSION_SPEC "march=fiji:--amdhsa-code-object-version=3;" \
>                          "!march=*|march=*:--amdhsa-code-object-version=4"
> 
> and for the debugging data in mkoffload.cc's
> 
>   ehdr.e_ident[8] = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803
>                      ? ELFABIVERSION_AMDGPU_HSA_V3
>                      : ELFABIVERSION_AMDGPU_HSA_V4);
> 
> But I fail to see why this doesn't work for you - you should get V4 for your
> gfx1036 target.
> 
> Here, ELFABIVERSION_AMDGPU_HSA_V4 2 (V1 did not have a number and V2 started
> with 0, hence V3 = 1 etc.)

So just for the record it was --with-arch=gfx1036 not passed through to
mkoffload, using explicit -foffload-options=-march=gfx1036 fixes that
problem.

> What LLVM version did you use for the assembler (llvm-mc)?

I've used llvm15 but lld from llvm14 (don't ask ...)

Richard.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] amdgcn: Add gfx1036 target
       [not found] <20240325112750.6934E3858410@sourceware.org>
@ 2024-03-25 14:29 ` Tobias Burnus
  2024-03-25 14:39   ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Tobias Burnus @ 2024-03-25 14:29 UTC (permalink / raw)
  To: Richard Biener, gcc-patches; +Cc: Andrew Stubbs

Richard Biener wrote:
> I'll follow up with the libgomp testing test summary for archival
> purposes.  I still see linker errors for testcases using -g
> (the ld: ^[[0;31merror: ^[[0mincompatible mach:
> /tmp/ccr0oDpD.mkoffload.dbg.o^M kind)

Hmm, odd – can you try compile with -save-temp and look at the relevant 
files with, e.g., readelf -h on the GCN files (e.g. 'readelf -h 
*.xamdgcn-amdhsa.mkoffload.*o') – that should show under "Flags" what 
the program was compiled for.

We did encounter this issue with LLVM 18 and the solution was explicitly 
set the version both in the compiler via gcc/config/gcn/gcn-hsa.h's

#define ABI_VERSION_SPEC "march=fiji:--amdhsa-code-object-version=3;" \
                          "!march=*|march=*:--amdhsa-code-object-version=4"

and for the debugging data in mkoffload.cc's

   ehdr.e_ident[8] = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803
                      ? ELFABIVERSION_AMDGPU_HSA_V3
                      : ELFABIVERSION_AMDGPU_HSA_V4);

But I fail to see why this doesn't work for you - you should get V4 for 
your gfx1036 target.

Here, ELFABIVERSION_AMDGPU_HSA_V4 2 (V1 did not have a number and V2 
started with 0, hence V3 = 1 etc.)

What LLVM version did you use for the assembler (llvm-mc)?

Tobias

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] amdgcn: Add gfx1036 target
@ 2024-03-25 11:27 Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2024-03-25 11:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: ams

Add support for the gfx1036 RDNA2 APU integrated graphics devices.  The ROCm
documentation warns that these may not be supported, but it seems to work
at least partially.

x86 host bootstrap/regtest running, target-libgomp testing for the
offload produces results comparable to those of gfx1030.  The nice
thing is that gfx1036 is inside every Zen4 desktop CPU (Ryzen 7xxx)
and testing on that doesn't interfere with a separate GPU used for
your desktop (where I experienced crashes when using the GPU for both
offload and graphics).

I'll note that while gfx1030 works with llvm14 gfx1036 needs llvm15
as minimum version for the assembler.

OK for trunk?

I'll follow up with the libgomp testing test summary for archival
purposes.  I still see linker errors for testcases using -g
(the ld: ^[[0;31merror: ^[[0mincompatible mach: 
/tmp/ccr0oDpD.mkoffload.dbg.o^M kind)

Thanks,
Richard.

gcc/ChangeLog:

	* config.gcc (amdgcn): Add gfx1036 entries.
	* config/gcn/gcn-hsa.h (NO_XNACK): Likewise.
	(gcn_local_sym_hash): Likewise.
	* config/gcn/gcn-opts.h (enum processor_type): Likewise.
	(TARGET_GFX1036): New macro.
	* config/gcn/gcn.cc (gcn_option_override): Handle gfx1036.
	(gcn_omp_device_kind_arch_isa): Likewise.
	(output_file_start): Likewise.
	* config/gcn/gcn.h (TARGET_CPU_CPP_BUILTINS): Add __gfx1036__.
	(TARGET_CPU_CPP_BUILTINS): Rename __gfx1030 to __gfx1030__.
	* config/gcn/gcn.opt: Add gfx1036.
	* config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX1036): New.
	(main): Handle gfx1036.
	* config/gcn/t-omp-device: Add gfx1036 isa.
	* doc/install.texi (amdgcn): Add gfx1036.
	* doc/invoke.texi (-march): Likewise.

libgomp/ChangeLog:

	* plugin/plugin-gcn.c (EF_AMDGPU_MACH): GFX1036.
	(gcn_gfx1103_s): New.
	(isa_hsa_name): Handle gfx1036.
	(isa_code): Likewise.
	(max_isa_vgprs): Likewise.
---
 gcc/config.gcc              |  4 ++--
 gcc/config/gcn/gcn-hsa.h    |  6 +++---
 gcc/config/gcn/gcn-opts.h   |  2 ++
 gcc/config/gcn/gcn.cc       | 10 ++++++++++
 gcc/config/gcn/gcn.h        |  4 +++-
 gcc/config/gcn/gcn.opt      |  3 +++
 gcc/config/gcn/mkoffload.cc |  5 +++++
 gcc/config/gcn/t-omp-device |  2 +-
 gcc/doc/install.texi        |  3 ++-
 gcc/doc/invoke.texi         |  3 +++
 libgomp/plugin/plugin-gcn.c |  8 ++++++++
 11 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 87a5c92b6e3..17873ac2103 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4560,7 +4560,7 @@ case "${target}" in
 		for which in arch tune; do
 			eval "val=\$with_$which"
 			case ${val} in
-			"" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030 | gfx1100 | gfx1103)
+			"" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030 | gfx1036 | gfx1100 | gfx1103)
 				# OK
 				;;
 			*)
@@ -4576,7 +4576,7 @@ case "${target}" in
 			TM_MULTILIB_CONFIG=
 			;;
 		xdefault | xyes)
-			TM_MULTILIB_CONFIG=`echo "gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1100,gfx1103" | sed "s/${with_arch},\?//;s/,$//"`
+			TM_MULTILIB_CONFIG=`echo "gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1036,gfx1100,gfx1103" | sed "s/${with_arch},\?//;s/,$//"`
 			;;
 		*)
 			TM_MULTILIB_CONFIG="${with_multilib_list}"
diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index ac32b8a328f..7d6e3141cea 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -90,7 +90,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
    the ELF flags (e_flags) of that generated file must be identical to those
    generated by the compiler.  */
 
-#define NO_XNACK "march=fiji:;march=gfx1030:;march=gfx1100:;march=gfx1103:;" \
+#define NO_XNACK "march=fiji:;march=gfx1030:;march=gfx1036:;march=gfx1100:;march=gfx1103:;" \
     /* These match the defaults set in gcn.cc.  */ \
     "!mxnack*|mxnack=default:%{march=gfx900|march=gfx906|march=gfx908:-mattr=-xnack};"
 #define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;"
@@ -106,8 +106,8 @@ extern unsigned int gcn_local_sym_hash (const char *name);
 		  "%{" ABI_VERSION_SPEC "} " \
 		  "%{" NO_XNACK XNACKOPT "} " \
 		  "%{" NO_SRAM_ECC SRAMOPT "} " \
-		  "%{march=gfx1030|march=gfx1100|march=gfx1103:-mattr=+wavefrontsize64} " \
-		  "%{march=gfx1030|march=gfx1100|march=gfx1103:-mattr=+cumode} " \
+		  "%{march=gfx1030|march=gfx1036|march=gfx1100|march=gfx1103:-mattr=+wavefrontsize64} " \
+		  "%{march=gfx1030|march=gfx1036|march=gfx1100|march=gfx1103:-mattr=+cumode} " \
 		  "-filetype=obj"
 #define LINK_SPEC "--pie --export-dynamic"
 #define LIB_SPEC  "-lc"
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index 285746f7f4d..49099bad7e7 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -26,6 +26,7 @@ enum processor_type
   PROCESSOR_GFX908,
   PROCESSOR_GFX90a,
   PROCESSOR_GFX1030,
+  PROCESSOR_GFX1036,
   PROCESSOR_GFX1100,
   PROCESSOR_GFX1103
 };
@@ -36,6 +37,7 @@ enum processor_type
 #define TARGET_GFX908 (gcn_arch == PROCESSOR_GFX908)
 #define TARGET_GFX90a (gcn_arch == PROCESSOR_GFX90a)
 #define TARGET_GFX1030 (gcn_arch == PROCESSOR_GFX1030)
+#define TARGET_GFX1036 (gcn_arch == PROCESSOR_GFX1036)
 #define TARGET_GFX1100 (gcn_arch == PROCESSOR_GFX1100)
 #define TARGET_GFX1103 (gcn_arch == PROCESSOR_GFX1103)
 
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index efb73af50c4..700e554855e 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -139,6 +139,7 @@ gcn_option_override (void)
       : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
       : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
       : gcn_arch == PROCESSOR_GFX1030 ? ISA_RDNA2
+      : gcn_arch == PROCESSOR_GFX1036 ? ISA_RDNA2
       : gcn_arch == PROCESSOR_GFX1100 ? ISA_RDNA3
       : gcn_arch == PROCESSOR_GFX1103 ? ISA_RDNA3
       : ISA_UNKNOWN);
@@ -165,6 +166,7 @@ gcn_option_override (void)
   /* gfx803 "Fiji", gfx1030 and gfx1100 do not support XNACK.  */
   if (gcn_arch == PROCESSOR_FIJI
       || gcn_arch == PROCESSOR_GFX1030
+      || gcn_arch == PROCESSOR_GFX1036
       || gcn_arch == PROCESSOR_GFX1100
       || gcn_arch == PROCESSOR_GFX1103)
     {
@@ -172,6 +174,7 @@ gcn_option_override (void)
 	error ("%<-mxnack=on%> is incompatible with %<-march=%s%>",
 	       (gcn_arch == PROCESSOR_FIJI ? "fiji"
 		: gcn_arch == PROCESSOR_GFX1030 ? "gfx1030"
+		: gcn_arch == PROCESSOR_GFX1036 ? "gfx1036"
 		: gcn_arch == PROCESSOR_GFX1100 ? "gfx1100"
 		: gcn_arch == PROCESSOR_GFX1103 ? "gfx1103"
 		: NULL));
@@ -3049,6 +3052,8 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
 	return gcn_arch == PROCESSOR_GFX90a;
       if (strcmp (name, "gfx1030") == 0)
 	return gcn_arch == PROCESSOR_GFX1030;
+      if (strcmp (name, "gfx1036") == 0)
+	return gcn_arch == PROCESSOR_GFX1036;
       if (strcmp (name, "gfx1100") == 0)
 	return gcn_arch == PROCESSOR_GFX1100;
       if (strcmp (name, "gfx1103") == 0)
@@ -6584,6 +6589,11 @@ output_file_start (void)
       xnack = "";
       sram_ecc = "";
       break;
+    case PROCESSOR_GFX1036:
+      cpu = "gfx1036";
+      xnack = "";
+      sram_ecc = "";
+      break;
     case PROCESSOR_GFX1100:
       cpu = "gfx1100";
       xnack = "";
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index b003db6cd26..4148ceaf582 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -48,7 +48,9 @@
       else if (TARGET_GFX90a)                                                  \
 	builtin_define ("__gfx90a__");                                         \
       else if (TARGET_GFX1030)                                                 \
-	builtin_define ("__gfx1030");                                          \
+	builtin_define ("__gfx1030__");                                        \
+      else if (TARGET_GFX1036)                                                 \
+	builtin_define ("__gfx1036__");                                        \
       else if (TARGET_GFX1100)                                                 \
 	builtin_define ("__gfx1100__");                                        \
       else if (TARGET_GFX1103)                                                 \
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index e06a2849d90..1067b45f294 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -43,6 +43,9 @@ Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a)
 EnumValue
 Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
 
+EnumValue
+Enum(gpu_type) String(gfx1036) Value(PROCESSOR_GFX1036)
+
 EnumValue
 Enum(gpu_type) String(gfx1100) Value(PROCESSOR_GFX1100)
 
diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 58eeada8295..04356b86195 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -59,6 +59,8 @@
 #define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
 #undef  EF_AMDGPU_MACH_AMDGCN_GFX1030
 #define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
+#undef  EF_AMDGPU_MACH_AMDGCN_GFX1036
+#define EF_AMDGPU_MACH_AMDGCN_GFX1036 0x45
 #undef  EF_AMDGPU_MACH_AMDGCN_GFX1100
 #define EF_AMDGPU_MACH_AMDGCN_GFX1100 0x41
 #undef  EF_AMDGPU_MACH_AMDGCN_GFX1103
@@ -971,6 +973,8 @@ main (int argc, char **argv)
 	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a;
       else if (strcmp (argv[i], "-march=gfx1030") == 0)
 	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1030;
+      else if (strcmp (argv[i], "-march=gfx1036") == 0)
+	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1036;
       else if (strcmp (argv[i], "-march=gfx1100") == 0)
 	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1100;
       else if (strcmp (argv[i], "-march=gfx1103") == 0)
@@ -1016,6 +1020,7 @@ main (int argc, char **argv)
     {
     case EF_AMDGPU_MACH_AMDGCN_GFX803:
     case EF_AMDGPU_MACH_AMDGCN_GFX1030:
+    case EF_AMDGPU_MACH_AMDGCN_GFX1036:
     case EF_AMDGPU_MACH_AMDGCN_GFX1100:
     case EF_AMDGPU_MACH_AMDGCN_GFX1103:
       SET_XNACK_UNSET (elf_flags);
diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device
index 037df2657cc..7bcf910cbd0 100644
--- a/gcc/config/gcn/t-omp-device
+++ b/gcc/config/gcn/t-omp-device
@@ -1,4 +1,4 @@
 omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.cc
 	echo kind: gpu > $@
 	echo arch: amdgcn gcn >> $@
-	echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 gfx1103 >> $@
+	echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1036 gfx1100 gfx1103 >> $@
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index a9872e86bee..269fe7ec870 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1266,7 +1266,8 @@ default set of libraries is selected based on the value of
 @item amdgcn*-*-*
 @var{list} is a comma separated list of ISA names (allowed values: @code{fiji},
 @code{gfx900}, @code{gfx906}, @code{gfx908}, @code{gfx90a}, @code{gfx1030},
-@code{gfx1100}, @code{gfx1103}).  It ought not include the name of the default
+@code{gfx1036}, @code{gfx1100}, @code{gfx1103}).
+It ought not include the name of the default
 ISA, specified via @option{--with-arch}.  If @var{list} is empty, then there
 will be no multilibs and only the default run-time library will be built.  If
 @var{list} is @code{default} or @option{--with-multilib-list=} is not
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e7b96212354..d09074e13de 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21792,6 +21792,9 @@ Compile for CDNA2 Instinct MI200 series devices (gfx90a).
 @item gfx1030
 Compile for RDNA2 gfx1030 devices (GFX10 series).
 
+@item gfx1036
+Compile for RDNA2 gfx1036 devices (GFX10 series).
+
 @item gfx1100
 Compile for RDNA3 gfx1100 devices (GFX11 series).
 
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 53dc1c7d23a..1d183b61ca4 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -391,6 +391,7 @@ typedef enum {
   EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
   EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f,
   EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
+  EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
   EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
   EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044
 } EF_AMDGPU_MACH;
@@ -1677,6 +1678,7 @@ const static char *gcn_gfx906_s = "gfx906";
 const static char *gcn_gfx908_s = "gfx908";
 const static char *gcn_gfx90a_s = "gfx90a";
 const static char *gcn_gfx1030_s = "gfx1030";
+const static char *gcn_gfx1036_s = "gfx1036";
 const static char *gcn_gfx1100_s = "gfx1100";
 const static char *gcn_gfx1103_s = "gfx1103";
 const static int gcn_isa_name_len = 7;
@@ -1700,6 +1702,8 @@ isa_hsa_name (int isa) {
       return gcn_gfx90a_s;
     case EF_AMDGPU_MACH_AMDGCN_GFX1030:
       return gcn_gfx1030_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX1036:
+      return gcn_gfx1036_s;
     case EF_AMDGPU_MACH_AMDGCN_GFX1100:
       return gcn_gfx1100_s;
     case EF_AMDGPU_MACH_AMDGCN_GFX1103:
@@ -1746,6 +1750,9 @@ isa_code(const char *isa) {
   if (!strncmp (isa, gcn_gfx1030_s, gcn_isa_name_len))
     return EF_AMDGPU_MACH_AMDGCN_GFX1030;
 
+  if (!strncmp (isa, gcn_gfx1036_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX1036;
+
   if (!strncmp (isa, gcn_gfx1100_s, gcn_isa_name_len))
     return EF_AMDGPU_MACH_AMDGCN_GFX1100;
 
@@ -1770,6 +1777,7 @@ max_isa_vgprs (int isa)
     case EF_AMDGPU_MACH_AMDGCN_GFX90a:
       return 512;
     case EF_AMDGPU_MACH_AMDGCN_GFX1030:
+    case EF_AMDGPU_MACH_AMDGCN_GFX1036:
       return 512;  /* 512 SIMD32 = 256 wavefrontsize64.  */
     case EF_AMDGPU_MACH_AMDGCN_GFX1100:
     case EF_AMDGPU_MACH_AMDGCN_GFX1103:
-- 
2.35.3

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-03-25 15:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <66015f94.050a0220.c1dc7.3f4cSMTPIN_ADDED_MISSING@mx.google.com>
2024-03-25 14:51 ` [PATCH] amdgcn: Add gfx1036 target Andrew Stubbs
2024-03-25 15:18 Richard Biener
     [not found] <20240325112750.6934E3858410@sourceware.org>
2024-03-25 14:29 ` Tobias Burnus
2024-03-25 14:39   ` Richard Biener
  -- strict thread matches above, loose matches on Subject: below --
2024-03-25 11:27 Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).