From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by sourceware.org (Postfix) with ESMTPS id ABE363842ADB for ; Thu, 30 Jun 2022 13:16:13 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org ABE363842ADB Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-611-9fPLYOSNNn6oXzhvm0LPwA-1; Thu, 30 Jun 2022 09:16:10 -0400 X-MC-Unique: 9fPLYOSNNn6oXzhvm0LPwA-1 Received: from smtp.corp.redhat.com (int-mx04.intmail.prod.int.rdu2.redhat.com [10.11.54.4]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id D75C6802C17; Thu, 30 Jun 2022 13:16:09 +0000 (UTC) Received: from tucnak.zalov.cz (unknown [10.39.192.30]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 931FC2026D64; Thu, 30 Jun 2022 13:16:09 +0000 (UTC) Received: from tucnak.zalov.cz (localhost [127.0.0.1]) by tucnak.zalov.cz (8.17.1/8.17.1) with ESMTPS id 25UDG6lk4007462 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Thu, 30 Jun 2022 15:16:07 +0200 Received: (from jakub@localhost) by tucnak.zalov.cz (8.17.1/8.17.1/Submit) id 25UDG62V4007461; Thu, 30 Jun 2022 15:16:06 +0200 Date: Thu, 30 Jun 2022 15:16:05 +0200 From: Jakub Jelinek To: Marcel Vollweiler Cc: gcc-patches@gcc.gnu.org Subject: Re: [Patch] OpenMP, libgomp, gimple: omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices Message-ID: Reply-To: Jakub Jelinek References: MIME-Version: 1.0 In-Reply-To: X-Scanned-By: MIMEDefang 2.78 on 10.11.54.4 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Type: text/plain; charset=us-ascii Content-Disposition: inline X-Spam-Status: No, score=-10.1 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, RCVD_IN_DNSWL_LOW, SPF_HELO_NONE, SPF_NONE, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 30 Jun 2022 13:16:15 -0000 On Thu, Apr 14, 2022 at 06:06:24PM +0200, Marcel Vollweiler wrote: > --- a/gcc/gimplify.cc > +++ b/gcc/gimplify.cc > @@ -13994,7 +13994,7 @@ optimize_target_teams (tree target, gimple_seq *pre_p) > struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp; > > if (teams == NULL_TREE) > - num_teams_upper = integer_one_node; > + num_teams_upper = integer_minus_two_node; No, please don't introduce this, it is quite costly to have a GC trees like integer_one_node, so they should stay for the most commonly used numbers, -2 isn't like that. Just build_int_cst (integer_type_node, -2). > --- a/gcc/tree-core.h > +++ b/gcc/tree-core.h > @@ -642,6 +642,7 @@ enum tree_index { > TI_INTEGER_ONE, > TI_INTEGER_THREE, > TI_INTEGER_MINUS_ONE, > + TI_INTEGER_MINUS_TWO, > TI_NULL_POINTER, > > TI_SIZE_ZERO, > diff --git a/gcc/tree.cc b/gcc/tree.cc > index 8f83ea1..8cb474d 100644 > --- a/gcc/tree.cc > +++ b/gcc/tree.cc > @@ -9345,6 +9345,7 @@ build_common_tree_nodes (bool signed_char) > integer_one_node = build_int_cst (integer_type_node, 1); > integer_three_node = build_int_cst (integer_type_node, 3); > integer_minus_one_node = build_int_cst (integer_type_node, -1); > + integer_minus_two_node = build_int_cst (integer_type_node, -2); > > size_zero_node = size_int (0); > size_one_node = size_int (1); > diff --git a/gcc/tree.h b/gcc/tree.h > index cea49a5..1aeb009 100644 > --- a/gcc/tree.h > +++ b/gcc/tree.h > @@ -4206,6 +4206,7 @@ tree_strip_any_location_wrapper (tree exp) > #define integer_one_node global_trees[TI_INTEGER_ONE] > #define integer_three_node global_trees[TI_INTEGER_THREE] > #define integer_minus_one_node global_trees[TI_INTEGER_MINUS_ONE] > +#define integer_minus_two_node global_trees[TI_INTEGER_MINUS_TWO] > #define size_zero_node global_trees[TI_SIZE_ZERO] > #define size_one_node global_trees[TI_SIZE_ONE] > #define bitsize_zero_node global_trees[TI_BITSIZE_ZERO] And drop the above 3 hunks. > --- a/libgomp/config/gcn/icv-device.c > +++ b/libgomp/config/gcn/icv-device.c > @@ -37,6 +37,7 @@ volatile int GOMP_DEFAULT_DEVICE_VAR; > volatile int GOMP_MAX_ACTIVE_LEVELS_VAR; > volatile omp_proc_bind_t GOMP_BIND_VAR; > volatile int GOMP_NTEAMS_VAR; > +volatile int GOMP_TEAMS_THREAD_LIMIT_VAR; I really don't like this copying of individual ICVs one by one to the device, copy a struct containing them and access fields in that struct. > --- a/libgomp/libgomp-plugin.h > +++ b/libgomp/libgomp-plugin.h > @@ -116,6 +116,7 @@ struct addr_pair > #define GOMP_MAX_ACTIVE_LEVELS_VAR __gomp_max_active_levels > #define GOMP_BIND_VAR __gomp_bind > #define GOMP_NTEAMS_VAR __gomp_nteams > +#define GOMP_TEAMS_THREAD_LIMIT_VAR __gomp_teams_thread_limit_var Likewise here. > @@ -527,13 +538,19 @@ struct gomp_icv_list { > > extern void *gomp_get_icv_value_ptr (struct gomp_icv_list **list, > int device_num); > -extern struct gomp_icv_list *gomp_run_sched_var_dev_list; > -extern struct gomp_icv_list *gomp_run_sched_chunk_size_dev_list; > +extern struct gomp_icv_list* gomp_add_device_specific_icv (int dev_num, > + size_t size, > + struct gomp_icv_list **list); > +extern struct gomp_icv_list *gomp_initial_run_sched_var_dev_list; > +extern struct gomp_icv_list *gomp_initial_run_sched_chunk_size_dev_list; > +extern struct gomp_icv_list *gomp_initial_max_active_levels_var_dev_list; > +extern struct gomp_icv_list *gomp_initial_proc_bind_var_dev_list; > +extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_dev_list; > +extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_len_dev_list; > +extern struct gomp_icv_list *gomp_initial_nteams_var_dev_list; > + > extern struct gomp_icv_list *gomp_nteams_var_dev_list; > -extern struct gomp_icv_list *gomp_max_active_levels_var_dev_list; > -extern struct gomp_icv_list *gomp_proc_bind_var_dev_list; > -extern struct gomp_icv_list *gomp_proc_bind_var_list_dev_list; > -extern struct gomp_icv_list *gomp_proc_bind_var_list_len_dev_list; > +extern struct gomp_icv_list *gomp_teams_thread_limit_var_dev_list; Nor these per-var lists. For a specific device, walk the list with all the vars in it, start with the most specific (matching dev number), then just dev and then all and fill in from it what is going to be copied. > --- a/libgomp/plugin/plugin-gcn.c > +++ b/libgomp/plugin/plugin-gcn.c > @@ -572,7 +572,8 @@ static char *GOMP_ICV_STRINGS[] = > XSTRING (GOMP_DYN_VAR), > XSTRING (GOMP_MAX_ACTIVE_LEVELS_VAR), > XSTRING (GOMP_BIND_VAR), > - XSTRING (GOMP_NTEAMS_VAR) > + XSTRING (GOMP_NTEAMS_VAR), > + XSTRING (GOMP_TEAMS_THREAD_LIMIT_VAR) Then you don't need to e.g. track the names of the individual vars, just one for the whole ICV block. Jakub