From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 96413 invoked by alias); 13 May 2016 17:20:50 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 96401 invoked by uid 89); 13 May 2016 17:20:49 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-3.3 required=5.0 tests=BAYES_00,RP_MATCHES_RCVD,SPF_HELO_PASS autolearn=ham version=3.3.2 spammy= X-HELO: mx1.redhat.com Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES256-GCM-SHA384 encrypted) ESMTPS; Fri, 13 May 2016 17:20:39 +0000 Received: from int-mx11.intmail.prod.int.phx2.redhat.com (int-mx11.intmail.prod.int.phx2.redhat.com [10.5.11.24]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id E5B64C03BD53; Fri, 13 May 2016 17:20:37 +0000 (UTC) Received: from tucnak.zalov.cz (ovpn-116-17.ams2.redhat.com [10.36.116.17]) by int-mx11.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id u4DHKanm030620 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-GCM-SHA384 bits=256 verify=NO); Fri, 13 May 2016 13:20:37 -0400 Received: from tucnak.zalov.cz (localhost [127.0.0.1]) by tucnak.zalov.cz (8.15.2/8.15.2) with ESMTP id u4DHKYap018689; Fri, 13 May 2016 19:20:35 +0200 Received: (from jakub@localhost) by tucnak.zalov.cz (8.15.2/8.15.2/Submit) id u4DHKYSb018454; Fri, 13 May 2016 19:20:34 +0200 Date: Fri, 13 May 2016 17:20:00 -0000 From: Jakub Jelinek To: Uros Bizjak , Kirill Yukhin Cc: gcc-patches@gcc.gnu.org Subject: [PATCH] Allow XMM16-XMM31 in vpbroadcast* Message-ID: <20160513172034.GT28550@tucnak.redhat.com> Reply-To: Jakub Jelinek MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.24 (2015-08-30) X-IsSubscribed: yes X-SW-Source: 2016-05/txt/msg01032.txt.bz2 Hi! These insns are either AVX512VL or AVX512VL & BW, this patch allows using XMM16+ where possible. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-05-13 Jakub Jelinek * config/i386/sse.md (pbroadcast_evex_isa): New mode attr. (avx2_pbroadcast): Add another alternative with v instead of x constraints in it, using isa. (avx2_pbroadcast_1): Similarly, add two such alternatives. * gcc.target/i386/avx512bw-vpbroadcast-1.c: New test. * gcc.target/i386/avx512bw-vpbroadcast-2.c: New test. * gcc.target/i386/avx512bw-vpbroadcast-3.c: New test. * gcc.target/i386/avx512vl-vpbroadcast-1.c: New test. * gcc.target/i386/avx512vl-vpbroadcast-2.c: New test. * gcc.target/i386/avx512vl-vpbroadcast-3.c: New test. --- gcc/config/i386/sse.md.jj 2016-05-13 16:12:24.631965207 +0200 +++ gcc/config/i386/sse.md 2016-05-13 17:33:32.429909899 +0200 @@ -16725,30 +16725,40 @@ (define_insn "avx_vzeroupper" (set_attr "btver2_decode" "vector") (set_attr "mode" "OI")]) +(define_mode_attr pbroadcast_evex_isa + [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw") + (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw") + (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f") + (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")]) + (define_insn "avx2_pbroadcast" - [(set (match_operand:VI 0 "register_operand" "=x") + [(set (match_operand:VI 0 "register_operand" "=x,v") (vec_duplicate:VI (vec_select: - (match_operand: 1 "nonimmediate_operand" "xm") + (match_operand: 1 "nonimmediate_operand" "xm,vm") (parallel [(const_int 0)]))))] "TARGET_AVX2" "vpbroadcast\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "*,") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "vex,evex") (set_attr "mode" "")]) (define_insn "avx2_pbroadcast_1" - [(set (match_operand:VI_256 0 "register_operand" "=x,x") + [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v") (vec_duplicate:VI_256 (vec_select: - (match_operand:VI_256 1 "nonimmediate_operand" "m,x") + (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v") (parallel [(const_int 0)]))))] "TARGET_AVX2" "@ vpbroadcast\t{%1, %0|%0, %1} + vpbroadcast\t{%x1, %0|%0, %x1} + vpbroadcast\t{%1, %0|%0, %1} vpbroadcast\t{%x1, %0|%0, %x1}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "*,*,,") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "")]) --- gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-1.c.jj 2016-05-13 16:58:07.491988435 +0200 +++ gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-1.c 2016-05-13 17:31:29.830534782 +0200 @@ -0,0 +1,104 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw" } */ + +#include + +void +f1 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = _mm_broadcastb_epi8 (a); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */ + +void +f2 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = _mm_broadcastw_epi16 (a); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */ + +void +f3 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = _mm_broadcastd_epi32 (a); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */ + +void +f4 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = _mm_broadcastq_epi64 (a); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastq\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */ + +void +f5 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + register __m256i b __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + b = _mm256_broadcastb_epi8 (a); + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */ + +void +f6 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + register __m256i b __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + b = _mm256_broadcastw_epi16 (a); + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */ + +void +f7 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + register __m256i b __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + b = _mm256_broadcastd_epi32 (a); + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */ + +void +f8 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + register __m256i b __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + b = _mm256_broadcastq_epi64 (a); + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler "vpbroadcastq\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */ --- gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-2.c.jj 2016-05-13 17:23:57.412954445 +0200 +++ gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-2.c 2016-05-13 17:32:21.203853901 +0200 @@ -0,0 +1,68 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw" } */ + +typedef char V1 __attribute__((vector_size (16))); +typedef short V2 __attribute__((vector_size (16))); +typedef char V5 __attribute__((vector_size (32))); +typedef short V6 __attribute__((vector_size (32))); +typedef int V7 __attribute__((vector_size (32))); + +void +f1 (V1 x) +{ + register V1 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = __builtin_shuffle (a, (V1) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */ + +void +f2 (V2 x) +{ + register V2 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = __builtin_shuffle (a, (V2) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */ + +void +f5 (V5 x) +{ + register V5 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = __builtin_shuffle (a, (V5) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*(xmm16\[^\n\r]*ymm16|ymm16\[^\n\r]*xmm16)" } } */ + +void +f6 (V6 x) +{ + register V6 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = __builtin_shuffle (a, (V6) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*(xmm16\[^\n\r]*ymm16|ymm16\[^\n\r]*xmm16)" } } */ + +void +f7 (V7 x) +{ + register V7 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = __builtin_shuffle (a, (V7) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*(xmm16\[^\n\r]*ymm16|ymm16\[^\n\r]*xmm16)" } } */ --- gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-3.c.jj 2016-05-13 17:24:01.236896680 +0200 +++ gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-3.c 2016-05-13 17:19:57.000000000 +0200 @@ -0,0 +1,58 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw" } */ + +typedef char V1 __attribute__((vector_size (16))); +typedef short V2 __attribute__((vector_size (16))); +typedef char V5 __attribute__((vector_size (32))); +typedef short V6 __attribute__((vector_size (32))); +typedef int V7 __attribute__((vector_size (32))); + +void +f1 (V1 *x) +{ + register V1 a __asm ("xmm16"); + a = __builtin_shuffle (*x, (V1) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*xmm16" } } */ + +void +f2 (V2 *x) +{ + register V2 a __asm ("xmm16"); + a = __builtin_shuffle (*x, (V2) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*xmm16" } } */ + +void +f5 (V5 *x) +{ + register V5 a __asm ("xmm16"); + a = __builtin_shuffle (*x, (V5) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*ymm16" } } */ + +void +f6 (V6 *x) +{ + register V6 a __asm ("xmm16"); + a = __builtin_shuffle (*x, (V6) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*ymm16" } } */ + +void +f7 (V7 *x) +{ + register V7 a __asm ("xmm16"); + a = __builtin_shuffle (*x, (V7) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*ymm16" } } */ --- gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-1.c.jj 2016-05-13 16:58:38.167685897 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-1.c 2016-05-13 17:29:45.144162649 +0200 @@ -0,0 +1,104 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */ + +#include + +void +f1 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = _mm_broadcastb_epi8 (a); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*xmm16" } } */ + +void +f2 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = _mm_broadcastw_epi16 (a); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*xmm16" } } */ + +void +f3 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = _mm_broadcastd_epi32 (a); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */ + +void +f4 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = _mm_broadcastq_epi64 (a); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastq\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */ + +void +f5 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + register __m256i b __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + b = _mm256_broadcastb_epi8 (a); + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*\[xy]mm1\[67]" } } */ + +void +f6 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + register __m256i b __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + b = _mm256_broadcastw_epi16 (a); + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*\[xy]mm1\[67]" } } */ + +void +f7 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + register __m256i b __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + b = _mm256_broadcastd_epi32 (a); + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */ + +void +f8 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + register __m256i b __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + b = _mm256_broadcastq_epi64 (a); + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler "vpbroadcastq\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */ --- gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-2.c.jj 2016-05-13 17:24:27.606562792 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-2.c 2016-05-13 17:28:19.176240587 +0200 @@ -0,0 +1,68 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */ + +typedef char V1 __attribute__((vector_size (16))); +typedef short V2 __attribute__((vector_size (16))); +typedef char V5 __attribute__((vector_size (32))); +typedef short V6 __attribute__((vector_size (32))); +typedef int V7 __attribute__((vector_size (32))); + +void +f1 (V1 x) +{ + register V1 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = __builtin_shuffle (a, (V1) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*xmm16" } } */ + +void +f2 (V2 x) +{ + register V2 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = __builtin_shuffle (a, (V2) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*xmm16" } } */ + +void +f5 (V5 x) +{ + register V5 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = __builtin_shuffle (a, (V5) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*\[xy]mm16" } } */ + +void +f6 (V6 x) +{ + register V6 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = __builtin_shuffle (a, (V6) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*\[xy]mm16" } } */ + +void +f7 (V7 x) +{ + register V7 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a = __builtin_shuffle (a, (V7) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*(xmm16\[^\n\r]*ymm16|ymm16\[^\n\r]*xmm16)" } } */ --- gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-3.c.jj 2016-05-13 17:24:32.113506886 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-3.c 2016-05-13 17:26:31.587970561 +0200 @@ -0,0 +1,58 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */ + +typedef char V1 __attribute__((vector_size (16))); +typedef short V2 __attribute__((vector_size (16))); +typedef char V5 __attribute__((vector_size (32))); +typedef short V6 __attribute__((vector_size (32))); +typedef int V7 __attribute__((vector_size (32))); + +void +f1 (V1 *x) +{ + register V1 a __asm ("xmm16"); + a = __builtin_shuffle (*x, (V1) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*xmm16" } } */ + +void +f2 (V2 *x) +{ + register V2 a __asm ("xmm16"); + a = __builtin_shuffle (*x, (V2) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*xmm16" } } */ + +void +f5 (V5 *x) +{ + register V5 a __asm ("xmm16"); + a = __builtin_shuffle (*x, (V5) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*ymm16" } } */ + +void +f6 (V6 *x) +{ + register V6 a __asm ("xmm16"); + a = __builtin_shuffle (*x, (V6) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*ymm16" } } */ + +void +f7 (V7 *x) +{ + register V7 a __asm ("xmm16"); + a = __builtin_shuffle (*x, (V7) { 0 }); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*ymm16" } } */ Jakub