From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) by sourceware.org (Postfix) with ESMTPS id 89B44385742E for ; Mon, 22 Aug 2022 21:53:13 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 89B44385742E Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-338-U_mSUIvwNRSNjRpHmAnWZA-1; Mon, 22 Aug 2022 17:53:10 -0400 X-MC-Unique: U_mSUIvwNRSNjRpHmAnWZA-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id E27F0185A79C; Mon, 22 Aug 2022 21:53:09 +0000 (UTC) Received: from localhost (unknown [10.33.36.78]) by smtp.corp.redhat.com (Postfix) with ESMTP id AADFA40CF8E8; Mon, 22 Aug 2022 21:53:09 +0000 (UTC) From: Jonathan Wakely To: libstdc++@gcc.gnu.org, gcc-patches@gcc.gnu.org Subject: [committed] libstdc++: Check for overflow in regex back-reference [PR106607] Date: Mon, 22 Aug 2022 22:53:09 +0100 Message-Id: <20220822215309.92952-1-jwakely@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.84 on 10.11.54.1 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Type: text/plain Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-11.9 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_NONE, TXREP, T_SCC_BODY_TEXT_LINE, URI_HEX autolearn=unavailable autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libstdc++@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libstdc++ mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 22 Aug 2022 21:53:15 -0000 Tested powerpc64le-linux, pushed to trunk. Worth backporting. -- >8 -- Currently we fail to notice integer overflow when parsing a back-reference expression, or when converting the parsed result from long to int. This changes the result to be int, so no conversion is needed, and uses the overflow-checking built-ins to detect an out-of-range back-reference. libstdc++-v3/ChangeLog: PR libstdc++/106607 * include/bits/regex_compiler.tcc (_Compiler::_M_cur_int_value): Use built-ins to check for integer overflow in back-reference number. * testsuite/28_regex/basic_regex/106607.cc: New test. --- libstdc++-v3/include/bits/regex_compiler.tcc | 10 +++++--- .../testsuite/28_regex/basic_regex/106607.cc | 25 +++++++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 libstdc++-v3/testsuite/28_regex/basic_regex/106607.cc diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index c12f7502538..9f3ca63af51 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -583,10 +583,12 @@ namespace __detail _Compiler<_TraitsT>:: _M_cur_int_value(int __radix) { - long __v = 0; - for (typename _StringT::size_type __i = 0; - __i < _M_value.length(); ++__i) - __v =__v * __radix + _M_traits.value(_M_value[__i], __radix); + int __v = 0; + for (_CharT __c : _M_value) + if (__builtin_mul_overflow(__v, __radix, &__v) + || __builtin_add_overflow(__v, _M_traits.value(__c, __radix), &__v)) + std::__throw_regex_error(regex_constants::error_backref, + "invalid back reference"); return __v; } diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/106607.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/106607.cc new file mode 100644 index 00000000000..f8e7fb2364d --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/basic_regex/106607.cc @@ -0,0 +1,25 @@ +// { dg-do run { target c++11 } } + +#include +#include +#include +#include + +// PR libstdc++/106607 - Regex integer overflow on large backreference value + +int main() +{ + std::regex r("(.)\\1"); // OK + + try + { + long long n = (unsigned)-1 + 2LL; // 4294967297 for 32-bit int + VERIFY( (int)n == 1 ); // 4294967297 % 2^32 == 1 + std::regex r("(.)\\" + std::to_string(n)); // Invalid back reference. + VERIFY(false); + } + catch (const std::regex_error& e) + { + VERIFY( e.code() == std::regex_constants::error_backref ); + } +} -- 2.37.2