From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2181) id A3A0F3858417; Wed, 13 Oct 2021 19:39:54 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org A3A0F3858417 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Jonathan Wakely To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r12-4381] libstdc++: Refactor filesystem::path encoding conversions X-Act-Checkin: gcc X-Git-Author: Jonathan Wakely X-Git-Refname: refs/heads/master X-Git-Oldrev: 85b24e32dc27ec2e70b853713e0713cbc1ff08c3 X-Git-Newrev: b83b810ac440f72e7551b6496539e60ac30c0d8a Message-Id: <20211013193954.A3A0F3858417@sourceware.org> Date: Wed, 13 Oct 2021 19:39:54 +0000 (GMT) X-BeenThere: libstdc++-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libstdc++-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 13 Oct 2021 19:39:54 -0000 https://gcc.gnu.org/g:b83b810ac440f72e7551b6496539e60ac30c0d8a commit r12-4381-gb83b810ac440f72e7551b6496539e60ac30c0d8a Author: Jonathan Wakely Date: Wed Oct 13 17:19:57 2021 +0100 libstdc++: Refactor filesystem::path encoding conversions Adjust the __detail::__effective_range overloads so they always return a string or string view using std::char_traits, because we don't care about the traits of an incoming string. Use std::contiguous_iterator in the __effective_range(const Source&) overload, to allow returning a basic_string_view in more cases. For the non-contiguous casecl in both __effective_range and __string_from_range, return a std::string instead of std::u8string when the value type of the range is char8_t. These changes avoid unnecessary basic_string temporaries. Also simplify __string_from_range(Iter, Iter) to not need std::__to_address for the contiguous case. Combine the _S_convert(string_type) and _S_convert(const T&) overloads into a single _S_convert(T) function which also avoids the dangling view problem of PR 102592 (should that recur somehow). libstdc++-v3/ChangeLog: * include/bits/fs_path.h (__detail::__is_contiguous): New variable template to identify contiguous iterators. (__detail::__unified_char8_t): New alias template to decide when to treat char8_t as char without encoding conversion. (__detail::__effective_range(const basic_string&)): Use std::char_traits for returned string view. (__detail::__effective_range(const basic_string_view&)): Likewise. (__detail::__effective_range(const Source&)): Use __is_contiguous to detect mode cases of contiguous iterators. Use __unified_char8_t to return a std::string instead of std::u8string. Diff: --- libstdc++-v3/include/bits/fs_path.h | 126 +++++++++++++++++++++++------------- 1 file changed, 81 insertions(+), 45 deletions(-) diff --git a/libstdc++-v3/include/bits/fs_path.h b/libstdc++-v3/include/bits/fs_path.h index 7ead8ac299c..05db792fbae 100644 --- a/libstdc++-v3/include/bits/fs_path.h +++ b/libstdc++-v3/include/bits/fs_path.h @@ -153,33 +153,56 @@ namespace __detail template> using _Path2 = enable_if_t<__is_path_iter_src<_Tr>::value, path>; +#if __cpp_lib_concepts + template + constexpr bool __is_contiguous = std::contiguous_iterator<_Iter>; +#else + template + constexpr bool __is_contiguous = is_pointer_v<_Iter>; +#endif + +#if !defined _GLIBCXX_FILESYSTEM_IS_WINDOWS && defined _GLIBCXX_USE_CHAR8_T + // For POSIX treat char8_t sequences as char without encoding conversions. + template + using __unified_u8_t + = __conditional_t, char, _EcharT>; +#else + template + using __unified_u8_t = _EcharT; +#endif + // The __effective_range overloads convert a Source parameter into - // either a basic_string_view or basic_string containing the + // either a basic_string_view or basic_string containing the // effective range of the Source, as defined in [fs.path.req]. template - inline basic_string_view<_CharT, _Traits> + inline basic_string_view<_CharT> __effective_range(const basic_string<_CharT, _Traits, _Alloc>& __source) + noexcept { return __source; } template - inline const basic_string_view<_CharT, _Traits>& + inline basic_string_view<_CharT> __effective_range(const basic_string_view<_CharT, _Traits>& __source) + noexcept { return __source; } + // Return the effective range of an NTCTS. template - inline auto + auto __effective_range(const _Source& __source) { - if constexpr (is_pointer_v>) - return basic_string_view{&*__source}; + // Remove a level of normal/safe iterator indirection, or decay an array. + using _Iter = decltype(std::__niter_base(__source)); + using value_type = typename iterator_traits<_Iter>::value_type; + + if constexpr (__is_contiguous<_Iter>) + return basic_string_view{&*__source}; else { // _Source is an input iterator that iterates over an NTCTS. // Create a basic_string by reading until the null character. - using value_type - = typename iterator_traits<_Source>::value_type; - basic_string __str; + basic_string<__unified_u8_t> __str; _Source __it = __source; for (value_type __ch = *__it; __ch != value_type(); __ch = *++__it) __str.push_back(__ch); @@ -188,20 +211,39 @@ namespace __detail } // The value type of a Source parameter's effective range. - template - using __value_t = typename remove_reference_t< - decltype(__detail::__effective_range(std::declval<_Tp>()))>::value_type; + template + struct __source_value_type_impl + { + using type + = typename __safe_iterator_traits>::value_type; + }; + + template + struct __source_value_type_impl> + { + using type = _CharT; + }; + + template + struct __source_value_type_impl> + { + using type = _CharT; + }; + + // The value type of a Source parameter's effective range. + template + using __source_value_t = typename __source_value_type_impl<_Source>::type; // SFINAE helper to check that an effective range has value_type char, // as required by path constructors taking a std::locale parameter. // The type _Tp must have already been checked by _Path or _Path2<_Tp>. - template> + template> using __value_type_is_char = std::enable_if_t, _Val>; // As above, but also allows char8_t, as required by u8path // C++20 [depr.fs.path.factory] - template> + template> using __value_type_is_char_or_char8_t = std::enable_if_t #ifdef _GLIBCXX_USE_CHAR8_T @@ -209,31 +251,27 @@ namespace __detail #endif , _Val>; - // Create a string or string view from an iterator range. + // Create a basic_string or basic_string_view from an iterator range. template inline auto __string_from_range(_InputIterator __first, _InputIterator __last) { using _EcharT = typename std::iterator_traits<_InputIterator>::value_type; - static_assert(__is_encoded_char<_EcharT>); + static_assert(__is_encoded_char<_EcharT>); // C++17 [fs.req]/3 -#if __cpp_lib_concepts - constexpr bool __contiguous = std::contiguous_iterator<_InputIterator>; -#else - constexpr bool __contiguous - = is_pointer_v; -#endif - if constexpr (__contiguous) + if constexpr (__is_contiguous<_InputIterator>) { // For contiguous iterators we can just return a string view. - const auto* __f = std::__to_address(std::__niter_base(__first)); - const auto* __l = std::__to_address(std::__niter_base(__last)); - return basic_string_view<_EcharT>(__f, __l - __f); + if (auto __len = __last - __first) [[__likely__]] + return basic_string_view<_EcharT>(&*__first, __len); + return basic_string_view<_EcharT>(); } else - // Conversion requires contiguous characters, so create a string. - return basic_string<_EcharT>(__first, __last); + { + // Conversion requires contiguous characters, so create a string. + return basic_string<__unified_u8_t<_EcharT>>(__first, __last); + } } /// @} group filesystem @@ -573,27 +611,22 @@ namespace __detail pair _M_find_extension() const noexcept; // path::_S_convert creates a basic_string or - // basic_string_view from a range (either the effective - // range of a Source parameter, or a pair of InputIterator parameters), + // basic_string_view from a basic_string or + // basic_string_view, for an encoded character type C, // performing the conversions required by [fs.path.type.cvt]. - // If the value_type of the range value type is path::value_type, - // no encoding conversion is performed. If the range is contiguous - // a string_view - - static string_type - _S_convert(string_type __str) - { return __str; } - template static auto - _S_convert(const _Tp& __str) + _S_convert(_Tp __str) { - if constexpr (is_same_v<_Tp, string_type>) - return __str; - else if constexpr (is_same_v<_Tp, basic_string_view>) - return __str; - else if constexpr (is_same_v) - return basic_string_view(__str.data(), __str.size()); + if constexpr (is_same_v<_Tp, typename _Tp::value_type>) + return __str; // No conversion needed. +#if !defined _GLIBCXX_FILESYSTEM_IS_WINDOWS && defined _GLIBCXX_USE_CHAR8_T + else if constexpr (is_same_v<_Tp, std::u8string>) + // Calling _S_convert will return a u8string_view that + // refers to __str and would dangle after this function returns. + // Return a string_type instead, to avoid dangling. + return string_type(_S_convert(std::u8string_view(__str))); +#endif else return _S_convert(__str.data(), __str.data() + __str.size()); } @@ -602,6 +635,9 @@ namespace __detail static auto _S_convert(const _EcharT* __first, const _EcharT* __last); + // _S_convert_loc converts a range of char to string_type, using the + // supplied locale for encoding conversions. + static string_type _S_convert_loc(const char* __first, const char* __last, const std::locale& __loc);