commit 584d52b088f9fcf78704b504c3f1f07e17c1cded Author: Jonathan Wakely Date: Sat May 23 09:00:32 2020 +0100 libstdc++: Refactor filesystem::path string conversions This simplifies the logic of converting Source arguments and pairs of InputIterator arguments into the native string format. For any input that is a contiguous range of path::value_type (or char8_t for POSIX) a string view can be created and the conversion can be done directly, with no intermediate allocation. Previously some cases created a basic_string unnecessarily, for example construction from a pair of path::string_type::iterators, or a pair of non-const value_type* pointers. * include/bits/fs_path.h (__detail::_S_range_begin) (__detail::_S_range_end, path::_S_string_from_iter): Replace with overloaded function template __detail::__effective_range. (__detail::__effective_range): New overloaded function template to create a basic_string or basic_string_view for an effective range. (__detail::__value_type_is_char): Use __detail::__effective_range. Do not use remove_const on value type. (__detail::__value_type_is_char_or_char8_t): Likewise. (path::path(const Source&, format)) (path::path(const Source&, const locale&)) (path::operator/=(const Source&), path::append(const Source&)) (path::concat(const Source&)): Use __detail::__effective_range. (path::_S_to_string(InputIterator, InputIterator)): New function template to create a string view if possible, or string otherwise. (path::_S_convert): Add overloads that convert a string returned by __detail::__effective_range. Use if-constexpr to inline conversion logic from all overloads of _Cvt::_S_convert. (path::_S_convert_loc): Add overload that converts a string. Use _S_to_string to avoid allocation when possible. (path::_Cvt): Remove. (path::operator+=(CharT)): Remove indirection through path::concat. * include/experimental/bits/fs_path.h (path::_S_convert_loc): Add overload for non-const pointers, to avoid constructing a std::string. * src/c++17/fs_path.cc (path::_S_convert_loc): Replace conditional compilation with call to _S_convert. diff --git a/libstdc++-v3/include/bits/fs_path.h b/libstdc++-v3/include/bits/fs_path.h index 818b5918927..2d2766ec62e 100644 --- a/libstdc++-v3/include/bits/fs_path.h +++ b/libstdc++-v3/include/bits/fs_path.h @@ -155,56 +155,61 @@ namespace __detail template> using _Path2 = enable_if_t<__is_path_iter_src<_Tr>::value, path>; - template - _Source - _S_range_begin(_Source __begin) { return __begin; } - - struct __null_terminated { }; - - template - __null_terminated - _S_range_end(_Source) { return {}; } + // The __effective_range overloads convert a Source parameter into + // either a basic_string_view or basic_string containing the + // effective range of the Source, as defined in [fs.path.req]. template - inline const _CharT* - _S_range_begin(const basic_string<_CharT, _Traits, _Alloc>& __str) - { return __str.data(); } - - template - inline const _CharT* - _S_range_end(const basic_string<_CharT, _Traits, _Alloc>& __str) - { return __str.data() + __str.size(); } + inline basic_string_view<_CharT, _Traits> + __effective_range(const basic_string<_CharT, _Traits, _Alloc>& __source) + { return __source; } template - inline const _CharT* - _S_range_begin(const basic_string_view<_CharT, _Traits>& __str) - { return __str.data(); } + inline const basic_string_view<_CharT, _Traits>& + __effective_range(const basic_string_view<_CharT, _Traits>& __source) + { return __source; } - template - inline const _CharT* - _S_range_end(const basic_string_view<_CharT, _Traits>& __str) - { return __str.data() + __str.size(); } + template + inline auto + __effective_range(const _Source& __source) + { + if constexpr (is_pointer_v>) + return basic_string_view{&*__source}; + else + { + // _Source is an input iterator that iterates over an NTCTS. + // Create a basic_string by reading until the null character. + using value_type + = typename iterator_traits<_Source>::value_type; + basic_string __str; + _Source __it = __source; + for (value_type __ch = *__it; __ch != value_type(); __ch = *++__it) + __str.push_back(__ch); + return __str; + } + } - template())), - typename _Val = typename std::iterator_traits<_Iter>::value_type, - typename _UnqualVal = std::remove_const_t<_Val>> + // The value type of a Source parameter's effective range. + template + using __value_t = typename remove_reference_t< + decltype(__detail::__effective_range(std::declval<_Tp>()))>::value_type; + + // SFINAE helper to check that an effective range has value_type char, + // as required by path constructors taking a std::locale parameter. + // The type _Tp must have already been checked by _Path or _Path2<_Tp>. + template> using __value_type_is_char - = std::enable_if_t, - _UnqualVal>; + = std::enable_if_t, _Val>; - template())), - typename _Val = typename std::iterator_traits<_Iter>::value_type, - typename _UnqualVal = std::remove_const_t<_Val>> + // As above, but also allows char8_t, as required by u8path + // C++20 [depr.fs.path.factory] + template> using __value_type_is_char_or_char8_t - = std::enable_if_t<__or_v< - std::is_same<_UnqualVal, char> + = std::enable_if_t #ifdef _GLIBCXX_USE_CHAR8_T - , std::is_same<_UnqualVal, char8_t> + || std::is_same_v<_Val, char8_t> #endif - >, - _UnqualVal>; + , _Val>; } // namespace __detail /// @endcond @@ -251,8 +256,7 @@ namespace __detail template> path(_Source const& __source, format = auto_format) - : _M_pathname(_S_convert(__detail::_S_range_begin(__source), - __detail::_S_range_end(__source))) + : _M_pathname(_S_convert(__detail::__effective_range(__source))) { _M_split_cmpts(); } template, typename _Require2 = __detail::__value_type_is_char<_Source>> - path(_Source const& __source, const locale& __loc, format = auto_format) - : _M_pathname(_S_convert_loc(__detail::_S_range_begin(__source), - __detail::_S_range_end(__source), __loc)) + path(_Source const& __src, const locale& __loc, format = auto_format) + : _M_pathname(_S_convert_loc(__detail::__effective_range(__src), __loc)) { _M_split_cmpts(); } template& operator/=(_Source const& __source) { - _M_append(_S_convert(__detail::_S_range_begin(__source), - __detail::_S_range_end(__source))); + _M_append(_S_convert(__detail::__effective_range(__source))); return *this; } @@ -318,8 +320,7 @@ namespace __detail __detail::_Path<_Source>& append(_Source const& __source) { - _M_append(_S_convert(__detail::_S_range_begin(__source), - __detail::_S_range_end(__source))); + _M_append(_S_convert(__detail::__effective_range(__source))); return *this; } @@ -351,8 +352,7 @@ namespace __detail __detail::_Path<_Source>& concat(_Source const& __x) { - _M_concat(_S_convert(__detail::_S_range_begin(__x), - __detail::_S_range_end(__x))); + _M_concat(_S_convert(__detail::__effective_range(__x))); return *this; } @@ -523,22 +523,6 @@ namespace __detail return __result; } - /// @cond undocumented - // Create a basic_string by reading until a null character. - template, - typename _CharT - = typename std::remove_cv_t> - static std::basic_string<_CharT> - _S_string_from_iter(_InputIterator __source) - { - std::basic_string<_CharT> __str; - for (_CharT __ch = *__source; __ch != _CharT(); __ch = *++__source) - __str.push_back(__ch); - return __str; - } - /// @endcond - private: enum class _Type : unsigned char { _Multi = 0, _Root_name, _Root_dir, _Filename @@ -558,43 +542,67 @@ namespace __detail pair _M_find_extension() const noexcept; - template - struct _Cvt; + // Create a string or string view from an iterator range. + template + static auto + _S_to_string(_InputIterator __first, _InputIterator __last) + { + using _EcharT + = typename std::iterator_traits<_InputIterator>::value_type; + static_assert(__detail::__is_encoded_char<_EcharT>); - static basic_string_view - _S_convert(value_type* __src, __detail::__null_terminated) - { return __src; } +#if __cpp_lib_concepts + constexpr bool __contiguous = std::contiguous_iterator<_InputIterator>; +#else + constexpr bool __contiguous + = is_pointer_v; +#endif + if constexpr (__contiguous) + { + // For contiguous iterators we can just return a string view. + const auto* __f = std::__to_address(std::__niter_base(__first)); + const auto* __l = std::__to_address(std::__niter_base(__last)); + return basic_string_view<_EcharT>(__f, __l - __f); + } + else + // Conversion requires contiguous characters, so create a string. + return basic_string<_EcharT>(__first, __last); + } - static basic_string_view - _S_convert(const value_type* __src, __detail::__null_terminated) - { return __src; } + // path::_S_convert creates a basic_string or + // basic_string_view from a range (either the effective + // range of a Source parameter, or a pair of InputIterator parameters), + // performing the conversions required by [fs.path.type.cvt]. + // If the value_type of the range value type is path::value_type, + // no encoding conversion is performed. If the range is contiguous + // a string_view - static basic_string_view - _S_convert(value_type* __first, value_type* __last) - { return {__first, __last - __first}; } + static string_type + _S_convert(string_type __str) + { return __str; } - static basic_string_view - _S_convert(const value_type* __first, const value_type* __last) - { return {__first, __last - __first}; } + template + static auto + _S_convert(const _Tp& __str) + { + if constexpr (is_same_v<_Tp, string_type>) + return __str; + else if constexpr (is_same_v<_Tp, basic_string_view>) + return __str; + else if constexpr (is_same_v) + return basic_string_view(__str.data(), __str.size()); + else + return _S_convert(__str.data(), __str.data() + __str.size()); + } + + template + static auto + _S_convert(const _EcharT* __first, const _EcharT* __last); template - static string_type + static auto _S_convert(_Iter __first, _Iter __last) - { - using __value_type = typename std::iterator_traits<_Iter>::value_type; - return _Cvt::type>:: - _S_convert(__first, __last); - } - - template - static string_type - _S_convert(_InputIterator __src, __detail::__null_terminated) - { - // Read from iterator into basic_string until a null value is seen: - auto __s = _S_string_from_iter(__src); - // Convert (if needed) from iterator's value type to path::value_type: - return string_type(_S_convert(__s.data(), __s.data() + __s.size())); - } + { return _S_convert(_S_to_string(__first, __last)); } static string_type _S_convert_loc(const char* __first, const char* __last, @@ -604,16 +612,14 @@ namespace __detail static string_type _S_convert_loc(_Iter __first, _Iter __last, const std::locale& __loc) { - const std::string __str(__first, __last); - return _S_convert_loc(__str.data(), __str.data()+__str.size(), __loc); + const auto __s = _S_to_string(__first, __last); + return _S_convert_loc(__s.data(), __s.data() + __s.size(), __loc); } - template + template static string_type - _S_convert_loc(_InputIterator __src, __detail::__null_terminated, - const std::locale& __loc) + _S_convert_loc(const _Tp& __s, const std::locale& __loc) { - const std::string __s = _S_string_from_iter(__src); return _S_convert_loc(__s.data(), __s.data() + __s.size(), __loc); } @@ -803,100 +809,66 @@ namespace __detail size_t _M_pos; }; - // specialize _Cvt for degenerate 'noconv' case - template<> - struct path::_Cvt + template + auto + path::_S_convert(const _EcharT* __f, const _EcharT* __l) { - template - static string_type - _S_convert(_Iter __first, _Iter __last) - { return string_type{__first, __last}; } - }; + static_assert(__detail::__is_encoded_char<_EcharT>); -#if !defined _GLIBCXX_FILESYSTEM_IS_WINDOWS && defined _GLIBCXX_USE_CHAR8_T - // For POSIX converting from char8_t to char is also 'noconv' - template<> - struct path::_Cvt - { - template - static string_type - _S_convert(_Iter __first, _Iter __last) - { return string_type(__first, __last); } - }; + if constexpr (is_same_v<_EcharT, value_type>) + return basic_string_view(__f, __l - __f); +#if !defined _GLIBCXX_FILESYSTEM_IS_WINDOWS && defined _GLIBCXX_USE_CHAR8_T + else if constexpr (is_same_v<_EcharT, char8_t>) + // For POSIX converting from char8_t to char is also 'noconv' + return string_view(reinterpret_cast(__f), __l - __f); #endif - - template - struct path::_Cvt - { - static string_type - _S_convert(const _CharT* __f, const _CharT* __l) - { -#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS - std::wstring __wstr; - if constexpr (is_same_v<_CharT, char>) - { - struct _UCvt : std::codecvt - { } __cvt; - if (__str_codecvt_in_all(__f, __l, __wstr, __cvt)) - return __wstr; - } -#ifdef _GLIBCXX_USE_CHAR8_T - else if constexpr (is_same_v<_CharT, char8_t>) - { - const char* __f2 = (const char*)__f; - const char* __l2 = (const char*)__l; - std::codecvt_utf8_utf16 __wcvt; - if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt)) - return __wstr; - } -#endif - else // char16_t or char32_t - { - struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> - { } __cvt; - std::string __str; - if (__str_codecvt_out_all(__f, __l, __str, __cvt)) - { - const char* __f2 = __str.data(); - const char* __l2 = __f2 + __str.size(); - std::codecvt_utf8_utf16 __wcvt; - if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt)) - return __wstr; - } - } -#else // ! windows - struct _UCvt : std::codecvt<_CharT, char, std::mbstate_t> - { } __cvt; - std::string __str; - if (__str_codecvt_out_all(__f, __l, __str, __cvt)) - return __str; -#endif - _GLIBCXX_THROW_OR_ABORT(filesystem_error( - "Cannot convert character sequence", - std::make_error_code(errc::illegal_byte_sequence))); - } - - static string_type - _S_convert(_CharT* __f, _CharT* __l) - { - return _S_convert(const_cast(__f), - const_cast(__l)); - } - - template - static string_type - _S_convert(_Iter __first, _Iter __last) + else { - const std::basic_string<_CharT> __str(__first, __last); - return _S_convert(__str.data(), __str.data() + __str.size()); +#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS + std::wstring __wstr; + if constexpr (is_same_v<_EcharT, char>) + { + struct _UCvt : std::codecvt + { } __cvt; + if (__str_codecvt_in_all(__f, __l, __wstr, __cvt)) + return __wstr; + } +#ifdef _GLIBCXX_USE_CHAR8_T + else if constexpr (is_same_v<_EcharT, char8_t>) + { + const char* __f2 = (const char*)__f; + const char* __l2 = (const char*)__l; + std::codecvt_utf8_utf16 __wcvt; + if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt)) + return __wstr; + } +#endif + else // char16_t or char32_t + { + struct _UCvt : std::codecvt<_EcharT, char, std::mbstate_t> + { } __cvt; + std::string __str; + if (__str_codecvt_out_all(__f, __l, __str, __cvt)) + { + const char* __f2 = __str.data(); + const char* __l2 = __f2 + __str.size(); + std::codecvt_utf8_utf16 __wcvt; + if (__str_codecvt_in_all(__f2, __l2, __wstr, __wcvt)) + return __wstr; + } + } +#else // ! windows + struct _UCvt : std::codecvt<_EcharT, char, std::mbstate_t> + { } __cvt; + std::string __str; + if (__str_codecvt_out_all(__f, __l, __str, __cvt)) + return __str; +#endif + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); } - - template - static string_type - _S_convert(__gnu_cxx::__normal_iterator<_Iter, _Cont> __first, - __gnu_cxx::__normal_iterator<_Iter, _Cont> __last) - { return _S_convert(__first.base(), __last.base()); } - }; + } /// @endcond @@ -1030,10 +1002,10 @@ namespace __detail template inline __detail::_Path2<_CharT*>& - path::operator+=(_CharT __x) + path::operator+=(const _CharT __x) { - auto* __addr = std::__addressof(__x); - return concat(__addr, __addr + 1); + _M_concat(_S_convert(&__x, &__x + 1)); + return *this; } inline path& diff --git a/libstdc++-v3/include/experimental/bits/fs_path.h b/libstdc++-v3/include/experimental/bits/fs_path.h index 69b823a3466..c91937c66d8 100644 --- a/libstdc++-v3/include/experimental/bits/fs_path.h +++ b/libstdc++-v3/include/experimental/bits/fs_path.h @@ -495,6 +495,13 @@ namespace __detail _S_convert_loc(const char* __first, const char* __last, const std::locale& __loc); + static string_type + _S_convert_loc(char* __first, char* __last, const std::locale& __loc) + { + return _S_convert_loc(const_cast(__first), + const_cast(__last), __loc); + } + template static string_type _S_convert_loc(_Iter __first, _Iter __last, const std::locale& __loc) diff --git a/libstdc++-v3/src/c++17/fs_path.cc b/libstdc++-v3/src/c++17/fs_path.cc index 5ff17741f81..cea7aa08601 100644 --- a/libstdc++-v3/src/c++17/fs_path.cc +++ b/libstdc++-v3/src/c++17/fs_path.cc @@ -1949,11 +1949,7 @@ path::_S_convert_loc(const char* __first, const char* __last, _GLIBCXX_THROW_OR_ABORT(filesystem_error( "Cannot convert character sequence", std::make_error_code(errc::illegal_byte_sequence))); -#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS - return __ws; -#else - return _Cvt::_S_convert(__ws.data(), __ws.data() + __ws.size()); -#endif + return _S_convert(std::move(__ws)); #else return {__first, __last}; #endif