public inbox for libstdc++-cvs@sourceware.org help / color / mirror / Atom feed
From: Jonathan Wakely <redi@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r13-6371] libstdc++: Add likely/unlikely attributes to <codecvt> implementation Date: Tue, 28 Feb 2023 09:49:57 +0000 (GMT) [thread overview] Message-ID: <20230228094957.C1AD83858D33@sourceware.org> (raw) https://gcc.gnu.org/g:a41a56dee5c2d48337739d60c43cab5074bcc8e7 commit r13-6371-ga41a56dee5c2d48337739d60c43cab5074bcc8e7 Author: Jonathan Wakely <jwakely@redhat.com> Date: Fri Feb 24 21:28:11 2023 +0000 libstdc++: Add likely/unlikely attributes to <codecvt> implementation For the common case of converting valid text this improves performance significantly. libstdc++-v3/ChangeLog: * src/c++11/codecvt.cc: Add [[likely]] and [[unlikely]] attributes. Diff: --- libstdc++-v3/src/c++11/codecvt.cc | 92 +++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc index e333e795f48..02f05752de8 100644 --- a/libstdc++-v3/src/c++11/codecvt.cc +++ b/libstdc++-v3/src/c++11/codecvt.cc @@ -256,19 +256,19 @@ namespace return incomplete_mb_character; char32_t c1 = (unsigned char) from[0]; // https://en.wikipedia.org/wiki/UTF-8#Sample_code - if (c1 < 0x80) + if (c1 < 0x80) [[likely]] { ++from; return c1; } - else if (c1 < 0xC2) // continuation or overlong 2-byte sequence + else if (c1 < 0xC2) [[unlikely]] // continuation or overlong 2-byte sequence return invalid_mb_sequence; else if (c1 < 0xE0) // 2-byte sequence { - if (avail < 2) + if (avail < 2) [[unlikely]] return incomplete_mb_character; char32_t c2 = (unsigned char) from[1]; - if ((c2 & 0xC0) != 0x80) + if ((c2 & 0xC0) != 0x80) [[unlikely]] return invalid_mb_sequence; char32_t c = (c1 << 6) + c2 - 0x3080; if (c <= maxcode) @@ -277,17 +277,17 @@ namespace } else if (c1 < 0xF0) // 3-byte sequence { - if (avail < 2) + if (avail < 2) [[unlikely]] return incomplete_mb_character; char32_t c2 = (unsigned char) from[1]; - if ((c2 & 0xC0) != 0x80) + if ((c2 & 0xC0) != 0x80) [[unlikely]] return invalid_mb_sequence; - if (c1 == 0xE0 && c2 < 0xA0) // overlong + if (c1 == 0xE0 && c2 < 0xA0) [[unlikely]] // overlong return invalid_mb_sequence; - if (avail < 3) + if (avail < 3) [[unlikely]] return incomplete_mb_character; char32_t c3 = (unsigned char) from[2]; - if ((c3 & 0xC0) != 0x80) + if ((c3 & 0xC0) != 0x80) [[unlikely]] return invalid_mb_sequence; char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080; if (c <= maxcode) @@ -296,31 +296,31 @@ namespace } else if (c1 < 0xF5 && maxcode > 0xFFFF) // 4-byte sequence { - if (avail < 2) + if (avail < 2) [[unlikely]] return incomplete_mb_character; char32_t c2 = (unsigned char) from[1]; - if ((c2 & 0xC0) != 0x80) + if ((c2 & 0xC0) != 0x80) [[unlikely]] return invalid_mb_sequence; - if (c1 == 0xF0 && c2 < 0x90) // overlong + if (c1 == 0xF0 && c2 < 0x90) [[unlikely]] // overlong return invalid_mb_sequence; - if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF + if (c1 == 0xF4 && c2 >= 0x90) [[unlikely]] // > U+10FFFF return invalid_mb_sequence; - if (avail < 3) + if (avail < 3) [[unlikely]] return incomplete_mb_character; char32_t c3 = (unsigned char) from[2]; - if ((c3 & 0xC0) != 0x80) + if ((c3 & 0xC0) != 0x80) [[unlikely]] return invalid_mb_sequence; - if (avail < 4) + if (avail < 4) [[unlikely]] return incomplete_mb_character; char32_t c4 = (unsigned char) from[3]; - if ((c4 & 0xC0) != 0x80) + if ((c4 & 0xC0) != 0x80) [[unlikely]] return invalid_mb_sequence; char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080; if (c <= maxcode) from += 4; return c; } - else // > U+10FFFF + else [[unlikely]] // > U+10FFFF return invalid_mb_sequence; } @@ -330,20 +330,20 @@ namespace { if (code_point < 0x80) { - if (to.size() < 1) + if (to.size() < 1) [[unlikely]] return false; to = code_point; } else if (code_point <= 0x7FF) { - if (to.size() < 2) + if (to.size() < 2) [[unlikely]] return false; to = (code_point >> 6) + 0xC0; to = (code_point & 0x3F) + 0x80; } else if (code_point <= 0xFFFF) { - if (to.size() < 3) + if (to.size() < 3) [[unlikely]] return false; to = (code_point >> 12) + 0xE0; to = ((code_point >> 6) & 0x3F) + 0x80; @@ -351,14 +351,14 @@ namespace } else if (code_point <= 0x10FFFF) { - if (to.size() < 4) + if (to.size() < 4) [[unlikely]] return false; to = (code_point >> 18) + 0xF0; to = ((code_point >> 12) & 0x3F) + 0x80; to = ((code_point >> 6) & 0x3F) + 0x80; to = (code_point & 0x3F) + 0x80; } - else + else [[unlikely]] return false; return true; } @@ -403,16 +403,16 @@ namespace unsigned long maxcode, codecvt_mode mode) { const size_t avail = from.size(); - if (avail == 0) + if (avail == 0) [[unlikely]] return incomplete_mb_character; int inc = 1; char32_t c = adjust_byte_order(from[0], mode); if (is_high_surrogate(c)) { - if (avail < 2) + if (avail < 2) [[unlikely]] return incomplete_mb_character; const char16_t c2 = adjust_byte_order(from[1], mode); - if (is_low_surrogate(c2)) + if (is_low_surrogate(c2)) [[likely]] { c = surrogate_pair_to_code_point(c, c2); inc = 2; @@ -420,7 +420,7 @@ namespace else return invalid_mb_sequence; } - else if (is_low_surrogate(c)) + else if (is_low_surrogate(c)) [[unlikely]] return invalid_mb_sequence; if (c <= maxcode) from += inc; @@ -464,9 +464,9 @@ namespace while (from.size() && to.size()) { const char32_t codepoint = read_utf8_code_point(from, maxcode); - if (codepoint == incomplete_mb_character) + if (codepoint == incomplete_mb_character) [[unlikely]] return codecvt_base::partial; - if (codepoint > maxcode) + if (codepoint > maxcode) [[unlikely]] return codecvt_base::error; to = codepoint; } @@ -479,14 +479,14 @@ namespace ucs4_out(range<const char32_t>& from, range<C>& to, unsigned long maxcode = max_code_point, codecvt_mode mode = {}) { - if (!write_utf8_bom(to, mode)) + if (!write_utf8_bom(to, mode)) [[unlikely]] return codecvt_base::partial; while (from.size()) { const char32_t c = from[0]; - if (c > maxcode) + if (c > maxcode) [[unlikely]] return codecvt_base::error; - if (!write_utf8_code_point(to, c)) + if (!write_utf8_code_point(to, c)) [[unlikely]] return codecvt_base::partial; ++from; } @@ -502,9 +502,9 @@ namespace while (from.size() && to.size()) { const char32_t codepoint = read_utf16_code_point(from, maxcode, mode); - if (codepoint == incomplete_mb_character) + if (codepoint == incomplete_mb_character) [[unlikely]] return codecvt_base::partial; - if (codepoint > maxcode) + if (codepoint > maxcode) [[unlikely]] return codecvt_base::error; to = codepoint; } @@ -516,14 +516,14 @@ namespace ucs4_out(range<const char32_t>& from, range<char16_t, false>& to, unsigned long maxcode = max_code_point, codecvt_mode mode = {}) { - if (!write_utf16_bom(to, mode)) + if (!write_utf16_bom(to, mode)) [[unlikely]] return codecvt_base::partial; while (from.size()) { const char32_t c = from[0]; - if (c > maxcode) + if (c > maxcode) [[unlikely]] return codecvt_base::error; - if (!write_utf16_code_point(to, c, mode)) + if (!write_utf16_code_point(to, c, mode)) [[unlikely]] return codecvt_base::partial; ++from; } @@ -544,11 +544,11 @@ namespace { auto orig = from; const char32_t codepoint = read_utf8_code_point(from, maxcode); - if (codepoint == incomplete_mb_character) + if (codepoint == incomplete_mb_character) [[unlikely]] return codecvt_base::partial; if (codepoint > maxcode) return codecvt_base::error; - if (!write_utf16_code_point(to, codepoint, mode)) + if (!write_utf16_code_point(to, codepoint, mode)) [[unlikely]] { from = orig; // rewind to previous position return codecvt_base::partial; @@ -564,7 +564,7 @@ namespace unsigned long maxcode = max_code_point, codecvt_mode mode = {}, surrogates s = surrogates::allowed) { - if (!write_utf8_bom(to, mode)) + if (!write_utf8_bom(to, mode)) [[unlikely]] return codecvt_base::partial; while (from.size()) { @@ -572,14 +572,14 @@ namespace int inc = 1; if (is_high_surrogate(c)) { - if (s == surrogates::disallowed) + if (s == surrogates::disallowed) [[unlikely]] return codecvt_base::error; // No surrogates in UCS-2 - if (from.size() < 2) + if (from.size() < 2) [[unlikely]] return codecvt_base::partial; // stop converting at this point const char32_t c2 = from[1]; - if (is_low_surrogate(c2)) + if (is_low_surrogate(c2)) [[likely]] { c = surrogate_pair_to_code_point(c, c2); inc = 2; @@ -587,11 +587,11 @@ namespace else return codecvt_base::error; } - else if (is_low_surrogate(c)) + else if (is_low_surrogate(c)) [[unlikely]] return codecvt_base::error; - if (c > maxcode) + if (c > maxcode) [[unlikely]] return codecvt_base::error; - if (!write_utf8_code_point(to, c)) + if (!write_utf8_code_point(to, c)) [[unlikely]] return codecvt_base::partial; from += inc; }
reply other threads:[~2023-02-28 9:49 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230228094957.C1AD83858D33@sourceware.org \ --to=redi@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ --cc=libstdc++-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).