diff --git a/src/google/protobuf/generated_message_tctable_impl.h b/src/google/protobuf/generated_message_tctable_impl.h index a39245db5f70..4f2cc2a51619 100644 --- a/src/google/protobuf/generated_message_tctable_impl.h +++ b/src/google/protobuf/generated_message_tctable_impl.h @@ -360,35 +360,21 @@ class PROTOBUF_EXPORT TcParser final { static const char* FastZ64P2(PROTOBUF_TC_PARAM_DECL); // Manually unrolled and specialized Varint parsing. - template + template static const char* FastTV32S1(PROTOBUF_TC_PARAM_DECL); - template + template static const char* FastTV64S1(PROTOBUF_TC_PARAM_DECL); - template - static const char* FastTV8S1(PROTOBUF_TC_PARAM_DECL); - template + template static constexpr TailCallParseFunc SingularVarintNoZag1() { if (sizeof(FieldType) == 1) { - if (data_offset < 100) { - return &FastTV8S1; - } else { - return &FastV8S1; - } + return &FastV8S1; } if (sizeof(FieldType) == 4) { - if (data_offset < 100) { - return &FastTV32S1; - } else { // - return &FastV32S1; - } + return &FastTV32S1; } if (sizeof(FieldType) == 8) { - if (data_offset < 128) { - return &FastTV64S1; - } else { - return &FastV64S1; - } + return &FastTV64S1; } static_assert(sizeof(FieldType) == 1 || sizeof(FieldType) == 4 || sizeof(FieldType) == 8, @@ -856,67 +842,15 @@ ParseFallbackPair(const char* p, int64_t res1) { return {p + 10, res1 & res2 & res3}; } -// Notes: -// 1) if data_offset is negative, it's read from data.offset() -// 2) if hasbit_idx is negative, it's read from data.hasbit_idx() -template -PROTOBUF_NOINLINE const char* TcParser::FastTV8S1(PROTOBUF_TC_PARAM_DECL) { - using TagType = uint8_t; - - // Special case for a varint bool field with a tag of 1 byte: - // The coded_tag() field will actually contain the value too and we can check - // both at the same time. - auto coded_tag = data.coded_tag(); - if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) { - auto& field = - RefAt(msg, data_offset >= 0 ? data_offset : data.offset()); - // Note: we use `data.data` because Clang generates suboptimal code when - // using coded_tag. - // In x86_64 this uses the CH register to read the second byte out of - // `data`. - uint8_t value = data.data >> 8; - // The assume allows using a mov instead of test+setne. - PROTOBUF_ASSUME(value <= 1); - field = static_cast(value); - - ptr += sizeof(TagType) + 1; // Consume the tag and the value. - if (hasbit_idx < 0) { - hasbits |= (uint64_t{1} << data.hasbit_idx()); - } else { - if (hasbit_idx < 32) { - // `& 31` avoids a compiler warning when hasbit_idx is negative. - hasbits |= (uint64_t{1} << (hasbit_idx & 31)); - } else { - static_assert(hasbit_idx == 63 || (hasbit_idx < 32), - "hard-coded hasbit_idx should be 0-31, or the special" - "value 63, which indicates the field has no has-bit."); - // TODO(jorg): investigate whether higher hasbit indices are worth - // supporting. Something like: - // auto& hasblock = TcParser::RefAt(msg, hasbit_idx / 32 * 4); - // hasblock |= uint32_t{1} << (hasbit_idx % 32); - } - } - - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); - } - - // If it didn't match above either the tag is wrong, or the value is encoded - // non-canonically. - // Jump to MiniParse as wrong tag is the most probable reason. - PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); -} - -template +template PROTOBUF_NOINLINE const char* TcParser::FastTV64S1(PROTOBUF_TC_PARAM_DECL) { using TagType = uint8_t; // super-early success test... if (PROTOBUF_PREDICT_TRUE(((data.data) & 0x80FF) == 0)) { ptr += sizeof(TagType); // Consume tag - if (hasbit_idx < 32) { - hasbits |= (uint64_t{1} << hasbit_idx); - } + hasbits |= (uint64_t{1} << data.hasbit_idx()); uint8_t value = data.data >> 8; - RefAt(msg, data_offset) = value; + RefAt(msg, data.offset()) = value; ptr += 1; PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); } @@ -924,33 +858,30 @@ PROTOBUF_NOINLINE const char* TcParser::FastTV64S1(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); } ptr += sizeof(TagType); // Consume tag - if (hasbit_idx < 32) { - hasbits |= (uint64_t{1} << hasbit_idx); - } + hasbits |= (uint64_t{1} << data.hasbit_idx()); auto tmp = ParseFallbackPair(ptr, static_cast(data.data >> 8)); - data.data = 0; // Indicate to the compiler that we don't need this anymore. ptr = tmp.first; if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) { + data.data = 0; // Indicate to the compiler that we don't need this anymore. return Error(PROTOBUF_TC_PARAM_PASS); } - RefAt(msg, data_offset) = static_cast(tmp.second); + RefAt(msg, data.offset()) = static_cast(tmp.second); + data.data = 0; // Indicate to the compiler that we don't need this anymore. PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); } -template +template PROTOBUF_NOINLINE const char* TcParser::FastTV32S1(PROTOBUF_TC_PARAM_DECL) { using TagType = uint8_t; // super-early success test... if (PROTOBUF_PREDICT_TRUE(((data.data) & 0x80FF) == 0)) { ptr += sizeof(TagType); // Consume tag - if (hasbit_idx < 32) { - hasbits |= (uint64_t{1} << hasbit_idx); - } + hasbits |= (uint64_t{1} << data.hasbit_idx()); uint8_t value = data.data >> 8; - RefAt(msg, data_offset) = value; + RefAt(msg, data.offset()) = value; ptr += 1; PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); } @@ -958,19 +889,17 @@ PROTOBUF_NOINLINE const char* TcParser::FastTV32S1(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); } ptr += sizeof(TagType); // Consume tag - if (hasbit_idx < 32) { - hasbits |= (uint64_t{1} << hasbit_idx); - } + hasbits |= (uint64_t{1} << data.hasbit_idx()); auto tmp = ParseFallbackPair(ptr, static_cast(data.data >> 8)); - data.data = 0; // Indicate to the compiler that we don't need this anymore. ptr = tmp.first; if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) { return Error(PROTOBUF_TC_PARAM_PASS); } - RefAt(msg, data_offset) = static_cast(tmp.second); + RefAt(msg, data.offset()) = static_cast(tmp.second); + data.data = 0; // Indicate to the compiler that we don't need this anymore. PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); } diff --git a/src/google/protobuf/generated_message_tctable_lite.cc b/src/google/protobuf/generated_message_tctable_lite.cc index 86d067b01075..acc1915efa8e 100644 --- a/src/google/protobuf/generated_message_tctable_lite.cc +++ b/src/google/protobuf/generated_message_tctable_lite.cc @@ -850,8 +850,35 @@ PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint( } PROTOBUF_NOINLINE const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return FastTV8S1<-1, -1>(PROTOBUF_TC_PARAM_PASS); + using TagType = uint8_t; + + // Special case for a varint bool field with a tag of 1 byte: + // The coded_tag() field will actually contain the value too and we can check + // both at the same time. + auto coded_tag = data.coded_tag(); + if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) { + auto& field = RefAt(msg, data.offset()); + // Note: we use `data.data` because Clang generates suboptimal code when + // using coded_tag. + // In x86_64 this uses the CH register to read the second byte out of + // `data`. + uint8_t value = data.data >> 8; + // The assume allows using a mov instead of test+setne. + PROTOBUF_ASSUME(value <= 1); + field = static_cast(value); + + ptr += sizeof(TagType) + 1; // Consume the tag and the value. + hasbits |= (uint64_t{1} << data.hasbit_idx()); + + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + } + + // If it didn't match above either the tag is wrong, or the value is encoded + // non-canonically. + // Jump to MiniParse as wrong tag is the most probable reason. + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); } + PROTOBUF_NOINLINE const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_MUSTTAIL return SingularVarint( PROTOBUF_TC_PARAM_PASS); diff --git a/src/google/protobuf/generated_message_tctable_lite_test.cc b/src/google/protobuf/generated_message_tctable_lite_test.cc index e242d839eed5..0d923aeb4a27 100644 --- a/src/google/protobuf/generated_message_tctable_lite_test.cc +++ b/src/google/protobuf/generated_message_tctable_lite_test.cc @@ -125,6 +125,7 @@ TEST(FastVarints, NameHere) { // clang-format on uint8_t serialize_buffer[64]; + // TODO(b/27721823): cleanup test cases for 'former' TV* functions. for (int size : {8, 32, 64, -8, -32, -64}) { SCOPED_TRACE(size); auto next_i = [](uint64_t i) { @@ -193,19 +194,19 @@ TEST(FastVarints, NameHere) { fn = &TcParser::FastV8S1; break; case -8: - fn = &TcParser::FastTV8S1; + fn = &TcParser::FastV8S1; break; case 32: fn = &TcParser::FastV32S1; break; case -32: - fn = &TcParser::FastTV32S1; + fn = &TcParser::FastTV32S1; break; case 64: fn = &TcParser::FastV64S1; break; case -64: - fn = &TcParser::FastTV64S1; + fn = &TcParser::FastTV64S1; break; } fallback_ptr_received = absl::nullopt;