mirror of https://github.com/M66B/FairEmail.git
parent
bc4cb29839
commit
01328483f5
@ -0,0 +1,890 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: feature_extractor.proto
|
||||
|
||||
#include "feature_extractor.pb.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/stubs/port.h>
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/wire_format_lite_inl.h>
|
||||
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
|
||||
// This is a temporary google only hack
|
||||
#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
#include "third_party/protobuf/version.h"
|
||||
#endif
|
||||
// @@protoc_insertion_point(includes)
|
||||
|
||||
namespace protobuf_feature_5fextractor_2eproto {
|
||||
extern PROTOBUF_INTERNAL_EXPORT_protobuf_feature_5fextractor_2eproto ::google::protobuf::internal::SCCInfo<0> scc_info_Parameter;
|
||||
extern PROTOBUF_INTERNAL_EXPORT_protobuf_feature_5fextractor_2eproto ::google::protobuf::internal::SCCInfo<1> scc_info_FeatureFunctionDescriptor;
|
||||
} // namespace protobuf_feature_5fextractor_2eproto
|
||||
namespace chrome_lang_id {
|
||||
class ParameterDefaultTypeInternal {
|
||||
public:
|
||||
::google::protobuf::internal::ExplicitlyConstructed<Parameter>
|
||||
_instance;
|
||||
} _Parameter_default_instance_;
|
||||
class FeatureFunctionDescriptorDefaultTypeInternal {
|
||||
public:
|
||||
::google::protobuf::internal::ExplicitlyConstructed<FeatureFunctionDescriptor>
|
||||
_instance;
|
||||
} _FeatureFunctionDescriptor_default_instance_;
|
||||
class FeatureExtractorDescriptorDefaultTypeInternal {
|
||||
public:
|
||||
::google::protobuf::internal::ExplicitlyConstructed<FeatureExtractorDescriptor>
|
||||
_instance;
|
||||
} _FeatureExtractorDescriptor_default_instance_;
|
||||
} // namespace chrome_lang_id
|
||||
namespace protobuf_feature_5fextractor_2eproto {
|
||||
static void InitDefaultsParameter() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
{
|
||||
void* ptr = &::chrome_lang_id::_Parameter_default_instance_;
|
||||
new (ptr) ::chrome_lang_id::Parameter();
|
||||
::google::protobuf::internal::OnShutdownDestroyMessage(ptr);
|
||||
}
|
||||
::chrome_lang_id::Parameter::InitAsDefaultInstance();
|
||||
}
|
||||
|
||||
::google::protobuf::internal::SCCInfo<0> scc_info_Parameter =
|
||||
{{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 0, InitDefaultsParameter}, {}};
|
||||
|
||||
static void InitDefaultsFeatureFunctionDescriptor() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
{
|
||||
void* ptr = &::chrome_lang_id::_FeatureFunctionDescriptor_default_instance_;
|
||||
new (ptr) ::chrome_lang_id::FeatureFunctionDescriptor();
|
||||
::google::protobuf::internal::OnShutdownDestroyMessage(ptr);
|
||||
}
|
||||
::chrome_lang_id::FeatureFunctionDescriptor::InitAsDefaultInstance();
|
||||
}
|
||||
|
||||
::google::protobuf::internal::SCCInfo<1> scc_info_FeatureFunctionDescriptor =
|
||||
{{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 1, InitDefaultsFeatureFunctionDescriptor}, {
|
||||
&protobuf_feature_5fextractor_2eproto::scc_info_Parameter.base,}};
|
||||
|
||||
static void InitDefaultsFeatureExtractorDescriptor() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
{
|
||||
void* ptr = &::chrome_lang_id::_FeatureExtractorDescriptor_default_instance_;
|
||||
new (ptr) ::chrome_lang_id::FeatureExtractorDescriptor();
|
||||
::google::protobuf::internal::OnShutdownDestroyMessage(ptr);
|
||||
}
|
||||
::chrome_lang_id::FeatureExtractorDescriptor::InitAsDefaultInstance();
|
||||
}
|
||||
|
||||
::google::protobuf::internal::SCCInfo<1> scc_info_FeatureExtractorDescriptor =
|
||||
{{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 1, InitDefaultsFeatureExtractorDescriptor}, {
|
||||
&protobuf_feature_5fextractor_2eproto::scc_info_FeatureFunctionDescriptor.base,}};
|
||||
|
||||
void InitDefaults() {
|
||||
::google::protobuf::internal::InitSCC(&scc_info_Parameter.base);
|
||||
::google::protobuf::internal::InitSCC(&scc_info_FeatureFunctionDescriptor.base);
|
||||
::google::protobuf::internal::InitSCC(&scc_info_FeatureExtractorDescriptor.base);
|
||||
}
|
||||
|
||||
} // namespace protobuf_feature_5fextractor_2eproto
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// ===================================================================
|
||||
|
||||
void Parameter::InitAsDefaultInstance() {
|
||||
}
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
const int Parameter::kNameFieldNumber;
|
||||
const int Parameter::kValueFieldNumber;
|
||||
#endif // !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
|
||||
Parameter::Parameter()
|
||||
: ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
|
||||
::google::protobuf::internal::InitSCC(
|
||||
&protobuf_feature_5fextractor_2eproto::scc_info_Parameter.base);
|
||||
SharedCtor();
|
||||
// @@protoc_insertion_point(constructor:chrome_lang_id.Parameter)
|
||||
}
|
||||
Parameter::Parameter(const Parameter& from)
|
||||
: ::google::protobuf::MessageLite(),
|
||||
_internal_metadata_(NULL),
|
||||
_has_bits_(from._has_bits_) {
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
name_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.has_name()) {
|
||||
name_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.name_);
|
||||
}
|
||||
value_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.has_value()) {
|
||||
value_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.value_);
|
||||
}
|
||||
// @@protoc_insertion_point(copy_constructor:chrome_lang_id.Parameter)
|
||||
}
|
||||
|
||||
void Parameter::SharedCtor() {
|
||||
name_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
value_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
|
||||
Parameter::~Parameter() {
|
||||
// @@protoc_insertion_point(destructor:chrome_lang_id.Parameter)
|
||||
SharedDtor();
|
||||
}
|
||||
|
||||
void Parameter::SharedDtor() {
|
||||
name_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
value_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
|
||||
void Parameter::SetCachedSize(int size) const {
|
||||
_cached_size_.Set(size);
|
||||
}
|
||||
const Parameter& Parameter::default_instance() {
|
||||
::google::protobuf::internal::InitSCC(&protobuf_feature_5fextractor_2eproto::scc_info_Parameter.base);
|
||||
return *internal_default_instance();
|
||||
}
|
||||
|
||||
|
||||
void Parameter::Clear() {
|
||||
// @@protoc_insertion_point(message_clear_start:chrome_lang_id.Parameter)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
cached_has_bits = _has_bits_[0];
|
||||
if (cached_has_bits & 3u) {
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
name_.ClearNonDefaultToEmptyNoArena();
|
||||
}
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
value_.ClearNonDefaultToEmptyNoArena();
|
||||
}
|
||||
}
|
||||
_has_bits_.Clear();
|
||||
_internal_metadata_.Clear();
|
||||
}
|
||||
|
||||
bool Parameter::MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) {
|
||||
#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
|
||||
::google::protobuf::uint32 tag;
|
||||
::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter(
|
||||
&_internal_metadata_);
|
||||
::google::protobuf::io::StringOutputStream unknown_fields_output(
|
||||
unknown_fields_setter.buffer());
|
||||
::google::protobuf::io::CodedOutputStream unknown_fields_stream(
|
||||
&unknown_fields_output, false);
|
||||
// @@protoc_insertion_point(parse_start:chrome_lang_id.Parameter)
|
||||
for (;;) {
|
||||
::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
|
||||
tag = p.first;
|
||||
if (!p.second) goto handle_unusual;
|
||||
switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
|
||||
// optional string name = 1;
|
||||
case 1: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_name()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// optional string value = 2;
|
||||
case 2: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_value()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
handle_unusual:
|
||||
if (tag == 0) {
|
||||
goto success;
|
||||
}
|
||||
DO_(::google::protobuf::internal::WireFormatLite::SkipField(
|
||||
input, tag, &unknown_fields_stream));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
success:
|
||||
// @@protoc_insertion_point(parse_success:chrome_lang_id.Parameter)
|
||||
return true;
|
||||
failure:
|
||||
// @@protoc_insertion_point(parse_failure:chrome_lang_id.Parameter)
|
||||
return false;
|
||||
#undef DO_
|
||||
}
|
||||
|
||||
void Parameter::SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const {
|
||||
// @@protoc_insertion_point(serialize_start:chrome_lang_id.Parameter)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
cached_has_bits = _has_bits_[0];
|
||||
// optional string name = 1;
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
1, this->name(), output);
|
||||
}
|
||||
|
||||
// optional string value = 2;
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
2, this->value(), output);
|
||||
}
|
||||
|
||||
output->WriteRaw(_internal_metadata_.unknown_fields().data(),
|
||||
static_cast<int>(_internal_metadata_.unknown_fields().size()));
|
||||
// @@protoc_insertion_point(serialize_end:chrome_lang_id.Parameter)
|
||||
}
|
||||
|
||||
size_t Parameter::ByteSizeLong() const {
|
||||
// @@protoc_insertion_point(message_byte_size_start:chrome_lang_id.Parameter)
|
||||
size_t total_size = 0;
|
||||
|
||||
total_size += _internal_metadata_.unknown_fields().size();
|
||||
|
||||
if (_has_bits_[0 / 32] & 3u) {
|
||||
// optional string name = 1;
|
||||
if (has_name()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->name());
|
||||
}
|
||||
|
||||
// optional string value = 2;
|
||||
if (has_value()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->value());
|
||||
}
|
||||
|
||||
}
|
||||
int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
|
||||
SetCachedSize(cached_size);
|
||||
return total_size;
|
||||
}
|
||||
|
||||
void Parameter::CheckTypeAndMergeFrom(
|
||||
const ::google::protobuf::MessageLite& from) {
|
||||
MergeFrom(*::google::protobuf::down_cast<const Parameter*>(&from));
|
||||
}
|
||||
|
||||
void Parameter::MergeFrom(const Parameter& from) {
|
||||
// @@protoc_insertion_point(class_specific_merge_from_start:chrome_lang_id.Parameter)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
cached_has_bits = from._has_bits_[0];
|
||||
if (cached_has_bits & 3u) {
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
set_has_name();
|
||||
name_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.name_);
|
||||
}
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
set_has_value();
|
||||
value_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.value_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Parameter::CopyFrom(const Parameter& from) {
|
||||
// @@protoc_insertion_point(class_specific_copy_from_start:chrome_lang_id.Parameter)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
bool Parameter::IsInitialized() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void Parameter::Swap(Parameter* other) {
|
||||
if (other == this) return;
|
||||
InternalSwap(other);
|
||||
}
|
||||
void Parameter::InternalSwap(Parameter* other) {
|
||||
using std::swap;
|
||||
name_.Swap(&other->name_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
value_.Swap(&other->value_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
swap(_has_bits_[0], other->_has_bits_[0]);
|
||||
_internal_metadata_.Swap(&other->_internal_metadata_);
|
||||
}
|
||||
|
||||
::std::string Parameter::GetTypeName() const {
|
||||
return "chrome_lang_id.Parameter";
|
||||
}
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
void FeatureFunctionDescriptor::InitAsDefaultInstance() {
|
||||
}
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
const int FeatureFunctionDescriptor::kTypeFieldNumber;
|
||||
const int FeatureFunctionDescriptor::kNameFieldNumber;
|
||||
const int FeatureFunctionDescriptor::kArgumentFieldNumber;
|
||||
const int FeatureFunctionDescriptor::kParameterFieldNumber;
|
||||
const int FeatureFunctionDescriptor::kFeatureFieldNumber;
|
||||
#endif // !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
|
||||
FeatureFunctionDescriptor::FeatureFunctionDescriptor()
|
||||
: ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
|
||||
::google::protobuf::internal::InitSCC(
|
||||
&protobuf_feature_5fextractor_2eproto::scc_info_FeatureFunctionDescriptor.base);
|
||||
SharedCtor();
|
||||
// @@protoc_insertion_point(constructor:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
}
|
||||
FeatureFunctionDescriptor::FeatureFunctionDescriptor(const FeatureFunctionDescriptor& from)
|
||||
: ::google::protobuf::MessageLite(),
|
||||
_internal_metadata_(NULL),
|
||||
_has_bits_(from._has_bits_),
|
||||
parameter_(from.parameter_),
|
||||
feature_(from.feature_) {
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
type_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.has_type()) {
|
||||
type_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.type_);
|
||||
}
|
||||
name_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.has_name()) {
|
||||
name_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.name_);
|
||||
}
|
||||
argument_ = from.argument_;
|
||||
// @@protoc_insertion_point(copy_constructor:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
}
|
||||
|
||||
void FeatureFunctionDescriptor::SharedCtor() {
|
||||
type_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
name_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
argument_ = 0;
|
||||
}
|
||||
|
||||
FeatureFunctionDescriptor::~FeatureFunctionDescriptor() {
|
||||
// @@protoc_insertion_point(destructor:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
SharedDtor();
|
||||
}
|
||||
|
||||
void FeatureFunctionDescriptor::SharedDtor() {
|
||||
type_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
name_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
|
||||
void FeatureFunctionDescriptor::SetCachedSize(int size) const {
|
||||
_cached_size_.Set(size);
|
||||
}
|
||||
const FeatureFunctionDescriptor& FeatureFunctionDescriptor::default_instance() {
|
||||
::google::protobuf::internal::InitSCC(&protobuf_feature_5fextractor_2eproto::scc_info_FeatureFunctionDescriptor.base);
|
||||
return *internal_default_instance();
|
||||
}
|
||||
|
||||
|
||||
void FeatureFunctionDescriptor::Clear() {
|
||||
// @@protoc_insertion_point(message_clear_start:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
parameter_.Clear();
|
||||
feature_.Clear();
|
||||
cached_has_bits = _has_bits_[0];
|
||||
if (cached_has_bits & 3u) {
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
type_.ClearNonDefaultToEmptyNoArena();
|
||||
}
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
name_.ClearNonDefaultToEmptyNoArena();
|
||||
}
|
||||
}
|
||||
argument_ = 0;
|
||||
_has_bits_.Clear();
|
||||
_internal_metadata_.Clear();
|
||||
}
|
||||
|
||||
bool FeatureFunctionDescriptor::MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) {
|
||||
#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
|
||||
::google::protobuf::uint32 tag;
|
||||
::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter(
|
||||
&_internal_metadata_);
|
||||
::google::protobuf::io::StringOutputStream unknown_fields_output(
|
||||
unknown_fields_setter.buffer());
|
||||
::google::protobuf::io::CodedOutputStream unknown_fields_stream(
|
||||
&unknown_fields_output, false);
|
||||
// @@protoc_insertion_point(parse_start:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
for (;;) {
|
||||
::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
|
||||
tag = p.first;
|
||||
if (!p.second) goto handle_unusual;
|
||||
switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
|
||||
// required string type = 1;
|
||||
case 1: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_type()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// optional string name = 2;
|
||||
case 2: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_name()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// optional int32 argument = 3 [default = 0];
|
||||
case 3: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(24u /* 24 & 0xFF */)) {
|
||||
set_has_argument();
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
|
||||
::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
|
||||
input, &argument_)));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// repeated .chrome_lang_id.Parameter parameter = 4;
|
||||
case 4: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(34u /* 34 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadMessage(
|
||||
input, add_parameter()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// repeated .chrome_lang_id.FeatureFunctionDescriptor feature = 7;
|
||||
case 7: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(58u /* 58 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadMessage(
|
||||
input, add_feature()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
handle_unusual:
|
||||
if (tag == 0) {
|
||||
goto success;
|
||||
}
|
||||
DO_(::google::protobuf::internal::WireFormatLite::SkipField(
|
||||
input, tag, &unknown_fields_stream));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
success:
|
||||
// @@protoc_insertion_point(parse_success:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
return true;
|
||||
failure:
|
||||
// @@protoc_insertion_point(parse_failure:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
return false;
|
||||
#undef DO_
|
||||
}
|
||||
|
||||
void FeatureFunctionDescriptor::SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const {
|
||||
// @@protoc_insertion_point(serialize_start:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
cached_has_bits = _has_bits_[0];
|
||||
// required string type = 1;
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
1, this->type(), output);
|
||||
}
|
||||
|
||||
// optional string name = 2;
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
2, this->name(), output);
|
||||
}
|
||||
|
||||
// optional int32 argument = 3 [default = 0];
|
||||
if (cached_has_bits & 0x00000004u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteInt32(3, this->argument(), output);
|
||||
}
|
||||
|
||||
// repeated .chrome_lang_id.Parameter parameter = 4;
|
||||
for (unsigned int i = 0,
|
||||
n = static_cast<unsigned int>(this->parameter_size()); i < n; i++) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteMessage(
|
||||
4,
|
||||
this->parameter(static_cast<int>(i)),
|
||||
output);
|
||||
}
|
||||
|
||||
// repeated .chrome_lang_id.FeatureFunctionDescriptor feature = 7;
|
||||
for (unsigned int i = 0,
|
||||
n = static_cast<unsigned int>(this->feature_size()); i < n; i++) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteMessage(
|
||||
7,
|
||||
this->feature(static_cast<int>(i)),
|
||||
output);
|
||||
}
|
||||
|
||||
output->WriteRaw(_internal_metadata_.unknown_fields().data(),
|
||||
static_cast<int>(_internal_metadata_.unknown_fields().size()));
|
||||
// @@protoc_insertion_point(serialize_end:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
}
|
||||
|
||||
size_t FeatureFunctionDescriptor::ByteSizeLong() const {
|
||||
// @@protoc_insertion_point(message_byte_size_start:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
size_t total_size = 0;
|
||||
|
||||
total_size += _internal_metadata_.unknown_fields().size();
|
||||
|
||||
// required string type = 1;
|
||||
if (has_type()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->type());
|
||||
}
|
||||
// repeated .chrome_lang_id.Parameter parameter = 4;
|
||||
{
|
||||
unsigned int count = static_cast<unsigned int>(this->parameter_size());
|
||||
total_size += 1UL * count;
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
total_size +=
|
||||
::google::protobuf::internal::WireFormatLite::MessageSize(
|
||||
this->parameter(static_cast<int>(i)));
|
||||
}
|
||||
}
|
||||
|
||||
// repeated .chrome_lang_id.FeatureFunctionDescriptor feature = 7;
|
||||
{
|
||||
unsigned int count = static_cast<unsigned int>(this->feature_size());
|
||||
total_size += 1UL * count;
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
total_size +=
|
||||
::google::protobuf::internal::WireFormatLite::MessageSize(
|
||||
this->feature(static_cast<int>(i)));
|
||||
}
|
||||
}
|
||||
|
||||
if (_has_bits_[0 / 32] & 6u) {
|
||||
// optional string name = 2;
|
||||
if (has_name()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->name());
|
||||
}
|
||||
|
||||
// optional int32 argument = 3 [default = 0];
|
||||
if (has_argument()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::Int32Size(
|
||||
this->argument());
|
||||
}
|
||||
|
||||
}
|
||||
int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
|
||||
SetCachedSize(cached_size);
|
||||
return total_size;
|
||||
}
|
||||
|
||||
void FeatureFunctionDescriptor::CheckTypeAndMergeFrom(
|
||||
const ::google::protobuf::MessageLite& from) {
|
||||
MergeFrom(*::google::protobuf::down_cast<const FeatureFunctionDescriptor*>(&from));
|
||||
}
|
||||
|
||||
void FeatureFunctionDescriptor::MergeFrom(const FeatureFunctionDescriptor& from) {
|
||||
// @@protoc_insertion_point(class_specific_merge_from_start:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
parameter_.MergeFrom(from.parameter_);
|
||||
feature_.MergeFrom(from.feature_);
|
||||
cached_has_bits = from._has_bits_[0];
|
||||
if (cached_has_bits & 7u) {
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
set_has_type();
|
||||
type_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.type_);
|
||||
}
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
set_has_name();
|
||||
name_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.name_);
|
||||
}
|
||||
if (cached_has_bits & 0x00000004u) {
|
||||
argument_ = from.argument_;
|
||||
}
|
||||
_has_bits_[0] |= cached_has_bits;
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureFunctionDescriptor::CopyFrom(const FeatureFunctionDescriptor& from) {
|
||||
// @@protoc_insertion_point(class_specific_copy_from_start:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
bool FeatureFunctionDescriptor::IsInitialized() const {
|
||||
if ((_has_bits_[0] & 0x00000001) != 0x00000001) return false;
|
||||
if (!::google::protobuf::internal::AllAreInitialized(this->feature())) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void FeatureFunctionDescriptor::Swap(FeatureFunctionDescriptor* other) {
|
||||
if (other == this) return;
|
||||
InternalSwap(other);
|
||||
}
|
||||
void FeatureFunctionDescriptor::InternalSwap(FeatureFunctionDescriptor* other) {
|
||||
using std::swap;
|
||||
CastToBase(¶meter_)->InternalSwap(CastToBase(&other->parameter_));
|
||||
CastToBase(&feature_)->InternalSwap(CastToBase(&other->feature_));
|
||||
type_.Swap(&other->type_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
name_.Swap(&other->name_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
swap(argument_, other->argument_);
|
||||
swap(_has_bits_[0], other->_has_bits_[0]);
|
||||
_internal_metadata_.Swap(&other->_internal_metadata_);
|
||||
}
|
||||
|
||||
::std::string FeatureFunctionDescriptor::GetTypeName() const {
|
||||
return "chrome_lang_id.FeatureFunctionDescriptor";
|
||||
}
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
void FeatureExtractorDescriptor::InitAsDefaultInstance() {
|
||||
}
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
const int FeatureExtractorDescriptor::kFeatureFieldNumber;
|
||||
#endif // !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
|
||||
FeatureExtractorDescriptor::FeatureExtractorDescriptor()
|
||||
: ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
|
||||
::google::protobuf::internal::InitSCC(
|
||||
&protobuf_feature_5fextractor_2eproto::scc_info_FeatureExtractorDescriptor.base);
|
||||
SharedCtor();
|
||||
// @@protoc_insertion_point(constructor:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
}
|
||||
FeatureExtractorDescriptor::FeatureExtractorDescriptor(const FeatureExtractorDescriptor& from)
|
||||
: ::google::protobuf::MessageLite(),
|
||||
_internal_metadata_(NULL),
|
||||
_has_bits_(from._has_bits_),
|
||||
feature_(from.feature_) {
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
// @@protoc_insertion_point(copy_constructor:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
}
|
||||
|
||||
void FeatureExtractorDescriptor::SharedCtor() {
|
||||
}
|
||||
|
||||
FeatureExtractorDescriptor::~FeatureExtractorDescriptor() {
|
||||
// @@protoc_insertion_point(destructor:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
SharedDtor();
|
||||
}
|
||||
|
||||
void FeatureExtractorDescriptor::SharedDtor() {
|
||||
}
|
||||
|
||||
void FeatureExtractorDescriptor::SetCachedSize(int size) const {
|
||||
_cached_size_.Set(size);
|
||||
}
|
||||
const FeatureExtractorDescriptor& FeatureExtractorDescriptor::default_instance() {
|
||||
::google::protobuf::internal::InitSCC(&protobuf_feature_5fextractor_2eproto::scc_info_FeatureExtractorDescriptor.base);
|
||||
return *internal_default_instance();
|
||||
}
|
||||
|
||||
|
||||
void FeatureExtractorDescriptor::Clear() {
|
||||
// @@protoc_insertion_point(message_clear_start:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
feature_.Clear();
|
||||
_has_bits_.Clear();
|
||||
_internal_metadata_.Clear();
|
||||
}
|
||||
|
||||
bool FeatureExtractorDescriptor::MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) {
|
||||
#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
|
||||
::google::protobuf::uint32 tag;
|
||||
::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter(
|
||||
&_internal_metadata_);
|
||||
::google::protobuf::io::StringOutputStream unknown_fields_output(
|
||||
unknown_fields_setter.buffer());
|
||||
::google::protobuf::io::CodedOutputStream unknown_fields_stream(
|
||||
&unknown_fields_output, false);
|
||||
// @@protoc_insertion_point(parse_start:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
for (;;) {
|
||||
::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
|
||||
tag = p.first;
|
||||
if (!p.second) goto handle_unusual;
|
||||
switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
|
||||
// repeated .chrome_lang_id.FeatureFunctionDescriptor feature = 1;
|
||||
case 1: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadMessage(
|
||||
input, add_feature()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
handle_unusual:
|
||||
if (tag == 0) {
|
||||
goto success;
|
||||
}
|
||||
DO_(::google::protobuf::internal::WireFormatLite::SkipField(
|
||||
input, tag, &unknown_fields_stream));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
success:
|
||||
// @@protoc_insertion_point(parse_success:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
return true;
|
||||
failure:
|
||||
// @@protoc_insertion_point(parse_failure:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
return false;
|
||||
#undef DO_
|
||||
}
|
||||
|
||||
void FeatureExtractorDescriptor::SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const {
|
||||
// @@protoc_insertion_point(serialize_start:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// repeated .chrome_lang_id.FeatureFunctionDescriptor feature = 1;
|
||||
for (unsigned int i = 0,
|
||||
n = static_cast<unsigned int>(this->feature_size()); i < n; i++) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteMessage(
|
||||
1,
|
||||
this->feature(static_cast<int>(i)),
|
||||
output);
|
||||
}
|
||||
|
||||
output->WriteRaw(_internal_metadata_.unknown_fields().data(),
|
||||
static_cast<int>(_internal_metadata_.unknown_fields().size()));
|
||||
// @@protoc_insertion_point(serialize_end:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
}
|
||||
|
||||
size_t FeatureExtractorDescriptor::ByteSizeLong() const {
|
||||
// @@protoc_insertion_point(message_byte_size_start:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
size_t total_size = 0;
|
||||
|
||||
total_size += _internal_metadata_.unknown_fields().size();
|
||||
|
||||
// repeated .chrome_lang_id.FeatureFunctionDescriptor feature = 1;
|
||||
{
|
||||
unsigned int count = static_cast<unsigned int>(this->feature_size());
|
||||
total_size += 1UL * count;
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
total_size +=
|
||||
::google::protobuf::internal::WireFormatLite::MessageSize(
|
||||
this->feature(static_cast<int>(i)));
|
||||
}
|
||||
}
|
||||
|
||||
int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
|
||||
SetCachedSize(cached_size);
|
||||
return total_size;
|
||||
}
|
||||
|
||||
void FeatureExtractorDescriptor::CheckTypeAndMergeFrom(
|
||||
const ::google::protobuf::MessageLite& from) {
|
||||
MergeFrom(*::google::protobuf::down_cast<const FeatureExtractorDescriptor*>(&from));
|
||||
}
|
||||
|
||||
void FeatureExtractorDescriptor::MergeFrom(const FeatureExtractorDescriptor& from) {
|
||||
// @@protoc_insertion_point(class_specific_merge_from_start:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
feature_.MergeFrom(from.feature_);
|
||||
}
|
||||
|
||||
void FeatureExtractorDescriptor::CopyFrom(const FeatureExtractorDescriptor& from) {
|
||||
// @@protoc_insertion_point(class_specific_copy_from_start:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
bool FeatureExtractorDescriptor::IsInitialized() const {
|
||||
if (!::google::protobuf::internal::AllAreInitialized(this->feature())) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void FeatureExtractorDescriptor::Swap(FeatureExtractorDescriptor* other) {
|
||||
if (other == this) return;
|
||||
InternalSwap(other);
|
||||
}
|
||||
void FeatureExtractorDescriptor::InternalSwap(FeatureExtractorDescriptor* other) {
|
||||
using std::swap;
|
||||
CastToBase(&feature_)->InternalSwap(CastToBase(&other->feature_));
|
||||
swap(_has_bits_[0], other->_has_bits_[0]);
|
||||
_internal_metadata_.Swap(&other->_internal_metadata_);
|
||||
}
|
||||
|
||||
::std::string FeatureExtractorDescriptor::GetTypeName() const {
|
||||
return "chrome_lang_id.FeatureExtractorDescriptor";
|
||||
}
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
} // namespace chrome_lang_id
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::chrome_lang_id::Parameter* Arena::CreateMaybeMessage< ::chrome_lang_id::Parameter >(Arena* arena) {
|
||||
return Arena::CreateInternal< ::chrome_lang_id::Parameter >(arena);
|
||||
}
|
||||
template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::chrome_lang_id::FeatureFunctionDescriptor* Arena::CreateMaybeMessage< ::chrome_lang_id::FeatureFunctionDescriptor >(Arena* arena) {
|
||||
return Arena::CreateInternal< ::chrome_lang_id::FeatureFunctionDescriptor >(arena);
|
||||
}
|
||||
template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::chrome_lang_id::FeatureExtractorDescriptor* Arena::CreateMaybeMessage< ::chrome_lang_id::FeatureExtractorDescriptor >(Arena* arena) {
|
||||
return Arena::CreateInternal< ::chrome_lang_id::FeatureExtractorDescriptor >(arena);
|
||||
}
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
@ -0,0 +1,904 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: feature_extractor.proto
|
||||
|
||||
#ifndef PROTOBUF_INCLUDED_feature_5fextractor_2eproto
|
||||
#define PROTOBUF_INCLUDED_feature_5fextractor_2eproto
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
|
||||
#if GOOGLE_PROTOBUF_VERSION < 3006001
|
||||
#error This file was generated by a newer version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please update
|
||||
#error your headers.
|
||||
#endif
|
||||
#if 3006001 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION
|
||||
#error This file was generated by an older version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please
|
||||
#error regenerate this file with a newer version of protoc.
|
||||
#endif
|
||||
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/arena.h>
|
||||
#include <google/protobuf/arenastring.h>
|
||||
#include <google/protobuf/generated_message_table_driven.h>
|
||||
#include <google/protobuf/generated_message_util.h>
|
||||
#include <google/protobuf/inlined_string_field.h>
|
||||
#include <google/protobuf/metadata_lite.h>
|
||||
#include <google/protobuf/message_lite.h>
|
||||
#include <google/protobuf/repeated_field.h> // IWYU pragma: export
|
||||
#include <google/protobuf/extension_set.h> // IWYU pragma: export
|
||||
// @@protoc_insertion_point(includes)
|
||||
#define PROTOBUF_INTERNAL_EXPORT_protobuf_feature_5fextractor_2eproto
|
||||
|
||||
namespace protobuf_feature_5fextractor_2eproto {
|
||||
// Internal implementation detail -- do not use these members.
|
||||
struct TableStruct {
|
||||
static const ::google::protobuf::internal::ParseTableField entries[];
|
||||
static const ::google::protobuf::internal::AuxillaryParseTableField aux[];
|
||||
static const ::google::protobuf::internal::ParseTable schema[3];
|
||||
static const ::google::protobuf::internal::FieldMetadata field_metadata[];
|
||||
static const ::google::protobuf::internal::SerializationTable serialization_table[];
|
||||
static const ::google::protobuf::uint32 offsets[];
|
||||
};
|
||||
} // namespace protobuf_feature_5fextractor_2eproto
|
||||
namespace chrome_lang_id {
|
||||
class FeatureExtractorDescriptor;
|
||||
class FeatureExtractorDescriptorDefaultTypeInternal;
|
||||
extern FeatureExtractorDescriptorDefaultTypeInternal _FeatureExtractorDescriptor_default_instance_;
|
||||
class FeatureFunctionDescriptor;
|
||||
class FeatureFunctionDescriptorDefaultTypeInternal;
|
||||
extern FeatureFunctionDescriptorDefaultTypeInternal _FeatureFunctionDescriptor_default_instance_;
|
||||
class Parameter;
|
||||
class ParameterDefaultTypeInternal;
|
||||
extern ParameterDefaultTypeInternal _Parameter_default_instance_;
|
||||
} // namespace chrome_lang_id
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
template<> ::chrome_lang_id::FeatureExtractorDescriptor* Arena::CreateMaybeMessage<::chrome_lang_id::FeatureExtractorDescriptor>(Arena*);
|
||||
template<> ::chrome_lang_id::FeatureFunctionDescriptor* Arena::CreateMaybeMessage<::chrome_lang_id::FeatureFunctionDescriptor>(Arena*);
|
||||
template<> ::chrome_lang_id::Parameter* Arena::CreateMaybeMessage<::chrome_lang_id::Parameter>(Arena*);
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// ===================================================================
|
||||
|
||||
class Parameter : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:chrome_lang_id.Parameter) */ {
|
||||
public:
|
||||
Parameter();
|
||||
virtual ~Parameter();
|
||||
|
||||
Parameter(const Parameter& from);
|
||||
|
||||
inline Parameter& operator=(const Parameter& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
#if LANG_CXX11
|
||||
Parameter(Parameter&& from) noexcept
|
||||
: Parameter() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline Parameter& operator=(Parameter&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
inline const ::std::string& unknown_fields() const {
|
||||
return _internal_metadata_.unknown_fields();
|
||||
}
|
||||
inline ::std::string* mutable_unknown_fields() {
|
||||
return _internal_metadata_.mutable_unknown_fields();
|
||||
}
|
||||
|
||||
static const Parameter& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const Parameter* internal_default_instance() {
|
||||
return reinterpret_cast<const Parameter*>(
|
||||
&_Parameter_default_instance_);
|
||||
}
|
||||
static constexpr int kIndexInFileMessages =
|
||||
0;
|
||||
|
||||
void Swap(Parameter* other);
|
||||
friend void swap(Parameter& a, Parameter& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline Parameter* New() const final {
|
||||
return CreateMaybeMessage<Parameter>(NULL);
|
||||
}
|
||||
|
||||
Parameter* New(::google::protobuf::Arena* arena) const final {
|
||||
return CreateMaybeMessage<Parameter>(arena);
|
||||
}
|
||||
void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
|
||||
final;
|
||||
void CopyFrom(const Parameter& from);
|
||||
void MergeFrom(const Parameter& from);
|
||||
void Clear() final;
|
||||
bool IsInitialized() const final;
|
||||
|
||||
size_t ByteSizeLong() const final;
|
||||
bool MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) final;
|
||||
void SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const final;
|
||||
void DiscardUnknownFields();
|
||||
int GetCachedSize() const final { return _cached_size_.Get(); }
|
||||
|
||||
private:
|
||||
void SharedCtor();
|
||||
void SharedDtor();
|
||||
void SetCachedSize(int size) const;
|
||||
void InternalSwap(Parameter* other);
|
||||
private:
|
||||
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
|
||||
return NULL;
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return NULL;
|
||||
}
|
||||
public:
|
||||
|
||||
::std::string GetTypeName() const final;
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
// optional string name = 1;
|
||||
bool has_name() const;
|
||||
void clear_name();
|
||||
static const int kNameFieldNumber = 1;
|
||||
const ::std::string& name() const;
|
||||
void set_name(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_name(::std::string&& value);
|
||||
#endif
|
||||
void set_name(const char* value);
|
||||
void set_name(const char* value, size_t size);
|
||||
::std::string* mutable_name();
|
||||
::std::string* release_name();
|
||||
void set_allocated_name(::std::string* name);
|
||||
|
||||
// optional string value = 2;
|
||||
bool has_value() const;
|
||||
void clear_value();
|
||||
static const int kValueFieldNumber = 2;
|
||||
const ::std::string& value() const;
|
||||
void set_value(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_value(::std::string&& value);
|
||||
#endif
|
||||
void set_value(const char* value);
|
||||
void set_value(const char* value, size_t size);
|
||||
::std::string* mutable_value();
|
||||
::std::string* release_value();
|
||||
void set_allocated_value(::std::string* value);
|
||||
|
||||
// @@protoc_insertion_point(class_scope:chrome_lang_id.Parameter)
|
||||
private:
|
||||
void set_has_name();
|
||||
void clear_has_name();
|
||||
void set_has_value();
|
||||
void clear_has_value();
|
||||
|
||||
::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
|
||||
::google::protobuf::internal::HasBits<1> _has_bits_;
|
||||
mutable ::google::protobuf::internal::CachedSize _cached_size_;
|
||||
::google::protobuf::internal::ArenaStringPtr name_;
|
||||
::google::protobuf::internal::ArenaStringPtr value_;
|
||||
friend struct ::protobuf_feature_5fextractor_2eproto::TableStruct;
|
||||
};
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
class FeatureFunctionDescriptor : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:chrome_lang_id.FeatureFunctionDescriptor) */ {
|
||||
public:
|
||||
FeatureFunctionDescriptor();
|
||||
virtual ~FeatureFunctionDescriptor();
|
||||
|
||||
FeatureFunctionDescriptor(const FeatureFunctionDescriptor& from);
|
||||
|
||||
inline FeatureFunctionDescriptor& operator=(const FeatureFunctionDescriptor& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
#if LANG_CXX11
|
||||
FeatureFunctionDescriptor(FeatureFunctionDescriptor&& from) noexcept
|
||||
: FeatureFunctionDescriptor() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline FeatureFunctionDescriptor& operator=(FeatureFunctionDescriptor&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
inline const ::std::string& unknown_fields() const {
|
||||
return _internal_metadata_.unknown_fields();
|
||||
}
|
||||
inline ::std::string* mutable_unknown_fields() {
|
||||
return _internal_metadata_.mutable_unknown_fields();
|
||||
}
|
||||
|
||||
static const FeatureFunctionDescriptor& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const FeatureFunctionDescriptor* internal_default_instance() {
|
||||
return reinterpret_cast<const FeatureFunctionDescriptor*>(
|
||||
&_FeatureFunctionDescriptor_default_instance_);
|
||||
}
|
||||
static constexpr int kIndexInFileMessages =
|
||||
1;
|
||||
|
||||
void Swap(FeatureFunctionDescriptor* other);
|
||||
friend void swap(FeatureFunctionDescriptor& a, FeatureFunctionDescriptor& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline FeatureFunctionDescriptor* New() const final {
|
||||
return CreateMaybeMessage<FeatureFunctionDescriptor>(NULL);
|
||||
}
|
||||
|
||||
FeatureFunctionDescriptor* New(::google::protobuf::Arena* arena) const final {
|
||||
return CreateMaybeMessage<FeatureFunctionDescriptor>(arena);
|
||||
}
|
||||
void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
|
||||
final;
|
||||
void CopyFrom(const FeatureFunctionDescriptor& from);
|
||||
void MergeFrom(const FeatureFunctionDescriptor& from);
|
||||
void Clear() final;
|
||||
bool IsInitialized() const final;
|
||||
|
||||
size_t ByteSizeLong() const final;
|
||||
bool MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) final;
|
||||
void SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const final;
|
||||
void DiscardUnknownFields();
|
||||
int GetCachedSize() const final { return _cached_size_.Get(); }
|
||||
|
||||
private:
|
||||
void SharedCtor();
|
||||
void SharedDtor();
|
||||
void SetCachedSize(int size) const;
|
||||
void InternalSwap(FeatureFunctionDescriptor* other);
|
||||
private:
|
||||
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
|
||||
return NULL;
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return NULL;
|
||||
}
|
||||
public:
|
||||
|
||||
::std::string GetTypeName() const final;
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
// repeated .chrome_lang_id.Parameter parameter = 4;
|
||||
int parameter_size() const;
|
||||
void clear_parameter();
|
||||
static const int kParameterFieldNumber = 4;
|
||||
::chrome_lang_id::Parameter* mutable_parameter(int index);
|
||||
::google::protobuf::RepeatedPtrField< ::chrome_lang_id::Parameter >*
|
||||
mutable_parameter();
|
||||
const ::chrome_lang_id::Parameter& parameter(int index) const;
|
||||
::chrome_lang_id::Parameter* add_parameter();
|
||||
const ::google::protobuf::RepeatedPtrField< ::chrome_lang_id::Parameter >&
|
||||
parameter() const;
|
||||
|
||||
// repeated .chrome_lang_id.FeatureFunctionDescriptor feature = 7;
|
||||
int feature_size() const;
|
||||
void clear_feature();
|
||||
static const int kFeatureFieldNumber = 7;
|
||||
::chrome_lang_id::FeatureFunctionDescriptor* mutable_feature(int index);
|
||||
::google::protobuf::RepeatedPtrField< ::chrome_lang_id::FeatureFunctionDescriptor >*
|
||||
mutable_feature();
|
||||
const ::chrome_lang_id::FeatureFunctionDescriptor& feature(int index) const;
|
||||
::chrome_lang_id::FeatureFunctionDescriptor* add_feature();
|
||||
const ::google::protobuf::RepeatedPtrField< ::chrome_lang_id::FeatureFunctionDescriptor >&
|
||||
feature() const;
|
||||
|
||||
// required string type = 1;
|
||||
bool has_type() const;
|
||||
void clear_type();
|
||||
static const int kTypeFieldNumber = 1;
|
||||
const ::std::string& type() const;
|
||||
void set_type(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_type(::std::string&& value);
|
||||
#endif
|
||||
void set_type(const char* value);
|
||||
void set_type(const char* value, size_t size);
|
||||
::std::string* mutable_type();
|
||||
::std::string* release_type();
|
||||
void set_allocated_type(::std::string* type);
|
||||
|
||||
// optional string name = 2;
|
||||
bool has_name() const;
|
||||
void clear_name();
|
||||
static const int kNameFieldNumber = 2;
|
||||
const ::std::string& name() const;
|
||||
void set_name(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_name(::std::string&& value);
|
||||
#endif
|
||||
void set_name(const char* value);
|
||||
void set_name(const char* value, size_t size);
|
||||
::std::string* mutable_name();
|
||||
::std::string* release_name();
|
||||
void set_allocated_name(::std::string* name);
|
||||
|
||||
// optional int32 argument = 3 [default = 0];
|
||||
bool has_argument() const;
|
||||
void clear_argument();
|
||||
static const int kArgumentFieldNumber = 3;
|
||||
::google::protobuf::int32 argument() const;
|
||||
void set_argument(::google::protobuf::int32 value);
|
||||
|
||||
// @@protoc_insertion_point(class_scope:chrome_lang_id.FeatureFunctionDescriptor)
|
||||
private:
|
||||
void set_has_type();
|
||||
void clear_has_type();
|
||||
void set_has_name();
|
||||
void clear_has_name();
|
||||
void set_has_argument();
|
||||
void clear_has_argument();
|
||||
|
||||
::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
|
||||
::google::protobuf::internal::HasBits<1> _has_bits_;
|
||||
mutable ::google::protobuf::internal::CachedSize _cached_size_;
|
||||
::google::protobuf::RepeatedPtrField< ::chrome_lang_id::Parameter > parameter_;
|
||||
::google::protobuf::RepeatedPtrField< ::chrome_lang_id::FeatureFunctionDescriptor > feature_;
|
||||
::google::protobuf::internal::ArenaStringPtr type_;
|
||||
::google::protobuf::internal::ArenaStringPtr name_;
|
||||
::google::protobuf::int32 argument_;
|
||||
friend struct ::protobuf_feature_5fextractor_2eproto::TableStruct;
|
||||
};
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
class FeatureExtractorDescriptor : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:chrome_lang_id.FeatureExtractorDescriptor) */ {
|
||||
public:
|
||||
FeatureExtractorDescriptor();
|
||||
virtual ~FeatureExtractorDescriptor();
|
||||
|
||||
FeatureExtractorDescriptor(const FeatureExtractorDescriptor& from);
|
||||
|
||||
inline FeatureExtractorDescriptor& operator=(const FeatureExtractorDescriptor& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
#if LANG_CXX11
|
||||
FeatureExtractorDescriptor(FeatureExtractorDescriptor&& from) noexcept
|
||||
: FeatureExtractorDescriptor() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline FeatureExtractorDescriptor& operator=(FeatureExtractorDescriptor&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
inline const ::std::string& unknown_fields() const {
|
||||
return _internal_metadata_.unknown_fields();
|
||||
}
|
||||
inline ::std::string* mutable_unknown_fields() {
|
||||
return _internal_metadata_.mutable_unknown_fields();
|
||||
}
|
||||
|
||||
static const FeatureExtractorDescriptor& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const FeatureExtractorDescriptor* internal_default_instance() {
|
||||
return reinterpret_cast<const FeatureExtractorDescriptor*>(
|
||||
&_FeatureExtractorDescriptor_default_instance_);
|
||||
}
|
||||
static constexpr int kIndexInFileMessages =
|
||||
2;
|
||||
|
||||
void Swap(FeatureExtractorDescriptor* other);
|
||||
friend void swap(FeatureExtractorDescriptor& a, FeatureExtractorDescriptor& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline FeatureExtractorDescriptor* New() const final {
|
||||
return CreateMaybeMessage<FeatureExtractorDescriptor>(NULL);
|
||||
}
|
||||
|
||||
FeatureExtractorDescriptor* New(::google::protobuf::Arena* arena) const final {
|
||||
return CreateMaybeMessage<FeatureExtractorDescriptor>(arena);
|
||||
}
|
||||
void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
|
||||
final;
|
||||
void CopyFrom(const FeatureExtractorDescriptor& from);
|
||||
void MergeFrom(const FeatureExtractorDescriptor& from);
|
||||
void Clear() final;
|
||||
bool IsInitialized() const final;
|
||||
|
||||
size_t ByteSizeLong() const final;
|
||||
bool MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) final;
|
||||
void SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const final;
|
||||
void DiscardUnknownFields();
|
||||
int GetCachedSize() const final { return _cached_size_.Get(); }
|
||||
|
||||
private:
|
||||
void SharedCtor();
|
||||
void SharedDtor();
|
||||
void SetCachedSize(int size) const;
|
||||
void InternalSwap(FeatureExtractorDescriptor* other);
|
||||
private:
|
||||
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
|
||||
return NULL;
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return NULL;
|
||||
}
|
||||
public:
|
||||
|
||||
::std::string GetTypeName() const final;
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
// repeated .chrome_lang_id.FeatureFunctionDescriptor feature = 1;
|
||||
int feature_size() const;
|
||||
void clear_feature();
|
||||
static const int kFeatureFieldNumber = 1;
|
||||
::chrome_lang_id::FeatureFunctionDescriptor* mutable_feature(int index);
|
||||
::google::protobuf::RepeatedPtrField< ::chrome_lang_id::FeatureFunctionDescriptor >*
|
||||
mutable_feature();
|
||||
const ::chrome_lang_id::FeatureFunctionDescriptor& feature(int index) const;
|
||||
::chrome_lang_id::FeatureFunctionDescriptor* add_feature();
|
||||
const ::google::protobuf::RepeatedPtrField< ::chrome_lang_id::FeatureFunctionDescriptor >&
|
||||
feature() const;
|
||||
|
||||
// @@protoc_insertion_point(class_scope:chrome_lang_id.FeatureExtractorDescriptor)
|
||||
private:
|
||||
|
||||
::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
|
||||
::google::protobuf::internal::HasBits<1> _has_bits_;
|
||||
mutable ::google::protobuf::internal::CachedSize _cached_size_;
|
||||
::google::protobuf::RepeatedPtrField< ::chrome_lang_id::FeatureFunctionDescriptor > feature_;
|
||||
friend struct ::protobuf_feature_5fextractor_2eproto::TableStruct;
|
||||
};
|
||||
// ===================================================================
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#endif // __GNUC__
|
||||
// Parameter
|
||||
|
||||
// optional string name = 1;
|
||||
inline bool Parameter::has_name() const {
|
||||
return (_has_bits_[0] & 0x00000001u) != 0;
|
||||
}
|
||||
inline void Parameter::set_has_name() {
|
||||
_has_bits_[0] |= 0x00000001u;
|
||||
}
|
||||
inline void Parameter::clear_has_name() {
|
||||
_has_bits_[0] &= ~0x00000001u;
|
||||
}
|
||||
inline void Parameter::clear_name() {
|
||||
name_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
clear_has_name();
|
||||
}
|
||||
inline const ::std::string& Parameter::name() const {
|
||||
// @@protoc_insertion_point(field_get:chrome_lang_id.Parameter.name)
|
||||
return name_.GetNoArena();
|
||||
}
|
||||
inline void Parameter::set_name(const ::std::string& value) {
|
||||
set_has_name();
|
||||
name_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
|
||||
// @@protoc_insertion_point(field_set:chrome_lang_id.Parameter.name)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void Parameter::set_name(::std::string&& value) {
|
||||
set_has_name();
|
||||
name_.SetNoArena(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
|
||||
// @@protoc_insertion_point(field_set_rvalue:chrome_lang_id.Parameter.name)
|
||||
}
|
||||
#endif
|
||||
inline void Parameter::set_name(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
set_has_name();
|
||||
name_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
|
||||
// @@protoc_insertion_point(field_set_char:chrome_lang_id.Parameter.name)
|
||||
}
|
||||
inline void Parameter::set_name(const char* value, size_t size) {
|
||||
set_has_name();
|
||||
name_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
::std::string(reinterpret_cast<const char*>(value), size));
|
||||
// @@protoc_insertion_point(field_set_pointer:chrome_lang_id.Parameter.name)
|
||||
}
|
||||
inline ::std::string* Parameter::mutable_name() {
|
||||
set_has_name();
|
||||
// @@protoc_insertion_point(field_mutable:chrome_lang_id.Parameter.name)
|
||||
return name_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline ::std::string* Parameter::release_name() {
|
||||
// @@protoc_insertion_point(field_release:chrome_lang_id.Parameter.name)
|
||||
if (!has_name()) {
|
||||
return NULL;
|
||||
}
|
||||
clear_has_name();
|
||||
return name_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline void Parameter::set_allocated_name(::std::string* name) {
|
||||
if (name != NULL) {
|
||||
set_has_name();
|
||||
} else {
|
||||
clear_has_name();
|
||||
}
|
||||
name_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), name);
|
||||
// @@protoc_insertion_point(field_set_allocated:chrome_lang_id.Parameter.name)
|
||||
}
|
||||
|
||||
// optional string value = 2;
|
||||
inline bool Parameter::has_value() const {
|
||||
return (_has_bits_[0] & 0x00000002u) != 0;
|
||||
}
|
||||
inline void Parameter::set_has_value() {
|
||||
_has_bits_[0] |= 0x00000002u;
|
||||
}
|
||||
inline void Parameter::clear_has_value() {
|
||||
_has_bits_[0] &= ~0x00000002u;
|
||||
}
|
||||
inline void Parameter::clear_value() {
|
||||
value_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
clear_has_value();
|
||||
}
|
||||
inline const ::std::string& Parameter::value() const {
|
||||
// @@protoc_insertion_point(field_get:chrome_lang_id.Parameter.value)
|
||||
return value_.GetNoArena();
|
||||
}
|
||||
inline void Parameter::set_value(const ::std::string& value) {
|
||||
set_has_value();
|
||||
value_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
|
||||
// @@protoc_insertion_point(field_set:chrome_lang_id.Parameter.value)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void Parameter::set_value(::std::string&& value) {
|
||||
set_has_value();
|
||||
value_.SetNoArena(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
|
||||
// @@protoc_insertion_point(field_set_rvalue:chrome_lang_id.Parameter.value)
|
||||
}
|
||||
#endif
|
||||
inline void Parameter::set_value(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
set_has_value();
|
||||
value_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
|
||||
// @@protoc_insertion_point(field_set_char:chrome_lang_id.Parameter.value)
|
||||
}
|
||||
inline void Parameter::set_value(const char* value, size_t size) {
|
||||
set_has_value();
|
||||
value_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
::std::string(reinterpret_cast<const char*>(value), size));
|
||||
// @@protoc_insertion_point(field_set_pointer:chrome_lang_id.Parameter.value)
|
||||
}
|
||||
inline ::std::string* Parameter::mutable_value() {
|
||||
set_has_value();
|
||||
// @@protoc_insertion_point(field_mutable:chrome_lang_id.Parameter.value)
|
||||
return value_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline ::std::string* Parameter::release_value() {
|
||||
// @@protoc_insertion_point(field_release:chrome_lang_id.Parameter.value)
|
||||
if (!has_value()) {
|
||||
return NULL;
|
||||
}
|
||||
clear_has_value();
|
||||
return value_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline void Parameter::set_allocated_value(::std::string* value) {
|
||||
if (value != NULL) {
|
||||
set_has_value();
|
||||
} else {
|
||||
clear_has_value();
|
||||
}
|
||||
value_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
|
||||
// @@protoc_insertion_point(field_set_allocated:chrome_lang_id.Parameter.value)
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// FeatureFunctionDescriptor
|
||||
|
||||
// required string type = 1;
|
||||
inline bool FeatureFunctionDescriptor::has_type() const {
|
||||
return (_has_bits_[0] & 0x00000001u) != 0;
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::set_has_type() {
|
||||
_has_bits_[0] |= 0x00000001u;
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::clear_has_type() {
|
||||
_has_bits_[0] &= ~0x00000001u;
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::clear_type() {
|
||||
type_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
clear_has_type();
|
||||
}
|
||||
inline const ::std::string& FeatureFunctionDescriptor::type() const {
|
||||
// @@protoc_insertion_point(field_get:chrome_lang_id.FeatureFunctionDescriptor.type)
|
||||
return type_.GetNoArena();
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::set_type(const ::std::string& value) {
|
||||
set_has_type();
|
||||
type_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
|
||||
// @@protoc_insertion_point(field_set:chrome_lang_id.FeatureFunctionDescriptor.type)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void FeatureFunctionDescriptor::set_type(::std::string&& value) {
|
||||
set_has_type();
|
||||
type_.SetNoArena(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
|
||||
// @@protoc_insertion_point(field_set_rvalue:chrome_lang_id.FeatureFunctionDescriptor.type)
|
||||
}
|
||||
#endif
|
||||
inline void FeatureFunctionDescriptor::set_type(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
set_has_type();
|
||||
type_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
|
||||
// @@protoc_insertion_point(field_set_char:chrome_lang_id.FeatureFunctionDescriptor.type)
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::set_type(const char* value, size_t size) {
|
||||
set_has_type();
|
||||
type_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
::std::string(reinterpret_cast<const char*>(value), size));
|
||||
// @@protoc_insertion_point(field_set_pointer:chrome_lang_id.FeatureFunctionDescriptor.type)
|
||||
}
|
||||
inline ::std::string* FeatureFunctionDescriptor::mutable_type() {
|
||||
set_has_type();
|
||||
// @@protoc_insertion_point(field_mutable:chrome_lang_id.FeatureFunctionDescriptor.type)
|
||||
return type_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline ::std::string* FeatureFunctionDescriptor::release_type() {
|
||||
// @@protoc_insertion_point(field_release:chrome_lang_id.FeatureFunctionDescriptor.type)
|
||||
if (!has_type()) {
|
||||
return NULL;
|
||||
}
|
||||
clear_has_type();
|
||||
return type_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::set_allocated_type(::std::string* type) {
|
||||
if (type != NULL) {
|
||||
set_has_type();
|
||||
} else {
|
||||
clear_has_type();
|
||||
}
|
||||
type_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), type);
|
||||
// @@protoc_insertion_point(field_set_allocated:chrome_lang_id.FeatureFunctionDescriptor.type)
|
||||
}
|
||||
|
||||
// optional string name = 2;
|
||||
inline bool FeatureFunctionDescriptor::has_name() const {
|
||||
return (_has_bits_[0] & 0x00000002u) != 0;
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::set_has_name() {
|
||||
_has_bits_[0] |= 0x00000002u;
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::clear_has_name() {
|
||||
_has_bits_[0] &= ~0x00000002u;
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::clear_name() {
|
||||
name_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
clear_has_name();
|
||||
}
|
||||
inline const ::std::string& FeatureFunctionDescriptor::name() const {
|
||||
// @@protoc_insertion_point(field_get:chrome_lang_id.FeatureFunctionDescriptor.name)
|
||||
return name_.GetNoArena();
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::set_name(const ::std::string& value) {
|
||||
set_has_name();
|
||||
name_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
|
||||
// @@protoc_insertion_point(field_set:chrome_lang_id.FeatureFunctionDescriptor.name)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void FeatureFunctionDescriptor::set_name(::std::string&& value) {
|
||||
set_has_name();
|
||||
name_.SetNoArena(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
|
||||
// @@protoc_insertion_point(field_set_rvalue:chrome_lang_id.FeatureFunctionDescriptor.name)
|
||||
}
|
||||
#endif
|
||||
inline void FeatureFunctionDescriptor::set_name(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
set_has_name();
|
||||
name_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
|
||||
// @@protoc_insertion_point(field_set_char:chrome_lang_id.FeatureFunctionDescriptor.name)
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::set_name(const char* value, size_t size) {
|
||||
set_has_name();
|
||||
name_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
::std::string(reinterpret_cast<const char*>(value), size));
|
||||
// @@protoc_insertion_point(field_set_pointer:chrome_lang_id.FeatureFunctionDescriptor.name)
|
||||
}
|
||||
inline ::std::string* FeatureFunctionDescriptor::mutable_name() {
|
||||
set_has_name();
|
||||
// @@protoc_insertion_point(field_mutable:chrome_lang_id.FeatureFunctionDescriptor.name)
|
||||
return name_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline ::std::string* FeatureFunctionDescriptor::release_name() {
|
||||
// @@protoc_insertion_point(field_release:chrome_lang_id.FeatureFunctionDescriptor.name)
|
||||
if (!has_name()) {
|
||||
return NULL;
|
||||
}
|
||||
clear_has_name();
|
||||
return name_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::set_allocated_name(::std::string* name) {
|
||||
if (name != NULL) {
|
||||
set_has_name();
|
||||
} else {
|
||||
clear_has_name();
|
||||
}
|
||||
name_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), name);
|
||||
// @@protoc_insertion_point(field_set_allocated:chrome_lang_id.FeatureFunctionDescriptor.name)
|
||||
}
|
||||
|
||||
// optional int32 argument = 3 [default = 0];
|
||||
inline bool FeatureFunctionDescriptor::has_argument() const {
|
||||
return (_has_bits_[0] & 0x00000004u) != 0;
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::set_has_argument() {
|
||||
_has_bits_[0] |= 0x00000004u;
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::clear_has_argument() {
|
||||
_has_bits_[0] &= ~0x00000004u;
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::clear_argument() {
|
||||
argument_ = 0;
|
||||
clear_has_argument();
|
||||
}
|
||||
inline ::google::protobuf::int32 FeatureFunctionDescriptor::argument() const {
|
||||
// @@protoc_insertion_point(field_get:chrome_lang_id.FeatureFunctionDescriptor.argument)
|
||||
return argument_;
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::set_argument(::google::protobuf::int32 value) {
|
||||
set_has_argument();
|
||||
argument_ = value;
|
||||
// @@protoc_insertion_point(field_set:chrome_lang_id.FeatureFunctionDescriptor.argument)
|
||||
}
|
||||
|
||||
// repeated .chrome_lang_id.Parameter parameter = 4;
|
||||
inline int FeatureFunctionDescriptor::parameter_size() const {
|
||||
return parameter_.size();
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::clear_parameter() {
|
||||
parameter_.Clear();
|
||||
}
|
||||
inline ::chrome_lang_id::Parameter* FeatureFunctionDescriptor::mutable_parameter(int index) {
|
||||
// @@protoc_insertion_point(field_mutable:chrome_lang_id.FeatureFunctionDescriptor.parameter)
|
||||
return parameter_.Mutable(index);
|
||||
}
|
||||
inline ::google::protobuf::RepeatedPtrField< ::chrome_lang_id::Parameter >*
|
||||
FeatureFunctionDescriptor::mutable_parameter() {
|
||||
// @@protoc_insertion_point(field_mutable_list:chrome_lang_id.FeatureFunctionDescriptor.parameter)
|
||||
return ¶meter_;
|
||||
}
|
||||
inline const ::chrome_lang_id::Parameter& FeatureFunctionDescriptor::parameter(int index) const {
|
||||
// @@protoc_insertion_point(field_get:chrome_lang_id.FeatureFunctionDescriptor.parameter)
|
||||
return parameter_.Get(index);
|
||||
}
|
||||
inline ::chrome_lang_id::Parameter* FeatureFunctionDescriptor::add_parameter() {
|
||||
// @@protoc_insertion_point(field_add:chrome_lang_id.FeatureFunctionDescriptor.parameter)
|
||||
return parameter_.Add();
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedPtrField< ::chrome_lang_id::Parameter >&
|
||||
FeatureFunctionDescriptor::parameter() const {
|
||||
// @@protoc_insertion_point(field_list:chrome_lang_id.FeatureFunctionDescriptor.parameter)
|
||||
return parameter_;
|
||||
}
|
||||
|
||||
// repeated .chrome_lang_id.FeatureFunctionDescriptor feature = 7;
|
||||
inline int FeatureFunctionDescriptor::feature_size() const {
|
||||
return feature_.size();
|
||||
}
|
||||
inline void FeatureFunctionDescriptor::clear_feature() {
|
||||
feature_.Clear();
|
||||
}
|
||||
inline ::chrome_lang_id::FeatureFunctionDescriptor* FeatureFunctionDescriptor::mutable_feature(int index) {
|
||||
// @@protoc_insertion_point(field_mutable:chrome_lang_id.FeatureFunctionDescriptor.feature)
|
||||
return feature_.Mutable(index);
|
||||
}
|
||||
inline ::google::protobuf::RepeatedPtrField< ::chrome_lang_id::FeatureFunctionDescriptor >*
|
||||
FeatureFunctionDescriptor::mutable_feature() {
|
||||
// @@protoc_insertion_point(field_mutable_list:chrome_lang_id.FeatureFunctionDescriptor.feature)
|
||||
return &feature_;
|
||||
}
|
||||
inline const ::chrome_lang_id::FeatureFunctionDescriptor& FeatureFunctionDescriptor::feature(int index) const {
|
||||
// @@protoc_insertion_point(field_get:chrome_lang_id.FeatureFunctionDescriptor.feature)
|
||||
return feature_.Get(index);
|
||||
}
|
||||
inline ::chrome_lang_id::FeatureFunctionDescriptor* FeatureFunctionDescriptor::add_feature() {
|
||||
// @@protoc_insertion_point(field_add:chrome_lang_id.FeatureFunctionDescriptor.feature)
|
||||
return feature_.Add();
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedPtrField< ::chrome_lang_id::FeatureFunctionDescriptor >&
|
||||
FeatureFunctionDescriptor::feature() const {
|
||||
// @@protoc_insertion_point(field_list:chrome_lang_id.FeatureFunctionDescriptor.feature)
|
||||
return feature_;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// FeatureExtractorDescriptor
|
||||
|
||||
// repeated .chrome_lang_id.FeatureFunctionDescriptor feature = 1;
|
||||
inline int FeatureExtractorDescriptor::feature_size() const {
|
||||
return feature_.size();
|
||||
}
|
||||
inline void FeatureExtractorDescriptor::clear_feature() {
|
||||
feature_.Clear();
|
||||
}
|
||||
inline ::chrome_lang_id::FeatureFunctionDescriptor* FeatureExtractorDescriptor::mutable_feature(int index) {
|
||||
// @@protoc_insertion_point(field_mutable:chrome_lang_id.FeatureExtractorDescriptor.feature)
|
||||
return feature_.Mutable(index);
|
||||
}
|
||||
inline ::google::protobuf::RepeatedPtrField< ::chrome_lang_id::FeatureFunctionDescriptor >*
|
||||
FeatureExtractorDescriptor::mutable_feature() {
|
||||
// @@protoc_insertion_point(field_mutable_list:chrome_lang_id.FeatureExtractorDescriptor.feature)
|
||||
return &feature_;
|
||||
}
|
||||
inline const ::chrome_lang_id::FeatureFunctionDescriptor& FeatureExtractorDescriptor::feature(int index) const {
|
||||
// @@protoc_insertion_point(field_get:chrome_lang_id.FeatureExtractorDescriptor.feature)
|
||||
return feature_.Get(index);
|
||||
}
|
||||
inline ::chrome_lang_id::FeatureFunctionDescriptor* FeatureExtractorDescriptor::add_feature() {
|
||||
// @@protoc_insertion_point(field_add:chrome_lang_id.FeatureExtractorDescriptor.feature)
|
||||
return feature_.Add();
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedPtrField< ::chrome_lang_id::FeatureFunctionDescriptor >&
|
||||
FeatureExtractorDescriptor::feature() const {
|
||||
// @@protoc_insertion_point(field_list:chrome_lang_id.FeatureExtractorDescriptor.feature)
|
||||
return feature_;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif // __GNUC__
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
|
||||
#endif // PROTOBUF_INCLUDED_feature_5fextractor_2eproto
|
||||
@ -0,0 +1,923 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: sentence.proto
|
||||
|
||||
#include "sentence.pb.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/stubs/port.h>
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/wire_format_lite_inl.h>
|
||||
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
|
||||
// This is a temporary google only hack
|
||||
#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
#include "third_party/protobuf/version.h"
|
||||
#endif
|
||||
// @@protoc_insertion_point(includes)
|
||||
|
||||
namespace protobuf_sentence_2eproto {
|
||||
extern PROTOBUF_INTERNAL_EXPORT_protobuf_sentence_2eproto ::google::protobuf::internal::SCCInfo<0> scc_info_Token;
|
||||
} // namespace protobuf_sentence_2eproto
|
||||
namespace chrome_lang_id {
|
||||
class SentenceDefaultTypeInternal {
|
||||
public:
|
||||
::google::protobuf::internal::ExplicitlyConstructed<Sentence>
|
||||
_instance;
|
||||
} _Sentence_default_instance_;
|
||||
class TokenDefaultTypeInternal {
|
||||
public:
|
||||
::google::protobuf::internal::ExplicitlyConstructed<Token>
|
||||
_instance;
|
||||
} _Token_default_instance_;
|
||||
} // namespace chrome_lang_id
|
||||
namespace protobuf_sentence_2eproto {
|
||||
static void InitDefaultsSentence() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
{
|
||||
void* ptr = &::chrome_lang_id::_Sentence_default_instance_;
|
||||
new (ptr) ::chrome_lang_id::Sentence();
|
||||
::google::protobuf::internal::OnShutdownDestroyMessage(ptr);
|
||||
}
|
||||
::chrome_lang_id::Sentence::InitAsDefaultInstance();
|
||||
}
|
||||
|
||||
::google::protobuf::internal::SCCInfo<1> scc_info_Sentence =
|
||||
{{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 1, InitDefaultsSentence}, {
|
||||
&protobuf_sentence_2eproto::scc_info_Token.base,}};
|
||||
|
||||
static void InitDefaultsToken() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
{
|
||||
void* ptr = &::chrome_lang_id::_Token_default_instance_;
|
||||
new (ptr) ::chrome_lang_id::Token();
|
||||
::google::protobuf::internal::OnShutdownDestroyMessage(ptr);
|
||||
}
|
||||
::chrome_lang_id::Token::InitAsDefaultInstance();
|
||||
}
|
||||
|
||||
::google::protobuf::internal::SCCInfo<0> scc_info_Token =
|
||||
{{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 0, InitDefaultsToken}, {}};
|
||||
|
||||
void InitDefaults() {
|
||||
::google::protobuf::internal::InitSCC(&scc_info_Sentence.base);
|
||||
::google::protobuf::internal::InitSCC(&scc_info_Token.base);
|
||||
}
|
||||
|
||||
} // namespace protobuf_sentence_2eproto
|
||||
namespace chrome_lang_id {
|
||||
bool Token_BreakLevel_IsValid(int value) {
|
||||
switch (value) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
const Token_BreakLevel Token::NO_BREAK;
|
||||
const Token_BreakLevel Token::SPACE_BREAK;
|
||||
const Token_BreakLevel Token::LINE_BREAK;
|
||||
const Token_BreakLevel Token::SENTENCE_BREAK;
|
||||
const Token_BreakLevel Token::BreakLevel_MIN;
|
||||
const Token_BreakLevel Token::BreakLevel_MAX;
|
||||
const int Token::BreakLevel_ARRAYSIZE;
|
||||
#endif // !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
|
||||
// ===================================================================
|
||||
|
||||
void Sentence::InitAsDefaultInstance() {
|
||||
}
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
const int Sentence::kIdFieldNumber;
|
||||
const int Sentence::kTextFieldNumber;
|
||||
const int Sentence::kTokenFieldNumber;
|
||||
#endif // !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
|
||||
Sentence::Sentence()
|
||||
: ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
|
||||
::google::protobuf::internal::InitSCC(
|
||||
&protobuf_sentence_2eproto::scc_info_Sentence.base);
|
||||
SharedCtor();
|
||||
// @@protoc_insertion_point(constructor:chrome_lang_id.Sentence)
|
||||
}
|
||||
Sentence::Sentence(const Sentence& from)
|
||||
: ::google::protobuf::MessageLite(),
|
||||
_internal_metadata_(NULL),
|
||||
_has_bits_(from._has_bits_),
|
||||
token_(from.token_) {
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
_extensions_.MergeFrom(from._extensions_);
|
||||
id_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.has_id()) {
|
||||
id_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.id_);
|
||||
}
|
||||
text_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.has_text()) {
|
||||
text_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.text_);
|
||||
}
|
||||
// @@protoc_insertion_point(copy_constructor:chrome_lang_id.Sentence)
|
||||
}
|
||||
|
||||
void Sentence::SharedCtor() {
|
||||
id_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
text_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
|
||||
Sentence::~Sentence() {
|
||||
// @@protoc_insertion_point(destructor:chrome_lang_id.Sentence)
|
||||
SharedDtor();
|
||||
}
|
||||
|
||||
void Sentence::SharedDtor() {
|
||||
id_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
text_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
|
||||
void Sentence::SetCachedSize(int size) const {
|
||||
_cached_size_.Set(size);
|
||||
}
|
||||
const Sentence& Sentence::default_instance() {
|
||||
::google::protobuf::internal::InitSCC(&protobuf_sentence_2eproto::scc_info_Sentence.base);
|
||||
return *internal_default_instance();
|
||||
}
|
||||
|
||||
|
||||
void Sentence::Clear() {
|
||||
// @@protoc_insertion_point(message_clear_start:chrome_lang_id.Sentence)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
_extensions_.Clear();
|
||||
token_.Clear();
|
||||
cached_has_bits = _has_bits_[0];
|
||||
if (cached_has_bits & 3u) {
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
id_.ClearNonDefaultToEmptyNoArena();
|
||||
}
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
text_.ClearNonDefaultToEmptyNoArena();
|
||||
}
|
||||
}
|
||||
_has_bits_.Clear();
|
||||
_internal_metadata_.Clear();
|
||||
}
|
||||
|
||||
bool Sentence::MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) {
|
||||
#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
|
||||
::google::protobuf::uint32 tag;
|
||||
::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter(
|
||||
&_internal_metadata_);
|
||||
::google::protobuf::io::StringOutputStream unknown_fields_output(
|
||||
unknown_fields_setter.buffer());
|
||||
::google::protobuf::io::CodedOutputStream unknown_fields_stream(
|
||||
&unknown_fields_output, false);
|
||||
// @@protoc_insertion_point(parse_start:chrome_lang_id.Sentence)
|
||||
for (;;) {
|
||||
::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
|
||||
tag = p.first;
|
||||
if (!p.second) goto handle_unusual;
|
||||
switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
|
||||
// optional string id = 1;
|
||||
case 1: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_id()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// optional string text = 2;
|
||||
case 2: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_text()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// repeated .chrome_lang_id.Token token = 3;
|
||||
case 3: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(26u /* 26 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadMessage(
|
||||
input, add_token()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
handle_unusual:
|
||||
if (tag == 0) {
|
||||
goto success;
|
||||
}
|
||||
if ((8000u <= tag)) {
|
||||
DO_(_extensions_.ParseField(tag, input,
|
||||
internal_default_instance(),
|
||||
&unknown_fields_stream));
|
||||
continue;
|
||||
}
|
||||
DO_(::google::protobuf::internal::WireFormatLite::SkipField(
|
||||
input, tag, &unknown_fields_stream));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
success:
|
||||
// @@protoc_insertion_point(parse_success:chrome_lang_id.Sentence)
|
||||
return true;
|
||||
failure:
|
||||
// @@protoc_insertion_point(parse_failure:chrome_lang_id.Sentence)
|
||||
return false;
|
||||
#undef DO_
|
||||
}
|
||||
|
||||
void Sentence::SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const {
|
||||
// @@protoc_insertion_point(serialize_start:chrome_lang_id.Sentence)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
cached_has_bits = _has_bits_[0];
|
||||
// optional string id = 1;
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
1, this->id(), output);
|
||||
}
|
||||
|
||||
// optional string text = 2;
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
2, this->text(), output);
|
||||
}
|
||||
|
||||
// repeated .chrome_lang_id.Token token = 3;
|
||||
for (unsigned int i = 0,
|
||||
n = static_cast<unsigned int>(this->token_size()); i < n; i++) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteMessage(
|
||||
3,
|
||||
this->token(static_cast<int>(i)),
|
||||
output);
|
||||
}
|
||||
|
||||
// Extension range [1000, 536870912)
|
||||
_extensions_.SerializeWithCachedSizes(
|
||||
1000, 536870912, output);
|
||||
|
||||
output->WriteRaw(_internal_metadata_.unknown_fields().data(),
|
||||
static_cast<int>(_internal_metadata_.unknown_fields().size()));
|
||||
// @@protoc_insertion_point(serialize_end:chrome_lang_id.Sentence)
|
||||
}
|
||||
|
||||
size_t Sentence::ByteSizeLong() const {
|
||||
// @@protoc_insertion_point(message_byte_size_start:chrome_lang_id.Sentence)
|
||||
size_t total_size = 0;
|
||||
|
||||
total_size += _extensions_.ByteSize();
|
||||
|
||||
total_size += _internal_metadata_.unknown_fields().size();
|
||||
|
||||
// repeated .chrome_lang_id.Token token = 3;
|
||||
{
|
||||
unsigned int count = static_cast<unsigned int>(this->token_size());
|
||||
total_size += 1UL * count;
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
total_size +=
|
||||
::google::protobuf::internal::WireFormatLite::MessageSize(
|
||||
this->token(static_cast<int>(i)));
|
||||
}
|
||||
}
|
||||
|
||||
if (_has_bits_[0 / 32] & 3u) {
|
||||
// optional string id = 1;
|
||||
if (has_id()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->id());
|
||||
}
|
||||
|
||||
// optional string text = 2;
|
||||
if (has_text()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->text());
|
||||
}
|
||||
|
||||
}
|
||||
int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
|
||||
SetCachedSize(cached_size);
|
||||
return total_size;
|
||||
}
|
||||
|
||||
void Sentence::CheckTypeAndMergeFrom(
|
||||
const ::google::protobuf::MessageLite& from) {
|
||||
MergeFrom(*::google::protobuf::down_cast<const Sentence*>(&from));
|
||||
}
|
||||
|
||||
void Sentence::MergeFrom(const Sentence& from) {
|
||||
// @@protoc_insertion_point(class_specific_merge_from_start:chrome_lang_id.Sentence)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
_extensions_.MergeFrom(from._extensions_);
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
token_.MergeFrom(from.token_);
|
||||
cached_has_bits = from._has_bits_[0];
|
||||
if (cached_has_bits & 3u) {
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
set_has_id();
|
||||
id_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.id_);
|
||||
}
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
set_has_text();
|
||||
text_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.text_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Sentence::CopyFrom(const Sentence& from) {
|
||||
// @@protoc_insertion_point(class_specific_copy_from_start:chrome_lang_id.Sentence)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
bool Sentence::IsInitialized() const {
|
||||
if (!_extensions_.IsInitialized()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!::google::protobuf::internal::AllAreInitialized(this->token())) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Sentence::Swap(Sentence* other) {
|
||||
if (other == this) return;
|
||||
InternalSwap(other);
|
||||
}
|
||||
void Sentence::InternalSwap(Sentence* other) {
|
||||
using std::swap;
|
||||
CastToBase(&token_)->InternalSwap(CastToBase(&other->token_));
|
||||
id_.Swap(&other->id_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
text_.Swap(&other->text_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
swap(_has_bits_[0], other->_has_bits_[0]);
|
||||
_internal_metadata_.Swap(&other->_internal_metadata_);
|
||||
_extensions_.Swap(&other->_extensions_);
|
||||
}
|
||||
|
||||
::std::string Sentence::GetTypeName() const {
|
||||
return "chrome_lang_id.Sentence";
|
||||
}
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
void Token::InitAsDefaultInstance() {
|
||||
}
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
const int Token::kWordFieldNumber;
|
||||
const int Token::kStartFieldNumber;
|
||||
const int Token::kEndFieldNumber;
|
||||
const int Token::kHeadFieldNumber;
|
||||
const int Token::kTagFieldNumber;
|
||||
const int Token::kCategoryFieldNumber;
|
||||
const int Token::kLabelFieldNumber;
|
||||
const int Token::kBreakLevelFieldNumber;
|
||||
#endif // !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
|
||||
Token::Token()
|
||||
: ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
|
||||
::google::protobuf::internal::InitSCC(
|
||||
&protobuf_sentence_2eproto::scc_info_Token.base);
|
||||
SharedCtor();
|
||||
// @@protoc_insertion_point(constructor:chrome_lang_id.Token)
|
||||
}
|
||||
Token::Token(const Token& from)
|
||||
: ::google::protobuf::MessageLite(),
|
||||
_internal_metadata_(NULL),
|
||||
_has_bits_(from._has_bits_) {
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
_extensions_.MergeFrom(from._extensions_);
|
||||
word_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.has_word()) {
|
||||
word_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.word_);
|
||||
}
|
||||
tag_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.has_tag()) {
|
||||
tag_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.tag_);
|
||||
}
|
||||
category_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.has_category()) {
|
||||
category_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.category_);
|
||||
}
|
||||
label_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.has_label()) {
|
||||
label_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.label_);
|
||||
}
|
||||
::memcpy(&start_, &from.start_,
|
||||
static_cast<size_t>(reinterpret_cast<char*>(&break_level_) -
|
||||
reinterpret_cast<char*>(&start_)) + sizeof(break_level_));
|
||||
// @@protoc_insertion_point(copy_constructor:chrome_lang_id.Token)
|
||||
}
|
||||
|
||||
void Token::SharedCtor() {
|
||||
word_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
tag_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
category_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
label_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
::memset(&start_, 0, static_cast<size_t>(
|
||||
reinterpret_cast<char*>(&end_) -
|
||||
reinterpret_cast<char*>(&start_)) + sizeof(end_));
|
||||
head_ = -1;
|
||||
break_level_ = 1;
|
||||
}
|
||||
|
||||
Token::~Token() {
|
||||
// @@protoc_insertion_point(destructor:chrome_lang_id.Token)
|
||||
SharedDtor();
|
||||
}
|
||||
|
||||
void Token::SharedDtor() {
|
||||
word_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
tag_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
category_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
label_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
|
||||
void Token::SetCachedSize(int size) const {
|
||||
_cached_size_.Set(size);
|
||||
}
|
||||
const Token& Token::default_instance() {
|
||||
::google::protobuf::internal::InitSCC(&protobuf_sentence_2eproto::scc_info_Token.base);
|
||||
return *internal_default_instance();
|
||||
}
|
||||
|
||||
|
||||
void Token::Clear() {
|
||||
// @@protoc_insertion_point(message_clear_start:chrome_lang_id.Token)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
_extensions_.Clear();
|
||||
cached_has_bits = _has_bits_[0];
|
||||
if (cached_has_bits & 15u) {
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
word_.ClearNonDefaultToEmptyNoArena();
|
||||
}
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
tag_.ClearNonDefaultToEmptyNoArena();
|
||||
}
|
||||
if (cached_has_bits & 0x00000004u) {
|
||||
category_.ClearNonDefaultToEmptyNoArena();
|
||||
}
|
||||
if (cached_has_bits & 0x00000008u) {
|
||||
label_.ClearNonDefaultToEmptyNoArena();
|
||||
}
|
||||
}
|
||||
if (cached_has_bits & 240u) {
|
||||
::memset(&start_, 0, static_cast<size_t>(
|
||||
reinterpret_cast<char*>(&end_) -
|
||||
reinterpret_cast<char*>(&start_)) + sizeof(end_));
|
||||
head_ = -1;
|
||||
break_level_ = 1;
|
||||
}
|
||||
_has_bits_.Clear();
|
||||
_internal_metadata_.Clear();
|
||||
}
|
||||
|
||||
bool Token::MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) {
|
||||
#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
|
||||
::google::protobuf::uint32 tag;
|
||||
::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter(
|
||||
&_internal_metadata_);
|
||||
::google::protobuf::io::StringOutputStream unknown_fields_output(
|
||||
unknown_fields_setter.buffer());
|
||||
::google::protobuf::io::CodedOutputStream unknown_fields_stream(
|
||||
&unknown_fields_output, false);
|
||||
// @@protoc_insertion_point(parse_start:chrome_lang_id.Token)
|
||||
for (;;) {
|
||||
::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
|
||||
tag = p.first;
|
||||
if (!p.second) goto handle_unusual;
|
||||
switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
|
||||
// required string word = 1;
|
||||
case 1: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_word()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// required int32 start = 2;
|
||||
case 2: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(16u /* 16 & 0xFF */)) {
|
||||
set_has_start();
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
|
||||
::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
|
||||
input, &start_)));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// required int32 end = 3;
|
||||
case 3: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(24u /* 24 & 0xFF */)) {
|
||||
set_has_end();
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
|
||||
::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
|
||||
input, &end_)));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// optional int32 head = 4 [default = -1];
|
||||
case 4: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(32u /* 32 & 0xFF */)) {
|
||||
set_has_head();
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
|
||||
::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
|
||||
input, &head_)));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// optional string tag = 5;
|
||||
case 5: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(42u /* 42 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_tag()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// optional string category = 6;
|
||||
case 6: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(50u /* 50 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_category()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// optional string label = 7;
|
||||
case 7: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(58u /* 58 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_label()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// optional .chrome_lang_id.Token.BreakLevel break_level = 8 [default = SPACE_BREAK];
|
||||
case 8: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(64u /* 64 & 0xFF */)) {
|
||||
int value;
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
|
||||
int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>(
|
||||
input, &value)));
|
||||
if (::chrome_lang_id::Token_BreakLevel_IsValid(value)) {
|
||||
set_break_level(static_cast< ::chrome_lang_id::Token_BreakLevel >(value));
|
||||
} else {
|
||||
unknown_fields_stream.WriteVarint32(64u);
|
||||
unknown_fields_stream.WriteVarint32(
|
||||
static_cast< ::google::protobuf::uint32>(value));
|
||||
}
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
handle_unusual:
|
||||
if (tag == 0) {
|
||||
goto success;
|
||||
}
|
||||
if ((8000u <= tag)) {
|
||||
DO_(_extensions_.ParseField(tag, input,
|
||||
internal_default_instance(),
|
||||
&unknown_fields_stream));
|
||||
continue;
|
||||
}
|
||||
DO_(::google::protobuf::internal::WireFormatLite::SkipField(
|
||||
input, tag, &unknown_fields_stream));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
success:
|
||||
// @@protoc_insertion_point(parse_success:chrome_lang_id.Token)
|
||||
return true;
|
||||
failure:
|
||||
// @@protoc_insertion_point(parse_failure:chrome_lang_id.Token)
|
||||
return false;
|
||||
#undef DO_
|
||||
}
|
||||
|
||||
void Token::SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const {
|
||||
// @@protoc_insertion_point(serialize_start:chrome_lang_id.Token)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
cached_has_bits = _has_bits_[0];
|
||||
// required string word = 1;
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
1, this->word(), output);
|
||||
}
|
||||
|
||||
// required int32 start = 2;
|
||||
if (cached_has_bits & 0x00000010u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteInt32(2, this->start(), output);
|
||||
}
|
||||
|
||||
// required int32 end = 3;
|
||||
if (cached_has_bits & 0x00000020u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteInt32(3, this->end(), output);
|
||||
}
|
||||
|
||||
// optional int32 head = 4 [default = -1];
|
||||
if (cached_has_bits & 0x00000040u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteInt32(4, this->head(), output);
|
||||
}
|
||||
|
||||
// optional string tag = 5;
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
5, this->tag(), output);
|
||||
}
|
||||
|
||||
// optional string category = 6;
|
||||
if (cached_has_bits & 0x00000004u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
6, this->category(), output);
|
||||
}
|
||||
|
||||
// optional string label = 7;
|
||||
if (cached_has_bits & 0x00000008u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
7, this->label(), output);
|
||||
}
|
||||
|
||||
// optional .chrome_lang_id.Token.BreakLevel break_level = 8 [default = SPACE_BREAK];
|
||||
if (cached_has_bits & 0x00000080u) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteEnum(
|
||||
8, this->break_level(), output);
|
||||
}
|
||||
|
||||
// Extension range [1000, 536870912)
|
||||
_extensions_.SerializeWithCachedSizes(
|
||||
1000, 536870912, output);
|
||||
|
||||
output->WriteRaw(_internal_metadata_.unknown_fields().data(),
|
||||
static_cast<int>(_internal_metadata_.unknown_fields().size()));
|
||||
// @@protoc_insertion_point(serialize_end:chrome_lang_id.Token)
|
||||
}
|
||||
|
||||
size_t Token::RequiredFieldsByteSizeFallback() const {
|
||||
// @@protoc_insertion_point(required_fields_byte_size_fallback_start:chrome_lang_id.Token)
|
||||
size_t total_size = 0;
|
||||
|
||||
if (has_word()) {
|
||||
// required string word = 1;
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->word());
|
||||
}
|
||||
|
||||
if (has_start()) {
|
||||
// required int32 start = 2;
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::Int32Size(
|
||||
this->start());
|
||||
}
|
||||
|
||||
if (has_end()) {
|
||||
// required int32 end = 3;
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::Int32Size(
|
||||
this->end());
|
||||
}
|
||||
|
||||
return total_size;
|
||||
}
|
||||
size_t Token::ByteSizeLong() const {
|
||||
// @@protoc_insertion_point(message_byte_size_start:chrome_lang_id.Token)
|
||||
size_t total_size = 0;
|
||||
|
||||
total_size += _extensions_.ByteSize();
|
||||
|
||||
total_size += _internal_metadata_.unknown_fields().size();
|
||||
|
||||
if (((_has_bits_[0] & 0x00000031) ^ 0x00000031) == 0) { // All required fields are present.
|
||||
// required string word = 1;
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->word());
|
||||
|
||||
// required int32 start = 2;
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::Int32Size(
|
||||
this->start());
|
||||
|
||||
// required int32 end = 3;
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::Int32Size(
|
||||
this->end());
|
||||
|
||||
} else {
|
||||
total_size += RequiredFieldsByteSizeFallback();
|
||||
}
|
||||
if (_has_bits_[0 / 32] & 14u) {
|
||||
// optional string tag = 5;
|
||||
if (has_tag()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->tag());
|
||||
}
|
||||
|
||||
// optional string category = 6;
|
||||
if (has_category()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->category());
|
||||
}
|
||||
|
||||
// optional string label = 7;
|
||||
if (has_label()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->label());
|
||||
}
|
||||
|
||||
}
|
||||
if (_has_bits_[0 / 32] & 192u) {
|
||||
// optional int32 head = 4 [default = -1];
|
||||
if (has_head()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::Int32Size(
|
||||
this->head());
|
||||
}
|
||||
|
||||
// optional .chrome_lang_id.Token.BreakLevel break_level = 8 [default = SPACE_BREAK];
|
||||
if (has_break_level()) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::EnumSize(this->break_level());
|
||||
}
|
||||
|
||||
}
|
||||
int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
|
||||
SetCachedSize(cached_size);
|
||||
return total_size;
|
||||
}
|
||||
|
||||
void Token::CheckTypeAndMergeFrom(
|
||||
const ::google::protobuf::MessageLite& from) {
|
||||
MergeFrom(*::google::protobuf::down_cast<const Token*>(&from));
|
||||
}
|
||||
|
||||
void Token::MergeFrom(const Token& from) {
|
||||
// @@protoc_insertion_point(class_specific_merge_from_start:chrome_lang_id.Token)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
_extensions_.MergeFrom(from._extensions_);
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
cached_has_bits = from._has_bits_[0];
|
||||
if (cached_has_bits & 255u) {
|
||||
if (cached_has_bits & 0x00000001u) {
|
||||
set_has_word();
|
||||
word_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.word_);
|
||||
}
|
||||
if (cached_has_bits & 0x00000002u) {
|
||||
set_has_tag();
|
||||
tag_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.tag_);
|
||||
}
|
||||
if (cached_has_bits & 0x00000004u) {
|
||||
set_has_category();
|
||||
category_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.category_);
|
||||
}
|
||||
if (cached_has_bits & 0x00000008u) {
|
||||
set_has_label();
|
||||
label_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.label_);
|
||||
}
|
||||
if (cached_has_bits & 0x00000010u) {
|
||||
start_ = from.start_;
|
||||
}
|
||||
if (cached_has_bits & 0x00000020u) {
|
||||
end_ = from.end_;
|
||||
}
|
||||
if (cached_has_bits & 0x00000040u) {
|
||||
head_ = from.head_;
|
||||
}
|
||||
if (cached_has_bits & 0x00000080u) {
|
||||
break_level_ = from.break_level_;
|
||||
}
|
||||
_has_bits_[0] |= cached_has_bits;
|
||||
}
|
||||
}
|
||||
|
||||
void Token::CopyFrom(const Token& from) {
|
||||
// @@protoc_insertion_point(class_specific_copy_from_start:chrome_lang_id.Token)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
bool Token::IsInitialized() const {
|
||||
if (!_extensions_.IsInitialized()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((_has_bits_[0] & 0x00000031) != 0x00000031) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Token::Swap(Token* other) {
|
||||
if (other == this) return;
|
||||
InternalSwap(other);
|
||||
}
|
||||
void Token::InternalSwap(Token* other) {
|
||||
using std::swap;
|
||||
word_.Swap(&other->word_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
tag_.Swap(&other->tag_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
category_.Swap(&other->category_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
label_.Swap(&other->label_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
swap(start_, other->start_);
|
||||
swap(end_, other->end_);
|
||||
swap(head_, other->head_);
|
||||
swap(break_level_, other->break_level_);
|
||||
swap(_has_bits_[0], other->_has_bits_[0]);
|
||||
_internal_metadata_.Swap(&other->_internal_metadata_);
|
||||
_extensions_.Swap(&other->_extensions_);
|
||||
}
|
||||
|
||||
::std::string Token::GetTypeName() const {
|
||||
return "chrome_lang_id.Token";
|
||||
}
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
} // namespace chrome_lang_id
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::chrome_lang_id::Sentence* Arena::CreateMaybeMessage< ::chrome_lang_id::Sentence >(Arena* arena) {
|
||||
return Arena::CreateInternal< ::chrome_lang_id::Sentence >(arena);
|
||||
}
|
||||
template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::chrome_lang_id::Token* Arena::CreateMaybeMessage< ::chrome_lang_id::Token >(Arena* arena) {
|
||||
return Arena::CreateInternal< ::chrome_lang_id::Token >(arena);
|
||||
}
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,36 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "base.h"
|
||||
|
||||
#include <string>
|
||||
#if defined(COMPILER_MSVC) || defined(_WIN32)
|
||||
#include <sstream>
|
||||
#endif // defined(COMPILER_MSVC) || defined(_WIN32)
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// TODO(abakalov): Pick the most efficient approach.
|
||||
#if defined(COMPILER_MSVC) || defined(_WIN32)
|
||||
std::string Int64ToString(int64 input) {
|
||||
std::stringstream stream;
|
||||
stream << input;
|
||||
return stream.str();
|
||||
}
|
||||
#else
|
||||
std::string Int64ToString(int64 input) { return std::to_string(input); }
|
||||
#endif // defined(COMPILER_MSVC) || defined(_WIN32)
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,106 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef BASE_H_
|
||||
#define BASE_H_
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
typedef unsigned int uint32;
|
||||
|
||||
#if LANG_CXX11
|
||||
#define CLD3_DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName &) = delete; \
|
||||
TypeName &operator=(const TypeName &) = delete
|
||||
#else // C++98 case follows
|
||||
|
||||
// Note that these C++98 implementations cannot completely disallow copying,
|
||||
// as members and friends can still accidentally make elided copies without
|
||||
// triggering a linker error.
|
||||
#define CLD3_DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName &); \
|
||||
TypeName &operator=(const TypeName &)
|
||||
#endif // LANG_CXX11
|
||||
|
||||
#ifndef CLD3_IMMEDIATE_CRASH
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define CLD3_IMMEDIATE_CRASH() __builtin_trap()
|
||||
#else
|
||||
#define CLD3_IMMEDIATE_CRASH() ((void)(*(volatile char *)0 = 0))
|
||||
#endif
|
||||
#endif // CLD3_IMMEDIATE_CRASH
|
||||
|
||||
#define CLD3_CHECK(f) (!(f) ? CLD3_IMMEDIATE_CRASH() : (void)0)
|
||||
|
||||
#if defined(NDEBUG) && !defined(DCHECK_ALWAYS_ON)
|
||||
#define CLD3_DCHECK(f) ((void)0)
|
||||
#else
|
||||
#define CLD3_DCHECK(f) CLD3_CHECK(f)
|
||||
#endif
|
||||
|
||||
#ifndef SWIG
|
||||
typedef int int32;
|
||||
typedef unsigned char uint8; // NOLINT
|
||||
typedef unsigned short uint16; // NOLINT
|
||||
|
||||
// A type to represent a Unicode code-point value. As of Unicode 4.0,
|
||||
// such values require up to 21 bits.
|
||||
// (For type-checking on pointers, make this explicitly signed,
|
||||
// and it should always be the signed version of whatever int32 is.)
|
||||
typedef signed int char32;
|
||||
#endif // SWIG
|
||||
|
||||
#ifdef COMPILER_MSVC
|
||||
typedef __int64 int64;
|
||||
#else
|
||||
typedef long long int64; // NOLINT
|
||||
#endif // COMPILER_MSVC
|
||||
|
||||
#if defined(__GNUC__) && \
|
||||
(__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
|
||||
|
||||
// For functions we want to force inline.
|
||||
// Introduced in gcc 3.1.
|
||||
#define CLD3_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
#define CLD3_ATTRIBUTE_ALWAYS_INLINE __forceinline
|
||||
#else
|
||||
|
||||
// Other compilers will have to figure it out for themselves.
|
||||
#define CLD3_ATTRIBUTE_ALWAYS_INLINE
|
||||
#endif
|
||||
|
||||
#ifdef INTERNAL_BUILD
|
||||
typedef basic_string<char> bstring;
|
||||
#else
|
||||
typedef std::basic_string<char> bstring;
|
||||
#endif // INTERNAL_BUILD
|
||||
|
||||
// Converts int64 to string.
|
||||
std::string Int64ToString(int64 input);
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // BASE_H_
|
||||
@ -0,0 +1,98 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// This code is compiled directly on many platforms, including client
|
||||
// platforms like Windows, Mac, and embedded systems. Before making
|
||||
// any changes here, make sure that you're not breaking any platforms.
|
||||
//
|
||||
|
||||
#ifndef CASTS_H_
|
||||
#define CASTS_H_
|
||||
|
||||
#include <string.h> // for memcpy
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// lang_id_bit_cast<Dest,Source> is a template function that implements the
|
||||
// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
|
||||
// very low-level functions like the protobuf library and fast math
|
||||
// support.
|
||||
//
|
||||
// float f = 3.14159265358979;
|
||||
// int i = lang_id_bit_cast<int32>(f);
|
||||
// // i = 0x40490fdb
|
||||
//
|
||||
// The classical address-casting method is:
|
||||
//
|
||||
// // WRONG
|
||||
// float f = 3.14159265358979; // WRONG
|
||||
// int i = * reinterpret_cast<int*>(&f); // WRONG
|
||||
//
|
||||
// The address-casting method actually produces undefined behavior
|
||||
// according to ISO C++ specification section 3.10 -15 -. Roughly, this
|
||||
// section says: if an object in memory has one type, and a program
|
||||
// accesses it with a different type, then the result is undefined
|
||||
// behavior for most values of "different type".
|
||||
//
|
||||
// This is true for any cast syntax, either *(int*)&f or
|
||||
// *reinterpret_cast<int*>(&f). And it is particularly true for
|
||||
// conversions between integral lvalues and floating-point lvalues.
|
||||
//
|
||||
// The purpose of 3.10 -15- is to allow optimizing compilers to assume
|
||||
// that expressions with different types refer to different memory. gcc
|
||||
// 4.0.1 has an optimizer that takes advantage of this. So a
|
||||
// non-conforming program quietly produces wildly incorrect output.
|
||||
//
|
||||
// The problem is not the use of reinterpret_cast. The problem is type
|
||||
// punning: holding an object in memory of one type and reading its bits
|
||||
// back using a different type.
|
||||
//
|
||||
// The C++ standard is more subtle and complex than this, but that
|
||||
// is the basic idea.
|
||||
//
|
||||
// Anyways ...
|
||||
//
|
||||
// lang_id_bit_cast<> calls memcpy() which is blessed by the standard,
|
||||
// especially by the example in section 3.9 . Also, of course,
|
||||
// lang_id_bit_cast<> wraps up the nasty logic in one place.
|
||||
//
|
||||
// Fortunately memcpy() is very fast. In optimized mode, with a
|
||||
// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
|
||||
// code with the minimal amount of data movement. On a 32-bit system,
|
||||
// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
|
||||
// compiles to two loads and two stores.
|
||||
//
|
||||
// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
|
||||
//
|
||||
// WARNING: if Dest or Source is a non-POD type, the result of the memcpy
|
||||
// is likely to surprise you.
|
||||
//
|
||||
// Props to Bill Gibbons for the compile time assertion technique and
|
||||
// Art Komninos and Igor Tandetnik for the msvc experiments.
|
||||
//
|
||||
// -- mec 2005-10-17
|
||||
|
||||
template <class Dest, class Source>
|
||||
inline Dest lang_id_bit_cast(const Source &source) {
|
||||
static_assert(sizeof(Dest) == sizeof(Source), "Sizes do not match");
|
||||
|
||||
Dest dest;
|
||||
memcpy(&dest, &source, sizeof(dest));
|
||||
return dest;
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // CASTS_H_
|
||||
@ -0,0 +1,51 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "embedding_feature_extractor.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <vector>
|
||||
|
||||
#include "feature_extractor.h"
|
||||
#include "feature_types.h"
|
||||
#include "task_context.h"
|
||||
#include "utils.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
GenericEmbeddingFeatureExtractor::GenericEmbeddingFeatureExtractor() {}
|
||||
|
||||
GenericEmbeddingFeatureExtractor::~GenericEmbeddingFeatureExtractor() {}
|
||||
|
||||
void GenericEmbeddingFeatureExtractor::Setup(TaskContext *context) {
|
||||
// Don't use version to determine how to get feature FML.
|
||||
string features_param = ArgPrefix();
|
||||
features_param += "_features";
|
||||
const string features = context->Get(features_param, "");
|
||||
const string embedding_names =
|
||||
context->Get(GetParamName("embedding_names"), "");
|
||||
const string embedding_dims =
|
||||
context->Get(GetParamName("embedding_dims"), "");
|
||||
embedding_fml_ = utils::Split(features, ';');
|
||||
add_strings_ = context->Get(GetParamName("add_varlen_strings"), false);
|
||||
embedding_names_ = utils::Split(embedding_names, ';');
|
||||
for (const string &dim : utils::Split(embedding_dims, ';')) {
|
||||
embedding_dims_.push_back(utils::ParseUsing<int>(dim, utils::ParseInt32));
|
||||
}
|
||||
}
|
||||
|
||||
void GenericEmbeddingFeatureExtractor::Init(TaskContext *context) {}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,182 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef EMBEDDING_FEATURE_EXTRACTOR_H_
|
||||
#define EMBEDDING_FEATURE_EXTRACTOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "feature_extractor.h"
|
||||
#include "task_context.h"
|
||||
#include "workspace.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// An EmbeddingFeatureExtractor manages the extraction of features for
|
||||
// embedding-based models. It wraps a sequence of underlying classes of feature
|
||||
// extractors, along with associated predicate maps. Each class of feature
|
||||
// extractors is associated with a name, e.g., "unigrams", "bigrams".
|
||||
//
|
||||
// The class is split between a generic abstract version,
|
||||
// GenericEmbeddingFeatureExtractor (that can be initialized without knowing the
|
||||
// signature of the ExtractFeatures method) and a typed version.
|
||||
//
|
||||
// The predicate maps must be initialized before use: they can be loaded using
|
||||
// Read() or updated via UpdateMapsForExample.
|
||||
class GenericEmbeddingFeatureExtractor {
|
||||
public:
|
||||
GenericEmbeddingFeatureExtractor();
|
||||
virtual ~GenericEmbeddingFeatureExtractor();
|
||||
|
||||
// Get the prefix string to put in front of all arguments, so they don't
|
||||
// conflict with other embedding models.
|
||||
virtual const string ArgPrefix() const = 0;
|
||||
|
||||
// Sets up predicate maps and embedding space names that are common for all
|
||||
// embedding based feature extractors.
|
||||
virtual void Setup(TaskContext *context);
|
||||
virtual void Init(TaskContext *context);
|
||||
|
||||
// Requests workspace for the underlying feature extractors. This is
|
||||
// implemented in the typed class.
|
||||
virtual void RequestWorkspaces(WorkspaceRegistry *registry) = 0;
|
||||
|
||||
// Number of predicates for the embedding at a given index (vocabulary size.)
|
||||
int EmbeddingSize(int index) const {
|
||||
return generic_feature_extractor(index).GetDomainSize();
|
||||
}
|
||||
|
||||
// Returns number of embedding spaces.
|
||||
int NumEmbeddings() const { return embedding_dims_.size(); }
|
||||
|
||||
// Returns the number of features in the embedding space.
|
||||
int FeatureSize(int idx) const {
|
||||
return generic_feature_extractor(idx).feature_types();
|
||||
}
|
||||
|
||||
// Returns the dimensionality of the embedding space.
|
||||
int EmbeddingDims(int index) const { return embedding_dims_[index]; }
|
||||
|
||||
// Accessor for embedding dims (dimensions of the embedding spaces).
|
||||
const std::vector<int> &embedding_dims() const { return embedding_dims_; }
|
||||
|
||||
const std::vector<string> &embedding_fml() const { return embedding_fml_; }
|
||||
|
||||
// Get parameter name by concatenating the prefix and the original name.
|
||||
string GetParamName(const string ¶m_name) const {
|
||||
string name = ArgPrefix();
|
||||
name += "_";
|
||||
name += param_name;
|
||||
return name;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Provides the generic class with access to the templated extractors. This is
|
||||
// used to get the type information out of the feature extractor without
|
||||
// knowing the specific calling arguments of the extractor itself.
|
||||
virtual const GenericFeatureExtractor &generic_feature_extractor(
|
||||
int idx) const = 0;
|
||||
|
||||
private:
|
||||
// Embedding space names for parameter sharing.
|
||||
std::vector<string> embedding_names_;
|
||||
|
||||
// FML strings for each feature extractor.
|
||||
std::vector<string> embedding_fml_;
|
||||
|
||||
// Size of each of the embedding spaces (maximum predicate id).
|
||||
std::vector<int> embedding_sizes_;
|
||||
|
||||
// Embedding dimensions of the embedding spaces (i.e. 32, 64 etc.)
|
||||
std::vector<int> embedding_dims_;
|
||||
|
||||
// Whether or not to add string descriptions to converted examples.
|
||||
bool add_strings_;
|
||||
};
|
||||
|
||||
// Templated, object-specific implementation of the
|
||||
// EmbeddingFeatureExtractor. EXTRACTOR should be a FeatureExtractor<OBJ,
|
||||
// ARGS...> class that has the appropriate FeatureTraits() to ensure that
|
||||
// locator type features work.
|
||||
//
|
||||
// Note: for backwards compatibility purposes, this always reads the FML spec
|
||||
// from "<prefix>_features".
|
||||
template <class EXTRACTOR, class OBJ, class... ARGS>
|
||||
class EmbeddingFeatureExtractor : public GenericEmbeddingFeatureExtractor {
|
||||
public:
|
||||
// Sets up all predicate maps, feature extractors, and flags.
|
||||
void Setup(TaskContext *context) override {
|
||||
GenericEmbeddingFeatureExtractor::Setup(context);
|
||||
feature_extractors_.resize(embedding_fml().size());
|
||||
for (size_t i = 0; i < embedding_fml().size(); ++i) {
|
||||
feature_extractors_[i].Parse(embedding_fml()[i]);
|
||||
feature_extractors_[i].Setup(context);
|
||||
}
|
||||
}
|
||||
|
||||
// Initializes resources needed by the feature extractors.
|
||||
void Init(TaskContext *context) override {
|
||||
GenericEmbeddingFeatureExtractor::Init(context);
|
||||
for (auto &feature_extractor : feature_extractors_) {
|
||||
feature_extractor.Init(context);
|
||||
}
|
||||
}
|
||||
|
||||
// Requests workspaces from the registry. Must be called after Init(), and
|
||||
// before Preprocess().
|
||||
void RequestWorkspaces(WorkspaceRegistry *registry) override {
|
||||
for (auto &feature_extractor : feature_extractors_) {
|
||||
feature_extractor.RequestWorkspaces(registry);
|
||||
}
|
||||
}
|
||||
|
||||
// Must be called on the object one state for each sentence, before any
|
||||
// feature extraction (e.g., UpdateMapsForExample, ExtractSparseFeatures).
|
||||
void Preprocess(WorkspaceSet *workspaces, OBJ *obj) const {
|
||||
for (auto &feature_extractor : feature_extractors_) {
|
||||
feature_extractor.Preprocess(workspaces, obj);
|
||||
}
|
||||
}
|
||||
|
||||
// Extracts features using the extractors. Note that features must already
|
||||
// be initialized to the correct number of feature extractors. No predicate
|
||||
// mapping is applied.
|
||||
void ExtractFeatures(const WorkspaceSet &workspaces, const OBJ &obj,
|
||||
ARGS... args,
|
||||
std::vector<FeatureVector> *features) const {
|
||||
for (size_t i = 0; i < feature_extractors_.size(); ++i) {
|
||||
features->at(i).clear();
|
||||
feature_extractors_.at(i).ExtractFeatures(workspaces, obj, args...,
|
||||
&features->at(i));
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
// Provides generic access to the feature extractors.
|
||||
const GenericFeatureExtractor &generic_feature_extractor(
|
||||
int idx) const override {
|
||||
return feature_extractors_.at(idx);
|
||||
}
|
||||
|
||||
private:
|
||||
// Templated feature extractor class.
|
||||
std::vector<EXTRACTOR> feature_extractors_;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // EMBEDDING_FEATURE_EXTRACTOR_H_
|
||||
@ -0,0 +1,196 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "embedding_network.h"
|
||||
|
||||
#include "base.h"
|
||||
#include "embedding_network_params.h"
|
||||
#include "float16.h"
|
||||
#include "simple_adder.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace {
|
||||
|
||||
using VectorWrapper = EmbeddingNetwork::VectorWrapper;
|
||||
|
||||
void CheckNoQuantization(const EmbeddingNetworkParams::Matrix matrix) {
|
||||
// Quantization not allowed here.
|
||||
CLD3_DCHECK(static_cast<int>(QuantizationType::NONE) ==
|
||||
static_cast<int>(matrix.quant_type));
|
||||
}
|
||||
|
||||
// Fills a Matrix object with the parameters in the given MatrixParams. This
|
||||
// function is used to initialize weight matrices that are *not* embedding
|
||||
// matrices.
|
||||
void FillMatrixParams(const EmbeddingNetworkParams::Matrix source_matrix,
|
||||
EmbeddingNetwork::Matrix *mat) {
|
||||
mat->resize(source_matrix.rows);
|
||||
CheckNoQuantization(source_matrix);
|
||||
const float *weights =
|
||||
reinterpret_cast<const float *>(source_matrix.elements);
|
||||
for (int r = 0; r < source_matrix.rows; ++r) {
|
||||
(*mat)[r] = EmbeddingNetwork::VectorWrapper(weights, source_matrix.cols);
|
||||
weights += source_matrix.cols;
|
||||
}
|
||||
}
|
||||
|
||||
// Computes y = weights * Relu(x) + b where Relu is optionally applied.
|
||||
template <typename ScaleAdderClass>
|
||||
void SparseReluProductPlusBias(bool apply_relu,
|
||||
const EmbeddingNetwork::Matrix &weights,
|
||||
const EmbeddingNetwork::VectorWrapper &b,
|
||||
const EmbeddingNetwork::Vector &x,
|
||||
EmbeddingNetwork::Vector *y) {
|
||||
y->assign(b.data(), b.data() + b.size());
|
||||
ScaleAdderClass adder(y->data(), y->size());
|
||||
|
||||
const int x_size = x.size();
|
||||
for (int i = 0; i < x_size; ++i) {
|
||||
const float &scale = x[i];
|
||||
if (apply_relu) {
|
||||
if (scale > 0) {
|
||||
adder.LazyScaleAdd(weights[i].data(), scale);
|
||||
}
|
||||
} else {
|
||||
adder.LazyScaleAdd(weights[i].data(), scale);
|
||||
}
|
||||
}
|
||||
adder.Finalize();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void EmbeddingNetwork::ConcatEmbeddings(
|
||||
const std::vector<FeatureVector> &feature_vectors, Vector *concat) const {
|
||||
concat->resize(model_->concat_layer_size());
|
||||
|
||||
// "es_index" stands for "embedding space index".
|
||||
for (size_t es_index = 0; es_index < feature_vectors.size(); ++es_index) {
|
||||
const int concat_offset = model_->concat_offset(es_index);
|
||||
const int embedding_dim = model_->embedding_dim(es_index);
|
||||
|
||||
const EmbeddingMatrix &embedding_matrix = embedding_matrices_[es_index];
|
||||
CLD3_DCHECK(embedding_matrix.dim() == embedding_dim);
|
||||
|
||||
const bool is_quantized =
|
||||
embedding_matrix.quant_type() != QuantizationType::NONE;
|
||||
|
||||
const FeatureVector &feature_vector = feature_vectors[es_index];
|
||||
const int num_features = feature_vector.size();
|
||||
for (int fi = 0; fi < num_features; ++fi) {
|
||||
const FeatureType *feature_type = feature_vector.type(fi);
|
||||
int feature_offset = concat_offset + feature_type->base() * embedding_dim;
|
||||
CLD3_DCHECK(feature_offset + embedding_dim <=
|
||||
static_cast<int>(concat->size()));
|
||||
|
||||
// Weighted embeddings will be added starting from this address.
|
||||
float *concat_ptr = concat->data() + feature_offset;
|
||||
|
||||
// Pointer to float / uint8 weights for relevant embedding.
|
||||
const void *embedding_data;
|
||||
|
||||
// Multiplier for each embedding weight.
|
||||
float multiplier;
|
||||
const FeatureValue feature_value = feature_vector.value(fi);
|
||||
if (feature_type->is_continuous()) {
|
||||
// Continuous features (encoded as FloatFeatureValue).
|
||||
FloatFeatureValue float_feature_value(feature_value);
|
||||
const int id = float_feature_value.value.id;
|
||||
embedding_matrix.get_embedding(id, &embedding_data, &multiplier);
|
||||
multiplier *= float_feature_value.value.weight;
|
||||
} else {
|
||||
// Discrete features: every present feature has implicit value 1.0.
|
||||
embedding_matrix.get_embedding(feature_value, &embedding_data,
|
||||
&multiplier);
|
||||
}
|
||||
|
||||
if (is_quantized) {
|
||||
const uint8 *quant_weights =
|
||||
reinterpret_cast<const uint8 *>(embedding_data);
|
||||
for (int i = 0; i < embedding_dim; ++i, ++quant_weights, ++concat_ptr) {
|
||||
// 128 is bias for UINT8 quantization, only one we currently support.
|
||||
*concat_ptr += (static_cast<int>(*quant_weights) - 128) * multiplier;
|
||||
}
|
||||
} else {
|
||||
const float *weights = reinterpret_cast<const float *>(embedding_data);
|
||||
for (int i = 0; i < embedding_dim; ++i, ++weights, ++concat_ptr) {
|
||||
*concat_ptr += *weights * multiplier;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ScaleAdderClass>
|
||||
void EmbeddingNetwork::FinishComputeFinalScores(const Vector &concat,
|
||||
Vector *scores) const {
|
||||
Vector h0(hidden_bias_[0].size());
|
||||
SparseReluProductPlusBias<ScaleAdderClass>(false, hidden_weights_[0],
|
||||
hidden_bias_[0], concat, &h0);
|
||||
|
||||
CLD3_DCHECK((hidden_weights_.size() == 1) || (hidden_weights_.size() == 2));
|
||||
if (hidden_weights_.size() == 1) { // 1 hidden layer
|
||||
SparseReluProductPlusBias<ScaleAdderClass>(true, softmax_weights_,
|
||||
softmax_bias_, h0, scores);
|
||||
} else if (hidden_weights_.size() == 2) { // 2 hidden layers
|
||||
Vector h1(hidden_bias_[1].size());
|
||||
SparseReluProductPlusBias<ScaleAdderClass>(true, hidden_weights_[1],
|
||||
hidden_bias_[1], h0, &h1);
|
||||
SparseReluProductPlusBias<ScaleAdderClass>(true, softmax_weights_,
|
||||
softmax_bias_, h1, scores);
|
||||
}
|
||||
}
|
||||
|
||||
void EmbeddingNetwork::ComputeFinalScores(
|
||||
const std::vector<FeatureVector> &features, Vector *scores) const {
|
||||
Vector concat;
|
||||
ConcatEmbeddings(features, &concat);
|
||||
|
||||
scores->resize(softmax_bias_.size());
|
||||
FinishComputeFinalScores<SimpleAdder>(concat, scores);
|
||||
}
|
||||
|
||||
EmbeddingNetwork::EmbeddingNetwork(const EmbeddingNetworkParams *model)
|
||||
: model_(model) {
|
||||
int offset_sum = 0;
|
||||
for (int i = 0; i < model_->embedding_dim_size(); ++i) {
|
||||
CLD3_DCHECK(offset_sum == model_->concat_offset(i));
|
||||
offset_sum += model_->embedding_dim(i) * model_->embedding_num_features(i);
|
||||
embedding_matrices_.emplace_back(model_->GetEmbeddingMatrix(i));
|
||||
}
|
||||
|
||||
CLD3_DCHECK(model_->hidden_size() == model_->hidden_bias_size());
|
||||
hidden_weights_.resize(model_->hidden_size());
|
||||
hidden_bias_.resize(model_->hidden_size());
|
||||
for (int i = 0; i < model_->hidden_size(); ++i) {
|
||||
FillMatrixParams(model_->GetHiddenLayerMatrix(i), &hidden_weights_[i]);
|
||||
EmbeddingNetworkParams::Matrix bias = model_->GetHiddenLayerBias(i);
|
||||
CLD3_DCHECK(1 == bias.cols);
|
||||
CheckNoQuantization(bias);
|
||||
hidden_bias_[i] = VectorWrapper(
|
||||
reinterpret_cast<const float *>(bias.elements), bias.rows);
|
||||
}
|
||||
|
||||
CLD3_DCHECK(model_->HasSoftmax());
|
||||
FillMatrixParams(model_->GetSoftmaxMatrix(), &softmax_weights_);
|
||||
|
||||
EmbeddingNetworkParams::Matrix softmax_bias = model_->GetSoftmaxBias();
|
||||
CLD3_DCHECK(1 == softmax_bias.cols);
|
||||
CheckNoQuantization(softmax_bias);
|
||||
softmax_bias_ =
|
||||
VectorWrapper(reinterpret_cast<const float *>(softmax_bias.elements),
|
||||
softmax_bias.rows);
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,186 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef EMBEDDING_NETWORK_H_
|
||||
#define EMBEDDING_NETWORK_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "embedding_network_params.h"
|
||||
#include "feature_extractor.h"
|
||||
#include "float16.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Classifier using a hand-coded feed-forward neural network.
|
||||
//
|
||||
// No gradient computation, just inference.
|
||||
//
|
||||
// Based on the more general nlp_saft::EmbeddingNetwork.
|
||||
//
|
||||
// Classification works as follows:
|
||||
//
|
||||
// Discrete features -> Embeddings -> Concatenation -> Hidden+ -> Softmax
|
||||
//
|
||||
// In words: given some discrete features, this class extracts the embeddings
|
||||
// for these features, concatenates them, passes them through one or two hidden
|
||||
// layers (each layer uses Relu) and next through a softmax layer that computes
|
||||
// an unnormalized score for each possible class. Note: there is always a
|
||||
// softmax layer.
|
||||
//
|
||||
// NOTE(salcianu): current code can easily be changed to allow more than two
|
||||
// hidden layers. Feel free to do so if you have a genuine need for that.
|
||||
class EmbeddingNetwork {
|
||||
public:
|
||||
// Class used to represent an embedding matrix. Each row is the embedding on
|
||||
// a vocabulary element. Number of columns = number of embedding dimensions.
|
||||
class EmbeddingMatrix {
|
||||
public:
|
||||
explicit EmbeddingMatrix(const EmbeddingNetworkParams::Matrix source_matrix)
|
||||
: rows_(source_matrix.rows),
|
||||
cols_(source_matrix.cols),
|
||||
quant_type_(source_matrix.quant_type),
|
||||
data_(source_matrix.elements),
|
||||
row_size_in_bytes_(GetRowSizeInBytes(cols_, quant_type_)),
|
||||
quant_scales_(source_matrix.quant_scales) {}
|
||||
|
||||
// Returns vocabulary size; one embedding for each vocabulary element.
|
||||
int size() const { return rows_; }
|
||||
|
||||
// Returns number of weights in embedding of each vocabulary element.
|
||||
int dim() const { return cols_; }
|
||||
|
||||
// Returns quantization type for this embedding matrix.
|
||||
QuantizationType quant_type() const { return quant_type_; }
|
||||
|
||||
// Gets embedding for k-th vocabulary element: on return, sets *data to
|
||||
// point to the embedding weights and *scale to the quantization scale (1.0
|
||||
// if no quantization).
|
||||
void get_embedding(int k, const void **data, float *scale) const {
|
||||
CLD3_CHECK(k >= 0);
|
||||
CLD3_CHECK(k < size());
|
||||
*data = reinterpret_cast<const char *>(data_) + k * row_size_in_bytes_;
|
||||
if (quant_type_ == QuantizationType::NONE) {
|
||||
*scale = 1.0;
|
||||
} else {
|
||||
*scale = Float16To32(quant_scales_[k]);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static int GetRowSizeInBytes(int cols, QuantizationType quant_type) {
|
||||
CLD3_DCHECK((quant_type == QuantizationType::NONE) ||
|
||||
(quant_type == QuantizationType::UINT8));
|
||||
if (quant_type == QuantizationType::NONE) {
|
||||
return cols * sizeof(float);
|
||||
} else { // QuantizationType::UINT8
|
||||
return cols * sizeof(uint8);
|
||||
}
|
||||
}
|
||||
|
||||
// Vocabulary size.
|
||||
int rows_;
|
||||
|
||||
// Number of elements in each embedding.
|
||||
int cols_;
|
||||
|
||||
QuantizationType quant_type_;
|
||||
|
||||
// Pointer to the embedding weights, in row-major order. This is a pointer
|
||||
// to an array of floats / uint8, depending on the quantization type.
|
||||
// Not owned.
|
||||
const void *data_;
|
||||
|
||||
// Number of bytes for one row. Used to jump to next row in data_.
|
||||
int row_size_in_bytes_;
|
||||
|
||||
// Pointer to quantization scales. nullptr if no quantization. Otherwise,
|
||||
// quant_scales_[i] is scale for embedding of i-th vocabulary element.
|
||||
const float16 *quant_scales_;
|
||||
};
|
||||
|
||||
// An immutable vector that doesn't own the memory that stores the underlying
|
||||
// floats. Can be used e.g., as a wrapper around model weights stored in the
|
||||
// static memory.
|
||||
class VectorWrapper {
|
||||
public:
|
||||
VectorWrapper() : VectorWrapper(nullptr, 0) {}
|
||||
|
||||
// Constructs a vector wrapper around the size consecutive floats that start
|
||||
// at address data. Note: the underlying data should be alive for at least
|
||||
// the lifetime of this VectorWrapper object. That's trivially true if data
|
||||
// points to statically allocated data :)
|
||||
VectorWrapper(const float *data, int size) : data_(data), size_(size) {}
|
||||
|
||||
int size() const { return size_; }
|
||||
|
||||
const float *data() const { return data_; }
|
||||
|
||||
private:
|
||||
const float *data_; // Not owned.
|
||||
int size_;
|
||||
|
||||
// Doesn't own anything, so it can be copied and assigned at will :)
|
||||
};
|
||||
|
||||
typedef std::vector<VectorWrapper> Matrix;
|
||||
typedef std::vector<float> Vector;
|
||||
|
||||
// Constructs an embedding network using the parameters from model.
|
||||
//
|
||||
// Note: model should stay alive for at least the lifetime of this
|
||||
// EmbeddingNetwork object. TODO(salcianu): remove this constraint: we should
|
||||
// copy all necessary data (except, of course, the static weights) at
|
||||
// construction time and use that, instead of relying on model.
|
||||
explicit EmbeddingNetwork(const EmbeddingNetworkParams *model);
|
||||
|
||||
virtual ~EmbeddingNetwork() {}
|
||||
|
||||
// Runs forward computation to fill scores with unnormalized output unit
|
||||
// scores. This is useful for making predictions.
|
||||
void ComputeFinalScores(const std::vector<FeatureVector> &features,
|
||||
Vector *scores) const;
|
||||
|
||||
private:
|
||||
// Computes the softmax scores (prior to normalization) from the concatenated
|
||||
// representation.
|
||||
template <typename ScaleAdderClass>
|
||||
void FinishComputeFinalScores(const Vector &concat, Vector *scores) const;
|
||||
|
||||
// Constructs the concatenated input embedding vector in place in output
|
||||
// vector concat.
|
||||
void ConcatEmbeddings(const std::vector<FeatureVector> &features,
|
||||
Vector *concat) const;
|
||||
|
||||
// Pointer to the model object passed to the constructor. Not owned.
|
||||
const EmbeddingNetworkParams *model_;
|
||||
|
||||
// Network parameters.
|
||||
|
||||
// One weight matrix for each embedding.
|
||||
std::vector<EmbeddingMatrix> embedding_matrices_;
|
||||
|
||||
// One weight matrix and one vector of bias weights for each hiden layer.
|
||||
std::vector<Matrix> hidden_weights_;
|
||||
std::vector<VectorWrapper> hidden_bias_;
|
||||
|
||||
// Weight matrix and bias vector for the softmax layer.
|
||||
Matrix softmax_weights_;
|
||||
VectorWrapper softmax_bias_;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // EMBEDDING_NETWORK_H_
|
||||
@ -0,0 +1,285 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef EMBEDDING_NETWORK_PARAMS_H_
|
||||
#define EMBEDDING_NETWORK_PARAMS_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base.h"
|
||||
#include "float16.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
enum class QuantizationType { NONE = 0, UINT8 };
|
||||
|
||||
// API for accessing parameters from a statically-linked EmbeddingNetworkProto.
|
||||
class EmbeddingNetworkParams {
|
||||
public:
|
||||
virtual ~EmbeddingNetworkParams() {}
|
||||
|
||||
// **** High-level API.
|
||||
|
||||
// Simple representation of a matrix. This small struct that doesn't own any
|
||||
// resource intentionally supports copy / assign, to simplify our APIs.
|
||||
struct Matrix {
|
||||
// Number of rows.
|
||||
int rows;
|
||||
|
||||
// Number of columns.
|
||||
int cols;
|
||||
|
||||
QuantizationType quant_type;
|
||||
|
||||
// Pointer to matrix elements, in row-major order
|
||||
// (https://en.wikipedia.org/wiki/Row-major_order) Not owned.
|
||||
const void *elements;
|
||||
|
||||
// Quantization scales: one scale for each row.
|
||||
const float16 *quant_scales;
|
||||
};
|
||||
|
||||
// Returns i-th embedding matrix. Crashes on out of bounds indices.
|
||||
//
|
||||
// This is the transpose of the corresponding matrix from the original proto.
|
||||
Matrix GetEmbeddingMatrix(int i) const {
|
||||
CheckMatrixRange(i, embeddings_size(), "embedding matrix");
|
||||
Matrix matrix;
|
||||
matrix.rows = embeddings_num_rows(i);
|
||||
matrix.cols = embeddings_num_cols(i);
|
||||
matrix.elements = embeddings_weights(i);
|
||||
matrix.quant_type = embeddings_quant_type(i);
|
||||
matrix.quant_scales = embeddings_quant_scales(i);
|
||||
return matrix;
|
||||
}
|
||||
|
||||
// Returns weight matrix for i-th hidden layer. Crashes on out of bounds
|
||||
// indices.
|
||||
//
|
||||
// This is the transpose of the corresponding matrix from the original proto.
|
||||
Matrix GetHiddenLayerMatrix(int i) const {
|
||||
CheckMatrixRange(i, hidden_size(), "hidden layer");
|
||||
Matrix matrix;
|
||||
matrix.rows = hidden_num_rows(i);
|
||||
matrix.cols = hidden_num_cols(i);
|
||||
|
||||
// Quantization not supported here.
|
||||
matrix.quant_type = QuantizationType::NONE;
|
||||
matrix.elements = hidden_weights(i);
|
||||
return matrix;
|
||||
}
|
||||
|
||||
// Returns bias for i-th hidden layer. Technically a Matrix, but we expect it
|
||||
// to be a row/column vector (i.e., num rows or num cols is 1). However, we
|
||||
// don't CHECK for that: we just provide access to underlying data. Crashes
|
||||
// on out of bounds indices.
|
||||
Matrix GetHiddenLayerBias(int i) const {
|
||||
CheckMatrixRange(i, hidden_bias_size(), "hidden layer bias");
|
||||
Matrix matrix;
|
||||
matrix.rows = hidden_bias_num_rows(i);
|
||||
matrix.cols = hidden_bias_num_cols(i);
|
||||
|
||||
// Quantization not supported here.
|
||||
matrix.quant_type = QuantizationType::NONE;
|
||||
matrix.elements = hidden_bias_weights(i);
|
||||
return matrix;
|
||||
}
|
||||
|
||||
// Returns true if a softmax layer exists.
|
||||
bool HasSoftmax() const { return softmax_size() == 1; }
|
||||
|
||||
// Returns weight matrix for the softmax layer. Note: should be called only
|
||||
// if HasSoftmax() is true.
|
||||
//
|
||||
// This is the transpose of the corresponding matrix from the original proto.
|
||||
Matrix GetSoftmaxMatrix() const {
|
||||
CLD3_DCHECK(HasSoftmax());
|
||||
Matrix matrix;
|
||||
matrix.rows = softmax_num_rows(0);
|
||||
matrix.cols = softmax_num_cols(0);
|
||||
|
||||
// Quantization not supported here.
|
||||
matrix.quant_type = QuantizationType::NONE;
|
||||
matrix.elements = softmax_weights(0);
|
||||
return matrix;
|
||||
}
|
||||
|
||||
// Returns bias for the softmax layer. Technically a Matrix, but we expect it
|
||||
// to be a row/column vector (i.e., num rows or num cols is 1). However, we
|
||||
// don't CHECK for that: we just provide access to underlying data.
|
||||
Matrix GetSoftmaxBias() const {
|
||||
CLD3_DCHECK(HasSoftmax());
|
||||
Matrix matrix;
|
||||
matrix.rows = softmax_bias_num_rows(0);
|
||||
matrix.cols = softmax_bias_num_cols(0);
|
||||
|
||||
// Quantization not supported here.
|
||||
matrix.quant_type = QuantizationType::NONE;
|
||||
matrix.elements = softmax_bias_weights(0);
|
||||
return matrix;
|
||||
}
|
||||
|
||||
// **** Low-level API.
|
||||
//
|
||||
// * Most low-level API methods are documented by giving an equivalent
|
||||
// function call on proto, the original proto (of type
|
||||
// EmbeddingNetworkProto) which was used to generate the C++ code.
|
||||
//
|
||||
// * To simplify our generation code, optional proto fields of message type
|
||||
// are treated as repeated fields with 0 or 1 instances. As such, we have
|
||||
// *_size() methods for such optional fields: they return 0 or 1.
|
||||
//
|
||||
// * "transpose(M)" denotes the transpose of a matrix M.
|
||||
|
||||
// ** Access methods for repeated MatrixParams embeddings.
|
||||
//
|
||||
// Returns proto.embeddings_size().
|
||||
virtual int embeddings_size() const = 0;
|
||||
|
||||
// Returns number of rows of transpose(proto.embeddings(i)).
|
||||
virtual int embeddings_num_rows(int i) const = 0;
|
||||
|
||||
// Returns number of columns of transpose(proto.embeddings(i)).
|
||||
virtual int embeddings_num_cols(int i) const = 0;
|
||||
|
||||
// Returns pointer to elements of transpose(proto.embeddings(i)), in row-major
|
||||
// order.
|
||||
virtual const void *embeddings_weights(int i) const = 0;
|
||||
|
||||
virtual QuantizationType embeddings_quant_type(int i) const {
|
||||
return QuantizationType::NONE;
|
||||
}
|
||||
|
||||
virtual const float16 *embeddings_quant_scales(int i) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// ** Access methods for repeated MatrixParams hidden.
|
||||
//
|
||||
// Returns embedding_network_proto.hidden_size().
|
||||
virtual int hidden_size() const = 0;
|
||||
|
||||
// Returns embedding_network_proto.hidden(i).rows().
|
||||
virtual int hidden_num_rows(int i) const = 0;
|
||||
|
||||
// Returns embedding_network_proto.hidden(i).rows().
|
||||
virtual int hidden_num_cols(int i) const = 0;
|
||||
|
||||
// Returns pointer to beginning of array of floats with all values from
|
||||
// embedding_network_proto.hidden(i).
|
||||
virtual const void *hidden_weights(int i) const = 0;
|
||||
|
||||
// ** Access methods for repeated MatrixParams hidden_bias.
|
||||
//
|
||||
// Returns proto.hidden_bias_size().
|
||||
virtual int hidden_bias_size() const = 0;
|
||||
|
||||
// Returns number of rows of proto.hidden_bias(i).
|
||||
virtual int hidden_bias_num_rows(int i) const = 0;
|
||||
|
||||
// Returns number of columns of proto.hidden_bias(i).
|
||||
virtual int hidden_bias_num_cols(int i) const = 0;
|
||||
|
||||
// Returns pointer to elements of proto.hidden_bias(i), in row-major order.
|
||||
virtual const void *hidden_bias_weights(int i) const = 0;
|
||||
|
||||
// ** Access methods for optional MatrixParams softmax.
|
||||
//
|
||||
// Returns 1 if proto has optional field softmax, 0 otherwise.
|
||||
virtual int softmax_size() const = 0;
|
||||
|
||||
// Returns number of rows of transpose(proto.softmax()).
|
||||
virtual int softmax_num_rows(int i) const = 0;
|
||||
|
||||
// Returns number of columns of transpose(proto.softmax()).
|
||||
virtual int softmax_num_cols(int i) const = 0;
|
||||
|
||||
// Returns pointer to elements of transpose(proto.softmax()), in row-major
|
||||
// order.
|
||||
virtual const void *softmax_weights(int i) const = 0;
|
||||
|
||||
// ** Access methods for optional MatrixParams softmax_bias.
|
||||
//
|
||||
// Returns 1 if proto has optional field softmax_bias, 0 otherwise.
|
||||
virtual int softmax_bias_size() const = 0;
|
||||
|
||||
// Returns number of rows of proto.softmax_bias().
|
||||
virtual int softmax_bias_num_rows(int i) const = 0;
|
||||
|
||||
// Returns number of columns of proto.softmax_bias().
|
||||
virtual int softmax_bias_num_cols(int i) const = 0;
|
||||
|
||||
// Returns pointer to elements of proto.softmax_bias(), in row-major order.
|
||||
virtual const void *softmax_bias_weights(int i) const = 0;
|
||||
|
||||
// ** Access methods for repeated int32 embedding_dim.
|
||||
//
|
||||
// Returns proto.embedding_dim_size().
|
||||
virtual int embedding_dim_size() const = 0;
|
||||
|
||||
// Returns proto.embedding_dim(i).
|
||||
virtual int embedding_dim(int i) const = 0;
|
||||
|
||||
// ** Access methods for repeated int32 embedding_num_features.
|
||||
//
|
||||
// Returns proto.embedding_num_features_size().
|
||||
virtual int embedding_num_features_size() const = 0;
|
||||
|
||||
// Returns proto.embedding_num_features(i).
|
||||
virtual int embedding_num_features(int i) const = 0;
|
||||
|
||||
// ** Access methods for repeated int32 embedding_features_domain_size.
|
||||
//
|
||||
// Returns proto.embedding_features_domain_size_size().
|
||||
virtual int embedding_features_domain_size_size() const = 0;
|
||||
|
||||
// Returns proto.embedding_features_domain_size(i).
|
||||
virtual int embedding_features_domain_size(int i) const = 0;
|
||||
|
||||
// ** Access methods for repeated int32 concat_offset.
|
||||
//
|
||||
// Returns proto.concat_offset_size().
|
||||
virtual int concat_offset(int i) const = 0;
|
||||
|
||||
// Returns proto.concat_offset(i).
|
||||
virtual int concat_offset_size() const = 0;
|
||||
|
||||
// ** Access methods for concat_layer_size.
|
||||
//
|
||||
// Returns proto.has_concat_layer_size().
|
||||
virtual bool has_concat_layer_size() const = 0;
|
||||
|
||||
// Returns proto.concat_layer_size().
|
||||
virtual int concat_layer_size() const = 0;
|
||||
|
||||
// ** Access methods for is_precomputed
|
||||
//
|
||||
// Returns proto.has_is_precomputed().
|
||||
virtual bool has_is_precomputed() const = 0;
|
||||
|
||||
// Returns proto.is_precomputed().
|
||||
virtual bool is_precomputed() const = 0;
|
||||
|
||||
private:
|
||||
void CheckMatrixRange(int index, int num_matrices,
|
||||
const string &description) const {
|
||||
CLD3_DCHECK(index >= 0);
|
||||
CLD3_DCHECK(index < num_matrices);
|
||||
}
|
||||
}; // class EmbeddingNetworkParams
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // EMBEDDING_NETWORK_PARAMS_H_
|
||||
@ -0,0 +1,137 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "feature_extractor.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "feature_types.h"
|
||||
#include "fml_parser.h"
|
||||
#include "utils.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
constexpr FeatureValue GenericFeatureFunction::kNone;
|
||||
|
||||
FeatureVector::FeatureVector() {}
|
||||
|
||||
FeatureVector::~FeatureVector() {}
|
||||
|
||||
GenericFeatureExtractor::GenericFeatureExtractor() {}
|
||||
|
||||
GenericFeatureExtractor::~GenericFeatureExtractor() {}
|
||||
|
||||
GenericFeatureExtractor::GenericFeatureExtractor(
|
||||
const GenericFeatureExtractor &extractor)
|
||||
: descriptor_(extractor.descriptor_),
|
||||
feature_types_(extractor.feature_types_) {}
|
||||
|
||||
void GenericFeatureExtractor::Parse(const string &source) {
|
||||
// Parse feature specification into descriptor.
|
||||
FMLParser parser;
|
||||
parser.Parse(source, mutable_descriptor());
|
||||
|
||||
// Initialize feature extractor from descriptor.
|
||||
InitializeFeatureFunctions();
|
||||
}
|
||||
|
||||
void GenericFeatureExtractor::InitializeFeatureTypes() {
|
||||
// Register all feature types.
|
||||
GetFeatureTypes(&feature_types_);
|
||||
for (size_t i = 0; i < feature_types_.size(); ++i) {
|
||||
FeatureType *ft = feature_types_[i];
|
||||
ft->set_base(i);
|
||||
|
||||
// Check for feature space overflow.
|
||||
CLD3_DCHECK(ft->GetDomainSize() >= 0);
|
||||
}
|
||||
|
||||
std::vector<string> types_names;
|
||||
GetFeatureTypeNames(&types_names);
|
||||
CLD3_DCHECK(feature_types_.size() == types_names.size());
|
||||
}
|
||||
|
||||
void GenericFeatureExtractor::GetFeatureTypeNames(
|
||||
std::vector<string> *type_names) const {
|
||||
for (size_t i = 0; i < feature_types_.size(); ++i) {
|
||||
FeatureType *ft = feature_types_[i];
|
||||
type_names->push_back(ft->name());
|
||||
}
|
||||
}
|
||||
|
||||
FeatureValue GenericFeatureExtractor::GetDomainSize() const {
|
||||
// Domain size of the set of features is equal to:
|
||||
// [largest domain size of any feature types] * [number of feature types]
|
||||
FeatureValue max_feature_type_dsize = 0;
|
||||
for (size_t i = 0; i < feature_types_.size(); ++i) {
|
||||
FeatureType *ft = feature_types_[i];
|
||||
const FeatureValue feature_type_dsize = ft->GetDomainSize();
|
||||
if (feature_type_dsize > max_feature_type_dsize) {
|
||||
max_feature_type_dsize = feature_type_dsize;
|
||||
}
|
||||
}
|
||||
|
||||
return max_feature_type_dsize;
|
||||
}
|
||||
|
||||
string GenericFeatureFunction::GetParameter(const string &name) const {
|
||||
// Find named parameter in feature descriptor.
|
||||
for (int i = 0; i < descriptor_->parameter_size(); ++i) {
|
||||
if (name == descriptor_->parameter(i).name()) {
|
||||
return descriptor_->parameter(i).value();
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
GenericFeatureFunction::GenericFeatureFunction() {}
|
||||
|
||||
GenericFeatureFunction::~GenericFeatureFunction() { delete feature_type_; }
|
||||
|
||||
int GenericFeatureFunction::GetIntParameter(const string &name,
|
||||
int default_value) const {
|
||||
string value = GetParameter(name);
|
||||
return value.empty() ? default_value
|
||||
: utils::ParseUsing<int>(value, utils::ParseInt32);
|
||||
}
|
||||
|
||||
bool GenericFeatureFunction::GetBoolParameter(const string &name,
|
||||
bool default_value) const {
|
||||
string value = GetParameter(name);
|
||||
if (value.empty()) return default_value;
|
||||
if (value == "true") return true;
|
||||
if (value == "false") return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
void GenericFeatureFunction::GetFeatureTypes(
|
||||
std::vector<FeatureType *> *types) const {
|
||||
if (feature_type_ != nullptr) types->push_back(feature_type_);
|
||||
}
|
||||
|
||||
FeatureType *GenericFeatureFunction::GetFeatureType() const {
|
||||
// If a single feature type has been registered return it.
|
||||
if (feature_type_ != nullptr) return feature_type_;
|
||||
|
||||
// Get feature types for function.
|
||||
std::vector<FeatureType *> types;
|
||||
GetFeatureTypes(&types);
|
||||
|
||||
// If there is exactly one feature type return this, else return null.
|
||||
if (types.size() == 1) return types[0];
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,633 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Generic feature extractor for extracting features from objects. The feature
|
||||
// extractor can be used for extracting features from any object. The feature
|
||||
// extractor and feature function classes are template classes that have to
|
||||
// be instantiated for extracting feature from a specific object type.
|
||||
//
|
||||
// A feature extractor consists of a hierarchy of feature functions. Each
|
||||
// feature function extracts one or more feature type and value pairs from the
|
||||
// object.
|
||||
//
|
||||
// The feature extractor has a modular design where new feature functions can be
|
||||
// registered as components. The feature extractor is initialized from a
|
||||
// descriptor represented by a protocol buffer. The feature extractor can also
|
||||
// be initialized from a text-based source specification of the feature
|
||||
// extractor. Feature specification parsers can be added as components. By
|
||||
// default the feature extractor can be read from an ASCII protocol buffer or in
|
||||
// a simple feature modeling language (fml).
|
||||
|
||||
// A feature function is invoked with a focus. Nested feature function can be
|
||||
// invoked with another focus determined by the parent feature function.
|
||||
|
||||
#ifndef FEATURE_EXTRACTOR_H_
|
||||
#define FEATURE_EXTRACTOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base.h"
|
||||
#include "cld_3/protos/feature_extractor.pb.h"
|
||||
#include "feature_types.h"
|
||||
#include "registry.h"
|
||||
#include "script_span/stringpiece.h"
|
||||
#include "task_context.h"
|
||||
#include "utils.h"
|
||||
#include "workspace.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// TODO(djweiss) Clean this up as well.
|
||||
// Use the same type for feature values as is used for predicated.
|
||||
typedef int64 Predicate;
|
||||
typedef Predicate FeatureValue;
|
||||
|
||||
// Output feature model in FML format.
|
||||
void ToFMLFunction(const FeatureFunctionDescriptor &function, string *output);
|
||||
void ToFML(const FeatureFunctionDescriptor &function, string *output);
|
||||
|
||||
// A union used to represent discrete and continuous feature values.
|
||||
union FloatFeatureValue {
|
||||
public:
|
||||
explicit FloatFeatureValue(FeatureValue v) : discrete_value(v) {}
|
||||
FloatFeatureValue(uint32 i, float w) {
|
||||
value.id = i;
|
||||
value.weight = w;
|
||||
}
|
||||
FeatureValue discrete_value;
|
||||
struct IdWeight {
|
||||
uint32 id;
|
||||
float weight;
|
||||
} value;
|
||||
};
|
||||
|
||||
// A feature vector contains feature type and value pairs.
|
||||
class FeatureVector {
|
||||
public:
|
||||
FeatureVector();
|
||||
~FeatureVector();
|
||||
|
||||
// Adds feature type and value pair to feature vector.
|
||||
void add(FeatureType *type, FeatureValue value) {
|
||||
features_.emplace_back(type, value);
|
||||
}
|
||||
|
||||
// Removes all elements from the feature vector.
|
||||
void clear() { features_.clear(); }
|
||||
|
||||
// Returns the number of elements in the feature vector.
|
||||
int size() const { return features_.size(); }
|
||||
|
||||
// Reserves space in the underlying feature vector.
|
||||
void reserve(int n) { features_.reserve(n); }
|
||||
|
||||
// Returns feature type for an element in the feature vector.
|
||||
FeatureType *type(int index) const { return features_[index].type; }
|
||||
|
||||
// Returns feature value for an element in the feature vector.
|
||||
FeatureValue value(int index) const { return features_[index].value; }
|
||||
|
||||
private:
|
||||
// Structure for holding feature type and value pairs.
|
||||
struct Element {
|
||||
Element() : type(NULL), value(-1) {}
|
||||
Element(FeatureType *t, FeatureValue v) : type(t), value(v) {}
|
||||
|
||||
FeatureType *type;
|
||||
FeatureValue value;
|
||||
};
|
||||
|
||||
// Array for storing feature vector elements.
|
||||
std::vector<Element> features_;
|
||||
|
||||
CLD3_DISALLOW_COPY_AND_ASSIGN(FeatureVector);
|
||||
};
|
||||
|
||||
// The generic feature extractor is the type-independent part of a feature
|
||||
// extractor. This holds the descriptor for the feature extractor and the
|
||||
// collection of feature types used in the feature extractor. The feature
|
||||
// types are not available until FeatureExtractor<>::Init() has been called.
|
||||
class GenericFeatureExtractor {
|
||||
public:
|
||||
GenericFeatureExtractor();
|
||||
virtual ~GenericFeatureExtractor();
|
||||
GenericFeatureExtractor(const GenericFeatureExtractor &extractor);
|
||||
|
||||
// Initializes the feature extractor from a source representation of the
|
||||
// feature extractor. The first line is used for determining the feature
|
||||
// specification language. If the first line starts with #! followed by a name
|
||||
// then this name is used for instantiating a feature specification parser
|
||||
// with that name. If the language cannot be detected this way it falls back
|
||||
// to using the default language supplied.
|
||||
void Parse(const string &source);
|
||||
|
||||
// Returns the feature extractor descriptor.
|
||||
const FeatureExtractorDescriptor &descriptor() const { return descriptor_; }
|
||||
FeatureExtractorDescriptor *mutable_descriptor() { return &descriptor_; }
|
||||
|
||||
// Returns the number of feature types in the feature extractor. Invalid
|
||||
// before Init() has been called.
|
||||
int feature_types() const { return feature_types_.size(); }
|
||||
|
||||
// Returns all feature types names used by the extractor. The names are
|
||||
// added to the types_names array. Invalid before Init() has been called.
|
||||
void GetFeatureTypeNames(std::vector<string> *type_names) const;
|
||||
|
||||
// Returns a feature type used in the extractor. Invalid before Init() has
|
||||
// been called.
|
||||
const FeatureType *feature_type(int index) const {
|
||||
return feature_types_[index];
|
||||
}
|
||||
|
||||
// Returns the feature domain size of this feature extractor.
|
||||
// NOTE: The way that domain size is calculated is, for some, unintuitive. It
|
||||
// is the largest domain size of any feature type.
|
||||
FeatureValue GetDomainSize() const;
|
||||
|
||||
protected:
|
||||
// Initializes the feature types used by the extractor. Called from
|
||||
// FeatureExtractor<>::Init().
|
||||
void InitializeFeatureTypes();
|
||||
|
||||
private:
|
||||
// Initializes the top-level feature functions.
|
||||
virtual void InitializeFeatureFunctions() = 0;
|
||||
|
||||
// Returns all feature types used by the extractor. The feature types are
|
||||
// added to the result array.
|
||||
virtual void GetFeatureTypes(std::vector<FeatureType *> *types) const = 0;
|
||||
|
||||
// Descriptor for the feature extractor. This is a protocol buffer that
|
||||
// contains all the information about the feature extractor. The feature
|
||||
// functions are initialized from the information in the descriptor.
|
||||
FeatureExtractorDescriptor descriptor_;
|
||||
|
||||
// All feature types used by the feature extractor. The collection of all the
|
||||
// feature types describes the feature space of the feature set produced by
|
||||
// the feature extractor. Not owned.
|
||||
std::vector<FeatureType *> feature_types_;
|
||||
};
|
||||
|
||||
// The generic feature function is the type-independent part of a feature
|
||||
// function. Each feature function is associated with the descriptor that it is
|
||||
// instantiated from. The feature types associated with this feature function
|
||||
// will be established by the time FeatureExtractor<>::Init() completes.
|
||||
class GenericFeatureFunction {
|
||||
public:
|
||||
// A feature value that represents the absence of a value.
|
||||
static constexpr FeatureValue kNone = -1;
|
||||
|
||||
GenericFeatureFunction();
|
||||
virtual ~GenericFeatureFunction();
|
||||
|
||||
// Sets up the feature function. NB: FeatureTypes of nested functions are not
|
||||
// guaranteed to be available until Init().
|
||||
virtual void Setup(TaskContext *context) {}
|
||||
|
||||
// Initializes the feature function. NB: The FeatureType of this function must
|
||||
// be established when this method completes.
|
||||
virtual void Init(TaskContext *context) {}
|
||||
|
||||
// Requests workspaces from a registry to obtain indices into a WorkspaceSet
|
||||
// for any Workspace objects used by this feature function. NB: This will be
|
||||
// called after Init(), so it can depend on resources and arguments.
|
||||
virtual void RequestWorkspaces(WorkspaceRegistry *registry) {}
|
||||
|
||||
// Appends the feature types produced by the feature function to types. The
|
||||
// default implementation appends feature_type(), if non-null. Invalid
|
||||
// before Init() has been called.
|
||||
virtual void GetFeatureTypes(std::vector<FeatureType *> *types) const;
|
||||
|
||||
// Returns the feature type for feature produced by this feature function. If
|
||||
// the feature function produces features of different types this returns
|
||||
// null. Invalid before Init() has been called.
|
||||
virtual FeatureType *GetFeatureType() const;
|
||||
|
||||
// Returns the name of the registry used for creating the feature function.
|
||||
// This can be used for checking if two feature functions are of the same
|
||||
// kind.
|
||||
virtual const char *RegistryName() const = 0;
|
||||
|
||||
// Returns the value of a named parameter in the feature functions descriptor.
|
||||
// If the named parameter is not found the global parameters are searched.
|
||||
string GetParameter(const string &name) const;
|
||||
int GetIntParameter(const string &name, int default_value) const;
|
||||
bool GetBoolParameter(const string &name, bool default_value) const;
|
||||
|
||||
// Returns the FML function description for the feature function, i.e. the
|
||||
// name and parameters without the nested features.
|
||||
string FunctionName() const {
|
||||
string output;
|
||||
ToFMLFunction(*descriptor_, &output);
|
||||
return output;
|
||||
}
|
||||
|
||||
// Returns the prefix for nested feature functions. This is the prefix of this
|
||||
// feature function concatenated with the feature function name.
|
||||
string SubPrefix() const {
|
||||
return prefix_.empty() ? FunctionName() : prefix_ + "." + FunctionName();
|
||||
}
|
||||
|
||||
// Returns/sets the feature extractor this function belongs to.
|
||||
GenericFeatureExtractor *extractor() const { return extractor_; }
|
||||
void set_extractor(GenericFeatureExtractor *extractor) {
|
||||
extractor_ = extractor;
|
||||
}
|
||||
|
||||
// Returns/sets the feature function descriptor.
|
||||
FeatureFunctionDescriptor *descriptor() const { return descriptor_; }
|
||||
void set_descriptor(FeatureFunctionDescriptor *descriptor) {
|
||||
descriptor_ = descriptor;
|
||||
}
|
||||
|
||||
// Returns a descriptive name for the feature function. The name is taken from
|
||||
// the descriptor for the feature function. If the name is empty or the
|
||||
// feature function is a variable the name is the FML representation of the
|
||||
// feature, including the prefix.
|
||||
string name() const {
|
||||
string output;
|
||||
if (descriptor_->name().empty()) {
|
||||
if (!prefix_.empty()) {
|
||||
output.append(prefix_);
|
||||
output.append(".");
|
||||
}
|
||||
ToFML(*descriptor_, &output);
|
||||
} else {
|
||||
output = descriptor_->name();
|
||||
}
|
||||
StringPiece stripped(output);
|
||||
utils::RemoveWhitespaceContext(&stripped);
|
||||
|
||||
string stripped_output(stripped.data(), stripped.size());
|
||||
return stripped_output;
|
||||
}
|
||||
|
||||
// Returns the argument from the feature function descriptor. It defaults to
|
||||
// 0 if the argument has not been specified.
|
||||
int argument() const {
|
||||
return descriptor_->has_argument() ? descriptor_->argument() : 0;
|
||||
}
|
||||
|
||||
// Returns/sets/clears function name prefix.
|
||||
const string &prefix() const { return prefix_; }
|
||||
void set_prefix(const string &prefix) { prefix_ = prefix; }
|
||||
|
||||
protected:
|
||||
// Returns the feature type for single-type feature functions.
|
||||
FeatureType *feature_type() const { return feature_type_; }
|
||||
|
||||
// Sets the feature type for single-type feature functions. This takes
|
||||
// ownership of feature_type. Can only be called once.
|
||||
void set_feature_type(FeatureType *feature_type) {
|
||||
CLD3_DCHECK(feature_type_ == nullptr);
|
||||
feature_type_ = feature_type;
|
||||
}
|
||||
|
||||
private:
|
||||
// Feature extractor this feature function belongs to. Not owned.
|
||||
GenericFeatureExtractor *extractor_ = nullptr;
|
||||
|
||||
// Descriptor for feature function. Not owned.
|
||||
FeatureFunctionDescriptor *descriptor_ = nullptr;
|
||||
|
||||
// Feature type for features produced by this feature function. If the
|
||||
// feature function produces features of multiple feature types this is null
|
||||
// and the feature function must return it's feature types in
|
||||
// GetFeatureTypes(). Owned.
|
||||
FeatureType *feature_type_ = nullptr;
|
||||
|
||||
// Prefix used for sub-feature types of this function.
|
||||
string prefix_;
|
||||
};
|
||||
|
||||
// Feature function that can extract features from an object. Templated on
|
||||
// two type arguments:
|
||||
//
|
||||
// OBJ: The "object" from which features are extracted; e.g., a sentence. This
|
||||
// should be a plain type, rather than a reference or pointer.
|
||||
//
|
||||
// ARGS: A set of 0 or more types that are used to "index" into some part of the
|
||||
// object that should be extracted, e.g. an int token index for a sentence
|
||||
// object. This should not be a reference type.
|
||||
template <class OBJ, class... ARGS>
|
||||
class FeatureFunction
|
||||
: public GenericFeatureFunction,
|
||||
public RegisterableClass<FeatureFunction<OBJ, ARGS...> > {
|
||||
public:
|
||||
using Self = FeatureFunction<OBJ, ARGS...>;
|
||||
|
||||
// Preprocesses the object. This will be called prior to calling Evaluate()
|
||||
// or Compute() on that object.
|
||||
virtual void Preprocess(WorkspaceSet *workspaces, OBJ *object) const {}
|
||||
|
||||
// Appends features computed from the object and focus to the result. The
|
||||
// default implementation delegates to Compute(), adding a single value if
|
||||
// available. Multi-valued feature functions must override this method.
|
||||
virtual void Evaluate(const WorkspaceSet &workspaces, const OBJ &object,
|
||||
ARGS... args, FeatureVector *result) const {
|
||||
FeatureValue value = Compute(workspaces, object, args..., result);
|
||||
if (value != kNone) result->add(feature_type(), value);
|
||||
}
|
||||
|
||||
// Returns a feature value computed from the object and focus, or kNone if no
|
||||
// value is computed. Single-valued feature functions only need to override
|
||||
// this method.
|
||||
virtual FeatureValue Compute(const WorkspaceSet &workspaces,
|
||||
const OBJ &object, ARGS... args,
|
||||
const FeatureVector *fv) const {
|
||||
return kNone;
|
||||
}
|
||||
|
||||
// Instantiates a new feature function in a feature extractor from a feature
|
||||
// descriptor.
|
||||
static Self *Instantiate(GenericFeatureExtractor *extractor,
|
||||
FeatureFunctionDescriptor *fd,
|
||||
const string &prefix) {
|
||||
Self *f = Self::Create(fd->type());
|
||||
f->set_extractor(extractor);
|
||||
f->set_descriptor(fd);
|
||||
f->set_prefix(prefix);
|
||||
return f;
|
||||
}
|
||||
|
||||
// Returns the name of the registry for the feature function.
|
||||
const char *RegistryName() const override { return Self::registry()->name; }
|
||||
|
||||
private:
|
||||
// Special feature function class for resolving variable references. The type
|
||||
// of the feature function is used for resolving the variable reference. When
|
||||
// evaluated it will either get the feature value(s) from the variable portion
|
||||
// of the feature vector, if present, or otherwise it will call the referenced
|
||||
// feature extractor function directly to extract the feature(s).
|
||||
class Reference;
|
||||
};
|
||||
|
||||
// Base class for features with nested feature functions. The nested functions
|
||||
// are of type NES, which may be different from the type of the parent function.
|
||||
// NB: NestedFeatureFunction will ensure that all initialization of nested
|
||||
// functions takes place during Setup() and Init() -- after the nested features
|
||||
// are initialized, the parent feature is initialized via SetupNested() and
|
||||
// InitNested(). Alternatively, a derived classes that overrides Setup() and
|
||||
// Init() directly should call Parent::Setup(), Parent::Init(), etc. first.
|
||||
//
|
||||
// Note: NestedFeatureFunction cannot know how to call Preprocess, Evaluate, or
|
||||
// Compute, since the nested functions may be of a different type.
|
||||
template <class NES, class OBJ, class... ARGS>
|
||||
class NestedFeatureFunction : public FeatureFunction<OBJ, ARGS...> {
|
||||
public:
|
||||
using Parent = NestedFeatureFunction<NES, OBJ, ARGS...>;
|
||||
|
||||
// Clean up nested functions.
|
||||
~NestedFeatureFunction() override { utils::STLDeleteElements(&nested_); }
|
||||
|
||||
// By default, just appends the nested feature types.
|
||||
void GetFeatureTypes(std::vector<FeatureType *> *types) const override {
|
||||
// Nested features require nested features to be defined.
|
||||
CLD3_DCHECK(!this->nested().empty());
|
||||
for (auto *function : nested_) function->GetFeatureTypes(types);
|
||||
}
|
||||
|
||||
// Sets up the nested features.
|
||||
void Setup(TaskContext *context) override {
|
||||
CreateNested(this->extractor(), this->descriptor(), &nested_,
|
||||
this->SubPrefix());
|
||||
for (auto *function : nested_) function->Setup(context);
|
||||
SetupNested(context);
|
||||
}
|
||||
|
||||
// Sets up this NestedFeatureFunction specifically.
|
||||
virtual void SetupNested(TaskContext *context) {}
|
||||
|
||||
// Initializes the nested features.
|
||||
void Init(TaskContext *context) override {
|
||||
for (auto *function : nested_) function->Init(context);
|
||||
InitNested(context);
|
||||
}
|
||||
|
||||
// Initializes this NestedFeatureFunction specifically.
|
||||
virtual void InitNested(TaskContext *context) {}
|
||||
|
||||
// Gets all the workspaces needed for the nested functions.
|
||||
void RequestWorkspaces(WorkspaceRegistry *registry) override {
|
||||
for (auto *function : nested_) function->RequestWorkspaces(registry);
|
||||
}
|
||||
|
||||
// Returns the list of nested feature functions.
|
||||
const vector<NES *> &nested() const { return nested_; }
|
||||
|
||||
// Instantiates nested feature functions for a feature function. Creates and
|
||||
// initializes one feature function for each sub-descriptor in the feature
|
||||
// descriptor.
|
||||
static void CreateNested(GenericFeatureExtractor *extractor,
|
||||
FeatureFunctionDescriptor *fd,
|
||||
vector<NES *> *functions, const string &prefix) {
|
||||
for (int i = 0; i < fd->feature_size(); ++i) {
|
||||
FeatureFunctionDescriptor *sub = fd->mutable_feature(i);
|
||||
NES *f = NES::Instantiate(extractor, sub, prefix);
|
||||
functions->push_back(f);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
// The nested feature functions, if any, in order of declaration in the
|
||||
// feature descriptor. Owned.
|
||||
vector<NES *> nested_;
|
||||
};
|
||||
|
||||
// Base class for a nested feature function that takes nested features with the
|
||||
// same signature as these features, i.e. a meta feature. For this class, we can
|
||||
// provide preprocessing of the nested features.
|
||||
template <class OBJ, class... ARGS>
|
||||
class MetaFeatureFunction
|
||||
: public NestedFeatureFunction<FeatureFunction<OBJ, ARGS...>, OBJ,
|
||||
ARGS...> {
|
||||
public:
|
||||
// Preprocesses using the nested features.
|
||||
void Preprocess(WorkspaceSet *workspaces, OBJ *object) const override {
|
||||
for (auto *function : this->nested_) {
|
||||
function->Preprocess(workspaces, object);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Template for a special type of locator: The locator of type
|
||||
// FeatureFunction<OBJ, ARGS...> calls nested functions of type
|
||||
// FeatureFunction<OBJ, IDX, ARGS...>, where the derived class DER is
|
||||
// responsible for translating by providing the following:
|
||||
//
|
||||
// // Gets the new additional focus.
|
||||
// IDX GetFocus(const WorkspaceSet &workspaces, const OBJ &object);
|
||||
//
|
||||
// This is useful to e.g. add a token focus to a parser state based on some
|
||||
// desired property of that state.
|
||||
template <class DER, class OBJ, class IDX, class... ARGS>
|
||||
class FeatureAddFocusLocator
|
||||
: public NestedFeatureFunction<FeatureFunction<OBJ, IDX, ARGS...>, OBJ,
|
||||
ARGS...> {
|
||||
public:
|
||||
void Preprocess(WorkspaceSet *workspaces, OBJ *object) const override {
|
||||
for (auto *function : this->nested_) {
|
||||
function->Preprocess(workspaces, object);
|
||||
}
|
||||
}
|
||||
|
||||
void Evaluate(const WorkspaceSet &workspaces, const OBJ &object, ARGS... args,
|
||||
FeatureVector *result) const override {
|
||||
IDX focus =
|
||||
static_cast<const DER *>(this)->GetFocus(workspaces, object, args...);
|
||||
for (auto *function : this->nested()) {
|
||||
function->Evaluate(workspaces, object, focus, args..., result);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the first nested feature's computed value.
|
||||
FeatureValue Compute(const WorkspaceSet &workspaces, const OBJ &object,
|
||||
ARGS... args,
|
||||
const FeatureVector *result) const override {
|
||||
IDX focus =
|
||||
static_cast<const DER *>(this)->GetFocus(workspaces, object, args...);
|
||||
return this->nested()[0]->Compute(workspaces, object, focus, args...,
|
||||
result);
|
||||
}
|
||||
};
|
||||
|
||||
// CRTP feature locator class. This is a meta feature that modifies ARGS and
|
||||
// then calls the nested feature functions with the modified ARGS. Note that in
|
||||
// order for this template to work correctly, all of ARGS must be types for
|
||||
// which the reference operator & can be interpreted as a pointer to the
|
||||
// argument. The derived class DER must implement the UpdateFocus method which
|
||||
// takes pointers to the ARGS arguments:
|
||||
//
|
||||
// // Updates the current arguments.
|
||||
// void UpdateArgs(const OBJ &object, ARGS *...args) const;
|
||||
template <class DER, class OBJ, class... ARGS>
|
||||
class FeatureLocator : public MetaFeatureFunction<OBJ, ARGS...> {
|
||||
public:
|
||||
// Feature locators have an additional check that there is no intrinsic type.
|
||||
void GetFeatureTypes(std::vector<FeatureType *> *types) const override {
|
||||
// FeatureLocators should not have an intrinsic type.
|
||||
CLD3_DCHECK(this->feature_type() == nullptr);
|
||||
MetaFeatureFunction<OBJ, ARGS...>::GetFeatureTypes(types);
|
||||
}
|
||||
|
||||
// Evaluates the locator.
|
||||
void Evaluate(const WorkspaceSet &workspaces, const OBJ &object, ARGS... args,
|
||||
FeatureVector *result) const override {
|
||||
static_cast<const DER *>(this)->UpdateArgs(workspaces, object, &args...);
|
||||
for (auto *function : this->nested()) {
|
||||
function->Evaluate(workspaces, object, args..., result);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the first nested feature's computed value.
|
||||
FeatureValue Compute(const WorkspaceSet &workspaces, const OBJ &object,
|
||||
ARGS... args,
|
||||
const FeatureVector *result) const override {
|
||||
static_cast<const DER *>(this)->UpdateArgs(workspaces, object, &args...);
|
||||
return this->nested()[0]->Compute(workspaces, object, args..., result);
|
||||
}
|
||||
};
|
||||
|
||||
// Feature extractor for extracting features from objects of a certain class.
|
||||
// Template type parameters are as defined for FeatureFunction.
|
||||
template <class OBJ, class... ARGS>
|
||||
class FeatureExtractor : public GenericFeatureExtractor {
|
||||
public:
|
||||
// Feature function type for top-level functions in the feature extractor.
|
||||
typedef FeatureFunction<OBJ, ARGS...> Function;
|
||||
typedef FeatureExtractor<OBJ, ARGS...> Self;
|
||||
|
||||
// Feature locator type for the feature extractor.
|
||||
template <class DER>
|
||||
using Locator = FeatureLocator<DER, OBJ, ARGS...>;
|
||||
|
||||
// Initializes feature extractor.
|
||||
FeatureExtractor() {}
|
||||
|
||||
~FeatureExtractor() override { utils::STLDeleteElements(&functions_); }
|
||||
|
||||
// Sets up the feature extractor. Note that only top-level functions exist
|
||||
// until Setup() is called. This does not take ownership over the context,
|
||||
// which must outlive this.
|
||||
void Setup(TaskContext *context) {
|
||||
for (Function *function : functions_) function->Setup(context);
|
||||
}
|
||||
|
||||
// Initializes the feature extractor. Must be called after Setup(). This
|
||||
// does not take ownership over the context, which must outlive this.
|
||||
void Init(TaskContext *context) {
|
||||
for (Function *function : functions_) function->Init(context);
|
||||
this->InitializeFeatureTypes();
|
||||
}
|
||||
|
||||
// Requests workspaces from the registry. Must be called after Init(), and
|
||||
// before Preprocess(). Does not take ownership over registry. This should be
|
||||
// the same registry used to initialize the WorkspaceSet used in Preprocess()
|
||||
// and ExtractFeatures(). NB: This is a different ordering from that used in
|
||||
// SentenceFeatureRepresentation style feature computation.
|
||||
void RequestWorkspaces(WorkspaceRegistry *registry) {
|
||||
for (auto *function : functions_) function->RequestWorkspaces(registry);
|
||||
}
|
||||
|
||||
// Preprocesses the object using feature functions for the phase. Must be
|
||||
// called before any calls to ExtractFeatures() on that object and phase.
|
||||
void Preprocess(WorkspaceSet *workspaces, OBJ *object) const {
|
||||
for (Function *function : functions_) {
|
||||
function->Preprocess(workspaces, object);
|
||||
}
|
||||
}
|
||||
|
||||
// Extracts features from an object with a focus. This invokes all the
|
||||
// top-level feature functions in the feature extractor. Only feature
|
||||
// functions belonging to the specified phase are invoked.
|
||||
void ExtractFeatures(const WorkspaceSet &workspaces, const OBJ &object,
|
||||
ARGS... args, FeatureVector *result) const {
|
||||
result->reserve(this->feature_types());
|
||||
|
||||
// Extract features.
|
||||
for (size_t i = 0; i < functions_.size(); ++i) {
|
||||
functions_[i]->Evaluate(workspaces, object, args..., result);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Creates and initializes all feature functions in the feature extractor.
|
||||
void InitializeFeatureFunctions() override {
|
||||
// Create all top-level feature functions.
|
||||
for (int i = 0; i < descriptor().feature_size(); ++i) {
|
||||
FeatureFunctionDescriptor *fd = mutable_descriptor()->mutable_feature(i);
|
||||
Function *function = Function::Instantiate(this, fd, "");
|
||||
functions_.push_back(function);
|
||||
}
|
||||
}
|
||||
|
||||
// Collect all feature types used in the feature extractor.
|
||||
void GetFeatureTypes(std::vector<FeatureType *> *types) const override {
|
||||
for (size_t i = 0; i < functions_.size(); ++i) {
|
||||
functions_[i]->GetFeatureTypes(types);
|
||||
}
|
||||
}
|
||||
|
||||
// Top-level feature functions (and variables) in the feature extractor.
|
||||
// Owned.
|
||||
std::vector<Function *> functions_;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // FEATURE_EXTRACTOR_H_
|
||||
@ -0,0 +1,72 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "feature_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "base.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
FeatureType::FeatureType(const string &name)
|
||||
: name_(name),
|
||||
base_(0),
|
||||
is_continuous_(name.find("continuous") != string::npos) {}
|
||||
|
||||
FeatureType::~FeatureType() {}
|
||||
|
||||
template <class Resource>
|
||||
ResourceBasedFeatureType<Resource>::ResourceBasedFeatureType(
|
||||
const string &name, const Resource *resource,
|
||||
const std::map<FeatureValue, string> &values)
|
||||
: FeatureType(name), resource_(resource), values_(values) {
|
||||
max_value_ = resource->NumValues() - 1;
|
||||
for (const auto &pair : values) {
|
||||
CLD3_DCHECK(pair.first >= resource->NumValues());
|
||||
max_value_ = pair.first > max_value_ ? pair.first : max_value_;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Resource>
|
||||
ResourceBasedFeatureType<Resource>::ResourceBasedFeatureType(
|
||||
const string &name, const Resource *resource)
|
||||
: ResourceBasedFeatureType(name, resource, {}) {}
|
||||
|
||||
EnumFeatureType::EnumFeatureType(
|
||||
const string &name, const std::map<FeatureValue, string> &value_names)
|
||||
: FeatureType(name), value_names_(value_names) {
|
||||
for (const auto &pair : value_names) {
|
||||
CLD3_DCHECK(pair.first >= 0);
|
||||
domain_size_ = std::max(domain_size_, pair.first + 1);
|
||||
}
|
||||
}
|
||||
|
||||
EnumFeatureType::~EnumFeatureType() {}
|
||||
|
||||
string EnumFeatureType::GetFeatureValueName(FeatureValue value) const {
|
||||
auto it = value_names_.find(value);
|
||||
if (it == value_names_.end()) {
|
||||
return "<INVALID>";
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
FeatureValue EnumFeatureType::GetDomainSize() const { return domain_size_; }
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,158 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Common feature types for parser components.
|
||||
|
||||
#ifndef FEATURE_TYPES_H_
|
||||
#define FEATURE_TYPES_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "base.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// TODO(djweiss) Clean this up as well.
|
||||
// Use the same type for feature values as is used for predicated.
|
||||
typedef int64 Predicate;
|
||||
typedef Predicate FeatureValue;
|
||||
|
||||
// Each feature value in a feature vector has a feature type. The feature type
|
||||
// is used for converting feature type and value pairs to predicate values. The
|
||||
// feature type can also return names for feature values and calculate the size
|
||||
// of the feature value domain. The FeatureType class is abstract and must be
|
||||
// specialized for the concrete feature types.
|
||||
class FeatureType {
|
||||
public:
|
||||
// Initializes a feature type.
|
||||
explicit FeatureType(const string &name);
|
||||
|
||||
virtual ~FeatureType();
|
||||
|
||||
// Converts a feature value to a name.
|
||||
virtual string GetFeatureValueName(FeatureValue value) const = 0;
|
||||
|
||||
// Returns the size of the feature values domain.
|
||||
virtual int64 GetDomainSize() const = 0;
|
||||
|
||||
// Returns the feature type name.
|
||||
const string &name() const { return name_; }
|
||||
|
||||
Predicate base() const { return base_; }
|
||||
void set_base(Predicate base) { base_ = base; }
|
||||
|
||||
// Returns true iff this feature is continuous; see FloatFeatureValue.
|
||||
bool is_continuous() const { return is_continuous_; }
|
||||
|
||||
private:
|
||||
// Feature type name.
|
||||
string name_;
|
||||
|
||||
// "Base" feature value: i.e. a "slot" in a global ordering of features.
|
||||
Predicate base_;
|
||||
|
||||
// See doc for is_continuous().
|
||||
bool is_continuous_;
|
||||
};
|
||||
|
||||
// Templated generic resource based feature type. This feature type delegates
|
||||
// look up of feature value names to an unknown resource class, which is not
|
||||
// owned. Optionally, this type can also store a mapping of extra values which
|
||||
// are not in the resource.
|
||||
//
|
||||
// Note: this class assumes that Resource->GetFeatureValueName() will return
|
||||
// successfully for values ONLY in the range [0, Resource->NumValues()) Any
|
||||
// feature value not in the extra value map and not in the above range of
|
||||
// Resource will result in a ERROR and return of "<INVALID>".
|
||||
template <class Resource>
|
||||
class ResourceBasedFeatureType : public FeatureType {
|
||||
public:
|
||||
// Creates a new type with given name, resource object, and a mapping of
|
||||
// special values. The values must be greater or equal to
|
||||
// resource->NumValues() so as to avoid collisions; this is verified with
|
||||
// CHECK at creation.
|
||||
ResourceBasedFeatureType(const string &name, const Resource *resource,
|
||||
const std::map<FeatureValue, string> &values);
|
||||
|
||||
// Creates a new type with no special values.
|
||||
ResourceBasedFeatureType(const string &name, const Resource *resource);
|
||||
|
||||
// Returns the feature name for a given feature value. First checks the values
|
||||
// map, then checks the resource to look up the name.
|
||||
string GetFeatureValueName(FeatureValue value) const override {
|
||||
if (values_.find(value) != values_.end()) {
|
||||
return values_.find(value)->second;
|
||||
}
|
||||
if (value >= 0 && value < resource_->NumValues()) {
|
||||
return resource_->GetFeatureValueName(value);
|
||||
} else {
|
||||
return "<INVALID>";
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the number of possible values for this feature type. This is the
|
||||
// based on the largest value that was observed in the extra values.
|
||||
FeatureValue GetDomainSize() const override { return max_value_ + 1; }
|
||||
|
||||
protected:
|
||||
// Shared resource. Not owned.
|
||||
const Resource *resource_ = nullptr;
|
||||
|
||||
// Maximum possible value this feature could take.
|
||||
FeatureValue max_value_;
|
||||
|
||||
// Mapping for extra feature values not in the resource.
|
||||
std::map<FeatureValue, string> values_;
|
||||
};
|
||||
|
||||
// Feature type that is defined using an explicit map from FeatureValue to
|
||||
// string values. This can reduce some of the boilerplate when defining
|
||||
// features that generate enum values. Example usage:
|
||||
//
|
||||
// class BeverageSizeFeature : public FeatureFunction<Beverage>
|
||||
// enum FeatureValue { SMALL, MEDIUM, LARGE }; // values for this feature
|
||||
// void Init(TaskContext *context) override {
|
||||
// set_feature_type(new EnumFeatureType("beverage_size",
|
||||
// {{SMALL, "SMALL"}, {MEDIUM, "MEDIUM"}, {LARGE, "LARGE"}});
|
||||
// }
|
||||
// [...]
|
||||
// };
|
||||
class EnumFeatureType : public FeatureType {
|
||||
public:
|
||||
EnumFeatureType(const string &name,
|
||||
const std::map<FeatureValue, string> &value_names);
|
||||
~EnumFeatureType() override;
|
||||
|
||||
// Returns the feature name for a given feature value.
|
||||
string GetFeatureValueName(FeatureValue value) const override;
|
||||
|
||||
// Returns the number of possible values for this feature type. This is one
|
||||
// greater than the largest value in the value_names map.
|
||||
FeatureValue GetDomainSize() const override;
|
||||
|
||||
protected:
|
||||
// Maximum possible value this feature could take.
|
||||
FeatureValue domain_size_ = 0;
|
||||
|
||||
// Names of feature values.
|
||||
std::map<FeatureValue, string> value_names_;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // FEATURE_TYPES_H_
|
||||
@ -0,0 +1,58 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef FLOAT16_H_
|
||||
#define FLOAT16_H_
|
||||
|
||||
#include <string.h> // for memcpy
|
||||
|
||||
#include "base.h"
|
||||
#include "casts.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Compact 16-bit encoding of floating point numbers. This
|
||||
// representation uses 1 bit for the sign, 8 bits for the exponent and
|
||||
// 7 bits for the mantissa. It is assumed that floats are in IEEE 754
|
||||
// format so a float16 is just bits 16-31 of a single precision float.
|
||||
//
|
||||
// NOTE: The IEEE floating point standard defines a float16 format that
|
||||
// is different than this format (it has fewer bits of exponent and more
|
||||
// bits of mantissa). We don't use that format here because conversion
|
||||
// to/from 32-bit floats is more complex for that format, and the
|
||||
// conversion for this format is very simple.
|
||||
//
|
||||
// <---------float16------------>
|
||||
// s e e e e e e e e f f f f f f f f f f f f f f f f f f f f f f f
|
||||
// <------------------------------float-------------------------->
|
||||
// 3 3 2 2 1 1 0
|
||||
// 1 0 3 2 5 4 0
|
||||
|
||||
typedef uint16 float16;
|
||||
|
||||
static inline float16 Float32To16(float f) {
|
||||
// Note that we just truncate the mantissa bits: we make no effort to
|
||||
// do any smarter rounding.
|
||||
return (lang_id_bit_cast<uint32>(f) >> 16) & 0xffff;
|
||||
}
|
||||
|
||||
static inline float Float16To32(float16 f) {
|
||||
// We fill in the new mantissa bits with 0, and don't do anything smarter.
|
||||
return lang_id_bit_cast<float>(f << 16);
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // FLOAT16_H_
|
||||
@ -0,0 +1,308 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "fml_parser.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string>
|
||||
|
||||
#include "base.h"
|
||||
#include "utils.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
namespace {
|
||||
|
||||
inline bool IsValidCharAtStartOfIdentifier(char c) {
|
||||
return isalpha(c) || (c == '_') || (c == '/');
|
||||
}
|
||||
|
||||
// Returns true iff character c can appear inside an identifier.
|
||||
inline bool IsValidCharInsideIdentifier(char c) {
|
||||
return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
|
||||
}
|
||||
|
||||
// Returns true iff character c can appear at the beginning of a number.
|
||||
inline bool IsValidCharAtStartOfNumber(char c) {
|
||||
return isdigit(c) || (c == '+') || (c == '-');
|
||||
}
|
||||
|
||||
// Returns true iff character c can appear inside a number.
|
||||
inline bool IsValidCharInsideNumber(char c) { return isdigit(c) || (c == '.'); }
|
||||
|
||||
} // namespace
|
||||
|
||||
FMLParser::FMLParser() {}
|
||||
FMLParser::~FMLParser() {}
|
||||
|
||||
void FMLParser::Initialize(const string &source) {
|
||||
// Initialize parser state.
|
||||
source_ = source;
|
||||
current_ = source_.begin();
|
||||
item_start_ = line_start_ = current_;
|
||||
line_number_ = item_line_number_ = 1;
|
||||
|
||||
// Read first input item.
|
||||
NextItem();
|
||||
}
|
||||
|
||||
void FMLParser::Next() {
|
||||
// Move to the next input character. If we are at a line break update line
|
||||
// number and line start position.
|
||||
if (CurrentChar() == '\n') {
|
||||
++line_number_;
|
||||
++current_;
|
||||
line_start_ = current_;
|
||||
} else {
|
||||
++current_;
|
||||
}
|
||||
}
|
||||
|
||||
void FMLParser::NextItem() {
|
||||
// Skip white space and comments.
|
||||
while (!eos()) {
|
||||
if (CurrentChar() == '#') {
|
||||
// Skip comment.
|
||||
while (!eos() && CurrentChar() != '\n') Next();
|
||||
} else if (isspace(CurrentChar())) {
|
||||
// Skip whitespace.
|
||||
while (!eos() && isspace(CurrentChar())) Next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Record start position for next item.
|
||||
item_start_ = current_;
|
||||
item_line_number_ = line_number_;
|
||||
|
||||
// Check for end of input.
|
||||
if (eos()) {
|
||||
item_type_ = END;
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse number.
|
||||
if (IsValidCharAtStartOfNumber(CurrentChar())) {
|
||||
string::iterator start = current_;
|
||||
Next();
|
||||
while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
|
||||
item_text_.assign(start, current_);
|
||||
item_type_ = NUMBER;
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse string.
|
||||
if (CurrentChar() == '"') {
|
||||
Next();
|
||||
string::iterator start = current_;
|
||||
while (CurrentChar() != '"') {
|
||||
CLD3_DCHECK(!eos());
|
||||
Next();
|
||||
}
|
||||
item_text_.assign(start, current_);
|
||||
item_type_ = STRING;
|
||||
Next();
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse identifier name.
|
||||
if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
|
||||
string::iterator start = current_;
|
||||
while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
|
||||
Next();
|
||||
}
|
||||
item_text_.assign(start, current_);
|
||||
item_type_ = NAME;
|
||||
return;
|
||||
}
|
||||
|
||||
// Single character item.
|
||||
item_type_ = CurrentChar();
|
||||
Next();
|
||||
}
|
||||
|
||||
void FMLParser::Parse(const string &source,
|
||||
FeatureExtractorDescriptor *result) {
|
||||
// Initialize parser.
|
||||
Initialize(source);
|
||||
|
||||
while (item_type_ != END) {
|
||||
// Parse either a parameter name or a feature.
|
||||
CLD3_DCHECK(item_type_ == NAME);
|
||||
string name = item_text_;
|
||||
NextItem();
|
||||
|
||||
// Feature expected.
|
||||
CLD3_DCHECK(static_cast<char>(item_type_) != '=');
|
||||
|
||||
// Parse feature.
|
||||
FeatureFunctionDescriptor *descriptor = result->add_feature();
|
||||
descriptor->set_type(name);
|
||||
ParseFeature(descriptor);
|
||||
}
|
||||
}
|
||||
|
||||
void FMLParser::ParseFeature(FeatureFunctionDescriptor *result) {
|
||||
// Parse argument and parameters.
|
||||
if (item_type_ == '(') {
|
||||
NextItem();
|
||||
ParseParameter(result);
|
||||
while (item_type_ == ',') {
|
||||
NextItem();
|
||||
ParseParameter(result);
|
||||
}
|
||||
|
||||
CLD3_DCHECK(item_type_ == ')');
|
||||
NextItem();
|
||||
}
|
||||
|
||||
// Parse feature name.
|
||||
if (item_type_ == ':') {
|
||||
NextItem();
|
||||
|
||||
// Feature name expected.
|
||||
CLD3_DCHECK((item_type_ == NAME) || (item_type_ == STRING));
|
||||
string name = item_text_;
|
||||
NextItem();
|
||||
|
||||
// Set feature name.
|
||||
result->set_name(name);
|
||||
}
|
||||
|
||||
// Parse sub-features.
|
||||
if (item_type_ == '.') {
|
||||
// Parse dotted sub-feature.
|
||||
NextItem();
|
||||
CLD3_DCHECK(item_type_ == NAME);
|
||||
string type = item_text_;
|
||||
NextItem();
|
||||
|
||||
// Parse sub-feature.
|
||||
FeatureFunctionDescriptor *subfeature = result->add_feature();
|
||||
subfeature->set_type(type);
|
||||
ParseFeature(subfeature);
|
||||
} else if (item_type_ == '{') {
|
||||
// Parse sub-feature block.
|
||||
NextItem();
|
||||
while (item_type_ != '}') {
|
||||
CLD3_DCHECK(item_type_ == NAME);
|
||||
string type = item_text_;
|
||||
NextItem();
|
||||
|
||||
// Parse sub-feature.
|
||||
FeatureFunctionDescriptor *subfeature = result->add_feature();
|
||||
subfeature->set_type(type);
|
||||
ParseFeature(subfeature);
|
||||
}
|
||||
NextItem();
|
||||
}
|
||||
}
|
||||
|
||||
void FMLParser::ParseParameter(FeatureFunctionDescriptor *result) {
|
||||
CLD3_DCHECK((item_type_ == NUMBER) || (item_type_ == NAME));
|
||||
if (item_type_ == NUMBER) {
|
||||
int argument = utils::ParseUsing<int>(item_text_, utils::ParseInt32);
|
||||
NextItem();
|
||||
|
||||
// Set default argument for feature.
|
||||
result->set_argument(argument);
|
||||
} else { // item_type_ == NAME
|
||||
string name = item_text_;
|
||||
NextItem();
|
||||
CLD3_DCHECK(item_type_ == '=');
|
||||
NextItem();
|
||||
|
||||
// Parameter value expected.
|
||||
CLD3_DCHECK(item_type_ < END);
|
||||
string value = item_text_;
|
||||
NextItem();
|
||||
|
||||
// Add parameter to feature.
|
||||
Parameter *parameter;
|
||||
parameter = result->add_parameter();
|
||||
parameter->set_name(name);
|
||||
parameter->set_value(value);
|
||||
}
|
||||
}
|
||||
|
||||
void ToFMLFunction(const FeatureFunctionDescriptor &function, string *output) {
|
||||
output->append(function.type());
|
||||
if (function.argument() != 0 || function.parameter_size() > 0) {
|
||||
output->append("(");
|
||||
bool first = true;
|
||||
if (function.argument() != 0) {
|
||||
output->append(Int64ToString(function.argument()));
|
||||
first = false;
|
||||
}
|
||||
for (int i = 0; i < function.parameter_size(); ++i) {
|
||||
if (!first) output->append(",");
|
||||
output->append(function.parameter(i).name());
|
||||
output->append("=");
|
||||
output->append("\"");
|
||||
output->append(function.parameter(i).value());
|
||||
output->append("\"");
|
||||
first = false;
|
||||
}
|
||||
output->append(")");
|
||||
}
|
||||
}
|
||||
|
||||
void ToFML(const FeatureFunctionDescriptor &function, string *output) {
|
||||
ToFMLFunction(function, output);
|
||||
if (function.feature_size() == 1) {
|
||||
output->append(".");
|
||||
ToFML(function.feature(0), output);
|
||||
} else if (function.feature_size() > 1) {
|
||||
output->append(" { ");
|
||||
for (int i = 0; i < function.feature_size(); ++i) {
|
||||
if (i > 0) output->append(" ");
|
||||
ToFML(function.feature(i), output);
|
||||
}
|
||||
output->append(" } ");
|
||||
}
|
||||
}
|
||||
|
||||
void ToFML(const FeatureExtractorDescriptor &extractor, string *output) {
|
||||
for (int i = 0; i < extractor.feature_size(); ++i) {
|
||||
ToFML(extractor.feature(i), output);
|
||||
output->append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
string AsFML(const FeatureFunctionDescriptor &function) {
|
||||
string str;
|
||||
ToFML(function, &str);
|
||||
return str;
|
||||
}
|
||||
|
||||
string AsFML(const FeatureExtractorDescriptor &extractor) {
|
||||
string str;
|
||||
ToFML(extractor, &str);
|
||||
return str;
|
||||
}
|
||||
|
||||
void StripFML(string *fml_string) {
|
||||
auto it = fml_string->begin();
|
||||
while (it != fml_string->end()) {
|
||||
if (*it == '"') {
|
||||
it = fml_string->erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,123 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Feature modeling language (fml) parser.
|
||||
//
|
||||
// BNF grammar for fml:
|
||||
//
|
||||
// <feature model> ::= { <feature extractor> }
|
||||
//
|
||||
// <feature extractor> ::= <extractor spec> |
|
||||
// <extractor spec> '.' <feature extractor> |
|
||||
// <extractor spec> '{' { <feature extractor> } '}'
|
||||
//
|
||||
// <extractor spec> ::= <extractor type>
|
||||
// [ '(' <parameter list> ')' ]
|
||||
// [ ':' <extractor name> ]
|
||||
//
|
||||
// <parameter list> = ( <parameter> | <argument> ) { ',' <parameter> }
|
||||
//
|
||||
// <parameter> ::= <parameter name> '=' <parameter value>
|
||||
//
|
||||
// <extractor type> ::= NAME
|
||||
// <extractor name> ::= NAME | STRING
|
||||
// <argument> ::= NUMBER
|
||||
// <parameter name> ::= NAME
|
||||
// <parameter value> ::= NUMBER | STRING | NAME
|
||||
|
||||
#ifndef FML_PARSER_H_
|
||||
#define FML_PARSER_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base.h"
|
||||
#include "cld_3/protos/feature_extractor.pb.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
class FMLParser {
|
||||
public:
|
||||
// Parses fml specification into feature extractor descriptor.
|
||||
void Parse(const string &source, FeatureExtractorDescriptor *result);
|
||||
|
||||
FMLParser();
|
||||
~FMLParser();
|
||||
|
||||
private:
|
||||
// Initializes the parser with the source text.
|
||||
void Initialize(const string &source);
|
||||
|
||||
// Moves to the next input character.
|
||||
void Next();
|
||||
|
||||
// Moves to the next input item.
|
||||
void NextItem();
|
||||
|
||||
// Parses a feature descriptor.
|
||||
void ParseFeature(FeatureFunctionDescriptor *result);
|
||||
|
||||
// Parses a parameter specification.
|
||||
void ParseParameter(FeatureFunctionDescriptor *result);
|
||||
|
||||
// Returns true if end of source input has been reached.
|
||||
bool eos() const { return current_ == source_.end(); }
|
||||
|
||||
// Returns current character. Other methods should access the current
|
||||
// character through this method (instead of using *current_ directly): this
|
||||
// method performs extra safety checks.
|
||||
char CurrentChar() const {
|
||||
// CLD3_DCHECK that we are reading from inside the string.
|
||||
CLD3_DCHECK(current_ >= source_.begin());
|
||||
CLD3_DCHECK(current_ < source_.end());
|
||||
return *current_;
|
||||
}
|
||||
|
||||
// Item types.
|
||||
enum ItemTypes {
|
||||
END = 0,
|
||||
NAME = -1,
|
||||
NUMBER = -2,
|
||||
STRING = -3,
|
||||
};
|
||||
|
||||
// Source text.
|
||||
string source_;
|
||||
|
||||
// Current input position.
|
||||
string::iterator current_;
|
||||
|
||||
// Line number for current input position.
|
||||
int line_number_;
|
||||
|
||||
// Start position for current item.
|
||||
string::iterator item_start_;
|
||||
|
||||
// Start position for current line.
|
||||
string::iterator line_start_;
|
||||
|
||||
// Line number for current item.
|
||||
int item_line_number_;
|
||||
|
||||
// Item type for current item. If this is positive it is interpreted as a
|
||||
// character. If it is negative it is interpreted as an item type.
|
||||
int item_type_;
|
||||
|
||||
// Text for current item.
|
||||
string item_text_;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // FML_PARSER_H_
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,178 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef LANG_ID_NN_PARAMS_H_
|
||||
#define LANG_ID_NN_PARAMS_H_
|
||||
|
||||
#include "base.h"
|
||||
#include "embedding_network_params.h"
|
||||
#include "float16.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
class LangIdNNParams : public EmbeddingNetworkParams {
|
||||
public:
|
||||
~LangIdNNParams() override {}
|
||||
|
||||
// Access methods for embeddings:
|
||||
int embeddings_size() const override { return 6; }
|
||||
int embeddings_num_rows(int i) const override {
|
||||
return kEmbeddingsNumRows[i];
|
||||
}
|
||||
int embeddings_num_cols(int i) const override {
|
||||
return kEmbeddingsNumCols[i];
|
||||
}
|
||||
const void *embeddings_weights(int i) const override {
|
||||
return embeddings_weights_[i];
|
||||
}
|
||||
QuantizationType embeddings_quant_type(int i) const override {
|
||||
return QuantizationType::UINT8;
|
||||
}
|
||||
const float16 *embeddings_quant_scales(int i) const override {
|
||||
return embeddings_quant_scales_[i];
|
||||
}
|
||||
|
||||
// Access methods for hidden:
|
||||
int hidden_size() const override { return 1; }
|
||||
int hidden_num_rows(int i) const override { return kHiddenNumRows[i]; }
|
||||
int hidden_num_cols(int i) const override { return kHiddenNumCols[i]; }
|
||||
const void *hidden_weights(int i) const override {
|
||||
return hidden_weights_[i];
|
||||
}
|
||||
|
||||
// Access methods for hidden_bias:
|
||||
int hidden_bias_size() const override { return 1; }
|
||||
int hidden_bias_num_rows(int i) const override {
|
||||
return kHiddenBiasNumRows[i];
|
||||
}
|
||||
int hidden_bias_num_cols(int i) const override {
|
||||
return kHiddenBiasNumCols[i];
|
||||
}
|
||||
const void *hidden_bias_weights(int i) const override {
|
||||
return hidden_bias_weights_[i];
|
||||
}
|
||||
|
||||
// Access methods for softmax:
|
||||
int softmax_size() const override { return 1; }
|
||||
int softmax_num_rows(int i) const override { return kSoftmaxNumRows[i]; }
|
||||
int softmax_num_cols(int i) const override { return kSoftmaxNumCols[i]; }
|
||||
const void *softmax_weights(int i) const override {
|
||||
return softmax_weights_[i];
|
||||
}
|
||||
|
||||
// Access methods for softmax_bias:
|
||||
int softmax_bias_size() const override { return 1; }
|
||||
int softmax_bias_num_rows(int i) const override {
|
||||
return kSoftmaxBiasNumRows[i];
|
||||
}
|
||||
int softmax_bias_num_cols(int i) const override {
|
||||
return kSoftmaxBiasNumCols[i];
|
||||
}
|
||||
const void *softmax_bias_weights(int i) const override {
|
||||
return softmax_bias_weights_[i];
|
||||
}
|
||||
|
||||
// Access methods for embedding_dim:
|
||||
int embedding_dim_size() const override { return 6; }
|
||||
int32 embedding_dim(int i) const override { return kEmbeddingDimValues[i]; }
|
||||
|
||||
// Access methods for embedding_num_features:
|
||||
int embedding_num_features_size() const override { return 6; }
|
||||
int32 embedding_num_features(int i) const override {
|
||||
return kEmbeddingNumFeaturesValues[i];
|
||||
}
|
||||
|
||||
// Access methods for embedding_features_domain_size:
|
||||
int embedding_features_domain_size_size() const override { return 6; }
|
||||
int32 embedding_features_domain_size(int i) const override {
|
||||
return kEmbeddingFeaturesDomainSizeValues[i];
|
||||
}
|
||||
|
||||
// Access methods for concat_offset:
|
||||
int concat_offset_size() const override { return 6; }
|
||||
int32 concat_offset(int i) const override { return kConcatOffsetValues[i]; }
|
||||
|
||||
// Access methods for concat_layer_size:
|
||||
bool has_concat_layer_size() const override { return true; }
|
||||
int32 concat_layer_size() const override { return 80; }
|
||||
|
||||
// Access methods for is_precomputed:
|
||||
bool has_is_precomputed() const override { return false; }
|
||||
bool is_precomputed() const override { return false; }
|
||||
|
||||
private:
|
||||
// Private fields for embeddings:
|
||||
static const int kEmbeddingsNumRows[];
|
||||
static const int kEmbeddingsNumCols[];
|
||||
static const uint8 kEmbeddingsWeights0[];
|
||||
static const uint8 kEmbeddingsWeights1[];
|
||||
static const uint8 kEmbeddingsWeights2[];
|
||||
static const uint8 kEmbeddingsWeights3[];
|
||||
static const uint8 kEmbeddingsWeights4[];
|
||||
static const uint8 kEmbeddingsWeights5[];
|
||||
const void *embeddings_weights_[6] = {
|
||||
kEmbeddingsWeights0, kEmbeddingsWeights1, kEmbeddingsWeights2,
|
||||
kEmbeddingsWeights3, kEmbeddingsWeights4, kEmbeddingsWeights5};
|
||||
static const float16 kEmbeddingsQuantScales0[];
|
||||
static const float16 kEmbeddingsQuantScales1[];
|
||||
static const float16 kEmbeddingsQuantScales2[];
|
||||
static const float16 kEmbeddingsQuantScales3[];
|
||||
static const float16 kEmbeddingsQuantScales4[];
|
||||
static const float16 kEmbeddingsQuantScales5[];
|
||||
const float16 *embeddings_quant_scales_[6] = {
|
||||
kEmbeddingsQuantScales0, kEmbeddingsQuantScales1,
|
||||
kEmbeddingsQuantScales2, kEmbeddingsQuantScales3,
|
||||
kEmbeddingsQuantScales4, kEmbeddingsQuantScales5};
|
||||
|
||||
// Private fields for hidden:
|
||||
static const int kHiddenNumRows[];
|
||||
static const int kHiddenNumCols[];
|
||||
static const float kHiddenWeights0[];
|
||||
const void *hidden_weights_[1] = {kHiddenWeights0};
|
||||
|
||||
// Private fields for hidden_bias:
|
||||
static const int kHiddenBiasNumRows[];
|
||||
static const int kHiddenBiasNumCols[];
|
||||
static const float kHiddenBiasWeights0[];
|
||||
const void *hidden_bias_weights_[1] = {kHiddenBiasWeights0};
|
||||
|
||||
// Private fields for softmax:
|
||||
static const int kSoftmaxNumRows[];
|
||||
static const int kSoftmaxNumCols[];
|
||||
static const float kSoftmaxWeights0[];
|
||||
const void *softmax_weights_[1] = {kSoftmaxWeights0};
|
||||
|
||||
// Private fields for softmax_bias:
|
||||
static const int kSoftmaxBiasNumRows[];
|
||||
static const int kSoftmaxBiasNumCols[];
|
||||
static const float kSoftmaxBiasWeights0[];
|
||||
const void *softmax_bias_weights_[1] = {kSoftmaxBiasWeights0};
|
||||
|
||||
// Private fields for embedding_dim:
|
||||
static const int32 kEmbeddingDimValues[];
|
||||
|
||||
// Private fields for embedding_num_features:
|
||||
static const int32 kEmbeddingNumFeaturesValues[];
|
||||
|
||||
// Private fields for embedding_features_domain_size:
|
||||
static const int32 kEmbeddingFeaturesDomainSizeValues[];
|
||||
|
||||
// Private fields for concat_offset:
|
||||
static const int32 kConcatOffsetValues[];
|
||||
}; // class LangIdNNParams
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // LANG_ID_NN_PARAMS_H_
|
||||
@ -0,0 +1,165 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "language_identifier_features.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "base.h"
|
||||
#include "feature_extractor.h"
|
||||
#include "feature_types.h"
|
||||
#include "script_span/generated_ulscript.h"
|
||||
#include "script_span/getonescriptspan.h"
|
||||
#include "sentence_features.h"
|
||||
#include "task_context.h"
|
||||
#include "unicodetext.h"
|
||||
#include "utils.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
NumericFeatureType::NumericFeatureType(const string &name, FeatureValue size)
|
||||
: FeatureType(name), size_(size) {}
|
||||
|
||||
string NumericFeatureType::GetFeatureValueName(FeatureValue value) const {
|
||||
return value < 0 ? "" : Int64ToString(value);
|
||||
}
|
||||
|
||||
FeatureValue NumericFeatureType::GetDomainSize() const { return size_; }
|
||||
|
||||
void ContinuousBagOfNgramsFunction::Setup(TaskContext *context) {
|
||||
// Parameters in the feature function descriptor.
|
||||
include_terminators_ = GetBoolParameter("include_terminators", false);
|
||||
include_spaces_ = GetBoolParameter("include_spaces", false);
|
||||
use_equal_ngram_weight_ = GetBoolParameter("use_equal_weight", false);
|
||||
ngram_id_dimension_ = GetIntParameter("id_dim", 10000);
|
||||
ngram_size_ = GetIntParameter("size", 3);
|
||||
}
|
||||
|
||||
void ContinuousBagOfNgramsFunction::Init(TaskContext *context) {
|
||||
set_feature_type(new NumericFeatureType(name(), ngram_id_dimension_));
|
||||
}
|
||||
|
||||
void ContinuousBagOfNgramsFunction::Evaluate(const WorkspaceSet &workspaces,
|
||||
const Sentence &sentence,
|
||||
FeatureVector *result) const {
|
||||
// Include terminators for each token. Tokens are discovered by splitting the
|
||||
// text on spaces.
|
||||
std::vector<string> chars;
|
||||
utils::GetUTF8Chars(sentence.text(), &chars);
|
||||
if (include_terminators_) {
|
||||
std::vector<string> new_chars{"^"};
|
||||
for (size_t index = 0; index < chars.size(); ++index) {
|
||||
if (chars.at(index) == " ") {
|
||||
new_chars.push_back("$");
|
||||
new_chars.push_back(" ");
|
||||
new_chars.push_back("^");
|
||||
} else {
|
||||
new_chars.push_back(chars.at(index));
|
||||
}
|
||||
}
|
||||
new_chars.push_back("$");
|
||||
chars.swap(new_chars);
|
||||
}
|
||||
|
||||
// Find the char ngram counts.
|
||||
std::unordered_map<string, int> char_ngram_counts;
|
||||
int count_sum = 0;
|
||||
for (int start = 0; start <= static_cast<int>(chars.size()) - ngram_size_;
|
||||
++start) {
|
||||
string char_ngram;
|
||||
int index;
|
||||
for (index = 0; index < ngram_size_; ++index) {
|
||||
const string ¤t_char = chars.at(start + index);
|
||||
if (current_char == " " && !include_spaces_) {
|
||||
break;
|
||||
}
|
||||
char_ngram.append(current_char);
|
||||
}
|
||||
if (index == ngram_size_) {
|
||||
char_ngram_counts[char_ngram]++;
|
||||
++count_sum;
|
||||
}
|
||||
}
|
||||
|
||||
// Populate the feature vector.
|
||||
const float equal_weight = 1.0 / char_ngram_counts.size();
|
||||
const float norm = static_cast<float>(count_sum);
|
||||
for (const auto &ngram_and_count : char_ngram_counts) {
|
||||
const float weight =
|
||||
use_equal_ngram_weight_ ? equal_weight : ngram_and_count.second / norm;
|
||||
FloatFeatureValue value(
|
||||
utils::Hash32WithDefaultSeed(ngram_and_count.first) %
|
||||
ngram_id_dimension_,
|
||||
weight);
|
||||
result->add(feature_type(), value.discrete_value);
|
||||
}
|
||||
}
|
||||
|
||||
FeatureValue ScriptFeature::Compute(const WorkspaceSet &workspaces,
|
||||
const Sentence &sentence,
|
||||
const FeatureVector *result) const {
|
||||
const string &text = sentence.text();
|
||||
CLD2::ScriptScanner ss(text.c_str(), text.size(),
|
||||
/*is_plain_text=*/true);
|
||||
|
||||
// GetOneScriptSpan() is called only once because of the assumption that the
|
||||
// input contains one script. This function also cleans up the input (e.g.,
|
||||
// removes digits, punctuation).
|
||||
// TODO(abakalov): Extract the clean-up and script detection code out of
|
||||
// GetOneScriptSpan() because we don't have to iterate over the whole text,
|
||||
// just look at the first codepoint after clean-up.
|
||||
CLD2::LangSpan script_span;
|
||||
ss.GetOneScriptSpan(&script_span);
|
||||
const CLD2::ULScript ulscript = script_span.ulscript;
|
||||
if (ulscript != CLD2::ULScript_Hani) {
|
||||
return ulscript;
|
||||
} else {
|
||||
// Out of the codepoints captured by ULScript_Hani, separately count those
|
||||
// in Hangul (Korean script) and those in a script other than Hangul.
|
||||
int num_hangul = 0;
|
||||
int num_non_hangul = 0;
|
||||
UnicodeText unicode_text;
|
||||
unicode_text.PointToUTF8(script_span.text, script_span.text_bytes);
|
||||
for (chrome_lang_id::char32 codepoint : unicode_text) {
|
||||
// If the current codepoint is space, continue.
|
||||
if (codepoint == 0x20) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the current codepoint is within the ranges associated with
|
||||
// Hangul.
|
||||
if ((codepoint >= 0x1100 && codepoint <= 0x11FF) || // Hangul Jamo
|
||||
(codepoint >= 0xA960 && codepoint <= 0xA97F) || // Jamo Extended A
|
||||
(codepoint >= 0xD7B0 && codepoint <= 0xD7FF) || // Jamo Extended B
|
||||
(codepoint >= 0x3130 && codepoint <= 0x318F) || // Compatibility Jamo
|
||||
(codepoint >= 0xFFA0 && codepoint <= 0xFFDC) || // Halfwidth Jamo
|
||||
(codepoint >= 0xAC00 && codepoint <= 0xD7AF)) { // Hangul Syllables
|
||||
num_hangul++;
|
||||
} else {
|
||||
num_non_hangul++;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_hangul > num_non_hangul) {
|
||||
return static_cast<FeatureValue>(CLD2::NUM_ULSCRIPTS);
|
||||
} else {
|
||||
return static_cast<FeatureValue>(CLD2::ULScript_Hani);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,116 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef LANGUAGE_IDENTIFIER_FEATURES_H_
|
||||
#define LANGUAGE_IDENTIFIER_FEATURES_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "feature_extractor.h"
|
||||
#include "feature_types.h"
|
||||
#include "script_span/generated_ulscript.h"
|
||||
#include "cld_3/protos/sentence.pb.h"
|
||||
#include "sentence_features.h"
|
||||
#include "task_context.h"
|
||||
#include "workspace.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Feature type for numeric features.
|
||||
class NumericFeatureType : public FeatureType {
|
||||
public:
|
||||
// Initializes numeric feature.
|
||||
NumericFeatureType(const string &name, FeatureValue size);
|
||||
|
||||
// Returns numeric feature value.
|
||||
string GetFeatureValueName(FeatureValue value) const override;
|
||||
|
||||
// Returns the number of feature values.
|
||||
FeatureValue GetDomainSize() const override;
|
||||
|
||||
private:
|
||||
FeatureValue size_;
|
||||
};
|
||||
|
||||
// Class for computing continuous char ngram features.
|
||||
// Feature function descriptor parameters:
|
||||
// include_terminators(bool, false):
|
||||
// If 'true', then splits the text based on spaces to get tokens, adds "^"
|
||||
// to the beginning of each token, and adds "$" to the end of each token.
|
||||
// include_spaces(bool, false):
|
||||
// If 'true', then includes char ngrams containing spaces.
|
||||
// use_equal_weight(bool, false):
|
||||
// If 'true', then weighs each unique ngram by 1.0 / (number of unique
|
||||
// ngrams in the input). Otherwise, weighs each unique ngram by (ngram
|
||||
// count) / (total number of ngrams).
|
||||
// id_dim(int, 10000):
|
||||
// The integer id of each char ngram is computed as follows:
|
||||
// Hash32WithDefaultSeed(char ngram) % id_dim.
|
||||
// size(int, 3):
|
||||
// Only ngrams of this size will be extracted.
|
||||
class ContinuousBagOfNgramsFunction : public WholeSentenceFeature {
|
||||
public:
|
||||
void Setup(TaskContext *context) override;
|
||||
void Init(TaskContext *context) override;
|
||||
|
||||
// Appends the features computed from the focus to the feature vector.
|
||||
void Evaluate(const WorkspaceSet &workspaces, const Sentence &sentence,
|
||||
FeatureVector *result) const override;
|
||||
|
||||
private:
|
||||
// If 'true', then splits the text based on spaces to get tokens, adds "^" to
|
||||
// the beginning of each token, and adds "$" to the end of each token.
|
||||
bool include_terminators_;
|
||||
|
||||
// If 'true', then includes char ngrams containing spaces.
|
||||
bool include_spaces_;
|
||||
|
||||
// If 'true', then weighs each unique ngram by 1.0 / (number of unique ngrams
|
||||
// in the input). Otherwise, weighs each unique ngram by (ngram count) /
|
||||
// (total number of ngrams).
|
||||
bool use_equal_ngram_weight_;
|
||||
|
||||
// The integer id of each char ngram is computed as follows:
|
||||
// Hash32WithDefaultSeed(char_ngram) % ngram_id_dimension_.
|
||||
int ngram_id_dimension_;
|
||||
|
||||
// Only ngrams of size ngram_size_ will be extracted.
|
||||
int ngram_size_;
|
||||
};
|
||||
|
||||
// Class for detecting the script of a piece of text. The list of supported
|
||||
// scripts is chrome_lang_id::CLD2::ULScript. This class uses the script
|
||||
// recognition code ported from CLD2. ULScript_Hani is split into non-Korean
|
||||
// script and Korean script (Hangul). In the former case, the function emits
|
||||
// ULScript_Hani. In the latter case, the function emits NUM_ULSCRIPTS. The
|
||||
// class assumes that the input is (1) interchange valid UTF8, and (2) contains
|
||||
// only one chrome_lang_id::CLD2::ULScript.
|
||||
class ScriptFeature : public WholeSentenceFeature {
|
||||
public:
|
||||
void Init(TaskContext *context) override {
|
||||
// The dimension is incremented by 1 because ULScript_Hani is split into two
|
||||
// as mentioned in the class description.
|
||||
set_feature_type(new NumericFeatureType(
|
||||
name(), chrome_lang_id::CLD2::NUM_ULSCRIPTS + 1));
|
||||
}
|
||||
|
||||
// Computes the feature and saves it in the feature vector.
|
||||
FeatureValue Compute(const WorkspaceSet &workspaces, const Sentence &sentence,
|
||||
const FeatureVector *result) const override;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // LANGUAGE_IDENTIFIER_FEATURES_H_
|
||||
@ -0,0 +1,260 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "base.h"
|
||||
#include "feature_extractor.h"
|
||||
#include "language_identifier_features.h"
|
||||
#include "nnet_language_identifier.h"
|
||||
#include "script_span/generated_ulscript.h"
|
||||
#include "cld_3/protos/sentence.pb.h"
|
||||
#include "task_context.h"
|
||||
#include "utils.h"
|
||||
#include "workspace.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace language_identifier_features_test {
|
||||
|
||||
static WholeSentenceFeature *cbog_factory() {
|
||||
return new ContinuousBagOfNgramsFunction;
|
||||
}
|
||||
|
||||
static WholeSentenceFeature *sf_factory() { return new ScriptFeature; }
|
||||
|
||||
// Class for calculating the feature weights and ids.
|
||||
class FeatureIdWeightCalculator {
|
||||
public:
|
||||
explicit FeatureIdWeightCalculator(TaskContext *context) {
|
||||
if (WholeSentenceFeature::registry() == nullptr) {
|
||||
// Create registry for our WholeSentenceFeature(s).
|
||||
RegisterableClass<WholeSentenceFeature>::CreateRegistry(
|
||||
"sentence feature function", "WholeSentenceFeature", __FILE__,
|
||||
__LINE__);
|
||||
}
|
||||
|
||||
// Register our WholeSentenceFeature(s).
|
||||
// Register ContinuousBagOfNgramsFunction feature function.
|
||||
static WholeSentenceFeature::Registry::Registrar cbog_registrar(
|
||||
WholeSentenceFeature::registry(), "continuous-bag-of-ngrams",
|
||||
"ContinuousBagOfNgramsFunction", __FILE__, __LINE__, cbog_factory);
|
||||
|
||||
// Register Script feature function.
|
||||
static WholeSentenceFeature::Registry::Registrar sf_registrar(
|
||||
WholeSentenceFeature::registry(), "script", "ScriptFeature", __FILE__,
|
||||
__LINE__, sf_factory);
|
||||
|
||||
feature_extractor_.Setup(context);
|
||||
feature_extractor_.Init(context);
|
||||
}
|
||||
|
||||
// Assumes that a single feature is specified and extracts it.
|
||||
void ExtractOnlyFeature(Sentence *sentence,
|
||||
std::vector<FeatureVector> *features) {
|
||||
CLD3_CHECK(features->size() == 1);
|
||||
WorkspaceSet workspace;
|
||||
workspace.Reset(workspace_registry_);
|
||||
feature_extractor_.Preprocess(&workspace, sentence);
|
||||
feature_extractor_.ExtractFeatures(workspace, *sentence, features);
|
||||
CLD3_CHECK(features->size() == 1);
|
||||
}
|
||||
|
||||
// Returns a map from feature value id to feature value weight.
|
||||
std::unordered_map<int, float> GetFloatFeatureValIdsAndWeights(
|
||||
Sentence *sentence) {
|
||||
std::vector<FeatureVector> feature_vectors(1); // one feature space
|
||||
ExtractOnlyFeature(sentence, &feature_vectors);
|
||||
const FeatureVector &feature_vector = feature_vectors.at(0);
|
||||
|
||||
// Save the (feature value id, feature value weight) pairs to a map.
|
||||
std::unordered_map<int, float> feature_id_weight;
|
||||
for (int index = 0; index < feature_vector.size(); ++index) {
|
||||
const FloatFeatureValue feature_value =
|
||||
FloatFeatureValue(feature_vector.value(index));
|
||||
feature_id_weight[feature_value.value.id] = feature_value.value.weight;
|
||||
}
|
||||
return feature_id_weight;
|
||||
}
|
||||
|
||||
// Returns the feature value ids.
|
||||
std::set<int> GetFeatureValueIds(Sentence *sentence) {
|
||||
std::vector<FeatureVector> feature_vectors(1); // one feature space
|
||||
ExtractOnlyFeature(sentence, &feature_vectors);
|
||||
const FeatureVector &feature_vector = feature_vectors.at(0);
|
||||
|
||||
std::set<int> ids;
|
||||
for (int index = 0; index < feature_vector.size(); ++index) {
|
||||
ids.insert(feature_vector.value(index));
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
private:
|
||||
// The registry of shared workspaces in the feature extractor.
|
||||
WorkspaceRegistry workspace_registry_;
|
||||
LanguageIdEmbeddingFeatureExtractor feature_extractor_;
|
||||
};
|
||||
|
||||
// Extracts features and checks that their ids and weights are correct.
|
||||
bool ExtractAndCheckFeatures(const string &features, const int id_dim,
|
||||
const std::vector<string> &expected_char_ngrams,
|
||||
const std::vector<float> &expected_weights,
|
||||
Sentence *sentence) {
|
||||
TaskContext context;
|
||||
context.SetParameter("language_identifier_features", features);
|
||||
FeatureIdWeightCalculator calc(&context);
|
||||
|
||||
// Get the feature ids and the corresponding weights.
|
||||
const std::unordered_map<int, float> feature_id_weight =
|
||||
calc.GetFloatFeatureValIdsAndWeights(sentence);
|
||||
if (feature_id_weight.size() != expected_char_ngrams.size()) {
|
||||
std::cout << " Failure" << std::endl;
|
||||
std::cout << " Number of expected feature ids: "
|
||||
<< expected_char_ngrams.size() << std::endl;
|
||||
std::cout << " Number of extracted feature ids: "
|
||||
<< feature_id_weight.size() << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Specifies how close two float values should be to be considered equal.
|
||||
const float epsilon = 0.0001f;
|
||||
bool test_successful = true;
|
||||
for (size_t i = 0; i < expected_char_ngrams.size(); ++i) {
|
||||
const int expected_id =
|
||||
utils::Hash32WithDefaultSeed(expected_char_ngrams.at(i)) % id_dim;
|
||||
|
||||
// Check the ids and the weights.
|
||||
if (feature_id_weight.count(expected_id) == 0) {
|
||||
std::cout << " Failure" << std::endl;
|
||||
std::cout << " Feature id " << expected_id << " is missing" << std::endl;
|
||||
test_successful = false;
|
||||
} else {
|
||||
if (std::abs(feature_id_weight.at(expected_id) - expected_weights.at(i)) >
|
||||
epsilon) {
|
||||
std::cout << " Failure" << std::endl;
|
||||
std::cout << " Different weight for feature id " << expected_id
|
||||
<< ": expected weight " << expected_weights.at(i)
|
||||
<< ", actual weight " << feature_id_weight.at(expected_id)
|
||||
<< std::endl;
|
||||
test_successful = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (test_successful) {
|
||||
std::cout << " Success!" << std::endl;
|
||||
}
|
||||
return test_successful;
|
||||
}
|
||||
|
||||
// Tests the case when ngram features get equal weight. Returns "true" if the
|
||||
// test is successful and "false" otherwise.
|
||||
bool TestExtractFeaturesWithEqualWeight() {
|
||||
std::cout << "Running " << __FUNCTION__ << std::endl;
|
||||
|
||||
// The integer id of each char ngram is computed as follows:
|
||||
// utils::Hash32WithDefaultSeed(char ngram) % id_dim.
|
||||
const int id_dim = 100;
|
||||
const string features = "continuous-bag-of-ngrams(id_dim=" +
|
||||
std::to_string(id_dim) +
|
||||
",size=2,include_terminators=true,include_" +
|
||||
"spaces=false,use_equal_weight=true)";
|
||||
Sentence sentence;
|
||||
sentence.set_text("aa aab");
|
||||
const std::vector<string> expected_char_ngrams{"ab", "b$", "^a", "aa", "a$"};
|
||||
const std::vector<float> expected_weights = {0.2f, 0.2f, 0.2f, 0.2f, 0.2f};
|
||||
return ExtractAndCheckFeatures(features, id_dim, expected_char_ngrams,
|
||||
expected_weights, &sentence);
|
||||
}
|
||||
|
||||
// Tests the case when ngram features get weights equal to their normalized
|
||||
// counts. Returns "true" if the test is successful and "false" otherwise.
|
||||
bool TestExtractFeaturesWithNonEqualWeight() {
|
||||
std::cout << "Running " << __FUNCTION__ << std::endl;
|
||||
|
||||
// The integer id of each char ngram is computed as follows:
|
||||
// utils::Hash32WithDefaultSeed(char ngram) % id_dim.
|
||||
const int id_dim = 100;
|
||||
const string features = "continuous-bag-of-ngrams(id_dim=" +
|
||||
std::to_string(id_dim) +
|
||||
",size=2,include_terminators=true,include_" +
|
||||
"spaces=false,use_equal_weight=false)";
|
||||
Sentence sentence;
|
||||
sentence.set_text("aa aab");
|
||||
const std::vector<string> expected_char_ngrams{"ab", "b$", "^a", "aa", "a$"};
|
||||
const std::vector<float> expected_weights{0.1428f, 0.1428f, 0.2857f, 0.2857f,
|
||||
0.1428f};
|
||||
return ExtractAndCheckFeatures(features, id_dim, expected_char_ngrams,
|
||||
expected_weights, &sentence);
|
||||
}
|
||||
|
||||
// Tests the feature Script.
|
||||
bool TestScriptFeature() {
|
||||
std::cout << "Running " << __FUNCTION__ << std::endl;
|
||||
|
||||
bool test_successful = true;
|
||||
TaskContext context;
|
||||
context.SetParameter("language_identifier_features", "script");
|
||||
FeatureIdWeightCalculator calc(&context);
|
||||
|
||||
// Check the script of the English sentence.
|
||||
Sentence sentence;
|
||||
sentence.set_text("food");
|
||||
std::set<int> feature_val_ids = calc.GetFeatureValueIds(&sentence);
|
||||
if (feature_val_ids.size() != 1 ||
|
||||
feature_val_ids.count(chrome_lang_id::CLD2::ULScript_Latin) == 0) {
|
||||
test_successful = false;
|
||||
std::cout << " Failure for input: " << sentence.text() << std::endl;
|
||||
}
|
||||
|
||||
// Check the script of a Chinese sentence.
|
||||
sentence.set_text("字");
|
||||
feature_val_ids = calc.GetFeatureValueIds(&sentence);
|
||||
if (feature_val_ids.size() != 1 ||
|
||||
feature_val_ids.count(chrome_lang_id::CLD2::ULScript_Hani) == 0) {
|
||||
test_successful = false;
|
||||
std::cout << " Failure for input: " << sentence.text() << std::endl;
|
||||
}
|
||||
|
||||
// Check the script of a Korean sentence.
|
||||
sentence.set_text("워드");
|
||||
feature_val_ids = calc.GetFeatureValueIds(&sentence);
|
||||
if (feature_val_ids.size() != 1 ||
|
||||
feature_val_ids.count(chrome_lang_id::CLD2::NUM_ULSCRIPTS) == 0) {
|
||||
test_successful = false;
|
||||
std::cout << " Failure for input: " << sentence.text() << std::endl;
|
||||
}
|
||||
|
||||
if (test_successful) {
|
||||
std::cout << " Success!" << std::endl;
|
||||
}
|
||||
return test_successful;
|
||||
}
|
||||
|
||||
} // namespace language_identifier_features_test
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
// Runs the feature extraction tests.
|
||||
int main(int argc, char **argv) {
|
||||
const bool tests_successful =
|
||||
chrome_lang_id::language_identifier_features_test::
|
||||
TestExtractFeaturesWithEqualWeight() &&
|
||||
chrome_lang_id::language_identifier_features_test::
|
||||
TestExtractFeaturesWithNonEqualWeight() &&
|
||||
chrome_lang_id::language_identifier_features_test::TestScriptFeature();
|
||||
return tests_successful ? 0 : 1;
|
||||
}
|
||||
@ -0,0 +1,117 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef NNET_LANG_ID_TEST_DATA_H_
|
||||
#define NNET_LANG_ID_TEST_DATA_H_
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
class NNetLangIdTestData {
|
||||
public:
|
||||
// Pieces of text in different languages.
|
||||
static const char *const kTestStrAF;
|
||||
static const char *const kTestStrAR;
|
||||
static const char *const kTestStrAZ;
|
||||
static const char *const kTestStrBE;
|
||||
static const char *const kTestStrBG;
|
||||
static const char *const kTestStrBN;
|
||||
static const char *const kTestStrBS;
|
||||
static const char *const kTestStrCA;
|
||||
static const char *const kTestStrCEB;
|
||||
static const char *const kTestStrCS;
|
||||
static const char *const kTestStrCY;
|
||||
static const char *const kTestStrDA;
|
||||
static const char *const kTestStrDE;
|
||||
static const char *const kTestStrEL;
|
||||
static const char *const kTestStrEN;
|
||||
static const char *const kTestStrEO;
|
||||
static const char *const kTestStrES;
|
||||
static const char *const kTestStrET;
|
||||
static const char *const kTestStrEU;
|
||||
static const char *const kTestStrFA;
|
||||
static const char *const kTestStrFI;
|
||||
static const char *const kTestStrFIL;
|
||||
static const char *const kTestStrFR;
|
||||
static const char *const kTestStrGA;
|
||||
static const char *const kTestStrGL;
|
||||
static const char *const kTestStrGU;
|
||||
static const char *const kTestStrHA;
|
||||
static const char *const kTestStrHI;
|
||||
static const char *const kTestStrHMN;
|
||||
static const char *const kTestStrHR;
|
||||
static const char *const kTestStrHT;
|
||||
static const char *const kTestStrHU;
|
||||
static const char *const kTestStrHY;
|
||||
static const char *const kTestStrID;
|
||||
static const char *const kTestStrIG;
|
||||
static const char *const kTestStrIS;
|
||||
static const char *const kTestStrIT;
|
||||
static const char *const kTestStrIW;
|
||||
static const char *const kTestStrJA;
|
||||
static const char *const kTestStrJV;
|
||||
static const char *const kTestStrKA;
|
||||
static const char *const kTestStrKK;
|
||||
static const char *const kTestStrKM;
|
||||
static const char *const kTestStrKN;
|
||||
static const char *const kTestStrKO;
|
||||
static const char *const kTestStrLA;
|
||||
static const char *const kTestStrLO;
|
||||
static const char *const kTestStrLT;
|
||||
static const char *const kTestStrLV;
|
||||
static const char *const kTestStrMG;
|
||||
static const char *const kTestStrMI;
|
||||
static const char *const kTestStrMK;
|
||||
static const char *const kTestStrML;
|
||||
static const char *const kTestStrMN;
|
||||
static const char *const kTestStrMR;
|
||||
static const char *const kTestStrMS;
|
||||
static const char *const kTestStrMT;
|
||||
static const char *const kTestStrMY;
|
||||
static const char *const kTestStrNE;
|
||||
static const char *const kTestStrNL;
|
||||
static const char *const kTestStrNO;
|
||||
static const char *const kTestStrNY;
|
||||
static const char *const kTestStrPA;
|
||||
static const char *const kTestStrPL;
|
||||
static const char *const kTestStrPT;
|
||||
static const char *const kTestStrRO;
|
||||
static const char *const kTestStrRU;
|
||||
static const char *const kTestStrSI;
|
||||
static const char *const kTestStrSK;
|
||||
static const char *const kTestStrSL;
|
||||
static const char *const kTestStrSO;
|
||||
static const char *const kTestStrSQ;
|
||||
static const char *const kTestStrSR;
|
||||
static const char *const kTestStrST;
|
||||
static const char *const kTestStrSU;
|
||||
static const char *const kTestStrSV;
|
||||
static const char *const kTestStrSW;
|
||||
static const char *const kTestStrTA;
|
||||
static const char *const kTestStrTE;
|
||||
static const char *const kTestStrTG;
|
||||
static const char *const kTestStrTH;
|
||||
static const char *const kTestStrTR;
|
||||
static const char *const kTestStrUK;
|
||||
static const char *const kTestStrUR;
|
||||
static const char *const kTestStrUZ;
|
||||
static const char *const kTestStrVI;
|
||||
static const char *const kTestStrYI;
|
||||
static const char *const kTestStrYO;
|
||||
static const char *const kTestStrZH;
|
||||
static const char *const kTestStrZU;
|
||||
};
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // NNET_LANG_ID_TEST_DATA_H_
|
||||
@ -0,0 +1,380 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "nnet_language_identifier.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
|
||||
#include "base.h"
|
||||
#include "embedding_network.h"
|
||||
#include "registry.h"
|
||||
#include "relevant_script_feature.h"
|
||||
#include "script_span/generated_ulscript.h"
|
||||
#include "script_span/getonescriptspan.h"
|
||||
#include "script_span/text_processing.h"
|
||||
#include "cld_3/protos/sentence.pb.h"
|
||||
#include "sentence_features.h"
|
||||
#include "task_context.h"
|
||||
#include "workspace.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace {
|
||||
|
||||
// Struct for accumulating stats for a language as text subsequences of the same
|
||||
// script are processed.
|
||||
struct LangChunksStats {
|
||||
// Sum of probabilities across subsequences.
|
||||
float prob_sum = 0.0;
|
||||
|
||||
// Total number of bytes corresponding to the language.
|
||||
int byte_sum = 0;
|
||||
|
||||
// Number chunks corresponding to the language.
|
||||
int num_chunks = 0;
|
||||
};
|
||||
|
||||
// Compares two pairs based on their values.
|
||||
bool OrderBySecondDescending(const std::pair<string, float> &x,
|
||||
const std::pair<string, float> &y) {
|
||||
if (x.second == y.second) {
|
||||
return x.first < y.first;
|
||||
} else {
|
||||
return x.second > y.second;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns "true" if the languge prediction is reliable based on the
|
||||
// probability, and "false" otherwise.
|
||||
bool ResultIsReliable(const string &language, float probability) {
|
||||
if (language == "hr" || language == "bs") {
|
||||
return (probability >= NNetLanguageIdentifier::kReliabilityHrBsThreshold);
|
||||
} else {
|
||||
return (probability >= NNetLanguageIdentifier::kReliabilityThreshold);
|
||||
}
|
||||
}
|
||||
|
||||
// Finds the number of interchange-valid bytes to process.
|
||||
int FindNumValidBytesToProcess(const string &text) {
|
||||
// Check if the size of the input text can fit into an int. If not, focus on
|
||||
// the first std::numeric_limits<int>::max() bytes.
|
||||
const int doc_text_size =
|
||||
(text.size() < static_cast<size_t>(std::numeric_limits<int>::max()))
|
||||
? static_cast<int>(text.size())
|
||||
: std::numeric_limits<int>::max();
|
||||
|
||||
// Truncate the input text if it is too long and find the span containing
|
||||
// interchange-valid UTF8.
|
||||
const int num_valid_bytes = CLD2::SpanInterchangeValid(
|
||||
text.c_str(),
|
||||
std::min(NNetLanguageIdentifier::kMaxNumInputBytesToConsider,
|
||||
doc_text_size));
|
||||
|
||||
return num_valid_bytes;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
const int NNetLanguageIdentifier::kMinNumBytesToConsider = 140;
|
||||
const int NNetLanguageIdentifier::kMaxNumBytesToConsider = 700;
|
||||
const int NNetLanguageIdentifier::kMaxNumInputBytesToConsider = 10000;
|
||||
const int NNetLanguageIdentifier::kNumSnippets = 5;
|
||||
const char NNetLanguageIdentifier::kUnknown[] = "und";
|
||||
const float NNetLanguageIdentifier::kReliabilityThreshold = 0.7f;
|
||||
const float NNetLanguageIdentifier::kReliabilityHrBsThreshold = 0.5f;
|
||||
|
||||
const string LanguageIdEmbeddingFeatureExtractor::ArgPrefix() const {
|
||||
return "language_identifier";
|
||||
}
|
||||
|
||||
NNetLanguageIdentifier::NNetLanguageIdentifier()
|
||||
: NNetLanguageIdentifier(kMinNumBytesToConsider, kMaxNumBytesToConsider) {}
|
||||
|
||||
static WholeSentenceFeature *cbog_factory() {
|
||||
return new ContinuousBagOfNgramsFunction;
|
||||
}
|
||||
|
||||
static WholeSentenceFeature *rsf_factory() { return new RelevantScriptFeature; }
|
||||
|
||||
static WholeSentenceFeature *sf_factory() { return new ScriptFeature; }
|
||||
|
||||
NNetLanguageIdentifier::NNetLanguageIdentifier(int min_num_bytes,
|
||||
int max_num_bytes)
|
||||
: num_languages_(TaskContextParams::GetNumLanguages()),
|
||||
network_(&nn_params_),
|
||||
min_num_bytes_(min_num_bytes),
|
||||
max_num_bytes_(max_num_bytes) {
|
||||
CLD3_CHECK(max_num_bytes_ > 0);
|
||||
CLD3_CHECK(min_num_bytes_ >= 0);
|
||||
CLD3_CHECK(min_num_bytes_ < max_num_bytes_);
|
||||
|
||||
num_snippets_ = (max_num_bytes_ <= kNumSnippets) ? 1 : kNumSnippets;
|
||||
snippet_size_ = max_num_bytes_ / num_snippets_;
|
||||
|
||||
if (WholeSentenceFeature::registry() == nullptr) {
|
||||
// Create registry for our WholeSentenceFeature(s).
|
||||
RegisterableClass<WholeSentenceFeature>::CreateRegistry(
|
||||
"sentence feature function", "WholeSentenceFeature", __FILE__,
|
||||
__LINE__);
|
||||
}
|
||||
|
||||
// Register our WholeSentenceFeature(s).
|
||||
// Register ContinuousBagOfNgramsFunction feature function.
|
||||
static WholeSentenceFeature::Registry::Registrar cbog_registrar(
|
||||
WholeSentenceFeature::registry(), "continuous-bag-of-ngrams",
|
||||
"ContinuousBagOfNgramsFunction", __FILE__, __LINE__, cbog_factory);
|
||||
|
||||
// Register RelevantScriptFeature feature function.
|
||||
static WholeSentenceFeature::Registry::Registrar rsf_registrar(
|
||||
WholeSentenceFeature::registry(), "continuous-bag-of-relevant-scripts",
|
||||
"RelevantScriptFeature", __FILE__, __LINE__, rsf_factory);
|
||||
|
||||
// Register ScriptFeature feature function.
|
||||
static WholeSentenceFeature::Registry::Registrar sf_registrar(
|
||||
WholeSentenceFeature::registry(), "script", "ScriptFeature", __FILE__,
|
||||
__LINE__, sf_factory);
|
||||
|
||||
// Get the model parameters, set up and initialize the model.
|
||||
TaskContext context;
|
||||
TaskContextParams::ToTaskContext(&context);
|
||||
Setup(&context);
|
||||
Init(&context);
|
||||
}
|
||||
|
||||
NNetLanguageIdentifier::~NNetLanguageIdentifier() {}
|
||||
|
||||
void NNetLanguageIdentifier::Setup(TaskContext *context) {
|
||||
feature_extractor_.Setup(context);
|
||||
}
|
||||
|
||||
void NNetLanguageIdentifier::Init(TaskContext *context) {
|
||||
feature_extractor_.Init(context);
|
||||
feature_extractor_.RequestWorkspaces(&workspace_registry_);
|
||||
}
|
||||
|
||||
void NNetLanguageIdentifier::GetFeatures(
|
||||
Sentence *sentence, std::vector<FeatureVector> *features) const {
|
||||
// Feature workspace set.
|
||||
WorkspaceSet workspace;
|
||||
workspace.Reset(workspace_registry_);
|
||||
feature_extractor_.Preprocess(&workspace, sentence);
|
||||
feature_extractor_.ExtractFeatures(workspace, *sentence, features);
|
||||
}
|
||||
|
||||
// Returns the language name corresponding to the given id.
|
||||
string NNetLanguageIdentifier::GetLanguageName(int language_id) const {
|
||||
CLD3_CHECK(language_id >= 0);
|
||||
CLD3_CHECK(language_id < num_languages_);
|
||||
return TaskContextParams::language_names(language_id);
|
||||
}
|
||||
|
||||
NNetLanguageIdentifier::Result NNetLanguageIdentifier::FindLanguage(
|
||||
const string &text) {
|
||||
const int num_valid_bytes = FindNumValidBytesToProcess(text);
|
||||
|
||||
// Iterate over the input with ScriptScanner to clean up the text (e.g.,
|
||||
// removing digits, punctuation, brackets).
|
||||
// TODO(abakalov): Extract the code that does the clean-up out of
|
||||
// ScriptScanner.
|
||||
CLD2::ScriptScanner ss(text.c_str(), num_valid_bytes, /*is_plain_text=*/true);
|
||||
CLD2::LangSpan script_span;
|
||||
string cleaned;
|
||||
while (ss.GetOneScriptSpanLower(&script_span)) {
|
||||
// script_span has spaces at the beginning and the end, so there is no need
|
||||
// for a delimiter.
|
||||
cleaned.append(script_span.text, script_span.text_bytes);
|
||||
}
|
||||
|
||||
if (static_cast<int>(cleaned.size()) < min_num_bytes_) {
|
||||
return Result();
|
||||
}
|
||||
|
||||
// Copy to a vector because a non-const char* will be needed.
|
||||
std::vector<char> text_to_process;
|
||||
for (size_t i = 0; i < cleaned.size(); ++i) {
|
||||
text_to_process.push_back(cleaned[i]);
|
||||
}
|
||||
text_to_process.push_back('\0');
|
||||
|
||||
// Remove repetitive chunks or ones containing mostly spaces.
|
||||
const int chunk_size = 0; // Use the default.
|
||||
char *text_begin = &text_to_process[0];
|
||||
const int new_length = CLD2::CheapSqueezeInplace(
|
||||
text_begin, text_to_process.size() - 1, chunk_size);
|
||||
if (new_length < min_num_bytes_) {
|
||||
return Result();
|
||||
}
|
||||
|
||||
const string squeezed_text_to_process =
|
||||
SelectTextGivenBeginAndSize(text_begin, new_length);
|
||||
return FindLanguageOfValidUTF8(squeezed_text_to_process);
|
||||
}
|
||||
|
||||
NNetLanguageIdentifier::Result NNetLanguageIdentifier::FindLanguageOfValidUTF8(
|
||||
const string &text) {
|
||||
// Create a Sentence storing the input text.
|
||||
Sentence sentence;
|
||||
sentence.set_text(text);
|
||||
|
||||
// Predict language.
|
||||
// TODO(salcianu): reuse vector<FeatureVector>.
|
||||
std::vector<FeatureVector> features(feature_extractor_.NumEmbeddings());
|
||||
GetFeatures(&sentence, &features);
|
||||
|
||||
EmbeddingNetwork::Vector scores;
|
||||
network_.ComputeFinalScores(features, &scores);
|
||||
int prediction_id = -1;
|
||||
float max_val = -std::numeric_limits<float>::infinity();
|
||||
for (size_t i = 0; i < scores.size(); ++i) {
|
||||
if (scores[i] > max_val) {
|
||||
prediction_id = i;
|
||||
max_val = scores[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Compute probability.
|
||||
Result result;
|
||||
float diff_sum = 0.0;
|
||||
for (size_t i = 0; i < scores.size(); ++i) {
|
||||
diff_sum += exp(scores[i] - max_val);
|
||||
}
|
||||
const float log_sum_exp = max_val + log(diff_sum);
|
||||
result.probability = exp(max_val - log_sum_exp);
|
||||
|
||||
result.language = GetLanguageName(prediction_id);
|
||||
result.is_reliable = ResultIsReliable(result.language, result.probability);
|
||||
result.proportion = 1.0;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<NNetLanguageIdentifier::Result>
|
||||
NNetLanguageIdentifier::FindTopNMostFreqLangs(const string &text,
|
||||
int num_langs) {
|
||||
std::vector<Result> results;
|
||||
|
||||
// Truncate the input text if it is too long and find the span containing
|
||||
// interchange-valid UTF8.
|
||||
const int num_valid_bytes = FindNumValidBytesToProcess(text);
|
||||
if (num_valid_bytes == 0) {
|
||||
while (num_langs-- > 0) {
|
||||
results.emplace_back();
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
// Process each subsequence of the same script.
|
||||
CLD2::ScriptScanner ss(text.c_str(), num_valid_bytes, /*is_plain_text=*/true);
|
||||
CLD2::LangSpan script_span;
|
||||
std::unordered_map<string, LangChunksStats> lang_stats;
|
||||
int total_num_bytes = 0;
|
||||
Result result;
|
||||
string language;
|
||||
int chunk_size = 0; // Use the default.
|
||||
while (ss.GetOneScriptSpanLower(&script_span)) {
|
||||
const int num_original_span_bytes = script_span.text_bytes;
|
||||
|
||||
// Remove repetitive chunks or ones containing mostly spaces.
|
||||
const int new_length = CLD2::CheapSqueezeInplace(
|
||||
script_span.text, script_span.text_bytes, chunk_size);
|
||||
script_span.text_bytes = new_length;
|
||||
|
||||
if (script_span.text_bytes < min_num_bytes_) {
|
||||
continue;
|
||||
}
|
||||
total_num_bytes += num_original_span_bytes;
|
||||
|
||||
const string selected_text = SelectTextGivenScriptSpan(script_span);
|
||||
result = FindLanguageOfValidUTF8(selected_text);
|
||||
language = result.language;
|
||||
lang_stats[language].byte_sum += num_original_span_bytes;
|
||||
lang_stats[language].prob_sum +=
|
||||
result.probability * num_original_span_bytes;
|
||||
lang_stats[language].num_chunks++;
|
||||
}
|
||||
|
||||
// Sort the languages based on the number of bytes associated with them.
|
||||
// TODO(abakalov): Consider alternative possibly more efficient portable
|
||||
// approaches for finding the top N languages. Given that on average, there
|
||||
// aren't that many languages in the input, it's likely that the benefits will
|
||||
// be negligible (if any).
|
||||
std::vector<std::pair<string, float>> langs_and_byte_counts;
|
||||
for (const auto &entry : lang_stats) {
|
||||
langs_and_byte_counts.emplace_back(entry.first, entry.second.byte_sum);
|
||||
}
|
||||
std::sort(langs_and_byte_counts.begin(), langs_and_byte_counts.end(),
|
||||
OrderBySecondDescending);
|
||||
|
||||
const float byte_sum = static_cast<float>(total_num_bytes);
|
||||
const int num_langs_to_save =
|
||||
std::min(num_langs, static_cast<int>(langs_and_byte_counts.size()));
|
||||
for (int indx = 0; indx < num_langs_to_save; ++indx) {
|
||||
Result result;
|
||||
const string &language = langs_and_byte_counts.at(indx).first;
|
||||
const LangChunksStats &stats = lang_stats.at(language);
|
||||
result.language = language;
|
||||
result.probability = stats.prob_sum / stats.byte_sum;
|
||||
result.proportion = stats.byte_sum / byte_sum;
|
||||
result.is_reliable = ResultIsReliable(language, result.probability);
|
||||
results.push_back(result);
|
||||
}
|
||||
|
||||
int padding_size = num_langs - langs_and_byte_counts.size();
|
||||
while (padding_size-- > 0) {
|
||||
results.emplace_back();
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
string NNetLanguageIdentifier::SelectTextGivenScriptSpan(
|
||||
const CLD2::LangSpan &script_span) {
|
||||
return SelectTextGivenBeginAndSize(script_span.text, script_span.text_bytes);
|
||||
}
|
||||
|
||||
string NNetLanguageIdentifier::SelectTextGivenBeginAndSize(
|
||||
const char *text_begin, int text_size) {
|
||||
string output_text;
|
||||
|
||||
// If the size of the input is greater than the maxium number of bytes needed
|
||||
// for a prediction, then concatenate snippets that are equally spread out
|
||||
// throughout the input.
|
||||
if (text_size > max_num_bytes_) {
|
||||
const char *snippet_begin = nullptr;
|
||||
const char *snippet_end = text_begin;
|
||||
|
||||
// Number of bytes between the snippets.
|
||||
const int num_skip_bytes =
|
||||
(text_size - max_num_bytes_) / (num_snippets_ + 1);
|
||||
|
||||
for (int i = 0; i < num_snippets_; ++i) {
|
||||
// Using SpanInterchangeValid to find the offsets to ensure that we are
|
||||
// not splitting a character in two.
|
||||
const int actual_num_skip_bytes =
|
||||
CLD2::SpanInterchangeValid(snippet_end, num_skip_bytes);
|
||||
snippet_begin = snippet_end + actual_num_skip_bytes;
|
||||
const int actual_snippet_size =
|
||||
CLD2::SpanInterchangeValid(snippet_begin, snippet_size_);
|
||||
snippet_end = snippet_begin + actual_snippet_size;
|
||||
output_text.append(snippet_begin, actual_snippet_size);
|
||||
output_text.append(" ");
|
||||
}
|
||||
} else {
|
||||
output_text.append(text_begin, text_size);
|
||||
}
|
||||
return output_text;
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,175 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef NNET_LANGUAGE_IDENTIFIER_H_
|
||||
#define NNET_LANGUAGE_IDENTIFIER_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base.h"
|
||||
#include "embedding_feature_extractor.h"
|
||||
#include "embedding_network.h"
|
||||
#include "lang_id_nn_params.h"
|
||||
#include "language_identifier_features.h"
|
||||
#include "script_span/getonescriptspan.h"
|
||||
#include "cld_3/protos/sentence.pb.h"
|
||||
#include "sentence_features.h"
|
||||
#include "task_context.h"
|
||||
#include "task_context_params.h"
|
||||
#include "cld_3/protos/task_spec.pb.h"
|
||||
#include "workspace.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Specialization of the EmbeddingFeatureExtractor for extracting from
|
||||
// (Sentence, int).
|
||||
class LanguageIdEmbeddingFeatureExtractor
|
||||
: public EmbeddingFeatureExtractor<WholeSentenceExtractor, Sentence> {
|
||||
public:
|
||||
const string ArgPrefix() const override;
|
||||
};
|
||||
|
||||
// Class for detecting the language of a document.
|
||||
class NNetLanguageIdentifier {
|
||||
public:
|
||||
// Information about a predicted language.
|
||||
struct Result {
|
||||
string language = kUnknown;
|
||||
float probability = 0.0; // Language probability.
|
||||
bool is_reliable = false; // Whether the prediction is reliable.
|
||||
|
||||
// Proportion of bytes associated with the language. If FindLanguage is
|
||||
// called, this variable is set to 1.
|
||||
float proportion = 0.0;
|
||||
};
|
||||
|
||||
NNetLanguageIdentifier();
|
||||
NNetLanguageIdentifier(int min_num_bytes, int max_num_bytes);
|
||||
~NNetLanguageIdentifier();
|
||||
|
||||
// Finds the most likely language for the given text, along with additional
|
||||
// information (e.g., probability). The prediction is based on the first N
|
||||
// bytes where N is the minumum between the number of interchange valid UTF8
|
||||
// bytes and max_num_bytes_. If N is less than min_num_bytes_ long, then this
|
||||
// function returns kUnknown.
|
||||
Result FindLanguage(const string &text);
|
||||
|
||||
// Splits the input text (up to the first byte, if any, that is not
|
||||
// interchange valid UTF8) into spans based on the script, predicts a language
|
||||
// for each span, and returns a vector storing the top num_langs most frequent
|
||||
// languages along with additional information (e.g., proportions). The number
|
||||
// of bytes considered for each span is the minimum between the size of the
|
||||
// span and max_num_bytes_. If more languages are requested than what is
|
||||
// available in the input, then for those cases kUnknown is returned. Also, if
|
||||
// the size of the span is less than min_num_bytes_ long, then the span is
|
||||
// skipped. If the input text is too long, only the first
|
||||
// kMaxNumInputBytesToConsider bytes are processed.
|
||||
std::vector<Result> FindTopNMostFreqLangs(const string &text, int num_langs);
|
||||
|
||||
// String returned when a language is unknown or prediction cannot be made.
|
||||
static const char kUnknown[];
|
||||
|
||||
// Min number of bytes needed to make a prediction if the default constructor
|
||||
// is called.
|
||||
static const int kMinNumBytesToConsider;
|
||||
|
||||
// Max number of bytes to consider to make a prediction if the default
|
||||
// constructor is called.
|
||||
static const int kMaxNumBytesToConsider;
|
||||
|
||||
// Max number of input bytes to process.
|
||||
static const int kMaxNumInputBytesToConsider;
|
||||
|
||||
// Predictions with probability greater than or equal to this threshold are
|
||||
// marked as reliable. This threshold was optimized on a set of text segments
|
||||
// extracted from wikipedia, and results in an overall precision, recall,
|
||||
// and f1 equal to 0.9760, 0.9624, and 0.9692, respectively.
|
||||
static const float kReliabilityThreshold;
|
||||
|
||||
// Reliability threshold for the languages hr and bs.
|
||||
static const float kReliabilityHrBsThreshold;
|
||||
|
||||
private:
|
||||
// Sets up and initializes the model.
|
||||
void Setup(TaskContext *context);
|
||||
void Init(TaskContext *context);
|
||||
|
||||
// Extract features from sentence. On return, FeatureVector features[i]
|
||||
// contains the features for the embedding space #i.
|
||||
void GetFeatures(Sentence *sentence,
|
||||
std::vector<FeatureVector> *features) const;
|
||||
|
||||
// Finds the most likely language for the given text. Assumes that the text is
|
||||
// interchange valid UTF8.
|
||||
Result FindLanguageOfValidUTF8(const string &text);
|
||||
|
||||
// Returns the language name corresponding to the given id.
|
||||
string GetLanguageName(int language_id) const;
|
||||
|
||||
// Concatenates snippets of text equally spread out throughout the input if
|
||||
// the size of the input is greater than the maximum number of bytes needed to
|
||||
// make a prediction. The resulting string is used for language
|
||||
// identification.
|
||||
string SelectTextGivenScriptSpan(const CLD2::LangSpan &script_span);
|
||||
string SelectTextGivenBeginAndSize(const char *text_begin, int text_size);
|
||||
|
||||
// Number of languages.
|
||||
const int num_languages_;
|
||||
|
||||
// Typed feature extractor for embeddings.
|
||||
LanguageIdEmbeddingFeatureExtractor feature_extractor_;
|
||||
|
||||
// The registry of shared workspaces in the feature extractor.
|
||||
WorkspaceRegistry workspace_registry_;
|
||||
|
||||
// Parameters for the neural networks.
|
||||
LangIdNNParams nn_params_;
|
||||
|
||||
// Neural network to use for scoring.
|
||||
EmbeddingNetwork network_;
|
||||
|
||||
// This feature function is not relevant to this class. Adding this variable
|
||||
// ensures that the features are linked.
|
||||
ContinuousBagOfNgramsFunction ngram_function_;
|
||||
|
||||
// Minimum number of bytes needed to make a prediction. If the default
|
||||
// constructor is called, this variable is equal to kMinNumBytesToConsider.
|
||||
int min_num_bytes_;
|
||||
|
||||
// Maximum number of bytes to use to make a prediction. If the default
|
||||
// constructor is called, this variable is equal to kMaxNumBytesToConsider.
|
||||
int max_num_bytes_;
|
||||
|
||||
// Number of snippets to concatenate to produce the string used for language
|
||||
// identification. If max_num_bytes_ <= kNumSnippets (i.e., the maximum number
|
||||
// of bytes needed to make a prediction is smaller or equal to the number of
|
||||
// default snippets), then this variable is equal to 1. Otherwise, it is set
|
||||
// to kNumSnippets.
|
||||
int num_snippets_;
|
||||
|
||||
// The string used to make a prediction is created by concatenating
|
||||
// num_snippets_ snippets of size snippet_size_ = (max_num_bytes_ /
|
||||
// num_snippets_) that are equaly spread out throughout the input.
|
||||
int snippet_size_;
|
||||
|
||||
// Default number of snippets to concatenate to produce the string used for
|
||||
// language identification. For the actual number of snippets, see
|
||||
// num_snippets_.
|
||||
static const int kNumSnippets;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // NNET_LANGUAGE_IDENTIFIER_H_
|
||||
@ -0,0 +1,28 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "registry.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Global list of all component registries.
|
||||
RegistryMetadata *global_registry_list = NULL;
|
||||
|
||||
void RegistryMetadata::Register(RegistryMetadata *registry) {
|
||||
registry->set_link(global_registry_list);
|
||||
global_registry_list = registry;
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,242 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Registry for component registration. These classes can be used for creating
|
||||
// registries of components conforming to the same interface. This is useful for
|
||||
// making a component-based architecture where the specific implementation
|
||||
// classes can be selected at runtime. There is support for both class-based and
|
||||
// instance based registries.
|
||||
//
|
||||
// Example:
|
||||
// function.h:
|
||||
//
|
||||
// class Function : public RegisterableInstance<Function> {
|
||||
// public:
|
||||
// virtual double Evaluate(double x) = 0;
|
||||
// };
|
||||
//
|
||||
// #define REGISTER_FUNCTION(type, component)
|
||||
// REGISTER_INSTANCE_COMPONENT(Function, type, component);
|
||||
//
|
||||
// function.cc:
|
||||
//
|
||||
// REGISTER_INSTANCE_REGISTRY("function", Function);
|
||||
//
|
||||
// class Cos : public Function {
|
||||
// public:
|
||||
// double Evaluate(double x) { return cos(x); }
|
||||
// };
|
||||
//
|
||||
// class Exp : public Function {
|
||||
// public:
|
||||
// double Evaluate(double x) { return exp(x); }
|
||||
// };
|
||||
//
|
||||
// REGISTER_FUNCTION("cos", Cos);
|
||||
// REGISTER_FUNCTION("exp", Exp);
|
||||
//
|
||||
// Function *f = Function::Lookup("cos");
|
||||
// double result = f->Evaluate(arg);
|
||||
|
||||
#ifndef REGISTRY_H_
|
||||
#define REGISTRY_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Component metadata with information about name, class, and code location.
|
||||
class ComponentMetadata {
|
||||
public:
|
||||
ComponentMetadata(const char *name, const char *class_name, const char *file,
|
||||
int line)
|
||||
: name_(name),
|
||||
class_name_(class_name),
|
||||
file_(file),
|
||||
line_(line),
|
||||
link_(NULL) {}
|
||||
|
||||
// Getters.
|
||||
const char *name() const { return name_; }
|
||||
const char *class_name() const { return class_name_; }
|
||||
const char *file() const { return file_; }
|
||||
int line() const { return line_; }
|
||||
|
||||
// Metadata objects can be linked in a list.
|
||||
ComponentMetadata *link() const { return link_; }
|
||||
void set_link(ComponentMetadata *link) { link_ = link; }
|
||||
|
||||
private:
|
||||
// Component name.
|
||||
const char *name_;
|
||||
|
||||
// Name of class for component.
|
||||
const char *class_name_;
|
||||
|
||||
// Code file and location where the component was registered.
|
||||
const char *file_;
|
||||
int line_;
|
||||
|
||||
// Link to next metadata object in list.
|
||||
ComponentMetadata *link_;
|
||||
};
|
||||
|
||||
// The master registry contains all registered component registries. A registry
|
||||
// is not registered in the master registry until the first component of that
|
||||
// type is registered.
|
||||
class RegistryMetadata : public ComponentMetadata {
|
||||
public:
|
||||
RegistryMetadata(const char *name, const char *class_name, const char *file,
|
||||
int line)
|
||||
: ComponentMetadata(name, class_name, file, line) {}
|
||||
|
||||
// Registers a component registry in the master registry.
|
||||
static void Register(RegistryMetadata *registry);
|
||||
};
|
||||
|
||||
// Registry for components. An object can be registered with a type name in the
|
||||
// registry. The named instances in the registry can be returned using the
|
||||
// Lookup() method. The components in the registry are put into a linked list
|
||||
// of components. It is important that the component registry can be statically
|
||||
// initialized in order not to depend on initialization order.
|
||||
template <class T>
|
||||
struct ComponentRegistry {
|
||||
typedef ComponentRegistry<T> Self;
|
||||
|
||||
// Component registration class.
|
||||
class Registrar : public ComponentMetadata {
|
||||
public:
|
||||
// Registers new component by linking itself into the component list of
|
||||
// the registry.
|
||||
Registrar(Self *registry, const char *type, const char *class_name,
|
||||
const char *file, int line, T *object)
|
||||
: ComponentMetadata(type, class_name, file, line), object_(object) {
|
||||
// Register registry in master registry if this is the first registered
|
||||
// component of this type.
|
||||
if (registry->components == NULL) {
|
||||
RegistryMetadata::Register(
|
||||
new RegistryMetadata(registry->name, registry->class_name,
|
||||
registry->file, registry->line));
|
||||
}
|
||||
|
||||
// Register component in registry.
|
||||
set_link(registry->components);
|
||||
registry->components = this;
|
||||
}
|
||||
|
||||
// Returns component type.
|
||||
const char *type() const { return name(); }
|
||||
|
||||
// Returns component object.
|
||||
T *object() const { return object_; }
|
||||
|
||||
// Returns the next component in the component list.
|
||||
Registrar *next() const { return static_cast<Registrar *>(link()); }
|
||||
|
||||
private:
|
||||
// Component object.
|
||||
T *object_;
|
||||
};
|
||||
|
||||
// Finds registrar for named component in registry.
|
||||
const Registrar *GetComponent(const char *type) const {
|
||||
Registrar *r = components;
|
||||
while (r != NULL && strcmp(type, r->type()) != 0) r = r->next();
|
||||
CLD3_DCHECK(r != nullptr);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
// Finds a named component in the registry.
|
||||
T *Lookup(const char *type) const { return GetComponent(type)->object(); }
|
||||
T *Lookup(const string &type) const { return Lookup(type.c_str()); }
|
||||
|
||||
// Textual description of the kind of components in the registry.
|
||||
const char *name;
|
||||
|
||||
// Base class name of component type.
|
||||
const char *class_name;
|
||||
|
||||
// File and line where the registry is defined.
|
||||
const char *file;
|
||||
int line;
|
||||
|
||||
// Linked list of registered components.
|
||||
Registrar *components;
|
||||
};
|
||||
|
||||
// Base class for registerable class-based components.
|
||||
template <class T>
|
||||
class RegisterableClass {
|
||||
public:
|
||||
// Factory function type.
|
||||
typedef T *(Factory)();
|
||||
|
||||
// Registry type.
|
||||
typedef ComponentRegistry<Factory> Registry;
|
||||
|
||||
// Should be called before any call to Create() or registry(), i.e., before
|
||||
// using the registration mechanism to register and or instantiate subclasses
|
||||
// of T.
|
||||
static void CreateRegistry(
|
||||
const char *name,
|
||||
const char *class_name,
|
||||
const char *file,
|
||||
int line) {
|
||||
registry_ = new Registry();
|
||||
registry_->name = name;
|
||||
registry_->class_name = class_name;
|
||||
registry_->file = file;
|
||||
registry_->line = line;
|
||||
registry_->components = nullptr;
|
||||
}
|
||||
|
||||
// Should be called when one is done using the registration mechanism for
|
||||
// class T.
|
||||
static void DeleteRegistry() {
|
||||
delete registry_;
|
||||
registry_ = nullptr;
|
||||
}
|
||||
|
||||
// Creates a new component instance.
|
||||
static T *Create(const string &type) { return registry()->Lookup(type)(); }
|
||||
|
||||
// Returns registry for class.
|
||||
static Registry *registry() { return registry_; }
|
||||
|
||||
private:
|
||||
// Registry for class.
|
||||
static Registry *registry_;
|
||||
};
|
||||
|
||||
// Base class for registerable instance-based components.
|
||||
template <class T>
|
||||
class RegisterableInstance {
|
||||
public:
|
||||
// Registry type.
|
||||
typedef ComponentRegistry<T> Registry;
|
||||
|
||||
private:
|
||||
// Registry for class.
|
||||
static Registry registry_;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // REGISTRY_H_
|
||||
@ -0,0 +1,89 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "relevant_script_feature.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "feature_extractor.h"
|
||||
#include "feature_types.h"
|
||||
#include "language_identifier_features.h"
|
||||
#include "script_detector.h"
|
||||
#include "cld_3/protos/sentence.pb.h"
|
||||
#include "sentence_features.h"
|
||||
#include "task_context.h"
|
||||
#include "utils.h"
|
||||
#include "workspace.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
void RelevantScriptFeature::Setup(TaskContext *context) {
|
||||
// Nothing.
|
||||
}
|
||||
|
||||
void RelevantScriptFeature::Init(TaskContext *context) {
|
||||
set_feature_type(new NumericFeatureType(name(), kNumRelevantScripts));
|
||||
}
|
||||
|
||||
void RelevantScriptFeature::Evaluate(const WorkspaceSet &workspaces,
|
||||
const Sentence &sentence,
|
||||
FeatureVector *result) const {
|
||||
const string &text = sentence.text();
|
||||
|
||||
// We expect kNumRelevantScripts to be small, so we stack-allocate the array
|
||||
// of counts. Still, if that changes, we want to find out.
|
||||
static_assert(
|
||||
kNumRelevantScripts < 25,
|
||||
"switch counts to vector<int>: too big for stack-allocated int[]");
|
||||
|
||||
// counts[s] is the number of characters with script s.
|
||||
// Note: {} "value-initializes" the array to zero.
|
||||
int counts[kNumRelevantScripts]{};
|
||||
int total_count = 0;
|
||||
const char *const text_end = text.data() + text.size();
|
||||
for (const char *curr = text.data(); curr < text_end;
|
||||
curr += utils::OneCharLen(curr)) {
|
||||
const int num_bytes = utils::OneCharLen(curr);
|
||||
|
||||
// If a partial UTF-8 character is encountered, break out of the loop.
|
||||
if (curr + num_bytes > text_end) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Skip spaces, numbers, punctuation, and all other non-alpha ASCII
|
||||
// characters: these characters are used in so many languages, they do not
|
||||
// communicate language-related information.
|
||||
if ((num_bytes == 1) && !isalpha(*curr)) {
|
||||
continue;
|
||||
}
|
||||
Script script = GetScript(curr, num_bytes);
|
||||
CLD3_DCHECK(script >= 0);
|
||||
CLD3_DCHECK(script < kNumRelevantScripts);
|
||||
counts[static_cast<int>(script)]++;
|
||||
total_count++;
|
||||
}
|
||||
|
||||
for (int script_id = 0; script_id < kNumRelevantScripts; ++script_id) {
|
||||
int count = counts[script_id];
|
||||
if (count > 0) {
|
||||
const float weight = static_cast<float>(count) / total_count;
|
||||
FloatFeatureValue value(script_id, weight);
|
||||
result->add(feature_type(), value.discrete_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,49 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef RELEVANT_SCRIPT_FEATURE_H_
|
||||
#define RELEVANT_SCRIPT_FEATURE_H_
|
||||
|
||||
#include "feature_extractor.h"
|
||||
#include "cld_3/protos/sentence.pb.h"
|
||||
#include "sentence_features.h"
|
||||
#include "task_context.h"
|
||||
#include "workspace.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Given a sentence, generates one FloatFeatureValue for each "relevant" Unicode
|
||||
// script (see below): each such feature indicates the script and the ratio of
|
||||
// UTF8 characters in that script, in the given sentence.
|
||||
//
|
||||
// What is a relevant script? Recognizing all 100+ Unicode scripts would
|
||||
// require too much code size and runtime. Instead, we focus only on a few
|
||||
// scripts that communicate a lot of language information: e.g., the use of
|
||||
// Hiragana characters almost always indicates Japanese, so Hiragana is a
|
||||
// "relevant" script for us. The Latin script is used by dozens of language, so
|
||||
// Latin is not relevant in this context.
|
||||
class RelevantScriptFeature : public WholeSentenceFeature {
|
||||
public:
|
||||
void Setup(TaskContext *context) override;
|
||||
void Init(TaskContext *context) override;
|
||||
|
||||
// Appends the features computed from the sentence to the feature vector.
|
||||
void Evaluate(const WorkspaceSet &workspaces, const Sentence &sentence,
|
||||
FeatureVector *result) const override;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // RELEVANT_SCRIPT_FEATURE_H_
|
||||
@ -0,0 +1,156 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef SCRIPT_DETECTOR_H_
|
||||
#define SCRIPT_DETECTOR_H_
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Unicode scripts we care about. To get compact and fast code, we detect only
|
||||
// a few Unicode scripts that offer a strong indication about the language of
|
||||
// the text (e.g., Hiragana -> Japanese).
|
||||
enum Script {
|
||||
// Special value to indicate internal errors in the script detection code.
|
||||
kScriptError,
|
||||
|
||||
// Special values for all Unicode scripts that we do not detect. One special
|
||||
// value for Unicode characters of 1, 2, 3, respectively 4 bytes (as we
|
||||
// already have that information, we use it). kScriptOtherUtf8OneByte means
|
||||
// ~Latin and kScriptOtherUtf8FourBytes means ~Han.
|
||||
kScriptOtherUtf8OneByte,
|
||||
kScriptOtherUtf8TwoBytes,
|
||||
kScriptOtherUtf8ThreeBytes,
|
||||
kScriptOtherUtf8FourBytes,
|
||||
|
||||
kScriptGreek,
|
||||
kScriptCyrillic,
|
||||
kScriptHebrew,
|
||||
kScriptArabic,
|
||||
kScriptHangulJamo, // Used primarily for Korean.
|
||||
kScriptHiragana, // Used primarily for Japanese.
|
||||
kScriptKatakana, // Used primarily for Japanese.
|
||||
|
||||
// Add new scripts here.
|
||||
|
||||
// Do not add any script after kNumRelevantScripts. This value indicates the
|
||||
// number of elements in this enum Script (except this value) such that we can
|
||||
// easily iterate over the scripts.
|
||||
kNumRelevantScripts,
|
||||
};
|
||||
|
||||
template <typename IntType>
|
||||
inline bool InRange(IntType value, IntType low, IntType hi) {
|
||||
return (value >= low) && (value <= hi);
|
||||
}
|
||||
|
||||
// Returns Script for the UTF8 character that starts at address p.
|
||||
// Precondition: p points to a valid UTF8 character of num_bytes bytes.
|
||||
inline Script GetScript(const unsigned char *p, int num_bytes) {
|
||||
switch (num_bytes) {
|
||||
case 1:
|
||||
return kScriptOtherUtf8OneByte;
|
||||
|
||||
case 2: {
|
||||
// 2-byte UTF8 characters have 11 bits of information. unsigned int has
|
||||
// at least 16 bits (http://en.cppreference.com/w/cpp/language/types) so
|
||||
// it's enough. It's also usually the fastest int type on the current
|
||||
// CPU, so it's better to use than int32.
|
||||
static const unsigned int kGreekStart = 0x370;
|
||||
|
||||
// Commented out (unsued in the code): kGreekEnd = 0x3FF;
|
||||
static const unsigned int kCyrillicStart = 0x400;
|
||||
static const unsigned int kCyrillicEnd = 0x4FF;
|
||||
static const unsigned int kHebrewStart = 0x590;
|
||||
|
||||
// Commented out (unsued in the code): kHebrewEnd = 0x5FF;
|
||||
static const unsigned int kArabicStart = 0x600;
|
||||
static const unsigned int kArabicEnd = 0x6FF;
|
||||
const unsigned int codepoint = ((p[0] & 0x1F) << 6) | (p[1] & 0x3F);
|
||||
if (codepoint > kCyrillicEnd) {
|
||||
if (codepoint >= kArabicStart) {
|
||||
if (codepoint <= kArabicEnd) {
|
||||
return kScriptArabic;
|
||||
}
|
||||
} else {
|
||||
// At this point, codepoint < kArabicStart = kHebrewEnd + 1, so
|
||||
// codepoint <= kHebrewEnd.
|
||||
if (codepoint >= kHebrewStart) {
|
||||
return kScriptHebrew;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (codepoint >= kCyrillicStart) {
|
||||
return kScriptCyrillic;
|
||||
} else {
|
||||
// At this point, codepoint < kCyrillicStart = kGreekEnd + 1, so
|
||||
// codepoint <= kGreekEnd.
|
||||
if (codepoint >= kGreekStart) {
|
||||
return kScriptGreek;
|
||||
}
|
||||
}
|
||||
}
|
||||
return kScriptOtherUtf8TwoBytes;
|
||||
}
|
||||
|
||||
case 3: {
|
||||
// 3-byte UTF8 characters have 16 bits of information. unsigned int has
|
||||
// at least 16 bits.
|
||||
static const unsigned int kHangulJamoStart = 0x1100;
|
||||
static const unsigned int kHangulJamoEnd = 0x11FF;
|
||||
static const unsigned int kHiraganaStart = 0x3041;
|
||||
static const unsigned int kHiraganaEnd = 0x309F;
|
||||
|
||||
// Commented out (unsued in the code): kKatakanaStart = 0x30A0;
|
||||
static const unsigned int kKatakanaEnd = 0x30FF;
|
||||
const unsigned int codepoint =
|
||||
((p[0] & 0x0F) << 12) | ((p[1] & 0x3F) << 6) | (p[2] & 0x3F);
|
||||
if (codepoint > kHiraganaEnd) {
|
||||
// On this branch, codepoint > kHiraganaEnd = kKatakanaStart - 1, so
|
||||
// codepoint >= kKatakanaStart.
|
||||
if (codepoint <= kKatakanaEnd) {
|
||||
return kScriptKatakana;
|
||||
}
|
||||
} else {
|
||||
if (codepoint >= kHiraganaStart) {
|
||||
return kScriptHiragana;
|
||||
} else {
|
||||
if (InRange(codepoint, kHangulJamoStart, kHangulJamoEnd)) {
|
||||
return kScriptHangulJamo;
|
||||
}
|
||||
}
|
||||
}
|
||||
return kScriptOtherUtf8ThreeBytes;
|
||||
}
|
||||
|
||||
case 4:
|
||||
return kScriptOtherUtf8FourBytes;
|
||||
|
||||
default:
|
||||
return kScriptError;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns Script for the UTF8 character that starts at address p. Similar to
|
||||
// the previous version of GetScript, except for "char" vs "unsigned char".
|
||||
// Most code works with "char *" pointers, ignoring the fact that char is
|
||||
// unsigned (by default) on most platforms, but signed on iOS. This code takes
|
||||
// care of making sure we always treat chars as unsigned.
|
||||
inline Script GetScript(const char *p, int num_bytes) {
|
||||
return GetScript(reinterpret_cast<const unsigned char *>(p), num_bytes);
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_DETECTOR_H_
|
||||
@ -0,0 +1,11 @@
|
||||
The code in this directory identifies the scripts present in a given piece of
|
||||
text along with the corresponding spans. The code was copied from
|
||||
[CLD2](https://github.com/CLD2Owners/cld2) and was slightly refactored. It can
|
||||
be further simplified and cleaned up.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,55 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// Routine that maps a Unicode code point to an interchange-valid one
|
||||
//
|
||||
|
||||
#include "fixunicodevalue.h"
|
||||
#include "integral_types.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
// Guarantees that the resulting output value is interchange valid
|
||||
// 00-FF; map to spaces or MS CP1252
|
||||
// D800-DFFF; surrogates
|
||||
// FDD0-FDEF; non-characters
|
||||
// xxFFFE-xxFFFF; non-characters
|
||||
char32 FixUnicodeValue(char32 uv) {
|
||||
uint32 uuv = static_cast<uint32>(uv);
|
||||
if (uuv < 0x0100) {
|
||||
return kMapFullMicrosoft1252OrSpace[uuv];
|
||||
}
|
||||
if (uuv < 0xD800) {
|
||||
return uv;
|
||||
}
|
||||
if ((uuv & ~0x0F) == 0xFDD0) { // non-characters
|
||||
return 0xFFFD;
|
||||
}
|
||||
if ((uuv & ~0x0F) == 0xFDE0) { // non-characters
|
||||
return 0xFFFD;
|
||||
}
|
||||
if ((uuv & 0x00FFFE) == 0xFFFE) { // non-characters
|
||||
return 0xFFFD;
|
||||
}
|
||||
if ((0xE000 <= uuv) && (uuv <= 0x10FFFF)) {
|
||||
return uv;
|
||||
}
|
||||
// surrogates and negative and > 0x10FFFF all land here
|
||||
return 0xFFFD;
|
||||
}
|
||||
|
||||
} // End namespace CLD2
|
||||
} // End namespace chrome_lang_id
|
||||
@ -0,0 +1,69 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// Routine that maps a Unicode code point to an interchange-valid one
|
||||
//
|
||||
// Table that maps MS CP1252 bytes 00-FF to their corresponding Unicode
|
||||
// code points. C0 and C1 control codes that are not interchange-valid
|
||||
// are mapped to spaces.
|
||||
|
||||
|
||||
#ifndef SCRIPT_SPAN_FIXUNICODEVALUE_H_
|
||||
#define SCRIPT_SPAN_FIXUNICODEVALUE_H_
|
||||
|
||||
#include "integral_types.h" // for char32
|
||||
#include "port.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
// Map byte value 0000-00FF to char32
|
||||
// Maps C0 control codes (other than CR LF HT FF) to space [29 instances including DEL=0x7F]
|
||||
// Maps C1 control codes to CP1252 [27 instances] or space [5 instances]
|
||||
static const char32 kMapFullMicrosoft1252OrSpace[256] = {
|
||||
0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20, 0x20,0x09,0x0a,0x20, 0x0c,0x0d,0x20,0x20, // 00
|
||||
0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20,
|
||||
0x20,0x21,0x22,0x23, 0x24,0x25,0x26,0x27, 0x28,0x29,0x2a,0x2b, 0x2c,0x2d,0x2e,0x2f,
|
||||
0x30,0x31,0x32,0x33, 0x34,0x35,0x36,0x37, 0x38,0x39,0x3a,0x3b, 0x3c,0x3d,0x3e,0x3f,
|
||||
|
||||
0x40,0x41,0x42,0x43, 0x44,0x45,0x46,0x47, 0x48,0x49,0x4a,0x4b, 0x4c,0x4d,0x4e,0x4f, // 40
|
||||
0x50,0x51,0x52,0x53, 0x54,0x55,0x56,0x57, 0x58,0x59,0x5a,0x5b, 0x5c,0x5d,0x5e,0x5f,
|
||||
0x60,0x61,0x62,0x63, 0x64,0x65,0x66,0x67, 0x68,0x69,0x6a,0x6b, 0x6c,0x6d,0x6e,0x6f,
|
||||
0x70,0x71,0x72,0x73, 0x74,0x75,0x76,0x77, 0x78,0x79,0x7a,0x7b, 0x7c,0x7d,0x7e,0x20,
|
||||
|
||||
0x20ac,0x20,0x201a,0x0192, 0x201e,0x2026,0x2020,0x2021, // 80
|
||||
0x02c6,0x2030,0x0160,0x2039, 0x0152,0x20,0x017d,0x20,
|
||||
0x20,0x2018,0x2019,0x201c, 0x201d,0x2022,0x2013,0x2014,
|
||||
0x02dc,0x2122,0x0161,0x203a, 0x0153,0x20,0x017e,0x0178,
|
||||
0xa0,0xa1,0xa2,0xa3, 0xa4,0xa5,0xa6,0xa7, 0xa8,0xa9,0xaa,0xab, 0xac,0xad,0xae,0xaf, // A0
|
||||
0xb0,0xb1,0xb2,0xb3, 0xb4,0xb5,0xb6,0xb7, 0xb8,0xb9,0xba,0xbb, 0xbc,0xbd,0xbe,0xbf,
|
||||
|
||||
0xc0,0xc1,0xc2,0xc3, 0xc4,0xc5,0xc6,0xc7, 0xc8,0xc9,0xca,0xcb, 0xcc,0xcd,0xce,0xcf, // C0
|
||||
0xd0,0xd1,0xd2,0xd3, 0xd4,0xd5,0xd6,0xd7, 0xd8,0xd9,0xda,0xdb, 0xdc,0xdd,0xde,0xdf,
|
||||
0xe0,0xe1,0xe2,0xe3, 0xe4,0xe5,0xe6,0xe7, 0xe8,0xe9,0xea,0xeb, 0xec,0xed,0xee,0xef,
|
||||
0xf0,0xf1,0xf2,0xf3, 0xf4,0xf5,0xf6,0xf7, 0xf8,0xf9,0xfa,0xfb, 0xfc,0xfd,0xfe,0xff,
|
||||
};
|
||||
|
||||
// Guarantees that the resulting output value is interchange valid
|
||||
// 00-FF; map to spaces or MS CP1252
|
||||
// D800-DFFF; surrogates
|
||||
// FDD0-FDEF; non-characters
|
||||
// xxFFFE-xxFFFF; non-characters
|
||||
char32 FixUnicodeValue(char32 uv);
|
||||
|
||||
} // End namespace CLD2
|
||||
} // End namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_FIXUNICODEVALUE_H_
|
||||
@ -0,0 +1,296 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// generated_entities.cc
|
||||
// Machine generated. Do Not Edit.
|
||||
//
|
||||
// Declarations for HTML entities recognized by CLD2
|
||||
//
|
||||
#include "generated_ulscript.h" // for CharIntPair
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
// Alphabetical order for binary search
|
||||
extern const int kNameToEntitySize = 265;
|
||||
extern const CharIntPair kNameToEntity[kNameToEntitySize] = {
|
||||
{"AElig", 198},
|
||||
{"AMP", 38},
|
||||
{"Aacute", 193},
|
||||
{"Acirc", 194},
|
||||
{"Agrave", 192},
|
||||
{"Alpha", 913},
|
||||
{"Aring", 197},
|
||||
{"Atilde", 195},
|
||||
{"Auml", 196},
|
||||
{"Beta", 914},
|
||||
{"Ccaron", 268},
|
||||
{"Ccedil", 199},
|
||||
{"Chi", 935},
|
||||
{"Dagger", 8225},
|
||||
{"Delta", 916},
|
||||
{"ETH", 208},
|
||||
{"Eacute", 201},
|
||||
{"Ecaron", 282},
|
||||
{"Ecirc", 202},
|
||||
{"Egrave", 200},
|
||||
{"Epsilon", 917},
|
||||
{"Eta", 919},
|
||||
{"Euml", 203},
|
||||
{"GT", 62},
|
||||
{"Gamma", 915},
|
||||
{"Iacute", 205},
|
||||
{"Icirc", 206},
|
||||
{"Igrave", 204},
|
||||
{"Iota", 921},
|
||||
{"Iuml", 207},
|
||||
{"Kappa", 922},
|
||||
{"LT", 60},
|
||||
{"Lambda", 923},
|
||||
{"Mu", 924},
|
||||
{"Ntilde", 209},
|
||||
{"Nu", 925},
|
||||
{"OElig", 338},
|
||||
{"Oacute", 211},
|
||||
{"Ocirc", 212},
|
||||
{"Ograve", 210},
|
||||
{"Omega", 937},
|
||||
{"Omicron", 927},
|
||||
{"Oslash", 216},
|
||||
{"Otilde", 213},
|
||||
{"Ouml", 214},
|
||||
{"Phi", 934},
|
||||
{"Pi", 928},
|
||||
{"Prime", 8243},
|
||||
{"Psi", 936},
|
||||
{"QUOT", 34},
|
||||
{"Rcaron", 344},
|
||||
{"Rho", 929},
|
||||
{"Scaron", 352},
|
||||
{"Sigma", 931},
|
||||
{"THORN", 222},
|
||||
{"Tau", 932},
|
||||
{"Theta", 920},
|
||||
{"Uacute", 218},
|
||||
{"Ucirc", 219},
|
||||
{"Ugrave", 217},
|
||||
{"Upsilon", 933},
|
||||
{"Uuml", 220},
|
||||
{"Xi", 926},
|
||||
{"Yacute", 221},
|
||||
{"Yuml", 376},
|
||||
{"Zeta", 918},
|
||||
{"aacute", 225},
|
||||
{"acirc", 226},
|
||||
{"acute", 180},
|
||||
{"aelig", 230},
|
||||
{"agrave", 224},
|
||||
{"alefsym", 8501},
|
||||
{"alpha", 945},
|
||||
{"amp", 38},
|
||||
{"and", 8743},
|
||||
{"ang", 8736},
|
||||
{"apos", 39},
|
||||
{"aring", 229},
|
||||
{"asymp", 8776},
|
||||
{"atilde", 227},
|
||||
{"auml", 228},
|
||||
{"bdquo", 8222},
|
||||
{"beta", 946},
|
||||
{"brvbar", 166},
|
||||
{"bull", 8226},
|
||||
{"cap", 8745},
|
||||
{"ccaron", 269},
|
||||
{"ccedil", 231},
|
||||
{"cedil", 184},
|
||||
{"cent", 162},
|
||||
{"chi", 967},
|
||||
{"circ", 710},
|
||||
{"clubs", 9827},
|
||||
{"cong", 8773},
|
||||
{"copy", 169},
|
||||
{"crarr", 8629},
|
||||
{"cup", 8746},
|
||||
{"curren", 164},
|
||||
{"dArr", 8659},
|
||||
{"dagger", 8224},
|
||||
{"darr", 8595},
|
||||
{"deg", 176},
|
||||
{"delta", 948},
|
||||
{"diams", 9830},
|
||||
{"divide", 247},
|
||||
{"eacute", 233},
|
||||
{"ecaron", 283},
|
||||
{"ecirc", 234},
|
||||
{"egrave", 232},
|
||||
{"emdash", 8212},
|
||||
{"empty", 8709},
|
||||
{"emsp", 8195},
|
||||
{"endash", 8211},
|
||||
{"ensp", 8194},
|
||||
{"epsilon", 949},
|
||||
{"equiv", 8801},
|
||||
{"eta", 951},
|
||||
{"eth", 240},
|
||||
{"euml", 235},
|
||||
{"euro", 8364},
|
||||
{"exist", 8707},
|
||||
{"fnof", 402},
|
||||
{"forall", 8704},
|
||||
{"frac12", 189},
|
||||
{"frac14", 188},
|
||||
{"frac34", 190},
|
||||
{"frasl", 8260},
|
||||
{"gamma", 947},
|
||||
{"ge", 8805},
|
||||
{"gt", 62},
|
||||
{"hArr", 8660},
|
||||
{"harr", 8596},
|
||||
{"hearts", 9829},
|
||||
{"hellip", 8230},
|
||||
{"iacute", 237},
|
||||
{"icirc", 238},
|
||||
{"iexcl", 161},
|
||||
{"igrave", 236},
|
||||
{"image", 8465},
|
||||
{"infin", 8734},
|
||||
{"int", 8747},
|
||||
{"iota", 953},
|
||||
{"iquest", 191},
|
||||
{"isin", 8712},
|
||||
{"iuml", 239},
|
||||
{"kappa", 954},
|
||||
{"lArr", 8656},
|
||||
{"lambda", 955},
|
||||
{"lang", 9001},
|
||||
{"laquo", 171},
|
||||
{"larr", 8592},
|
||||
{"lceil", 8968},
|
||||
{"ldquo", 8220},
|
||||
{"le", 8804},
|
||||
{"lfloor", 8970},
|
||||
{"lowast", 8727},
|
||||
{"loz", 9674},
|
||||
{"lrm", 8206},
|
||||
{"lsaquo", 8249},
|
||||
{"lsquo", 8216},
|
||||
{"lt", 60},
|
||||
{"macr", 175},
|
||||
{"mdash", 8212},
|
||||
{"micro", 181},
|
||||
{"middot", 183},
|
||||
{"minus", 8722},
|
||||
{"mu", 956},
|
||||
{"nabla", 8711},
|
||||
{"nbsp", 160},
|
||||
{"ndash", 8211},
|
||||
{"ne", 8800},
|
||||
{"ni", 8715},
|
||||
{"not", 172},
|
||||
{"notin", 8713},
|
||||
{"nsub", 8836},
|
||||
{"ntilde", 241},
|
||||
{"nu", 957},
|
||||
{"oacute", 243},
|
||||
{"ocirc", 244},
|
||||
{"oelig", 339},
|
||||
{"ograve", 242},
|
||||
{"oline", 8254},
|
||||
{"omega", 969},
|
||||
{"omicron", 959},
|
||||
{"oplus", 8853},
|
||||
{"or", 8744},
|
||||
{"ordf", 170},
|
||||
{"ordm", 186},
|
||||
{"oslash", 248},
|
||||
{"otilde", 245},
|
||||
{"otimes", 8855},
|
||||
{"ouml", 246},
|
||||
{"para", 182},
|
||||
{"part", 8706},
|
||||
{"permil", 8240},
|
||||
{"perp", 8869},
|
||||
{"phi", 966},
|
||||
{"pi", 960},
|
||||
{"piv", 982},
|
||||
{"plusmn", 177},
|
||||
{"pound", 163},
|
||||
{"prime", 8242},
|
||||
{"prod", 8719},
|
||||
{"prop", 8733},
|
||||
{"psi", 968},
|
||||
{"quot", 34},
|
||||
{"rArr", 8658},
|
||||
{"radic", 8730},
|
||||
{"rang", 9002},
|
||||
{"raquo", 187},
|
||||
{"rarr", 8594},
|
||||
{"rcaron", 345},
|
||||
{"rceil", 8969},
|
||||
{"rdquo", 8221},
|
||||
{"real", 8476},
|
||||
{"reg", 174},
|
||||
{"rfloor", 8971},
|
||||
{"rho", 961},
|
||||
{"rlm", 8207},
|
||||
{"rsaquo", 8250},
|
||||
{"rsquo", 8217},
|
||||
{"sbquo", 8218},
|
||||
{"scaron", 353},
|
||||
{"sdot", 8901},
|
||||
{"sect", 167},
|
||||
{"shy", 173},
|
||||
{"sigma", 963},
|
||||
{"sigmaf", 962},
|
||||
{"sim", 8764},
|
||||
{"spades", 9824},
|
||||
{"sub", 8834},
|
||||
{"sube", 8838},
|
||||
{"sum", 8721},
|
||||
{"sup", 8835},
|
||||
{"sup1", 185},
|
||||
{"sup2", 178},
|
||||
{"sup3", 179},
|
||||
{"supe", 8839},
|
||||
{"szlig", 223},
|
||||
{"tau", 964},
|
||||
{"there4", 8756},
|
||||
{"theta", 952},
|
||||
{"thetasym", 977},
|
||||
{"thinsp", 8201},
|
||||
{"thorn", 254},
|
||||
{"tilde", 732},
|
||||
{"times", 215},
|
||||
{"trade", 8482},
|
||||
{"uArr", 8657},
|
||||
{"uacute", 250},
|
||||
{"uarr", 8593},
|
||||
{"ucirc", 251},
|
||||
{"ugrave", 249},
|
||||
{"uml", 168},
|
||||
{"upsih", 978},
|
||||
{"upsilon", 965},
|
||||
{"uuml", 252},
|
||||
{"weierp", 8472},
|
||||
{"xi", 958},
|
||||
{"yacute", 253},
|
||||
{"yen", 165},
|
||||
{"yuml", 255},
|
||||
{"zeta", 950},
|
||||
{"zwj", 8205},
|
||||
{"zwnj", 8204},
|
||||
};
|
||||
|
||||
} // namespace CLD2
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,678 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// generated_ulscript.cc
|
||||
// Machine generated. Do Not Edit.
|
||||
//
|
||||
// Declarations for scripts recognized by CLD2
|
||||
//
|
||||
|
||||
#include "generated_ulscript.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
// Subscripted by enum ULScript
|
||||
extern const int kULScriptToNameSize = 102;
|
||||
extern const char* const kULScriptToName[kULScriptToNameSize] = {
|
||||
"Common", // 0 Zyyy
|
||||
"Latin", // 1 Latn
|
||||
"Greek", // 2 Grek
|
||||
"Cyrillic", // 3 Cyrl
|
||||
"Armenian", // 4 Armn
|
||||
"Hebrew", // 5 Hebr
|
||||
"Arabic", // 6 Arab
|
||||
"Syriac", // 7 Syrc
|
||||
"Thaana", // 8 Thaa
|
||||
"Devanagari", // 9 Deva
|
||||
"Bengali", // 10 Beng
|
||||
"Gurmukhi", // 11 Guru
|
||||
"Gujarati", // 12 Gujr
|
||||
"Oriya", // 13 Orya
|
||||
"Tamil", // 14 Taml
|
||||
"Telugu", // 15 Telu
|
||||
"Kannada", // 16 Knda
|
||||
"Malayalam", // 17 Mlym
|
||||
"Sinhala", // 18 Sinh
|
||||
"Thai", // 19 Thai
|
||||
"Lao", // 20 Laoo
|
||||
"Tibetan", // 21 Tibt
|
||||
"Myanmar", // 22 Mymr
|
||||
"Georgian", // 23 Geor
|
||||
"Hani", // 24 Hani
|
||||
"Ethiopic", // 25 Ethi
|
||||
"Cherokee", // 26 Cher
|
||||
"Canadian_Aboriginal", // 27 Cans
|
||||
"Ogham", // 28 Ogam
|
||||
"Runic", // 29 Runr
|
||||
"Khmer", // 30 Khmr
|
||||
"Mongolian", // 31 Mong
|
||||
"", // 32
|
||||
"", // 33
|
||||
"Bopomofo", // 34 Bopo
|
||||
"", // 35
|
||||
"Yi", // 36 Yiii
|
||||
"Old_Italic", // 37 Ital
|
||||
"Gothic", // 38 Goth
|
||||
"Deseret", // 39 Dsrt
|
||||
"Inherited", // 40 Zinh
|
||||
"Tagalog", // 41 Tglg
|
||||
"Hanunoo", // 42 Hano
|
||||
"Buhid", // 43 Buhd
|
||||
"Tagbanwa", // 44 Tagb
|
||||
"Limbu", // 45 Limb
|
||||
"Tai_Le", // 46 Tale
|
||||
"Linear_B", // 47 Linb
|
||||
"Ugaritic", // 48 Ugar
|
||||
"Shavian", // 49 Shaw
|
||||
"Osmanya", // 50 Osma
|
||||
"Cypriot", // 51 Cprt
|
||||
"Braille", // 52 Brai
|
||||
"Buginese", // 53 Bugi
|
||||
"Coptic", // 54 Copt
|
||||
"New_Tai_Lue", // 55 Talu
|
||||
"Glagolitic", // 56 Glag
|
||||
"Tifinagh", // 57 Tfng
|
||||
"Syloti_Nagri", // 58 Sylo
|
||||
"Old_Persian", // 59 Xpeo
|
||||
"Kharoshthi", // 60 Khar
|
||||
"Balinese", // 61 Bali
|
||||
"Cuneiform", // 62 Xsux
|
||||
"Phoenician", // 63 Phnx
|
||||
"Phags_Pa", // 64 Phag
|
||||
"Nko", // 65 Nkoo
|
||||
"Sundanese", // 66 Sund
|
||||
"Lepcha", // 67 Lepc
|
||||
"Ol_Chiki", // 68 Olck
|
||||
"Vai", // 69 Vaii
|
||||
"Saurashtra", // 70 Saur
|
||||
"Kayah_Li", // 71 Kali
|
||||
"Rejang", // 72 Rjng
|
||||
"Lycian", // 73 Lyci
|
||||
"Carian", // 74 Cari
|
||||
"Lydian", // 75 Lydi
|
||||
"Cham", // 76 Cham
|
||||
"Tai_Tham", // 77 Lana
|
||||
"Tai_Viet", // 78 Tavt
|
||||
"Avestan", // 79 Avst
|
||||
"Egyptian_Hieroglyphs", // 80 Egyp
|
||||
"Samaritan", // 81 Samr
|
||||
"Lisu", // 82 Lisu
|
||||
"Bamum", // 83 Bamu
|
||||
"Javanese", // 84 Java
|
||||
"Meetei_Mayek", // 85 Mtei
|
||||
"Imperial_Aramaic", // 86 Armi
|
||||
"Old_South_Arabian", // 87 Sarb
|
||||
"Inscriptional_Parthian", // 88 Prti
|
||||
"Inscriptional_Pahlavi", // 89 Phli
|
||||
"Old_Turkic", // 90 Orkh
|
||||
"Kaithi", // 91 Kthi
|
||||
"Batak", // 92 Batk
|
||||
"Brahmi", // 93 Brah
|
||||
"Mandaic", // 94 Mand
|
||||
"Chakma", // 95 Cakm
|
||||
"Meroitic_Cursive", // 96 Merc
|
||||
"Meroitic_Hieroglyphs", // 97 Mero
|
||||
"Miao", // 98 Plrd
|
||||
"Sharada", // 99 Shrd
|
||||
"Sora_Sompeng", // 100 Sora
|
||||
"Takri", // 101 Takr
|
||||
};
|
||||
|
||||
// Subscripted by enum ULScript
|
||||
extern const int kULScriptToCodeSize = 102;
|
||||
extern const char* const kULScriptToCode[kULScriptToCodeSize] = {
|
||||
"Zyyy", // 0 Common
|
||||
"Latn", // 1 Latin
|
||||
"Grek", // 2 Greek
|
||||
"Cyrl", // 3 Cyrillic
|
||||
"Armn", // 4 Armenian
|
||||
"Hebr", // 5 Hebrew
|
||||
"Arab", // 6 Arabic
|
||||
"Syrc", // 7 Syriac
|
||||
"Thaa", // 8 Thaana
|
||||
"Deva", // 9 Devanagari
|
||||
"Beng", // 10 Bengali
|
||||
"Guru", // 11 Gurmukhi
|
||||
"Gujr", // 12 Gujarati
|
||||
"Orya", // 13 Oriya
|
||||
"Taml", // 14 Tamil
|
||||
"Telu", // 15 Telugu
|
||||
"Knda", // 16 Kannada
|
||||
"Mlym", // 17 Malayalam
|
||||
"Sinh", // 18 Sinhala
|
||||
"Thai", // 19 Thai
|
||||
"Laoo", // 20 Lao
|
||||
"Tibt", // 21 Tibetan
|
||||
"Mymr", // 22 Myanmar
|
||||
"Geor", // 23 Georgian
|
||||
"Hani", // 24 Hani
|
||||
"Ethi", // 25 Ethiopic
|
||||
"Cher", // 26 Cherokee
|
||||
"Cans", // 27 Canadian_Aboriginal
|
||||
"Ogam", // 28 Ogham
|
||||
"Runr", // 29 Runic
|
||||
"Khmr", // 30 Khmer
|
||||
"Mong", // 31 Mongolian
|
||||
"", // 32
|
||||
"", // 33
|
||||
"Bopo", // 34 Bopomofo
|
||||
"", // 35
|
||||
"Yiii", // 36 Yi
|
||||
"Ital", // 37 Old_Italic
|
||||
"Goth", // 38 Gothic
|
||||
"Dsrt", // 39 Deseret
|
||||
"Zinh", // 40 Inherited
|
||||
"Tglg", // 41 Tagalog
|
||||
"Hano", // 42 Hanunoo
|
||||
"Buhd", // 43 Buhid
|
||||
"Tagb", // 44 Tagbanwa
|
||||
"Limb", // 45 Limbu
|
||||
"Tale", // 46 Tai_Le
|
||||
"Linb", // 47 Linear_B
|
||||
"Ugar", // 48 Ugaritic
|
||||
"Shaw", // 49 Shavian
|
||||
"Osma", // 50 Osmanya
|
||||
"Cprt", // 51 Cypriot
|
||||
"Brai", // 52 Braille
|
||||
"Bugi", // 53 Buginese
|
||||
"Copt", // 54 Coptic
|
||||
"Talu", // 55 New_Tai_Lue
|
||||
"Glag", // 56 Glagolitic
|
||||
"Tfng", // 57 Tifinagh
|
||||
"Sylo", // 58 Syloti_Nagri
|
||||
"Xpeo", // 59 Old_Persian
|
||||
"Khar", // 60 Kharoshthi
|
||||
"Bali", // 61 Balinese
|
||||
"Xsux", // 62 Cuneiform
|
||||
"Phnx", // 63 Phoenician
|
||||
"Phag", // 64 Phags_Pa
|
||||
"Nkoo", // 65 Nko
|
||||
"Sund", // 66 Sundanese
|
||||
"Lepc", // 67 Lepcha
|
||||
"Olck", // 68 Ol_Chiki
|
||||
"Vaii", // 69 Vai
|
||||
"Saur", // 70 Saurashtra
|
||||
"Kali", // 71 Kayah_Li
|
||||
"Rjng", // 72 Rejang
|
||||
"Lyci", // 73 Lycian
|
||||
"Cari", // 74 Carian
|
||||
"Lydi", // 75 Lydian
|
||||
"Cham", // 76 Cham
|
||||
"Lana", // 77 Tai_Tham
|
||||
"Tavt", // 78 Tai_Viet
|
||||
"Avst", // 79 Avestan
|
||||
"Egyp", // 80 Egyptian_Hieroglyphs
|
||||
"Samr", // 81 Samaritan
|
||||
"Lisu", // 82 Lisu
|
||||
"Bamu", // 83 Bamum
|
||||
"Java", // 84 Javanese
|
||||
"Mtei", // 85 Meetei_Mayek
|
||||
"Armi", // 86 Imperial_Aramaic
|
||||
"Sarb", // 87 Old_South_Arabian
|
||||
"Prti", // 88 Inscriptional_Parthian
|
||||
"Phli", // 89 Inscriptional_Pahlavi
|
||||
"Orkh", // 90 Old_Turkic
|
||||
"Kthi", // 91 Kaithi
|
||||
"Batk", // 92 Batak
|
||||
"Brah", // 93 Brahmi
|
||||
"Mand", // 94 Mandaic
|
||||
"Cakm", // 95 Chakma
|
||||
"Merc", // 96 Meroitic_Cursive
|
||||
"Mero", // 97 Meroitic_Hieroglyphs
|
||||
"Plrd", // 98 Miao
|
||||
"Shrd", // 99 Sharada
|
||||
"Sora", // 100 Sora_Sompeng
|
||||
"Takr", // 101 Takri
|
||||
};
|
||||
|
||||
// Subscripted by enum ULScript
|
||||
extern const int kULScriptToCNameSize = 102;
|
||||
extern const char* const kULScriptToCName[kULScriptToCNameSize] = {
|
||||
"ULScript_Common", // 0 Zyyy
|
||||
"ULScript_Latin", // 1 Latn
|
||||
"ULScript_Greek", // 2 Grek
|
||||
"ULScript_Cyrillic", // 3 Cyrl
|
||||
"ULScript_Armenian", // 4 Armn
|
||||
"ULScript_Hebrew", // 5 Hebr
|
||||
"ULScript_Arabic", // 6 Arab
|
||||
"ULScript_Syriac", // 7 Syrc
|
||||
"ULScript_Thaana", // 8 Thaa
|
||||
"ULScript_Devanagari", // 9 Deva
|
||||
"ULScript_Bengali", // 10 Beng
|
||||
"ULScript_Gurmukhi", // 11 Guru
|
||||
"ULScript_Gujarati", // 12 Gujr
|
||||
"ULScript_Oriya", // 13 Orya
|
||||
"ULScript_Tamil", // 14 Taml
|
||||
"ULScript_Telugu", // 15 Telu
|
||||
"ULScript_Kannada", // 16 Knda
|
||||
"ULScript_Malayalam", // 17 Mlym
|
||||
"ULScript_Sinhala", // 18 Sinh
|
||||
"ULScript_Thai", // 19 Thai
|
||||
"ULScript_Lao", // 20 Laoo
|
||||
"ULScript_Tibetan", // 21 Tibt
|
||||
"ULScript_Myanmar", // 22 Mymr
|
||||
"ULScript_Georgian", // 23 Geor
|
||||
"ULScript_Hani", // 24 Hani
|
||||
"ULScript_Ethiopic", // 25 Ethi
|
||||
"ULScript_Cherokee", // 26 Cher
|
||||
"ULScript_Canadian_Aboriginal", // 27 Cans
|
||||
"ULScript_Ogham", // 28 Ogam
|
||||
"ULScript_Runic", // 29 Runr
|
||||
"ULScript_Khmer", // 30 Khmr
|
||||
"ULScript_Mongolian", // 31 Mong
|
||||
"ULScript_32", // 32
|
||||
"ULScript_33", // 33
|
||||
"ULScript_Bopomofo", // 34 Bopo
|
||||
"ULScript_35", // 35
|
||||
"ULScript_Yi", // 36 Yiii
|
||||
"ULScript_Old_Italic", // 37 Ital
|
||||
"ULScript_Gothic", // 38 Goth
|
||||
"ULScript_Deseret", // 39 Dsrt
|
||||
"ULScript_Inherited", // 40 Zinh
|
||||
"ULScript_Tagalog", // 41 Tglg
|
||||
"ULScript_Hanunoo", // 42 Hano
|
||||
"ULScript_Buhid", // 43 Buhd
|
||||
"ULScript_Tagbanwa", // 44 Tagb
|
||||
"ULScript_Limbu", // 45 Limb
|
||||
"ULScript_Tai_Le", // 46 Tale
|
||||
"ULScript_Linear_B", // 47 Linb
|
||||
"ULScript_Ugaritic", // 48 Ugar
|
||||
"ULScript_Shavian", // 49 Shaw
|
||||
"ULScript_Osmanya", // 50 Osma
|
||||
"ULScript_Cypriot", // 51 Cprt
|
||||
"ULScript_Braille", // 52 Brai
|
||||
"ULScript_Buginese", // 53 Bugi
|
||||
"ULScript_Coptic", // 54 Copt
|
||||
"ULScript_New_Tai_Lue", // 55 Talu
|
||||
"ULScript_Glagolitic", // 56 Glag
|
||||
"ULScript_Tifinagh", // 57 Tfng
|
||||
"ULScript_Syloti_Nagri", // 58 Sylo
|
||||
"ULScript_Old_Persian", // 59 Xpeo
|
||||
"ULScript_Kharoshthi", // 60 Khar
|
||||
"ULScript_Balinese", // 61 Bali
|
||||
"ULScript_Cuneiform", // 62 Xsux
|
||||
"ULScript_Phoenician", // 63 Phnx
|
||||
"ULScript_Phags_Pa", // 64 Phag
|
||||
"ULScript_Nko", // 65 Nkoo
|
||||
"ULScript_Sundanese", // 66 Sund
|
||||
"ULScript_Lepcha", // 67 Lepc
|
||||
"ULScript_Ol_Chiki", // 68 Olck
|
||||
"ULScript_Vai", // 69 Vaii
|
||||
"ULScript_Saurashtra", // 70 Saur
|
||||
"ULScript_Kayah_Li", // 71 Kali
|
||||
"ULScript_Rejang", // 72 Rjng
|
||||
"ULScript_Lycian", // 73 Lyci
|
||||
"ULScript_Carian", // 74 Cari
|
||||
"ULScript_Lydian", // 75 Lydi
|
||||
"ULScript_Cham", // 76 Cham
|
||||
"ULScript_Tai_Tham", // 77 Lana
|
||||
"ULScript_Tai_Viet", // 78 Tavt
|
||||
"ULScript_Avestan", // 79 Avst
|
||||
"ULScript_Egyptian_Hieroglyphs", // 80 Egyp
|
||||
"ULScript_Samaritan", // 81 Samr
|
||||
"ULScript_Lisu", // 82 Lisu
|
||||
"ULScript_Bamum", // 83 Bamu
|
||||
"ULScript_Javanese", // 84 Java
|
||||
"ULScript_Meetei_Mayek", // 85 Mtei
|
||||
"ULScript_Imperial_Aramaic", // 86 Armi
|
||||
"ULScript_Old_South_Arabian", // 87 Sarb
|
||||
"ULScript_Inscriptional_Parthian", // 88 Prti
|
||||
"ULScript_Inscriptional_Pahlavi", // 89 Phli
|
||||
"ULScript_Old_Turkic", // 90 Orkh
|
||||
"ULScript_Kaithi", // 91 Kthi
|
||||
"ULScript_Batak", // 92 Batk
|
||||
"ULScript_Brahmi", // 93 Brah
|
||||
"ULScript_Mandaic", // 94 Mand
|
||||
"ULScript_Chakma", // 95 Cakm
|
||||
"ULScript_Meroitic_Cursive", // 96 Merc
|
||||
"ULScript_Meroitic_Hieroglyphs", // 97 Mero
|
||||
"ULScript_Miao", // 98 Plrd
|
||||
"ULScript_Sharada", // 99 Shrd
|
||||
"ULScript_Sora_Sompeng", // 100 Sora
|
||||
"ULScript_Takri", // 101 Takr
|
||||
};
|
||||
|
||||
// Subscripted by enum ULScript
|
||||
extern const int kULScriptToRtypeSize = 102;
|
||||
extern const ULScriptRType kULScriptToRtype[kULScriptToRtypeSize] = {
|
||||
RTypeNone, // 0 Zyyy
|
||||
RTypeMany, // 1 Latn
|
||||
RTypeOne, // 2 Grek
|
||||
RTypeMany, // 3 Cyrl
|
||||
RTypeOne, // 4 Armn
|
||||
RTypeMany, // 5 Hebr
|
||||
RTypeMany, // 6 Arab
|
||||
RTypeOne, // 7 Syrc
|
||||
RTypeOne, // 8 Thaa
|
||||
RTypeMany, // 9 Deva
|
||||
RTypeMany, // 10 Beng
|
||||
RTypeOne, // 11 Guru
|
||||
RTypeOne, // 12 Gujr
|
||||
RTypeOne, // 13 Orya
|
||||
RTypeOne, // 14 Taml
|
||||
RTypeOne, // 15 Telu
|
||||
RTypeOne, // 16 Knda
|
||||
RTypeOne, // 17 Mlym
|
||||
RTypeOne, // 18 Sinh
|
||||
RTypeOne, // 19 Thai
|
||||
RTypeOne, // 20 Laoo
|
||||
RTypeMany, // 21 Tibt
|
||||
RTypeOne, // 22 Mymr
|
||||
RTypeOne, // 23 Geor
|
||||
RTypeCJK, // 24 Hani
|
||||
RTypeMany, // 25 Ethi
|
||||
RTypeOne, // 26 Cher
|
||||
RTypeOne, // 27 Cans
|
||||
RTypeNone, // 28 Ogam
|
||||
RTypeNone, // 29 Runr
|
||||
RTypeOne, // 30 Khmr
|
||||
RTypeOne, // 31 Mong
|
||||
RTypeNone, // 32
|
||||
RTypeNone, // 33
|
||||
RTypeNone, // 34 Bopo
|
||||
RTypeNone, // 35
|
||||
RTypeNone, // 36 Yiii
|
||||
RTypeNone, // 37 Ital
|
||||
RTypeNone, // 38 Goth
|
||||
RTypeNone, // 39 Dsrt
|
||||
RTypeNone, // 40 Zinh
|
||||
RTypeOne, // 41 Tglg
|
||||
RTypeNone, // 42 Hano
|
||||
RTypeNone, // 43 Buhd
|
||||
RTypeNone, // 44 Tagb
|
||||
RTypeOne, // 45 Limb
|
||||
RTypeNone, // 46 Tale
|
||||
RTypeNone, // 47 Linb
|
||||
RTypeNone, // 48 Ugar
|
||||
RTypeNone, // 49 Shaw
|
||||
RTypeNone, // 50 Osma
|
||||
RTypeNone, // 51 Cprt
|
||||
RTypeNone, // 52 Brai
|
||||
RTypeNone, // 53 Bugi
|
||||
RTypeNone, // 54 Copt
|
||||
RTypeNone, // 55 Talu
|
||||
RTypeNone, // 56 Glag
|
||||
RTypeNone, // 57 Tfng
|
||||
RTypeNone, // 58 Sylo
|
||||
RTypeNone, // 59 Xpeo
|
||||
RTypeNone, // 60 Khar
|
||||
RTypeNone, // 61 Bali
|
||||
RTypeNone, // 62 Xsux
|
||||
RTypeNone, // 63 Phnx
|
||||
RTypeNone, // 64 Phag
|
||||
RTypeNone, // 65 Nkoo
|
||||
RTypeNone, // 66 Sund
|
||||
RTypeNone, // 67 Lepc
|
||||
RTypeNone, // 68 Olck
|
||||
RTypeNone, // 69 Vaii
|
||||
RTypeNone, // 70 Saur
|
||||
RTypeNone, // 71 Kali
|
||||
RTypeNone, // 72 Rjng
|
||||
RTypeNone, // 73 Lyci
|
||||
RTypeNone, // 74 Cari
|
||||
RTypeNone, // 75 Lydi
|
||||
RTypeNone, // 76 Cham
|
||||
RTypeNone, // 77 Lana
|
||||
RTypeNone, // 78 Tavt
|
||||
RTypeNone, // 79 Avst
|
||||
RTypeNone, // 80 Egyp
|
||||
RTypeNone, // 81 Samr
|
||||
RTypeNone, // 82 Lisu
|
||||
RTypeNone, // 83 Bamu
|
||||
RTypeNone, // 84 Java
|
||||
RTypeNone, // 85 Mtei
|
||||
RTypeNone, // 86 Armi
|
||||
RTypeNone, // 87 Sarb
|
||||
RTypeNone, // 88 Prti
|
||||
RTypeNone, // 89 Phli
|
||||
RTypeNone, // 90 Orkh
|
||||
RTypeNone, // 91 Kthi
|
||||
RTypeNone, // 92 Batk
|
||||
RTypeNone, // 93 Brah
|
||||
RTypeNone, // 94 Mand
|
||||
RTypeNone, // 95 Cakm
|
||||
RTypeNone, // 96 Merc
|
||||
RTypeNone, // 97 Mero
|
||||
RTypeNone, // 98 Plrd
|
||||
RTypeNone, // 99 Shrd
|
||||
RTypeNone, // 100 Sora
|
||||
RTypeNone, // 101 Takr
|
||||
};
|
||||
|
||||
// Subscripted by enum ULScript
|
||||
extern const int kULScriptToDefaultLangSize = 102;
|
||||
|
||||
// Alphabetical order for binary search
|
||||
extern const int kNameToULScriptSize = 105;
|
||||
extern const CharIntPair kNameToULScript[kNameToULScriptSize] = {
|
||||
{"Arabic", 6}, // Arab
|
||||
{"Armenian", 4}, // Armn
|
||||
{"Avestan", 79}, // Avst
|
||||
{"Balinese", 61}, // Bali
|
||||
{"Bamum", 83}, // Bamu
|
||||
{"Batak", 92}, // Batk
|
||||
{"Bengali", 10}, // Beng
|
||||
{"Bopomofo", 34}, // Bopo
|
||||
{"Brahmi", 93}, // Brah
|
||||
{"Braille", 52}, // Brai
|
||||
{"Buginese", 53}, // Bugi
|
||||
{"Buhid", 43}, // Buhd
|
||||
{"Canadian_Aboriginal", 27}, // Cans
|
||||
{"Carian", 74}, // Cari
|
||||
{"Chakma", 95}, // Cakm
|
||||
{"Cham", 76}, // Cham
|
||||
{"Cherokee", 26}, // Cher
|
||||
{"Common", 0}, // Zyyy
|
||||
{"Coptic", 54}, // Copt
|
||||
{"Cuneiform", 62}, // Xsux
|
||||
{"Cypriot", 51}, // Cprt
|
||||
{"Cyrillic", 3}, // Cyrl
|
||||
{"Deseret", 39}, // Dsrt
|
||||
{"Devanagari", 9}, // Deva
|
||||
{"Egyptian_Hieroglyphs", 80}, // Egyp
|
||||
{"Ethiopic", 25}, // Ethi
|
||||
{"Georgian", 23}, // Geor
|
||||
{"Glagolitic", 56}, // Glag
|
||||
{"Gothic", 38}, // Goth
|
||||
{"Greek", 2}, // Grek
|
||||
{"Gujarati", 12}, // Gujr
|
||||
{"Gurmukhi", 11}, // Guru
|
||||
{"Han", 24}, // Hant
|
||||
{"Han", 24}, // Hans
|
||||
{"Han", 24}, // Hani
|
||||
{"Hangul", 24}, // Hang
|
||||
{"Hani", 24}, // Hani
|
||||
{"Hanunoo", 42}, // Hano
|
||||
{"Hebrew", 5}, // Hebr
|
||||
{"Hiragana", 24}, // Hira
|
||||
{"Imperial_Aramaic", 86}, // Armi
|
||||
{"Inherited", 40}, // Zinh
|
||||
{"Inscriptional_Pahlavi", 89}, // Phli
|
||||
{"Inscriptional_Parthian", 88}, // Prti
|
||||
{"Javanese", 84}, // Java
|
||||
{"Kaithi", 91}, // Kthi
|
||||
{"Kannada", 16}, // Knda
|
||||
{"Katakana", 24}, // Kana
|
||||
{"Kayah_Li", 71}, // Kali
|
||||
{"Kharoshthi", 60}, // Khar
|
||||
{"Khmer", 30}, // Khmr
|
||||
{"Lao", 20}, // Laoo
|
||||
{"Latin", 1}, // Latn
|
||||
{"Lepcha", 67}, // Lepc
|
||||
{"Limbu", 45}, // Limb
|
||||
{"Linear_B", 47}, // Linb
|
||||
{"Lisu", 82}, // Lisu
|
||||
{"Lycian", 73}, // Lyci
|
||||
{"Lydian", 75}, // Lydi
|
||||
{"Malayalam", 17}, // Mlym
|
||||
{"Mandaic", 94}, // Mand
|
||||
{"Meetei_Mayek", 85}, // Mtei
|
||||
{"Meroitic_Cursive", 96}, // Merc
|
||||
{"Meroitic_Hieroglyphs", 97}, // Mero
|
||||
{"Miao", 98}, // Plrd
|
||||
{"Mongolian", 31}, // Mong
|
||||
{"Myanmar", 22}, // Mymr
|
||||
{"New_Tai_Lue", 55}, // Talu
|
||||
{"Nko", 65}, // Nkoo
|
||||
{"Ogham", 28}, // Ogam
|
||||
{"Ol_Chiki", 68}, // Olck
|
||||
{"Old_Italic", 37}, // Ital
|
||||
{"Old_Persian", 59}, // Xpeo
|
||||
{"Old_South_Arabian", 87}, // Sarb
|
||||
{"Old_Turkic", 90}, // Orkh
|
||||
{"Oriya", 13}, // Orya
|
||||
{"Osmanya", 50}, // Osma
|
||||
{"Phags_Pa", 64}, // Phag
|
||||
{"Phoenician", 63}, // Phnx
|
||||
{"Rejang", 72}, // Rjng
|
||||
{"Runic", 29}, // Runr
|
||||
{"Samaritan", 81}, // Samr
|
||||
{"Saurashtra", 70}, // Saur
|
||||
{"Sharada", 99}, // Shrd
|
||||
{"Shavian", 49}, // Shaw
|
||||
{"Sinhala", 18}, // Sinh
|
||||
{"Sora_Sompeng", 100}, // Sora
|
||||
{"Sundanese", 66}, // Sund
|
||||
{"Syloti_Nagri", 58}, // Sylo
|
||||
{"Syriac", 7}, // Syrc
|
||||
{"Tagalog", 41}, // Tglg
|
||||
{"Tagbanwa", 44}, // Tagb
|
||||
{"Tai_Le", 46}, // Tale
|
||||
{"Tai_Tham", 77}, // Lana
|
||||
{"Tai_Viet", 78}, // Tavt
|
||||
{"Takri", 101}, // Takr
|
||||
{"Tamil", 14}, // Taml
|
||||
{"Telugu", 15}, // Telu
|
||||
{"Thaana", 8}, // Thaa
|
||||
{"Thai", 19}, // Thai
|
||||
{"Tibetan", 21}, // Tibt
|
||||
{"Tifinagh", 57}, // Tfng
|
||||
{"Ugaritic", 48}, // Ugar
|
||||
{"Vai", 69}, // Vaii
|
||||
{"Yi", 36}, // Yiii
|
||||
};
|
||||
|
||||
// Alphabetical order for binary search
|
||||
extern const int kCodeToULScriptSize = 105;
|
||||
extern const CharIntPair kCodeToULScript[kNameToULScriptSize] = {
|
||||
{"Arab", 6}, // Arab
|
||||
{"Armi", 86}, // Armi
|
||||
{"Armn", 4}, // Armn
|
||||
{"Avst", 79}, // Avst
|
||||
{"Bali", 61}, // Bali
|
||||
{"Bamu", 83}, // Bamu
|
||||
{"Batk", 92}, // Batk
|
||||
{"Beng", 10}, // Beng
|
||||
{"Bopo", 34}, // Bopo
|
||||
{"Brah", 93}, // Brah
|
||||
{"Brai", 52}, // Brai
|
||||
{"Bugi", 53}, // Bugi
|
||||
{"Buhd", 43}, // Buhd
|
||||
{"Cakm", 95}, // Cakm
|
||||
{"Cans", 27}, // Cans
|
||||
{"Cari", 74}, // Cari
|
||||
{"Cham", 76}, // Cham
|
||||
{"Cher", 26}, // Cher
|
||||
{"Copt", 54}, // Copt
|
||||
{"Cprt", 51}, // Cprt
|
||||
{"Cyrl", 3}, // Cyrl
|
||||
{"Deva", 9}, // Deva
|
||||
{"Dsrt", 39}, // Dsrt
|
||||
{"Egyp", 80}, // Egyp
|
||||
{"Ethi", 25}, // Ethi
|
||||
{"Geor", 23}, // Geor
|
||||
{"Glag", 56}, // Glag
|
||||
{"Goth", 38}, // Goth
|
||||
{"Grek", 2}, // Grek
|
||||
{"Gujr", 12}, // Gujr
|
||||
{"Guru", 11}, // Guru
|
||||
{"Hang", 24}, // Hang
|
||||
{"Hani", 24}, // Hani
|
||||
{"Hani", 24}, // Hani
|
||||
{"Hano", 42}, // Hano
|
||||
{"Hans", 24}, // Hans
|
||||
{"Hant", 24}, // Hant
|
||||
{"Hebr", 5}, // Hebr
|
||||
{"Hira", 24}, // Hira
|
||||
{"Ital", 37}, // Ital
|
||||
{"Java", 84}, // Java
|
||||
{"Kali", 71}, // Kali
|
||||
{"Kana", 24}, // Kana
|
||||
{"Khar", 60}, // Khar
|
||||
{"Khmr", 30}, // Khmr
|
||||
{"Knda", 16}, // Knda
|
||||
{"Kthi", 91}, // Kthi
|
||||
{"Lana", 77}, // Lana
|
||||
{"Laoo", 20}, // Laoo
|
||||
{"Latn", 1}, // Latn
|
||||
{"Lepc", 67}, // Lepc
|
||||
{"Limb", 45}, // Limb
|
||||
{"Linb", 47}, // Linb
|
||||
{"Lisu", 82}, // Lisu
|
||||
{"Lyci", 73}, // Lyci
|
||||
{"Lydi", 75}, // Lydi
|
||||
{"Mand", 94}, // Mand
|
||||
{"Merc", 96}, // Merc
|
||||
{"Mero", 97}, // Mero
|
||||
{"Mlym", 17}, // Mlym
|
||||
{"Mong", 31}, // Mong
|
||||
{"Mtei", 85}, // Mtei
|
||||
{"Mymr", 22}, // Mymr
|
||||
{"Nkoo", 65}, // Nkoo
|
||||
{"Ogam", 28}, // Ogam
|
||||
{"Olck", 68}, // Olck
|
||||
{"Orkh", 90}, // Orkh
|
||||
{"Orya", 13}, // Orya
|
||||
{"Osma", 50}, // Osma
|
||||
{"Phag", 64}, // Phag
|
||||
{"Phli", 89}, // Phli
|
||||
{"Phnx", 63}, // Phnx
|
||||
{"Plrd", 98}, // Plrd
|
||||
{"Prti", 88}, // Prti
|
||||
{"Rjng", 72}, // Rjng
|
||||
{"Runr", 29}, // Runr
|
||||
{"Samr", 81}, // Samr
|
||||
{"Sarb", 87}, // Sarb
|
||||
{"Saur", 70}, // Saur
|
||||
{"Shaw", 49}, // Shaw
|
||||
{"Shrd", 99}, // Shrd
|
||||
{"Sinh", 18}, // Sinh
|
||||
{"Sora", 100}, // Sora
|
||||
{"Sund", 66}, // Sund
|
||||
{"Sylo", 58}, // Sylo
|
||||
{"Syrc", 7}, // Syrc
|
||||
{"Tagb", 44}, // Tagb
|
||||
{"Takr", 101}, // Takr
|
||||
{"Tale", 46}, // Tale
|
||||
{"Talu", 55}, // Talu
|
||||
{"Taml", 14}, // Taml
|
||||
{"Tavt", 78}, // Tavt
|
||||
{"Telu", 15}, // Telu
|
||||
{"Tfng", 57}, // Tfng
|
||||
{"Tglg", 41}, // Tglg
|
||||
{"Thaa", 8}, // Thaa
|
||||
{"Thai", 19}, // Thai
|
||||
{"Tibt", 21}, // Tibt
|
||||
{"Ugar", 48}, // Ugar
|
||||
{"Vaii", 69}, // Vaii
|
||||
{"Xpeo", 59}, // Xpeo
|
||||
{"Xsux", 62}, // Xsux
|
||||
{"Yiii", 36}, // Yiii
|
||||
{"Zinh", 40}, // Zinh
|
||||
{"Zyyy", 0}, // Zyyy
|
||||
};
|
||||
|
||||
} // namespace CLD2
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,142 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// generated_ulscript.h
|
||||
// Machine generated. Do Not Edit.
|
||||
//
|
||||
// Declarations for scripts recognized by CLD2
|
||||
//
|
||||
|
||||
#ifndef SCRIPT_SPAN_GENERATED_ULSCRIPT_H_
|
||||
#define SCRIPT_SPAN_GENERATED_ULSCRIPT_H_
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
typedef enum {RTypeNone = 0, RTypeOne, RTypeMany, RTypeCJK} ULScriptRType;
|
||||
|
||||
typedef struct {const char* s; int i;} CharIntPair;
|
||||
|
||||
typedef enum {
|
||||
ULScript_Common = 0, // Zyyy
|
||||
ULScript_Latin = 1, // Latn
|
||||
ULScript_Greek = 2, // Grek
|
||||
ULScript_Cyrillic = 3, // Cyrl
|
||||
ULScript_Armenian = 4, // Armn
|
||||
ULScript_Hebrew = 5, // Hebr
|
||||
ULScript_Arabic = 6, // Arab
|
||||
ULScript_Syriac = 7, // Syrc
|
||||
ULScript_Thaana = 8, // Thaa
|
||||
ULScript_Devanagari = 9, // Deva
|
||||
ULScript_Bengali = 10, // Beng
|
||||
ULScript_Gurmukhi = 11, // Guru
|
||||
ULScript_Gujarati = 12, // Gujr
|
||||
ULScript_Oriya = 13, // Orya
|
||||
ULScript_Tamil = 14, // Taml
|
||||
ULScript_Telugu = 15, // Telu
|
||||
ULScript_Kannada = 16, // Knda
|
||||
ULScript_Malayalam = 17, // Mlym
|
||||
ULScript_Sinhala = 18, // Sinh
|
||||
ULScript_Thai = 19, // Thai
|
||||
ULScript_Lao = 20, // Laoo
|
||||
ULScript_Tibetan = 21, // Tibt
|
||||
ULScript_Myanmar = 22, // Mymr
|
||||
ULScript_Georgian = 23, // Geor
|
||||
ULScript_Hani = 24, // Hani
|
||||
ULScript_Ethiopic = 25, // Ethi
|
||||
ULScript_Cherokee = 26, // Cher
|
||||
ULScript_Canadian_Aboriginal = 27, // Cans
|
||||
ULScript_Ogham = 28, // Ogam
|
||||
ULScript_Runic = 29, // Runr
|
||||
ULScript_Khmer = 30, // Khmr
|
||||
ULScript_Mongolian = 31, // Mong
|
||||
ULScript_32 = 32, //
|
||||
ULScript_33 = 33, //
|
||||
ULScript_Bopomofo = 34, // Bopo
|
||||
ULScript_35 = 35, //
|
||||
ULScript_Yi = 36, // Yiii
|
||||
ULScript_Old_Italic = 37, // Ital
|
||||
ULScript_Gothic = 38, // Goth
|
||||
ULScript_Deseret = 39, // Dsrt
|
||||
ULScript_Inherited = 40, // Zinh
|
||||
ULScript_Tagalog = 41, // Tglg
|
||||
ULScript_Hanunoo = 42, // Hano
|
||||
ULScript_Buhid = 43, // Buhd
|
||||
ULScript_Tagbanwa = 44, // Tagb
|
||||
ULScript_Limbu = 45, // Limb
|
||||
ULScript_Tai_Le = 46, // Tale
|
||||
ULScript_Linear_B = 47, // Linb
|
||||
ULScript_Ugaritic = 48, // Ugar
|
||||
ULScript_Shavian = 49, // Shaw
|
||||
ULScript_Osmanya = 50, // Osma
|
||||
ULScript_Cypriot = 51, // Cprt
|
||||
ULScript_Braille = 52, // Brai
|
||||
ULScript_Buginese = 53, // Bugi
|
||||
ULScript_Coptic = 54, // Copt
|
||||
ULScript_New_Tai_Lue = 55, // Talu
|
||||
ULScript_Glagolitic = 56, // Glag
|
||||
ULScript_Tifinagh = 57, // Tfng
|
||||
ULScript_Syloti_Nagri = 58, // Sylo
|
||||
ULScript_Old_Persian = 59, // Xpeo
|
||||
ULScript_Kharoshthi = 60, // Khar
|
||||
ULScript_Balinese = 61, // Bali
|
||||
ULScript_Cuneiform = 62, // Xsux
|
||||
ULScript_Phoenician = 63, // Phnx
|
||||
ULScript_Phags_Pa = 64, // Phag
|
||||
ULScript_Nko = 65, // Nkoo
|
||||
ULScript_Sundanese = 66, // Sund
|
||||
ULScript_Lepcha = 67, // Lepc
|
||||
ULScript_Ol_Chiki = 68, // Olck
|
||||
ULScript_Vai = 69, // Vaii
|
||||
ULScript_Saurashtra = 70, // Saur
|
||||
ULScript_Kayah_Li = 71, // Kali
|
||||
ULScript_Rejang = 72, // Rjng
|
||||
ULScript_Lycian = 73, // Lyci
|
||||
ULScript_Carian = 74, // Cari
|
||||
ULScript_Lydian = 75, // Lydi
|
||||
ULScript_Cham = 76, // Cham
|
||||
ULScript_Tai_Tham = 77, // Lana
|
||||
ULScript_Tai_Viet = 78, // Tavt
|
||||
ULScript_Avestan = 79, // Avst
|
||||
ULScript_Egyptian_Hieroglyphs = 80, // Egyp
|
||||
ULScript_Samaritan = 81, // Samr
|
||||
ULScript_Lisu = 82, // Lisu
|
||||
ULScript_Bamum = 83, // Bamu
|
||||
ULScript_Javanese = 84, // Java
|
||||
ULScript_Meetei_Mayek = 85, // Mtei
|
||||
ULScript_Imperial_Aramaic = 86, // Armi
|
||||
ULScript_Old_South_Arabian = 87, // Sarb
|
||||
ULScript_Inscriptional_Parthian = 88, // Prti
|
||||
ULScript_Inscriptional_Pahlavi = 89, // Phli
|
||||
ULScript_Old_Turkic = 90, // Orkh
|
||||
ULScript_Kaithi = 91, // Kthi
|
||||
ULScript_Batak = 92, // Batk
|
||||
ULScript_Brahmi = 93, // Brah
|
||||
ULScript_Mandaic = 94, // Mand
|
||||
ULScript_Chakma = 95, // Cakm
|
||||
ULScript_Meroitic_Cursive = 96, // Merc
|
||||
ULScript_Meroitic_Hieroglyphs = 97, // Mero
|
||||
ULScript_Miao = 98, // Plrd
|
||||
ULScript_Sharada = 99, // Shrd
|
||||
ULScript_Sora_Sompeng = 100, // Sora
|
||||
ULScript_Takri = 101, // Takr
|
||||
NUM_ULSCRIPTS
|
||||
} ULScript;
|
||||
|
||||
#define UNKNOWN_ULSCRIPT ULScript_Common
|
||||
|
||||
} // namespace CLD2
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_GENERATED_ULSCRIPT_H_
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,124 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// Author: dsites@google.com (Dick Sites)
|
||||
//
|
||||
|
||||
|
||||
#ifndef SCRIPT_SPAN_GETONESCRIPTSPAN_H_
|
||||
#define SCRIPT_SPAN_GETONESCRIPTSPAN_H_
|
||||
|
||||
#include "generated_ulscript.h"
|
||||
#include "integral_types.h"
|
||||
#include "offsetmap.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
static const int kMaxScriptBuffer = 40960;
|
||||
static const int kMaxScriptLowerBuffer = (kMaxScriptBuffer * 3) / 2;
|
||||
static const int kMaxScriptBytes = kMaxScriptBuffer - 32; // Leave some room
|
||||
static const int kWithinScriptTail = 32; // Stop at word space in last
|
||||
// N bytes of script buffer
|
||||
|
||||
typedef struct {
|
||||
char* text = nullptr; // Pointer to the span, somewhere
|
||||
int text_bytes = 0; // Number of bytes of text in the span
|
||||
int offset = 0; // Offset of start of span in original input buffer
|
||||
ULScript ulscript = UNKNOWN_ULSCRIPT; // Unicode Letters Script of this span
|
||||
bool truncated = false; // true if buffer filled up before a
|
||||
// different script or EOF was found
|
||||
} LangSpan;
|
||||
|
||||
static inline bool IsContinuationByte(char c) {
|
||||
return static_cast<signed char>(c) < -64;
|
||||
}
|
||||
|
||||
// Gets lscript number for letters; always returns
|
||||
// 0 (common script) for non-letters
|
||||
int GetUTF8LetterScriptNum(const char* src);
|
||||
|
||||
// Update src pointer to point to next quadgram, +2..+5
|
||||
// Looks at src[0..4]
|
||||
const char* AdvanceQuad(const char* src);
|
||||
|
||||
// Utility routine to search alphabetical tables
|
||||
int BinarySearch(const char* key, int lo, int hi, const CharIntPair* cipair);
|
||||
|
||||
// Returns the length in bytes of the prefix of src that is all
|
||||
// interchange valid UTF-8
|
||||
int SpanInterchangeValid(const char* src, int byte_length);
|
||||
|
||||
class ScriptScanner {
|
||||
public:
|
||||
ScriptScanner(const char* buffer, int buffer_length, bool is_plain_text);
|
||||
ScriptScanner(const char* buffer, int buffer_length, bool is_plain_text,
|
||||
bool any_text, bool any_script);
|
||||
~ScriptScanner();
|
||||
|
||||
// Copy next run of same-script non-tag letters to buffer [NUL terminated]
|
||||
bool GetOneScriptSpan(LangSpan* span);
|
||||
|
||||
// Force Latin and Cyrillic scripts to be lowercase
|
||||
void LowerScriptSpan(LangSpan* span);
|
||||
|
||||
// Copy next run of same-script non-tag letters to buffer [NUL terminated]
|
||||
// Force Latin and Cyrillic scripts to be lowercase
|
||||
bool GetOneScriptSpanLower(LangSpan* span);
|
||||
|
||||
// Copy next run of non-tag characters to buffer [NUL terminated]
|
||||
// This just removes tags and removes entities
|
||||
// Buffer has leading space
|
||||
bool GetOneTextSpan(LangSpan* span);
|
||||
|
||||
// Maps byte offset in most recent GetOneScriptSpan/Lower
|
||||
// span->text [0..text_bytes] into an additional byte offset from
|
||||
// span->offset, to get back to corresponding text in the original
|
||||
// input buffer.
|
||||
// text_offset must be the first byte
|
||||
// of a UTF-8 character, or just beyond the last character. Normally this
|
||||
// routine is called with the first byte of an interesting range and
|
||||
// again with the first byte of the following range.
|
||||
int MapBack(int text_offset);
|
||||
|
||||
const char* GetBufferStart() {return start_byte_;};
|
||||
|
||||
private:
|
||||
// Skip over tags and non-letters
|
||||
int SkipToFrontOfSpan(const char* src, int len, int* script);
|
||||
|
||||
const char* start_byte_; // Starting byte of buffer to scan
|
||||
const char* next_byte_; // First unscanned byte
|
||||
int byte_length_; // Bytes left
|
||||
|
||||
bool is_plain_text_; // true fo text, false for HTML
|
||||
char* script_buffer_; // Holds text with expanded entities
|
||||
char* script_buffer_lower_; // Holds lowercased text
|
||||
bool letters_marks_only_; // To distinguish scriptspan of one
|
||||
// letters/marks vs. any mixture of text
|
||||
bool one_script_only_; // To distinguish scriptspan of one
|
||||
// script vs. any mixture of scripts
|
||||
int exit_state_; // For tag parser kTagParseTbl_0, based
|
||||
// on letters_marks_only_
|
||||
public :
|
||||
// Expose for debugging
|
||||
OffsetMap map2original_; // map from script_buffer_ to buffer
|
||||
OffsetMap map2uplow_; // map from script_buffer_lower_ to script_buffer_
|
||||
};
|
||||
|
||||
} // namespace CLD2
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_GETONESCRIPTSPAN_H_
|
||||
@ -0,0 +1,135 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "getonescriptspan.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
namespace getonescriptspan_test {
|
||||
|
||||
// Tests invalid and interchange-invalid input. Returns "true" if the test is
|
||||
// successful and "false" otherwise.
|
||||
bool TestInvalidUTF8Input() {
|
||||
std::cout << "Running " << __FUNCTION__ << std::endl;
|
||||
const std::vector<std::string> invalid_strings{"\xC0\xA9",
|
||||
"\377\377\377\377"};
|
||||
const std::string gold_valid_prefix = "Some valid bytes followed by ";
|
||||
|
||||
// Iterates over the invalid strings, inserts each of them in the middle of a
|
||||
// piece of text, and checks whether these strings are correctly identified.
|
||||
bool test_successful = true;
|
||||
for (size_t i = 0; i < invalid_strings.size(); ++i) {
|
||||
const std::string text = "Some valid bytes followed by " +
|
||||
invalid_strings.at(i) +
|
||||
" and then valid ones again.";
|
||||
|
||||
const int num_valid_bytes = SpanInterchangeValid(text.c_str(), text.size());
|
||||
const std::string detected_valid_prefix(text.c_str(), num_valid_bytes);
|
||||
std::cout << " Testing input string at position " << i << std::endl;
|
||||
if (detected_valid_prefix == gold_valid_prefix) {
|
||||
std::cout << " Success!" << std::endl;
|
||||
} else {
|
||||
std::cout << " Failure" << std::endl;
|
||||
std::cout << " Gold: " << gold_valid_prefix << std::endl;
|
||||
std::cout << " Detected: " << detected_valid_prefix << std::endl;
|
||||
test_successful = false;
|
||||
}
|
||||
}
|
||||
return test_successful;
|
||||
}
|
||||
|
||||
// Tests whether different scripts are correctly detected. Returns "true" if the
|
||||
// test is successful and "false" otherwise.
|
||||
bool TestScriptDetection() {
|
||||
std::cout << "Running " << __FUNCTION__ << std::endl;
|
||||
|
||||
// Text containing a snippet in English, a snippet in Bulgarian, and a snippet
|
||||
// in English again.
|
||||
const std::string text =
|
||||
"Text in English. Текст на Български. Also text in English.";
|
||||
const std::vector<std::string> gold_script_spans{
|
||||
" Text in English ", " Текст на Български ", " Also text in English "};
|
||||
|
||||
std::vector<std::string> detected_script_spans;
|
||||
ScriptScanner ss(text.c_str(), text.size(), /*is_plain_text=*/true);
|
||||
LangSpan script_span;
|
||||
while (ss.GetOneScriptSpan(&script_span)) {
|
||||
detected_script_spans.emplace_back(script_span.text,
|
||||
script_span.text_bytes);
|
||||
}
|
||||
|
||||
if (detected_script_spans.size() != gold_script_spans.size()) {
|
||||
std::cout << " Failure" << std::endl;
|
||||
std::cout << " Number of gold spans " << gold_script_spans.size()
|
||||
<< std::endl;
|
||||
std::cout << " Number of detected spans " << detected_script_spans.size()
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < detected_script_spans.size(); ++i) {
|
||||
if (detected_script_spans.at(i) != gold_script_spans.at(i)) {
|
||||
std::cout << " Failure" << std::endl;
|
||||
std::cout << " Gold span: " << gold_script_spans.at(i) << std::endl;
|
||||
std::cout << " Detected span: " << detected_script_spans.at(i)
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
std::cout << " Success!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Tests the case when the input string is truncated in such a way that a
|
||||
// character is split in two pieces. Returns "true" if the test is successful
|
||||
// and "false" otherwise.
|
||||
bool TestStringCut() {
|
||||
std::cout << "Running " << __FUNCTION__ << std::endl;
|
||||
|
||||
// Text in Bulgarian (Cyrillic script).
|
||||
const std::string text = "Текст на Български";
|
||||
|
||||
// The size of the first two words ("Текст на ") is 16, and size of the first
|
||||
// two words plus the first char of the third word ("Текст на Б") is 18, so a
|
||||
// threshold of 17 results in slicing the first char of the third word.
|
||||
const int first_two_words_size = 16;
|
||||
const int span_size = 17;
|
||||
const int num_valid_bytes = SpanInterchangeValid(text.c_str(), span_size);
|
||||
if (num_valid_bytes == first_two_words_size) {
|
||||
std::cout << " Success!" << std::endl;
|
||||
return true;
|
||||
} else {
|
||||
std::cout << " Failure" << std::endl;
|
||||
std::cout << " Size of gold interchange-valid span: "
|
||||
<< first_two_words_size << std::endl;
|
||||
std::cout << " Size of detected span: " << num_valid_bytes << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace getonescriptspan_test
|
||||
} // namespace CLD2
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
// Runs the functions above.
|
||||
int main(int argc, char **argv) {
|
||||
const bool tests_successful =
|
||||
chrome_lang_id::CLD2::getonescriptspan_test::TestInvalidUTF8Input() &&
|
||||
chrome_lang_id::CLD2::getonescriptspan_test::TestScriptDetection() &&
|
||||
chrome_lang_id::CLD2::getonescriptspan_test::TestStringCut();
|
||||
return tests_successful ? 0 : 1;
|
||||
}
|
||||
@ -0,0 +1,37 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef SCRIPT_SPAN_INTEGRAL_TYPES_H_
|
||||
#define SCRIPT_SPAN_INTEGRAL_TYPES_H_
|
||||
|
||||
// Cheap version
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
typedef unsigned char uint8;
|
||||
typedef unsigned short uint16;
|
||||
typedef unsigned int uint32;
|
||||
typedef unsigned long long int uint64;
|
||||
|
||||
typedef signed char int8;
|
||||
typedef signed short int16;
|
||||
typedef signed int int32;
|
||||
typedef signed long long int int64;
|
||||
|
||||
typedef int32 char32;
|
||||
|
||||
} // End namespace CLD2
|
||||
} // End namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_INTEGRAL_TYPES_H_
|
||||
@ -0,0 +1,478 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// Author: dsites@google.com (Dick Sites)
|
||||
//
|
||||
//
|
||||
|
||||
#include "offsetmap.h"
|
||||
|
||||
#include <string.h> // for strcmp
|
||||
#include <algorithm> // for min
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
// Constructor, destructor
|
||||
OffsetMap::OffsetMap() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
OffsetMap::~OffsetMap() {
|
||||
}
|
||||
|
||||
// Clear the map
|
||||
// After:
|
||||
// next_diff_sub_ is 0
|
||||
// Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1]
|
||||
// which is a fake range of width 0 mapping 0=>0
|
||||
void OffsetMap::Clear() {
|
||||
diffs_.clear();
|
||||
pending_op_ = COPY_OP;
|
||||
pending_length_ = 0;
|
||||
next_diff_sub_ = 0;
|
||||
current_lo_aoffset_ = 0;
|
||||
current_hi_aoffset_ = 0;
|
||||
current_lo_aprimeoffset_ = 0;
|
||||
current_hi_aprimeoffset_ = 0;
|
||||
current_diff_ = 0;
|
||||
max_aoffset_ = 0; // Largest seen so far
|
||||
max_aprimeoffset_ = 0; // Largest seen so far
|
||||
}
|
||||
|
||||
static inline char OpPart(const char c) {
|
||||
return (c >> 6) & 3;
|
||||
}
|
||||
static inline char LenPart(const char c) {
|
||||
return c & 0x3f;
|
||||
}
|
||||
|
||||
// Reset to offset 0
|
||||
void OffsetMap::Reset() {
|
||||
MaybeFlushAll();
|
||||
|
||||
next_diff_sub_ = 0;
|
||||
current_lo_aoffset_ = 0;
|
||||
current_hi_aoffset_ = 0;
|
||||
current_lo_aprimeoffset_ = 0;
|
||||
current_hi_aprimeoffset_ = 0;
|
||||
current_diff_ = 0;
|
||||
}
|
||||
|
||||
// Add to mapping from A to A', specifying how many next bytes are
|
||||
// identical in A and A'
|
||||
void OffsetMap::Copy(int bytes) {
|
||||
if (bytes == 0) {return;}
|
||||
max_aoffset_ += bytes; // Largest seen so far
|
||||
max_aprimeoffset_ += bytes; // Largest seen so far
|
||||
if (pending_op_ == COPY_OP) {
|
||||
pending_length_ += bytes;
|
||||
} else {
|
||||
Flush();
|
||||
pending_op_ = COPY_OP;
|
||||
pending_length_ = bytes;
|
||||
}
|
||||
}
|
||||
|
||||
// Add to mapping from A to A', specifying how many next bytes are
|
||||
// inserted in A' while not advancing in A at all
|
||||
void OffsetMap::Insert(int bytes){
|
||||
if (bytes == 0) {return;}
|
||||
max_aprimeoffset_ += bytes; // Largest seen so far
|
||||
if (pending_op_ == INSERT_OP) {
|
||||
pending_length_ += bytes;
|
||||
} else if ((bytes == 1) &&
|
||||
(pending_op_ == DELETE_OP) && (pending_length_ == 1)) {
|
||||
// Special-case exactly delete(1) insert(1) +> copy(1);
|
||||
// all others backmap inserts to after deletes
|
||||
pending_op_ = COPY_OP;
|
||||
} else {
|
||||
Flush();
|
||||
pending_op_ = INSERT_OP;
|
||||
pending_length_ = bytes;
|
||||
}
|
||||
}
|
||||
|
||||
// Add to mapping from A to A', specifying how many next bytes are
|
||||
// deleted from A while not advancing in A' at all
|
||||
void OffsetMap::Delete(int bytes){
|
||||
if (bytes == 0) {return;}
|
||||
max_aoffset_ += bytes; // Largest seen so far
|
||||
if (pending_op_ == DELETE_OP) {
|
||||
pending_length_ += bytes;
|
||||
} else if ((bytes == 1) &&
|
||||
(pending_op_ == INSERT_OP) && (pending_length_ == 1)) {
|
||||
// Special-case exactly insert(1) delete(1) => copy(1);
|
||||
// all others backmap deletes to after insertss
|
||||
pending_op_ = COPY_OP;
|
||||
} else {
|
||||
Flush();
|
||||
pending_op_ = DELETE_OP;
|
||||
pending_length_ = bytes;
|
||||
}
|
||||
}
|
||||
|
||||
void OffsetMap::Flush() {
|
||||
if (pending_length_ == 0) {
|
||||
return;
|
||||
}
|
||||
// We may be emitting a copy op just after a copy op because +1 -1 cancelled
|
||||
// inbetween. If the lengths don't need a prefix byte, combine them
|
||||
if ((pending_op_ == COPY_OP) && !diffs_.empty()) {
|
||||
char c = diffs_[diffs_.size() - 1];
|
||||
MapOp prior_op = static_cast<MapOp>(OpPart(c));
|
||||
int prior_len = LenPart(c);
|
||||
if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) {
|
||||
diffs_[diffs_.size() - 1] += pending_length_;
|
||||
pending_length_ = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (pending_length_ > 0x3f) {
|
||||
bool non_zero_emitted = false;
|
||||
for (int shift = 30; shift > 0; shift -= 6) {
|
||||
int prefix = (pending_length_ >> shift) & 0x3f;
|
||||
if ((prefix > 0) || non_zero_emitted) {
|
||||
Emit(PREFIX_OP, prefix);
|
||||
non_zero_emitted = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
Emit(pending_op_, pending_length_ & 0x3f);
|
||||
pending_length_ = 0;
|
||||
}
|
||||
|
||||
|
||||
// Add one more entry to copy one byte off the end, then flush
|
||||
void OffsetMap::FlushAll() {
|
||||
Copy(1);
|
||||
Flush();
|
||||
}
|
||||
|
||||
// Flush all if necessary
|
||||
void OffsetMap::MaybeFlushAll() {
|
||||
if ((0 < pending_length_) || diffs_.empty()) {
|
||||
FlushAll();
|
||||
}
|
||||
}
|
||||
|
||||
// Len may be 0, for example as the low piece of length=64
|
||||
void OffsetMap::Emit(MapOp op, int len) {
|
||||
char c = (static_cast<char>(op) << 6) | (len & 0x3f);
|
||||
diffs_.push_back(c);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------//
|
||||
// The guts of the 2013 design //
|
||||
// If there are three ranges a b c in diffs_, we can be in one of five //
|
||||
// states: LEFT of a, in ranges a b c, or RIGHT of c //
|
||||
// In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_ //
|
||||
// position next_diff_sub_ //
|
||||
// There also are mapping constants max_aoffset_ and max_aprimeoffset_ //
|
||||
// If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0 //
|
||||
// If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and //
|
||||
// next_diff_sub_=diffs_.size() //
|
||||
// Otherwise, at least one of A[) and A'[) is non-empty and the first bytes //
|
||||
// correspond to each other. If range i is active, next_diff_sub_ is at //
|
||||
// the first byte of range i+1. Because of the length-prefix operator, //
|
||||
// an individual range item in diffs_ may be multiple bytes //
|
||||
// In all cases aprimeoffset = aoffset + current_diff_ //
|
||||
// i.e. current_diff_ = aprimeoffset - aoffset //
|
||||
// //
|
||||
// In the degenerate case of diffs_.empty(), there are only two states //
|
||||
// LEFT and RIGHT and the mapping is the identity mapping. //
|
||||
// The initial state is LEFT. //
|
||||
// It is an error to move left into LEFT or right into RIGHT, but the code //
|
||||
// below is robust in these cases. //
|
||||
//----------------------------------------------------------------------------//
|
||||
|
||||
void OffsetMap::SetLeft() {
|
||||
current_lo_aoffset_ = 0;
|
||||
current_hi_aoffset_ = 0;
|
||||
current_lo_aprimeoffset_ = 0;
|
||||
current_hi_aprimeoffset_ = 0;
|
||||
current_diff_ = 0;
|
||||
next_diff_sub_ = 0;
|
||||
}
|
||||
|
||||
void OffsetMap::SetRight() {
|
||||
current_lo_aoffset_ = max_aoffset_;
|
||||
current_hi_aoffset_ = max_aoffset_;
|
||||
current_lo_aprimeoffset_ = max_aprimeoffset_;
|
||||
current_hi_aprimeoffset_ = max_aprimeoffset_;
|
||||
current_diff_ = max_aprimeoffset_ - max_aoffset_;
|
||||
next_diff_sub_ = 0;
|
||||
}
|
||||
|
||||
// Back up over previous range, 1..5 bytes
|
||||
// Return subscript at the beginning of that. Pins at 0
|
||||
int OffsetMap::Backup(int sub) {
|
||||
if (sub <= 0) {return 0;}
|
||||
--sub;
|
||||
while ((0 < sub) &&
|
||||
(static_cast<MapOp>(OpPart(diffs_[sub - 1]) == PREFIX_OP))) {
|
||||
--sub;
|
||||
}
|
||||
return sub;
|
||||
}
|
||||
|
||||
// Parse next range, 1..5 bytes
|
||||
// Return subscript just off the end of that
|
||||
int OffsetMap::ParseNext(int sub, MapOp* op, int* length) {
|
||||
*op = PREFIX_OP;
|
||||
*length = 0;
|
||||
char c;
|
||||
while ((sub < static_cast<int>(diffs_.size())) && (*op == PREFIX_OP)) {
|
||||
c = diffs_[sub++];
|
||||
*op = static_cast<MapOp>(OpPart(c));
|
||||
int len = LenPart(c);
|
||||
*length = (*length << 6) + len;
|
||||
}
|
||||
// If mal-formed or in RIGHT, this will return with op = PREFIX_OP
|
||||
// Mal-formed can include a trailing prefix byte with no following op
|
||||
return sub;
|
||||
}
|
||||
|
||||
// Parse previous range, 1..5 bytes
|
||||
// Return current subscript
|
||||
int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) {
|
||||
sub = Backup(sub);
|
||||
return ParseNext(sub, op, length);
|
||||
}
|
||||
|
||||
// Move active window one range to the right
|
||||
// Return true if move was OK
|
||||
bool OffsetMap::MoveRight() {
|
||||
// If at last range or RIGHT, set to RIGHT, return error
|
||||
if (next_diff_sub_ >= static_cast<int>(diffs_.size())) {
|
||||
SetRight();
|
||||
return false;
|
||||
}
|
||||
// Actually OK to move right
|
||||
MapOp op;
|
||||
int length;
|
||||
bool retval = true;
|
||||
// If mal-formed or in RIGHT, this will return with op = PREFIX_OP
|
||||
next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length);
|
||||
|
||||
current_lo_aoffset_ = current_hi_aoffset_;
|
||||
current_lo_aprimeoffset_ = current_hi_aprimeoffset_;
|
||||
if (op == COPY_OP) {
|
||||
current_hi_aoffset_ = current_lo_aoffset_ + length;
|
||||
current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
|
||||
} else if (op == INSERT_OP) {
|
||||
current_hi_aoffset_ = current_lo_aoffset_ + 0;
|
||||
current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
|
||||
} else if (op == DELETE_OP) {
|
||||
current_hi_aoffset_ = current_lo_aoffset_ + length;
|
||||
current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0;
|
||||
} else {
|
||||
SetRight();
|
||||
retval = false;
|
||||
}
|
||||
current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
|
||||
return retval;
|
||||
}
|
||||
|
||||
// Move active window one range to the left
|
||||
// Return true if move was OK
|
||||
bool OffsetMap::MoveLeft() {
|
||||
// If at first range or LEFT, set to LEFT, return error
|
||||
if (next_diff_sub_ <= 0) {
|
||||
SetLeft();
|
||||
return false;
|
||||
}
|
||||
// Back up over current active window
|
||||
next_diff_sub_ = Backup(next_diff_sub_);
|
||||
if (next_diff_sub_ <= 0) {
|
||||
SetLeft();
|
||||
return false;
|
||||
}
|
||||
// Actually OK to move left
|
||||
MapOp op;
|
||||
int length;
|
||||
|
||||
// TODO(abakalov): 'retval' below is set but not used, which is suspicious.
|
||||
// Did the authors mean to return this variable, analogously to MoveRight()?
|
||||
// bool retval = true;
|
||||
// If mal-formed or in LEFT, this will return with op = PREFIX_OP
|
||||
next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length);
|
||||
|
||||
current_hi_aoffset_ = current_lo_aoffset_;
|
||||
current_hi_aprimeoffset_ = current_lo_aprimeoffset_;
|
||||
if (op == COPY_OP) {
|
||||
current_lo_aoffset_ = current_hi_aoffset_ - length;
|
||||
current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
|
||||
} else if (op == INSERT_OP) {
|
||||
current_lo_aoffset_ = current_hi_aoffset_ - 0;
|
||||
current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
|
||||
} else if (op == DELETE_OP) {
|
||||
current_lo_aoffset_ = current_hi_aoffset_ - length;
|
||||
current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0;
|
||||
} else {
|
||||
SetLeft();
|
||||
// retval = false;
|
||||
}
|
||||
current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Map an offset in A' to the corresponding offset in A
|
||||
int OffsetMap::MapBack(int aprimeoffset){
|
||||
MaybeFlushAll();
|
||||
if (aprimeoffset < 0) {return 0;}
|
||||
if (max_aprimeoffset_ <= aprimeoffset) {
|
||||
return (aprimeoffset - max_aprimeoffset_) + max_aoffset_;
|
||||
}
|
||||
|
||||
// If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_,
|
||||
// use current mapping, else move window left/right
|
||||
bool ok = true;
|
||||
while (ok && (aprimeoffset < current_lo_aprimeoffset_)) {
|
||||
ok = MoveLeft();
|
||||
}
|
||||
while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) {
|
||||
ok = MoveRight();
|
||||
}
|
||||
// So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_
|
||||
|
||||
int aoffset = aprimeoffset - current_diff_;
|
||||
if (aoffset >= current_hi_aoffset_) {
|
||||
// A' is in an insert region, all bytes of which backmap to A=hi_aoffset_
|
||||
aoffset = current_hi_aoffset_;
|
||||
}
|
||||
return aoffset;
|
||||
}
|
||||
|
||||
// Map an offset in A to the corresponding offset in A'
|
||||
int OffsetMap::MapForward(int aoffset){
|
||||
MaybeFlushAll();
|
||||
if (aoffset < 0) {return 0;}
|
||||
if (max_aoffset_ <= aoffset) {
|
||||
return (aoffset - max_aoffset_) + max_aprimeoffset_;
|
||||
}
|
||||
|
||||
// If current_lo_aoffset_ <= aoffset < current_hi_aoffset_,
|
||||
// use current mapping, else move window left/right
|
||||
bool ok = true;
|
||||
while (ok && (aoffset < current_lo_aoffset_)) {
|
||||
ok = MoveLeft();
|
||||
}
|
||||
while (ok && (current_hi_aoffset_ <= aoffset)) {
|
||||
ok = MoveRight();
|
||||
}
|
||||
|
||||
int aprimeoffset = aoffset + current_diff_;
|
||||
if (aprimeoffset >= current_hi_aprimeoffset_) {
|
||||
// A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_
|
||||
aprimeoffset = current_hi_aprimeoffset_;
|
||||
}
|
||||
return aprimeoffset;
|
||||
}
|
||||
|
||||
|
||||
// static
|
||||
bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) {
|
||||
bool ok = true;
|
||||
while (ok && (source->next_diff_sub_ !=
|
||||
static_cast<int>(source->diffs_.size()))) {
|
||||
ok = source->MoveRight();
|
||||
if (source->current_lo_aoffset_ != source->current_hi_aoffset_) {
|
||||
return false;
|
||||
}
|
||||
dest->Insert(
|
||||
source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// static
|
||||
bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) {
|
||||
bool ok = true;
|
||||
while (ok && (source->next_diff_sub_ !=
|
||||
static_cast<int>(source->diffs_.size()))) {
|
||||
ok = source->MoveRight();
|
||||
if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) {
|
||||
return false;
|
||||
}
|
||||
dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// static
|
||||
void OffsetMap::ComposeOffsetMap(
|
||||
OffsetMap* g, OffsetMap* f, OffsetMap* h) {
|
||||
h->Clear();
|
||||
f->Reset();
|
||||
g->Reset();
|
||||
|
||||
int lo = 0;
|
||||
for (;;) {
|
||||
// Consume delete operations in f. This moves A without moving
|
||||
// A' and A''.
|
||||
if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) {
|
||||
if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) {
|
||||
// fprintf(stderr,
|
||||
// "ComposeOffsetMap ERROR, f is longer than g.<br>\n");
|
||||
}
|
||||
|
||||
// FlushAll(), called by Reset(), MapForward() or MapBack(), has
|
||||
// added an extra COPY_OP to f and g, so this function has
|
||||
// composed an extra COPY_OP in h from those. To avoid
|
||||
// FlushAll() adds one more extra COPY_OP to h later, dispatch
|
||||
// Flush() right now.
|
||||
h->Flush();
|
||||
return;
|
||||
}
|
||||
|
||||
// Consume insert operations in g. This moves A'' without moving A
|
||||
// and A'.
|
||||
if (lo >= f->current_hi_aprimeoffset_) {
|
||||
if (!CopyDeletes(f, h)) {
|
||||
// fprintf(stderr,
|
||||
// "ComposeOffsetMap ERROR, g is longer than f.<br>\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Compose one operation which moves A' from lo to hi.
|
||||
int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_);
|
||||
if (f->current_lo_aoffset_ != f->current_hi_aoffset_ &&
|
||||
g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
|
||||
h->Copy(hi - lo);
|
||||
} else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) {
|
||||
h->Delete(hi - lo);
|
||||
} else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
|
||||
h->Insert(hi - lo);
|
||||
}
|
||||
|
||||
lo = hi;
|
||||
}
|
||||
}
|
||||
|
||||
// For testing only -- force a mapping
|
||||
void OffsetMap::StuffIt(const std::string& diffs,
|
||||
int max_aoffset, int max_aprimeoffset) {
|
||||
Clear();
|
||||
diffs_ = diffs;
|
||||
max_aoffset_ = max_aoffset;
|
||||
max_aprimeoffset_ = max_aprimeoffset;
|
||||
}
|
||||
|
||||
|
||||
} // namespace CLD2
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,168 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// Author: dsites@google.com (Dick Sites)
|
||||
//
|
||||
|
||||
#ifndef SCRIPT_SPAN_OFFSETMAP_H_
|
||||
#define SCRIPT_SPAN_OFFSETMAP_H_
|
||||
|
||||
#include <string> // for string
|
||||
|
||||
#include "integral_types.h" // for uint32
|
||||
|
||||
// ***************************** OffsetMap **************************
|
||||
//
|
||||
// An OffsetMap object is a container for a mapping from offsets in one text
|
||||
// buffer A' to offsets in another text buffer A. It is most useful when A' is
|
||||
// built from A via substitutions that occasionally do not preserve byte length.
|
||||
//
|
||||
// A series of operators are used to build the correspondence map, then
|
||||
// calls can be made to map an offset in A' to an offset in A, or vice versa.
|
||||
// The map starts with offset 0 in A corresponding to offset 0 in A'.
|
||||
// The mapping is then built sequentially, adding on byte ranges that are
|
||||
// identical in A and A', byte ranges that are inserted in A', and byte ranges
|
||||
// that are deleted from A. All bytes beyond those specified when building the
|
||||
// map are assumed to correspond, i.e. a Copy(infinity) is assumed at the
|
||||
// end of the map.
|
||||
//
|
||||
// The internal data structure records positions at which bytes are added or
|
||||
// deleted. Using the map is O(1) when increasing the A' or A offset
|
||||
// monotonically, and O(n) when accessing random offsets, where n is the
|
||||
// number of differences.
|
||||
//
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
class OffsetMap {
|
||||
public:
|
||||
// Constructor, destructor
|
||||
OffsetMap();
|
||||
~OffsetMap();
|
||||
|
||||
// Clear the map
|
||||
void Clear();
|
||||
|
||||
// Add to mapping from A to A', specifying how many next bytes correspond
|
||||
// in A and A'
|
||||
void Copy(int bytes);
|
||||
|
||||
// Add to mapping from A to A', specifying how many next bytes are
|
||||
// inserted in A' while not advancing in A at all
|
||||
void Insert(int bytes);
|
||||
|
||||
// Add to mapping from A to A', specifying how many next bytes are
|
||||
// deleted from A while not advancing in A' at all
|
||||
void Delete(int bytes);
|
||||
|
||||
// [Finish building map,] Re-position to offset 0
|
||||
// This call is optional; MapForward and MapBack finish building the map
|
||||
// if necessary
|
||||
void Reset();
|
||||
|
||||
// Map an offset in A' to the corresponding offset in A
|
||||
int MapBack(int aprimeoffset);
|
||||
|
||||
// Map an offset in A to the corresponding offset in A'
|
||||
int MapForward(int aoffset);
|
||||
|
||||
// h = ComposeOffsetMap(g, f), where f is a map from A to A', g is
|
||||
// from A' to A'' and h is from A to A''.
|
||||
//
|
||||
// Note that g->MoveForward(f->MoveForward(aoffset)) always equals
|
||||
// to h->MoveForward(aoffset), while
|
||||
// f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals
|
||||
// to h->MoveBack(aprimeprimeoffset). This happens when deletion in
|
||||
// f and insertion in g are at the same place. For example,
|
||||
//
|
||||
// A 1 2 3 4
|
||||
// ^ | ^ ^
|
||||
// | | / | f
|
||||
// v vv v
|
||||
// A' 1' 2' 3'
|
||||
// ^ ^^ ^
|
||||
// | | \ | g
|
||||
// v | v v
|
||||
// A'' 1'' 2'' 3'' 4''
|
||||
//
|
||||
// results in:
|
||||
//
|
||||
// A 1 2 3 4
|
||||
// ^ ^\ ^ ^
|
||||
// | | \ | | h
|
||||
// v | vv v
|
||||
// A'' 1'' 2'' 3'' 4''
|
||||
//
|
||||
// 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in
|
||||
// the latter figure.
|
||||
static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h);
|
||||
|
||||
// For testing only -- force a mapping
|
||||
void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset);
|
||||
|
||||
private:
|
||||
enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP};
|
||||
|
||||
void Flush();
|
||||
void FlushAll();
|
||||
void MaybeFlushAll();
|
||||
void Emit(MapOp op, int len);
|
||||
|
||||
void SetLeft();
|
||||
void SetRight();
|
||||
|
||||
// Back up over previous range, 1..5 bytes
|
||||
// Return subscript at the beginning of that. Pins at 0
|
||||
int Backup(int sub);
|
||||
|
||||
// Parse next range, 1..5 bytes
|
||||
// Return subscript just off the end of that
|
||||
int ParseNext(int sub, MapOp* op, int* length);
|
||||
|
||||
// Parse previous range, 1..5 bytes
|
||||
// Return current subscript
|
||||
int ParsePrevious(int sub, MapOp* op, int* length);
|
||||
|
||||
bool MoveRight(); // Returns true if OK
|
||||
bool MoveLeft(); // Returns true if OK
|
||||
|
||||
// Copies insert operations from source to dest. Returns true if no
|
||||
// other operations are found.
|
||||
static bool CopyInserts(OffsetMap* source, OffsetMap* dest);
|
||||
|
||||
// Copies delete operations from source to dest. Returns true if no other
|
||||
// operations are found.
|
||||
static bool CopyDeletes(OffsetMap* source, OffsetMap* dest);
|
||||
|
||||
std::string diffs_;
|
||||
MapOp pending_op_;
|
||||
uint32 pending_length_;
|
||||
|
||||
// Offsets in the ranges below correspond to each other, with A' = A + diff
|
||||
int next_diff_sub_;
|
||||
int current_lo_aoffset_;
|
||||
int current_hi_aoffset_;
|
||||
int current_lo_aprimeoffset_;
|
||||
int current_hi_aprimeoffset_;
|
||||
int current_diff_;
|
||||
int max_aoffset_;
|
||||
int max_aprimeoffset_;
|
||||
};
|
||||
|
||||
} // namespace CLD2
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_OFFSETMAP_H_
|
||||
@ -0,0 +1,143 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// These are weird things we need to do to get this compiling on
|
||||
// random systems [subset].
|
||||
|
||||
#ifndef SCRIPT_SPAN_PORT_H_
|
||||
#define SCRIPT_SPAN_PORT_H_
|
||||
|
||||
#include <string.h> // for memcpy()
|
||||
|
||||
#include "integral_types.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
// Portable handling of unaligned loads, stores, and copies.
|
||||
// On some platforms, like ARM, the copy functions can be more efficient
|
||||
// then a load and a store.
|
||||
|
||||
#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)
|
||||
|
||||
// x86 and x86-64 can perform unaligned loads/stores directly;
|
||||
// modern PowerPC hardware can also do unaligned integer loads and stores;
|
||||
// but note: the FPU still sends unaligned loads and stores to a trap handler!
|
||||
|
||||
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
|
||||
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
|
||||
#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
|
||||
|
||||
#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
|
||||
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
|
||||
#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
|
||||
|
||||
#elif defined(__arm__) && \
|
||||
!defined(__ARM_ARCH_5__) && \
|
||||
!defined(__ARM_ARCH_5T__) && \
|
||||
!defined(__ARM_ARCH_5TE__) && \
|
||||
!defined(__ARM_ARCH_5TEJ__) && \
|
||||
!defined(__ARM_ARCH_6__) && \
|
||||
!defined(__ARM_ARCH_6J__) && \
|
||||
!defined(__ARM_ARCH_6K__) && \
|
||||
!defined(__ARM_ARCH_6Z__) && \
|
||||
!defined(__ARM_ARCH_6ZK__) && \
|
||||
!defined(__ARM_ARCH_6T2__) && \
|
||||
!defined(__ARM_ARCH_7__) && \
|
||||
!defined(__ARM_ARCH_7A__) && \
|
||||
!defined(__ARM_ARCH_7M__) && \
|
||||
!defined(__ARM_ARCH_7R__) && \
|
||||
!defined(__ARM_ARCH_8__) && \
|
||||
!defined(__ARM_ARCH_8A__)
|
||||
|
||||
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
|
||||
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
|
||||
// do an unaligned read and rotate the words around a bit, or do the reads very
|
||||
// slowly (trip through kernel mode). There's no simple #define that says just
|
||||
// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
|
||||
// sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define,
|
||||
// so in time, maybe we can move on to that.
|
||||
//
|
||||
// Note that even if a chipset supports unaligned access, it might not be
|
||||
// enabled in any given system, e.g.:
|
||||
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0491c/CIHCGCFD.html
|
||||
// Therefore, it's generally just not safe to allow unaligned access on any ARM
|
||||
// variant.
|
||||
//
|
||||
// This is a mess, but there's not much we can do about it.
|
||||
|
||||
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
|
||||
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
|
||||
|
||||
#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
|
||||
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
|
||||
|
||||
// TODO(sesse): NEON supports unaligned 64-bit loads and stores.
|
||||
// See if that would be more efficient on platforms supporting it,
|
||||
// at least for copies.
|
||||
|
||||
inline uint64 UNALIGNED_LOAD64(const void *p) {
|
||||
uint64 t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline void UNALIGNED_STORE64(void *p, uint64 v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define NEED_ALIGNED_LOADS
|
||||
|
||||
// These functions are provided for architectures that don't support
|
||||
// unaligned loads and stores.
|
||||
|
||||
inline uint16 UNALIGNED_LOAD16(const void *p) {
|
||||
uint16 t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline uint32 UNALIGNED_LOAD32(const void *p) {
|
||||
uint32 t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline uint64 UNALIGNED_LOAD64(const void *p) {
|
||||
uint64 t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline void UNALIGNED_STORE16(void *p, uint16 v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
inline void UNALIGNED_STORE32(void *p, uint32 v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
inline void UNALIGNED_STORE64(void *p, uint64 v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // End namespace CLD2
|
||||
} // End namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_PORT_H_
|
||||
@ -0,0 +1,81 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// A StringPiece points to part or all of a string, double-quoted string
|
||||
// literal, or other string-like object. A StringPiece does *not* own the
|
||||
// string to which it points. A StringPiece is not null-terminated. [subset]
|
||||
//
|
||||
|
||||
#ifndef SCRIPT_SPAN_STRINGPIECE_H_
|
||||
#define SCRIPT_SPAN_STRINGPIECE_H_
|
||||
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
typedef int stringpiece_ssize_type;
|
||||
|
||||
class StringPiece {
|
||||
private:
|
||||
const char* ptr_;
|
||||
stringpiece_ssize_type length_;
|
||||
|
||||
public:
|
||||
// We provide non-explicit singleton constructors so users can pass
|
||||
// in a "const char*" or a "string" wherever a "StringPiece" is
|
||||
// expected.
|
||||
StringPiece() : ptr_(NULL), length_(0) {}
|
||||
|
||||
StringPiece(const char* str) // NOLINT(runtime/explicit)
|
||||
: ptr_(str), length_(0) {
|
||||
if (str != NULL) {
|
||||
length_ = static_cast<stringpiece_ssize_type>(strlen(str));
|
||||
}
|
||||
}
|
||||
|
||||
StringPiece(const std::string& str) // NOLINT(runtime/explicit)
|
||||
: ptr_(str.data()), length_(0) {
|
||||
length_ = static_cast<stringpiece_ssize_type>(str.size());
|
||||
}
|
||||
|
||||
StringPiece(const char* offset, stringpiece_ssize_type len)
|
||||
: ptr_(offset), length_(len) {
|
||||
}
|
||||
|
||||
void remove_prefix(stringpiece_ssize_type n) {
|
||||
ptr_ += n;
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
void remove_suffix(stringpiece_ssize_type n) {
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
// data() may return a pointer to a buffer with embedded NULs, and the
|
||||
// returned buffer may or may not be null terminated. Therefore it is
|
||||
// typically a mistake to pass data() to a routine that expects a NUL
|
||||
// terminated string.
|
||||
const char* data() const { return ptr_; }
|
||||
stringpiece_ssize_type size() const { return length_; }
|
||||
stringpiece_ssize_type length() const { return length_; }
|
||||
bool empty() const { return length_ == 0; }
|
||||
};
|
||||
|
||||
class StringPiece;
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_STRINGPIECE_H__
|
||||
@ -0,0 +1,245 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "text_processing.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
namespace {
|
||||
|
||||
static const int kMaxSpaceScan = 32; // Bytes
|
||||
|
||||
int minint(int a, int b) { return (a < b) ? a : b; }
|
||||
|
||||
// Counts number of spaces; a little faster than one-at-a-time
|
||||
// Doesn't count odd bytes at end
|
||||
int CountSpaces4(const char *src, int src_len) {
|
||||
int s_count = 0;
|
||||
for (int i = 0; i < (src_len & ~3); i += 4) {
|
||||
s_count += (src[i] == ' ');
|
||||
s_count += (src[i + 1] == ' ');
|
||||
s_count += (src[i + 2] == ' ');
|
||||
s_count += (src[i + 3] == ' ');
|
||||
}
|
||||
return s_count;
|
||||
}
|
||||
|
||||
// This uses a cheap predictor to get a measure of compression, and
|
||||
// hence a measure of repetitiveness. It works on complete UTF-8 characters
|
||||
// instead of bytes, because three-byte UTF-8 Indic, etc. text compress highly
|
||||
// all the time when done with a byte-based count. Sigh.
|
||||
//
|
||||
// To allow running prediction across multiple chunks, caller passes in current
|
||||
// 12-bit hash value and int[4096] prediction table. Caller inits these to 0.
|
||||
//
|
||||
// Returns the number of *bytes* correctly predicted, increments by 1..4 for
|
||||
// each correctly-predicted character.
|
||||
//
|
||||
// NOTE: Overruns by up to three bytes. Not a problem with valid UTF-8 text
|
||||
//
|
||||
|
||||
// TODO(dsites) make this use just one byte per UTF-8 char and incr by charlen
|
||||
|
||||
int CountPredictedBytes(const char *isrc, int src_len, int *hash, int *tbl) {
|
||||
typedef unsigned char uint8;
|
||||
|
||||
int p_count = 0;
|
||||
const uint8 *src = reinterpret_cast<const uint8 *>(isrc);
|
||||
const uint8 *srclimit = src + src_len;
|
||||
int local_hash = *hash;
|
||||
|
||||
while (src < srclimit) {
|
||||
int c = src[0];
|
||||
int incr = 1;
|
||||
|
||||
// Pick up one char and length
|
||||
if (c < 0xc0) {
|
||||
// One-byte or continuation byte: 00xxxxxx 01xxxxxx 10xxxxxx
|
||||
// Do nothing more
|
||||
} else if ((c & 0xe0) == 0xc0) {
|
||||
// Two-byte
|
||||
c = (c << 8) | src[1];
|
||||
incr = 2;
|
||||
} else if ((c & 0xf0) == 0xe0) {
|
||||
// Three-byte
|
||||
c = (c << 16) | (src[1] << 8) | src[2];
|
||||
incr = 3;
|
||||
} else {
|
||||
// Four-byte
|
||||
c = (c << 24) | (src[1] << 16) | (src[2] << 8) | src[3];
|
||||
incr = 4;
|
||||
}
|
||||
src += incr;
|
||||
|
||||
int p = tbl[local_hash]; // Prediction
|
||||
tbl[local_hash] = c; // Update prediction
|
||||
if (c == p) {
|
||||
p_count += incr; // Count bytes of good predictions
|
||||
}
|
||||
|
||||
local_hash = ((local_hash << 4) ^ c) & 0xfff;
|
||||
}
|
||||
*hash = local_hash;
|
||||
return p_count;
|
||||
}
|
||||
|
||||
// Backscan to word boundary, returning how many bytes n to go back
|
||||
// so that src - n is non-space ans src - n - 1 is space.
|
||||
// If not found in kMaxSpaceScan bytes, return 0..3 to a clean UTF-8 boundary
|
||||
int BackscanToSpace(const char *src, int limit) {
|
||||
int n = 0;
|
||||
limit = minint(limit, kMaxSpaceScan);
|
||||
while (n < limit) {
|
||||
if (src[-n - 1] == ' ') {
|
||||
return n;
|
||||
} // We are at _X
|
||||
++n;
|
||||
}
|
||||
n = 0;
|
||||
while (n < limit) {
|
||||
if ((src[-n] & 0xc0) != 0x80) {
|
||||
return n;
|
||||
} // We are at char begin
|
||||
++n;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Forwardscan to word boundary, returning how many bytes n to go forward
|
||||
// so that src + n is non-space ans src + n - 1 is space.
|
||||
// If not found in kMaxSpaceScan bytes, return 0..3 to a clean UTF-8 boundary
|
||||
int ForwardscanToSpace(const char *src, int limit) {
|
||||
int n = 0;
|
||||
limit = minint(limit, kMaxSpaceScan);
|
||||
while (n < limit) {
|
||||
if (src[n] == ' ') {
|
||||
return n + 1;
|
||||
} // We are at _X
|
||||
++n;
|
||||
}
|
||||
n = 0;
|
||||
while (n < limit) {
|
||||
if ((src[n] & 0xc0) != 0x80) {
|
||||
return n;
|
||||
} // We are at char begin
|
||||
++n;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Must be exactly 4096 for cheap compressor.
|
||||
static const int kPredictionTableSize = 4096;
|
||||
static const int kChunksizeDefault = 48; // Squeeze 48-byte chunks
|
||||
static const int kSpacesThreshPercent = 30; // Squeeze if >=30% spaces
|
||||
static const int kPredictThreshPercent = 40; // Squeeze if >=40% predicted
|
||||
|
||||
// Remove portions of text that have a high density of spaces, or that are
|
||||
// overly repetitive, squeezing the remaining text in-place to the front of the
|
||||
// input buffer.
|
||||
//
|
||||
// Squeezing looks at density of space/prediced chars in fixed-size chunks,
|
||||
// specified by chunksize. A chunksize <= 0 uses the default size of 48 bytes.
|
||||
//
|
||||
// Return the new, possibly-shorter length
|
||||
//
|
||||
// Result Buffer ALWAYS has leading space and trailing space space space NUL,
|
||||
// if input does
|
||||
//
|
||||
int CheapSqueezeInplace(char *isrc, int src_len, int ichunksize) {
|
||||
char *src = isrc;
|
||||
char *dst = src;
|
||||
char *srclimit = src + src_len;
|
||||
bool skipping = false;
|
||||
|
||||
int hash = 0;
|
||||
|
||||
// Allocate local prediction table.
|
||||
int *predict_tbl = new int[kPredictionTableSize];
|
||||
memset(predict_tbl, 0, kPredictionTableSize * sizeof(predict_tbl[0]));
|
||||
|
||||
int chunksize = ichunksize;
|
||||
if (chunksize == 0) {
|
||||
chunksize = kChunksizeDefault;
|
||||
}
|
||||
int space_thresh = (chunksize * kSpacesThreshPercent) / 100;
|
||||
int predict_thresh = (chunksize * kPredictThreshPercent) / 100;
|
||||
|
||||
while (src < srclimit) {
|
||||
int remaining_bytes = srclimit - src;
|
||||
int len = minint(chunksize, remaining_bytes);
|
||||
|
||||
// Make len land us on a UTF-8 character boundary.
|
||||
// Ah. Also fixes mispredict because we could get out of phase
|
||||
// Loop always terminates at trailing space in buffer
|
||||
while ((src[len] & 0xc0) == 0x80) {
|
||||
++len;
|
||||
} // Move past continuation bytes
|
||||
|
||||
int space_n = CountSpaces4(src, len);
|
||||
int predb_n = CountPredictedBytes(src, len, &hash, predict_tbl);
|
||||
if ((space_n >= space_thresh) || (predb_n >= predict_thresh)) {
|
||||
// Skip the text
|
||||
if (!skipping) {
|
||||
// Keeping-to-skipping transition; do it at a space
|
||||
int n = BackscanToSpace(dst, static_cast<int>(dst - isrc));
|
||||
dst -= n;
|
||||
if (dst == isrc) {
|
||||
// Force a leading space if the first chunk is deleted
|
||||
*dst++ = ' ';
|
||||
}
|
||||
skipping = true;
|
||||
}
|
||||
} else {
|
||||
// Keep the text
|
||||
if (skipping) {
|
||||
// Skipping-to-keeping transition; do it at a space
|
||||
int n = ForwardscanToSpace(src, len);
|
||||
src += n;
|
||||
remaining_bytes -= n; // Shrink remaining length
|
||||
len -= n;
|
||||
skipping = false;
|
||||
}
|
||||
|
||||
// "len" can be negative in some cases
|
||||
if (len > 0) {
|
||||
memmove(dst, src, len);
|
||||
dst += len;
|
||||
}
|
||||
}
|
||||
src += len;
|
||||
}
|
||||
|
||||
if ((dst - isrc) < (src_len - 3)) {
|
||||
// Pad and make last char clean UTF-8 by putting following spaces
|
||||
dst[0] = ' ';
|
||||
dst[1] = ' ';
|
||||
dst[2] = ' ';
|
||||
dst[3] = '\0';
|
||||
} else if ((dst - isrc) < src_len) {
|
||||
// Make last char clean UTF-8 by putting following space off the end
|
||||
dst[0] = ' ';
|
||||
}
|
||||
|
||||
// Deallocate local prediction table
|
||||
delete[] predict_tbl;
|
||||
return static_cast<int>(dst - isrc);
|
||||
}
|
||||
|
||||
} // namespace CLD2
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,30 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef SCRIPT_SPAN_TEXT_PROCESSING_H_
|
||||
#define SCRIPT_SPAN_TEXT_PROCESSING_H_
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
// Remove portions of text that have a high density of spaces, or that are
|
||||
// overly repetitive, squeezing the remaining text in-place to the front
|
||||
// of the input buffer.
|
||||
// Return the new, possibly-shorter length
|
||||
int CheapSqueezeInplace(char *isrc, int srclen, int ichunksize);
|
||||
|
||||
} // namespace CLD2
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_TEXT_PROCESSING_H_
|
||||
@ -0,0 +1,486 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// Created by utf8tablebuilder version 2.9
|
||||
//
|
||||
// Rejects all codes that are not interchange-valid
|
||||
// Accepts all other UTF-8 codes 0000..10FFFF
|
||||
// Exit optimized -- exits after four times in state 0
|
||||
// All bytes are checked for structurally valid UTF-8
|
||||
// Table entries are absolute statetable subscripts
|
||||
|
||||
#ifndef SCRIPT_SPAN_UTF8ACCEPTINTERCHANGE_H_
|
||||
#define SCRIPT_SPAN_UTF8ACCEPTINTERCHANGE_H_
|
||||
|
||||
#include "integral_types.h"
|
||||
#include "utf8statetable.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
#define X__ (kExitIllegalStructure)
|
||||
#define RJ_ (kExitReject)
|
||||
#define S1_ (kExitReplace1)
|
||||
#define S2_ (kExitReplace2)
|
||||
#define S3_ (kExitReplace3)
|
||||
#define S21 (kExitReplace21)
|
||||
#define S31 (kExitReplace31)
|
||||
#define S32 (kExitReplace32)
|
||||
#define T1_ (kExitReplaceOffset1)
|
||||
#define T2_ (kExitReplaceOffset2)
|
||||
#define S11 (kExitReplace1S0)
|
||||
#define SP_ (kExitSpecial)
|
||||
#define D__ (kExitDoAgain)
|
||||
#define RJA (kExitRejectAlt)
|
||||
|
||||
// Entire table has 17 state blocks of 256 entries each
|
||||
|
||||
static const unsigned int utf8acceptinterchange_STATE0 = 0; // state[0]
|
||||
static const unsigned int utf8acceptinterchange_STATE0_SIZE = 1024; // =[4]
|
||||
static const unsigned int utf8acceptinterchange_TOTAL_SIZE = 4352;
|
||||
static const unsigned int utf8acceptinterchange_MAX_EXPAND_X4 = 0;
|
||||
static const unsigned int utf8acceptinterchange_SHIFT = 8;
|
||||
static const unsigned int utf8acceptinterchange_BYTES = 1;
|
||||
static const unsigned int utf8acceptinterchange_LOSUB = 0x20202020;
|
||||
static const unsigned int utf8acceptinterchange_HIADD = 0x01010101;
|
||||
|
||||
static const uint8 utf8acceptinterchange[] = {
|
||||
// state[0] 0x000000 Byte 1
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_, 1, 1,RJ_, 1, 1,RJ_,RJ_,
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,RJ_,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 10,
|
||||
13, 15, 15, 15, 16,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[1] 0x000000 Byte 1
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_, 2, 2,RJ_, 2, 2,RJ_,RJ_,
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,RJ_,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 10,
|
||||
13, 15, 15, 15, 16,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[2] 0x000000 Byte 1
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_, 3, 3,RJ_, 3, 3,RJ_,RJ_,
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,RJ_,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 10,
|
||||
13, 15, 15, 15, 16,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[3] 0x000000 Byte 1
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,D__,D__,RJ_,D__,D__,RJ_,RJ_,
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
D__,D__,D__,D__,D__,D__,D__,D__, D__,D__,D__,D__,D__,D__,D__,D__,
|
||||
D__,D__,D__,D__,D__,D__,D__,D__, D__,D__,D__,D__,D__,D__,D__,D__,
|
||||
|
||||
D__,D__,D__,D__,D__,D__,D__,D__, D__,D__,D__,D__,D__,D__,D__,D__,
|
||||
D__,D__,D__,D__,D__,D__,D__,D__, D__,D__,D__,D__,D__,D__,D__,D__,
|
||||
D__,D__,D__,D__,D__,D__,D__,D__, D__,D__,D__,D__,D__,D__,D__,D__,
|
||||
D__,D__,D__,D__,D__,D__,D__,D__, D__,D__,D__,D__,D__,D__,D__,RJ_,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 10,
|
||||
13, 15, 15, 15, 16,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[4] 0x0000c0 Byte 2 of 2
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[5] 0x000000 Byte 2 of 3
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[6] 0x001000 Byte 2 of 3
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[7] 0x000080 Byte 2 of 2
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[8] 0x00d000 Byte 2 of 3
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[9] 0x00d800 Byte 3 of 3
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[10] 0x00f000 Byte 2 of 3
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 11, 4, 4, 4, 4, 4, 4, 4, 12,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[11] 0x00fdc0 Byte 3 of 3
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_, RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,RJ_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[12] 0x00ffc0 Byte 3 of 3
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,RJ_,RJ_,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[13] 0x000000 Byte 2 of 4
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 14,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 14,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 14,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[14] 0x01f000 Byte 3 of 4
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[15] 0x040000 Byte 2 of 4
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 14,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 14,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 14,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 14,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[16] 0x100000 Byte 2 of 4
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 14,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
};
|
||||
|
||||
// Remap base[0] = (del, add, string_offset)
|
||||
static const RemapEntry utf8acceptinterchange_remap_base[] = {
|
||||
{0,0,0} };
|
||||
|
||||
// Remap string[0]
|
||||
static const unsigned char utf8acceptinterchange_remap_string[] = {
|
||||
0 };
|
||||
|
||||
static const unsigned char utf8acceptinterchange_fast[256] = {
|
||||
1,1,1,1,1,1,1,1, 1,0,0,1,0,0,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,
|
||||
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
|
||||
};
|
||||
|
||||
static const UTF8ScanObj utf8acceptinterchange_obj = {
|
||||
utf8acceptinterchange_STATE0,
|
||||
utf8acceptinterchange_STATE0_SIZE,
|
||||
utf8acceptinterchange_TOTAL_SIZE,
|
||||
utf8acceptinterchange_MAX_EXPAND_X4,
|
||||
utf8acceptinterchange_SHIFT,
|
||||
utf8acceptinterchange_BYTES,
|
||||
utf8acceptinterchange_LOSUB,
|
||||
utf8acceptinterchange_HIADD,
|
||||
utf8acceptinterchange,
|
||||
utf8acceptinterchange_remap_base,
|
||||
utf8acceptinterchange_remap_string,
|
||||
utf8acceptinterchange_fast
|
||||
};
|
||||
|
||||
|
||||
#undef X__
|
||||
#undef RJ_
|
||||
#undef S1_
|
||||
#undef S2_
|
||||
#undef S3_
|
||||
#undef S21
|
||||
#undef S31
|
||||
#undef S32
|
||||
#undef T1_
|
||||
#undef T2_
|
||||
#undef S11
|
||||
#undef SP_
|
||||
#undef D__
|
||||
#undef RJA
|
||||
|
||||
// Table has 4608 bytes, Hash = 505C-3D29
|
||||
|
||||
} // End namespace CLD2
|
||||
} // End namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_UTF8ACCEPTINTERCHANGE_H_
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,758 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// Created by utf8tablebuilder version 2.9
|
||||
//
|
||||
// Replaces all codes from file:
|
||||
// lettermarklower_6.2.0.txt
|
||||
// Accepts all other UTF-8 codes 0000..10FFFF
|
||||
// Space optimized
|
||||
//
|
||||
// ** ASSUMES INPUT IS STRUCTURALLY VALID UTF-8 **
|
||||
//
|
||||
// Table entries are absolute statetable subscripts
|
||||
|
||||
#ifndef SCRIPT_SPAN_UTF8REPL_LETTERMARKLOWER_H_
|
||||
#define SCRIPT_SPAN_UTF8REPL_LETTERMARKLOWER_H_
|
||||
|
||||
#include "integral_types.h"
|
||||
#include "utf8statetable.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
#define X__ (kExitIllegalStructure)
|
||||
#define RJ_ (kExitReject)
|
||||
#define S1_ (kExitReplace1)
|
||||
#define S2_ (kExitReplace2)
|
||||
#define S3_ (kExitReplace3)
|
||||
#define S21 (kExitReplace21)
|
||||
#define S31 (kExitReplace31)
|
||||
#define S32 (kExitReplace32)
|
||||
#define T1_ (kExitReplaceOffset1)
|
||||
#define T2_ (kExitReplaceOffset2)
|
||||
#define S11 (kExitReplace1S0)
|
||||
#define SP_ (kExitSpecial)
|
||||
#define D__ (kExitDoAgain)
|
||||
#define RJA (kExitRejectAlt)
|
||||
|
||||
// Entire table has 111 state blocks of 64 entries each
|
||||
|
||||
static const unsigned int utf8repl_lettermarklower_STATE0 = 0; // state[0]
|
||||
static const unsigned int utf8repl_lettermarklower_STATE0_SIZE = 320; // =[5]
|
||||
static const unsigned int utf8repl_lettermarklower_TOTAL_SIZE = 7104;
|
||||
static const unsigned int utf8repl_lettermarklower_MAX_EXPAND_X4 = 12;
|
||||
static const unsigned int utf8repl_lettermarklower_SHIFT = 6;
|
||||
static const unsigned int utf8repl_lettermarklower_BYTES = 1;
|
||||
static const unsigned int utf8repl_lettermarklower_LOSUB = 0x5b5b5b5b;
|
||||
static const unsigned int utf8repl_lettermarklower_HIADD = 0x00000000;
|
||||
|
||||
static const uint8 utf8repl_lettermarklower[] = {
|
||||
// state[0] 0x000000 Byte 1
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0,S11,S11,S11,S11,S11,S11,S11, S11,S11,S11,S11,S11,S11,S11,S11,
|
||||
S11,S11,S11,S11,S11,S11,S11,S11, S11,S11,S11, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
X__,X__, 6, 11, 13, 16, 19, 22, 25, 28, 6, 6, 6, 31, 33, 36,
|
||||
39, 42, 44, 46, 48, 51, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
7, 54, 74, 8, 8, 8, 8, 8, 8, 8, 88, 8, 8, 8, 8,100,
|
||||
104, 9, 9, 9, 10,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 0x78,0x79,0x7a,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[6 + 2] 0x000080 Byte 2 of 2
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
// state[7 + 2] 0x000000 Byte 2 of 3
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
|
||||
// state[8 + 2] 0x003000 Byte 2 of 3
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
|
||||
// state[9 + 2] 0x040000 Byte 2 of 4
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
|
||||
// state[10 + 2] 0x100000 Byte 2 of 4
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
|
||||
// state[11 + 2] 0x0000c0 Byte 2 of 2
|
||||
S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
S1_,S1_,S1_,S1_,S1_,S1_,S1_, 0, S1_,S1_,S1_,S1_,S1_,S1_,S1_, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7, 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
|
||||
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0x00, 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[13 + 2] 0x000100 Byte 2 of 2
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S21, 0,S1_, 0,S1_, 0,S1_, 0, 0,S1_, 0,S1_, 0,S1_, 0,S2_,
|
||||
|
||||
0x81,0x00,0x83,0x00,0x85,0x00,0x87,0x00, 0x89,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0x69,0x00,0xb3,0x00,0xb5,0x00,0xb7,0x00, 0x00,0xba,0x00,0xbc,0x00,0xbe,0x00,0x80,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xc5,
|
||||
|
||||
// state[16 + 2] 0x000140 Byte 2 of 2
|
||||
0,S1_, 0,S1_, 0,S1_, 0,S1_, 0, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S2_,S1_, 0,S1_, 0,S1_, 0, 0,
|
||||
|
||||
0x00,0x82,0x00,0x84,0x00,0x86,0x00,0x88, 0x00,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0xb1,0x00,0xb3,0x00,0xb5,0x00,0xb7,0x00, 0xbf,0xba,0x00,0xbc,0x00,0xbe,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xc3,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[19 + 2] 0x000180 Byte 2 of 2
|
||||
0,S2_,S1_, 0,S1_, 0,S2_,S1_, 0,S2_,S2_,S1_, 0, 0,S2_,S2_,
|
||||
S2_,S1_, 0,S2_,S2_, 0,S2_,S2_, S1_, 0, 0, 0,S2_,S2_, 0,S2_,
|
||||
S1_, 0,S1_, 0,S1_, 0,S2_,S1_, 0,S2_, 0, 0,S1_, 0,S2_,S1_,
|
||||
0,S2_,S2_,S1_, 0,S1_, 0,S2_, S1_, 0, 0, 0,S1_, 0, 0, 0,
|
||||
|
||||
0x00,0x93,0x83,0x00,0x85,0x00,0x94,0x88, 0x00,0x96,0x97,0x8c,0x00,0x00,0x9d,0x99,
|
||||
0x9b,0x92,0x00,0xa0,0xa3,0x00,0xa9,0xa8, 0x99,0x00,0x00,0x00,0xaf,0xb2,0x00,0xb5,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0x80,0xa8, 0x00,0x83,0x00,0x00,0xad,0x00,0x88,0xb0,
|
||||
0x00,0x8a,0x8b,0xb4,0x00,0xb6,0x00,0x92, 0xb9,0x00,0x00,0x00,0xbd,0x00,0x00,0x00,
|
||||
|
||||
0x00,0xc9,0x00,0x00,0x00,0x00,0xc9,0x00, 0x00,0xc9,0xc9,0x00,0x00,0x00,0xc7,0xc9,
|
||||
0xc9,0x00,0x00,0xc9,0xc9,0x00,0xc9,0xc9, 0x00,0x00,0x00,0x00,0xc9,0xc9,0x00,0xc9,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xca,0x00, 0x00,0xca,0x00,0x00,0x00,0x00,0xca,0x00,
|
||||
0x00,0xca,0xca,0x00,0x00,0x00,0x00,0xca, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[22 + 2] 0x0001c0 Byte 2 of 2
|
||||
0, 0, 0, 0,S1_,S1_, 0,S1_, S1_, 0,S1_,S1_, 0,S1_, 0,S1_,
|
||||
0,S1_, 0,S1_, 0,S1_, 0,S1_, 0,S1_, 0,S1_, 0, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
0,S1_,S1_, 0,S1_, 0,S2_,S2_, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x86,0x86,0x00,0x89, 0x89,0x00,0x8c,0x8c,0x00,0x8e,0x00,0x90,
|
||||
0x00,0x92,0x00,0x94,0x00,0x96,0x00,0x98, 0x00,0x9a,0x00,0x9c,0x00,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0x00,0xb3,0xb3,0x00,0xb5,0x00,0x95,0xbf, 0xb9,0x00,0xbb,0x00,0xbd,0x00,0xbf,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xc6,0xc6, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[25 + 2] 0x000200 Byte 2 of 2
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S2_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0, 0, 0, 0, 0, 0, 0,T1_,S1_, 0,S2_,T1_, 0,
|
||||
|
||||
0x81,0x00,0x83,0x00,0x85,0x00,0x87,0x00, 0x89,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0x9e,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0xb1,0x00,0xb3,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0xbc,0x00,0x9a,0x01,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xc6,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0xc6,0x00,0x00,
|
||||
|
||||
// state[28 + 2] 0x000240 Byte 2 of 2
|
||||
0,S1_, 0,S2_,S2_,S2_,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x00,0x82,0x00,0x80,0x89,0x8c,0x87,0x00, 0x89,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0xc6,0xca,0xca,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[31 + 2] 0x000340 Byte 2 of 2
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
S1_, 0,S1_, 0, 0, 0,S1_, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xb1,0x00,0xb3,0x00,0x00,0x00,0xb7,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[33 + 2] 0x000380 Byte 2 of 2
|
||||
0, 0, 0, 0, 0, 0,S1_, 0, S1_,S1_,S1_, 0,S2_, 0,S2_,S2_,
|
||||
0,S1_,S1_,S1_,S1_,S1_,S1_,S1_, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
S2_,S2_, 0,S2_,S2_,S2_,S2_,S2_, S2_,S2_,S2_,S2_, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xac,0x00, 0xad,0xae,0xaf,0x00,0x8c,0x00,0x8d,0x8e,
|
||||
0x00,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7, 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
||||
0x80,0x81,0x00,0x83,0x84,0x85,0x86,0x87, 0x88,0x89,0x8a,0x8b,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0xcf,0x00,0xcf,0xcf,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xcf,0xcf,0x00,0xcf,0xcf,0xcf,0xcf,0xcf, 0xcf,0xcf,0xcf,0xcf,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[36 + 2] 0x0003c0 Byte 2 of 2
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,S1_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
0, 0, 0, 0,S2_, 0, 0,S1_, 0,S1_,S1_, 0, 0,S2_,S2_,S2_,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x97,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0x00,0x00,0x00,0x00,0xb8,0x00,0x00,0xb8, 0x00,0xb2,0xbb,0x00,0x00,0xbb,0xbc,0xbd,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0xce,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0xcd,0xcd,0xcd,
|
||||
|
||||
// state[39 + 2] 0x000400 Byte 2 of 2
|
||||
S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_, S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_,
|
||||
S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_, S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
|
||||
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7, 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xd1,0xd1,0xd1,0xd1,0xd1,0xd1,0xd1,0xd1, 0xd1,0xd1,0xd1,0xd1,0xd1,0xd1,0xd1,0xd1,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xd1,0xd1,0xd1,0xd1,0xd1,0xd1,0xd1,0xd1, 0xd1,0xd1,0xd1,0xd1,0xd1,0xd1,0xd1,0xd1,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[42 + 2] 0x000440 Byte 2 of 2
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0xb1,0x00,0xb3,0x00,0xb5,0x00,0xb7,0x00, 0xb9,0x00,0xbb,0x00,0xbd,0x00,0xbf,0x00,
|
||||
|
||||
// state[44 + 2] 0x000480 Byte 2 of 2
|
||||
S1_, 0, 0, 0, 0, 0, 0, 0, 0, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
|
||||
0x81,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0xb1,0x00,0xb3,0x00,0xb5,0x00,0xb7,0x00, 0xb9,0x00,0xbb,0x00,0xbd,0x00,0xbf,0x00,
|
||||
|
||||
// state[46 + 2] 0x0004c0 Byte 2 of 2
|
||||
S1_,S1_, 0,S1_, 0,S1_, 0,S1_, 0,S1_, 0,S1_, 0,S1_, 0, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
|
||||
0x8f,0x82,0x00,0x84,0x00,0x86,0x00,0x88, 0x00,0x8a,0x00,0x8c,0x00,0x8e,0x00,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0xb1,0x00,0xb3,0x00,0xb5,0x00,0xb7,0x00, 0xb9,0x00,0xbb,0x00,0xbd,0x00,0xbf,0x00,
|
||||
|
||||
// state[48 + 2] 0x000500 Byte 2 of 2
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0,S2_,S2_,S2_,S2_,S2_,S2_,S2_, S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_,
|
||||
|
||||
0x81,0x00,0x83,0x00,0x85,0x00,0x87,0x00, 0x89,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7, 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0xd5,0xd5,0xd5,0xd5,0xd5,0xd5,0xd5, 0xd5,0xd5,0xd5,0xd5,0xd5,0xd5,0xd5,0xd5,
|
||||
|
||||
// state[51 + 2] 0x000540 Byte 2 of 2
|
||||
S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
S2_,S2_,S2_,S2_,S2_,S2_,S2_, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7, 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xd6,0xd6,0xd6,0xd6,0xd6,0xd6,0xd6,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[54 + 2] 0x001000 Byte 2 of 3
|
||||
6, 6, 55, 57, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 59, 59, 61, 59, 64, 66, 68, 71,
|
||||
|
||||
// state[55 + 2] 0x001080 Byte 3 of 3
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
T1_,T1_,T1_,T1_,T1_,T1_,T1_,T1_, T1_,T1_,T1_,T1_,T1_,T1_,T1_,T1_,
|
||||
T1_,T1_,T1_,T1_,T1_,T1_,T1_,T1_, T1_,T1_,T1_,T1_,T1_,T1_,T1_,T1_,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09, 0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,
|
||||
0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19, 0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,
|
||||
|
||||
// state[57 + 2] 0x0010c0 Byte 3 of 3
|
||||
T1_,T1_,T1_,T1_,T1_,T1_, 0,T1_, 0, 0, 0, 0, 0,T1_, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x22,0x23,0x24,0x25,0x26,0x27,0x00,0x28, 0x00,0x00,0x00,0x00,0x00,0x29,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[59 + 2] 0x001e00 Byte 3 of 3
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
|
||||
0x81,0x00,0x83,0x00,0x85,0x00,0x87,0x00, 0x89,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0xb1,0x00,0xb3,0x00,0xb5,0x00,0xb7,0x00, 0xb9,0x00,0xbb,0x00,0xbd,0x00,0xbf,0x00,
|
||||
|
||||
// state[61 + 2] 0x001e80 Byte 3 of 3
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0, 0, 0, 0, 0, 0, 0, 0, 0,S32, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
|
||||
0x81,0x00,0x83,0x00,0x85,0x00,0x87,0x00, 0x89,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0xb1,0x00,0xb3,0x00,0xb5,0x00,0xb7,0x00, 0xb9,0x00,0xbb,0x00,0xbd,0x00,0xbf,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0xc3,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[64 + 2] 0x001f00 Byte 3 of 3
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S1_,S1_,S1_,S1_, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x90,0x91,0x92,0x93,0x94,0x95,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,
|
||||
|
||||
// state[66 + 2] 0x001f40 Byte 3 of 3
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S1_,S1_,S1_,S1_, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0,S1_, 0,S1_, 0,S1_, 0,S1_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x80,0x81,0x82,0x83,0x84,0x85,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x91,0x00,0x93,0x00,0x95,0x00,0x97,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[68 + 2] 0x001f80 Byte 3 of 3
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S2_,S2_,S1_, 0, 0, 0,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xb0,0xb1,0xb0,0xb1,0xb3,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0xbd,0xbd,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[71 + 2] 0x001fc0 Byte 3 of 3
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S2_,S2_,S2_,S2_,S1_, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S2_,S2_, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S1_,S1_,S2_,S2_,S1_, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, S2_,S2_,S2_,S2_,S1_, 0, 0, 0,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xb2,0xb3,0xb4,0xb5,0x83,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x90,0x91,0xb6,0xb7,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xa0,0xa1,0xba,0xbb,0xa5,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xb8,0xb9,0xbc,0xbd,0xb3,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xbd,0xbd,0xbd,0xbd,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0xbd,0xbd,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0xbd,0xbd,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xbd,0xbd,0xbd,0xbd,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[74 + 2] 0x002000 Byte 2 of 3
|
||||
6, 6, 6, 6, 75, 6, 78, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
80, 83, 59, 86, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
|
||||
// state[75 + 2] 0x002100 Byte 3 of 3
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,S32, 0, 0, 0,S31,S32, 0, 0, 0, 0,
|
||||
0, 0,S2_, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x89,0x00, 0x00,0x00,0x6b,0xa5,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x8e,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xcf,0x00, 0x00,0x00,0x00,0xc3,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x85,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[78 + 2] 0x002180 Byte 3 of 3
|
||||
0, 0, 0,S1_, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x00,0x00,0x00,0x84,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[80 + 2] 0x002c00 Byte 3 of 3
|
||||
S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_, S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_,
|
||||
S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_, S2_,S2_,S2_,S2_,S2_,S2_,S2_, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7, 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
|
||||
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[83 + 2] 0x002c40 Byte 3 of 3
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
S1_, 0,S32,T1_,S32, 0, 0,S1_, 0,S1_, 0,S1_, 0,S32,S32,S32,
|
||||
S32, 0,S1_, 0, 0,S1_, 0, 0, 0, 0, 0, 0, 0, 0,S32,S32,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xa1,0x00,0xab,0x2a,0xbd,0x00,0x00,0xa8, 0x00,0xaa,0x00,0xac,0x00,0x91,0xb1,0x90,
|
||||
0x92,0x00,0xb3,0x00,0x00,0xb6,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0xbf,0x80,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0xc9,0x00,0xc9,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0xc9,0xc9,0xc9,
|
||||
0xc9,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0xc8,0xc9,
|
||||
|
||||
// state[86 + 2] 0x002cc0 Byte 3 of 3
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0, 0, 0, 0, 0, 0, 0, 0,S1_, 0,S1_, 0, 0,
|
||||
0, 0,S1_, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x81,0x00,0x83,0x00,0x85,0x00,0x87,0x00, 0x89,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0xac,0x00,0xae,0x00,0x00,
|
||||
0x00,0x00,0xb3,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[88 + 2] 0x00a000 Byte 2 of 3
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 89, 91, 6, 93, 95, 97, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
|
||||
// state[89 + 2] 0x00a640 Byte 3 of 3
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x81,0x00,0x83,0x00,0x85,0x00,0x87,0x00, 0x89,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[91 + 2] 0x00a680 Byte 3 of 3
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x81,0x00,0x83,0x00,0x85,0x00,0x87,0x00, 0x89,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[93 + 2] 0x00a700 Byte 3 of 3
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
0, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0x00,0x00,0xb3,0x00,0xb5,0x00,0xb7,0x00, 0xb9,0x00,0xbb,0x00,0xbd,0x00,0xbf,0x00,
|
||||
|
||||
// state[95 + 2] 0x00a740 Byte 3 of 3
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S1_, 0,S1_, 0,S1_, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0,S1_, 0,S1_, 0,T1_,S1_, 0,
|
||||
|
||||
0x81,0x00,0x83,0x00,0x85,0x00,0x87,0x00, 0x89,0x00,0x8b,0x00,0x8d,0x00,0x8f,0x00,
|
||||
0x91,0x00,0x93,0x00,0x95,0x00,0x97,0x00, 0x99,0x00,0x9b,0x00,0x9d,0x00,0x9f,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xab,0x00,0xad,0x00,0xaf,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0xba,0x00,0xbc,0x00,0x2b,0xbf,0x00,
|
||||
|
||||
// state[97 + 2] 0x00a780 Byte 3 of 3
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, 0, 0, 0,S1_, 0,S32, 0, 0,
|
||||
S1_, 0,S1_, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
S1_, 0,S1_, 0,S1_, 0,S1_, 0, S1_, 0,S32, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0x81,0x00,0x83,0x00,0x85,0x00,0x87,0x00, 0x00,0x00,0x00,0x8c,0x00,0xa5,0x00,0x00,
|
||||
0x91,0x00,0x93,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xa1,0x00,0xa3,0x00,0xa5,0x00,0xa7,0x00, 0xa9,0x00,0xa6,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0xc9,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0xc9,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[100 + 2] 0x00f000 Byte 2 of 3
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,101, 6, 6, 6,
|
||||
|
||||
// state[101 + 2] 0x00ff00 Byte 3 of 3
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0,S2_,S2_,S2_,S2_,S2_,S2_,S2_, S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_,
|
||||
S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_, S2_,S2_,S2_, 0, 0, 0, 0, 0,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 0x98,0x99,0x9a,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd, 0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,
|
||||
0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd, 0xbd,0xbd,0xbd,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
// state[104 + 2] 0x000000 Byte 2 of 4
|
||||
X__,X__,X__,X__,X__,X__,X__,X__, X__,X__,X__,X__,X__,X__,X__,X__,
|
||||
105, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
|
||||
// state[105 + 2] 0x010000 Byte 3 of 4
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
106, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
|
||||
// state[106 + 2] 0x010400 Byte 4 of 4
|
||||
S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_, S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_,
|
||||
S1_,S1_,S1_,S1_,S1_,S1_,S1_,S1_, S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_,
|
||||
S2_,S2_,S2_,S2_,S2_,S2_,S2_,S2_, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,
|
||||
0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
|
||||
0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x91,0x91,0x91,0x91,0x91,0x91,0x91,0x91,
|
||||
0x91,0x91,0x91,0x91,0x91,0x91,0x91,0x91, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
};
|
||||
|
||||
// Remap base[44] = (del, add, string_offset)
|
||||
static const RemapEntry utf8repl_lettermarklower_remap_base[] = {
|
||||
{2,3, 0}, {2,3, 3}, {3,3, 6}, {3,3, 9},
|
||||
{3,3, 12}, {3,3, 15}, {3,3, 18}, {3,3, 21},
|
||||
{3,3, 24}, {3,3, 27}, {3,3, 30}, {3,3, 33},
|
||||
{3,3, 36}, {3,3, 39}, {3,3, 42}, {3,3, 45},
|
||||
|
||||
{3,3, 48}, {3,3, 51}, {3,3, 54}, {3,3, 57},
|
||||
{3,3, 60}, {3,3, 63}, {3,3, 66}, {3,3, 69},
|
||||
{3,3, 72}, {3,3, 75}, {3,3, 78}, {3,3, 81},
|
||||
{3,3, 84}, {3,3, 87}, {3,3, 90}, {3,3, 93},
|
||||
|
||||
{3,3, 96}, {3,3, 99}, {3,3, 102}, {3,3, 105},
|
||||
{3,3, 108}, {3,3, 111}, {3,3, 114}, {3,3, 117},
|
||||
{3,3, 120}, {3,3, 123}, {3,3, 126}, {3,3, 129},
|
||||
{0,0,0} };
|
||||
|
||||
// Remap string[132]
|
||||
static const unsigned char utf8repl_lettermarklower_remap_string[] = {
|
||||
0xe2,0xb1,0xa5,0xe2,0xb1,0xa6,0xe2,0xb4, 0x80,0xe2,0xb4,0x81,0xe2,0xb4,0x82,0xe2,
|
||||
0xb4,0x83,0xe2,0xb4,0x84,0xe2,0xb4,0x85, 0xe2,0xb4,0x86,0xe2,0xb4,0x87,0xe2,0xb4,
|
||||
0x88,0xe2,0xb4,0x89,0xe2,0xb4,0x8a,0xe2, 0xb4,0x8b,0xe2,0xb4,0x8c,0xe2,0xb4,0x8d,
|
||||
0xe2,0xb4,0x8e,0xe2,0xb4,0x8f,0xe2,0xb4, 0x90,0xe2,0xb4,0x91,0xe2,0xb4,0x92,0xe2,
|
||||
|
||||
0xb4,0x93,0xe2,0xb4,0x94,0xe2,0xb4,0x95, 0xe2,0xb4,0x96,0xe2,0xb4,0x97,0xe2,0xb4,
|
||||
0x98,0xe2,0xb4,0x99,0xe2,0xb4,0x9a,0xe2, 0xb4,0x9b,0xe2,0xb4,0x9c,0xe2,0xb4,0x9d,
|
||||
0xe2,0xb4,0x9e,0xe2,0xb4,0x9f,0xe2,0xb4, 0xa0,0xe2,0xb4,0xa1,0xe2,0xb4,0xa2,0xe2,
|
||||
0xb4,0xa3,0xe2,0xb4,0xa4,0xe2,0xb4,0xa5, 0xe2,0xb4,0xa7,0xe2,0xb4,0xad,0xe1,0xb5,
|
||||
|
||||
0xbd,0xe1,0xb5,0xb9,0 };
|
||||
|
||||
static const unsigned char utf8repl_lettermarklower_fast[256] = {
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
|
||||
0,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
|
||||
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
|
||||
};
|
||||
|
||||
static const UTF8ReplaceObj utf8repl_lettermarklower_obj = {
|
||||
utf8repl_lettermarklower_STATE0,
|
||||
utf8repl_lettermarklower_STATE0_SIZE,
|
||||
utf8repl_lettermarklower_TOTAL_SIZE,
|
||||
utf8repl_lettermarklower_MAX_EXPAND_X4,
|
||||
utf8repl_lettermarklower_SHIFT,
|
||||
utf8repl_lettermarklower_BYTES,
|
||||
utf8repl_lettermarklower_LOSUB,
|
||||
utf8repl_lettermarklower_HIADD,
|
||||
utf8repl_lettermarklower,
|
||||
utf8repl_lettermarklower_remap_base,
|
||||
utf8repl_lettermarklower_remap_string,
|
||||
utf8repl_lettermarklower_fast
|
||||
};
|
||||
|
||||
|
||||
#undef X__
|
||||
#undef RJ_
|
||||
#undef S1_
|
||||
#undef S2_
|
||||
#undef S3_
|
||||
#undef S21
|
||||
#undef S31
|
||||
#undef S32
|
||||
#undef T1_
|
||||
#undef T2_
|
||||
#undef S11
|
||||
#undef SP_
|
||||
#undef D__
|
||||
#undef RJA
|
||||
|
||||
// Table has 7668 bytes, Hash = 07A2-C4E3
|
||||
|
||||
} // End namespace CLD2
|
||||
} // End namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_UTF8REPL_LETTERMARKLOWER_H_
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,285 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// State Table follower for scanning UTF-8 strings without converting to
|
||||
// 32- or 16-bit Unicode values.
|
||||
//
|
||||
// Author: dsites@google.com (Dick Sites)
|
||||
//
|
||||
|
||||
#ifndef SCRIPT_SPAN_UTF8STATETABLE_H_
|
||||
#define SCRIPT_SPAN_UTF8STATETABLE_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "integral_types.h" // for uint8, uint32, uint16
|
||||
#include "stringpiece.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace CLD2 {
|
||||
|
||||
class OffsetMap;
|
||||
|
||||
|
||||
// These four-byte entries compactly encode how many bytes 0..255 to delete
|
||||
// in making a string replacement, how many bytes to add 0..255, and the offset
|
||||
// 0..64k-1 of the replacement string in remap_string.
|
||||
struct RemapEntry {
|
||||
uint8 delete_bytes;
|
||||
uint8 add_bytes;
|
||||
uint16 bytes_offset;
|
||||
};
|
||||
|
||||
// Exit type codes for state tables. All but the first get stuffed into
|
||||
// signed one-byte entries. The first is only generated by executable code.
|
||||
// To distinguish from next-state entries, these must be contiguous and
|
||||
// all <= kExitNone
|
||||
typedef enum {
|
||||
kExitDstSpaceFull = 239,
|
||||
kExitIllegalStructure, // 240
|
||||
kExitOK, // 241
|
||||
kExitReject, // ...
|
||||
kExitReplace1,
|
||||
kExitReplace2,
|
||||
kExitReplace3,
|
||||
kExitReplace21,
|
||||
kExitReplace31,
|
||||
kExitReplace32,
|
||||
kExitReplaceOffset1,
|
||||
kExitReplaceOffset2,
|
||||
kExitReplace1S0,
|
||||
kExitSpecial,
|
||||
kExitDoAgain,
|
||||
kExitRejectAlt,
|
||||
kExitNone // 255
|
||||
} ExitReason;
|
||||
|
||||
typedef enum {
|
||||
kExitDstSpaceFull_2 = 32767, // 0x7fff
|
||||
kExitIllegalStructure_2, // 32768 0x8000
|
||||
kExitOK_2, // 32769 0x8001
|
||||
kExitReject_2, // ...
|
||||
kExitReplace1_2,
|
||||
kExitReplace2_2,
|
||||
kExitReplace3_2,
|
||||
kExitReplace21_2,
|
||||
kExitReplace31_2,
|
||||
kExitReplace32_2,
|
||||
kExitReplaceOffset1_2,
|
||||
kExitReplaceOffset2_2,
|
||||
kExitReplace1S0_2,
|
||||
kExitSpecial_2,
|
||||
kExitDoAgain_2,
|
||||
kExitRejectAlt_2,
|
||||
kExitNone_2 // 32783 0x800f
|
||||
} ExitReason_2;
|
||||
|
||||
|
||||
// This struct represents one entire state table. The three initialized byte
|
||||
// areas are state_table, remap_base, and remap_string. state0 and state0_size
|
||||
// give the byte offset and length within state_table of the initial state --
|
||||
// table lookups are expected to start and end in this state, but for
|
||||
// truncated UTF-8 strings, may end in a different state. These allow a quick
|
||||
// test for that condition. entry_shift is 8 for tables subscripted by a full
|
||||
// byte value and 6 for space-optimized tables subscripted by only six
|
||||
// significant bits in UTF-8 continuation bytes.
|
||||
typedef struct {
|
||||
const uint32 state0;
|
||||
const uint32 state0_size;
|
||||
const uint32 total_size;
|
||||
const int max_expand;
|
||||
const int entry_shift;
|
||||
const int bytes_per_entry;
|
||||
const uint32 losub;
|
||||
const uint32 hiadd;
|
||||
const uint8* state_table;
|
||||
const RemapEntry* remap_base;
|
||||
const uint8* remap_string;
|
||||
const uint8* fast_state;
|
||||
} UTF8StateMachineObj;
|
||||
|
||||
// Near-duplicate declaration for tables with two-byte entries
|
||||
typedef struct {
|
||||
const uint32 state0;
|
||||
const uint32 state0_size;
|
||||
const uint32 total_size;
|
||||
const int max_expand;
|
||||
const int entry_shift;
|
||||
const int bytes_per_entry;
|
||||
const uint32 losub;
|
||||
const uint32 hiadd;
|
||||
const unsigned short* state_table;
|
||||
const RemapEntry* remap_base;
|
||||
const uint8* remap_string;
|
||||
const uint8* fast_state;
|
||||
} UTF8StateMachineObj_2;
|
||||
|
||||
|
||||
typedef UTF8StateMachineObj UTF8PropObj;
|
||||
typedef UTF8StateMachineObj UTF8ScanObj;
|
||||
typedef UTF8StateMachineObj UTF8ReplaceObj;
|
||||
typedef UTF8StateMachineObj_2 UTF8PropObj_2;
|
||||
typedef UTF8StateMachineObj_2 UTF8ReplaceObj_2;
|
||||
// NOT IMPLEMENTED typedef UTF8StateMachineObj_2 UTF8ScanObj_2;
|
||||
|
||||
|
||||
// Look up property of one UTF-8 character and advance over it
|
||||
// Return 0 if input length is zero
|
||||
// Return 0 and advance one byte if input is ill-formed
|
||||
uint8 UTF8GenericProperty(const UTF8PropObj* st,
|
||||
const uint8** src,
|
||||
int* srclen);
|
||||
|
||||
// Look up property of one UTF-8 character (assumed to be valid).
|
||||
// (This is a faster version of UTF8GenericProperty.)
|
||||
bool UTF8HasGenericProperty(const UTF8PropObj& st, const char* src);
|
||||
|
||||
|
||||
// BigOneByte versions are needed for tables > 240 states, but most
|
||||
// won't need the TwoByte versions.
|
||||
|
||||
// Look up property of one UTF-8 character and advance over it
|
||||
// Return 0 if input length is zero
|
||||
// Return 0 and advance one byte if input is ill-formed
|
||||
uint8 UTF8GenericPropertyBigOneByte(const UTF8PropObj* st,
|
||||
const uint8** src,
|
||||
int* srclen);
|
||||
|
||||
|
||||
// TwoByte versions are needed for tables > 240 states that don't fit onto
|
||||
// BigOneByte -- rare ultimate fallback
|
||||
|
||||
// Look up property of one UTF-8 character (assumed to be valid).
|
||||
// (This is a faster version of UTF8GenericProperty.)
|
||||
bool UTF8HasGenericPropertyBigOneByte(const UTF8PropObj& st, const char* src);
|
||||
|
||||
// Look up property of one UTF-8 character and advance over it
|
||||
// Return 0 if input length is zero
|
||||
// Return 0 and advance one byte if input is ill-formed
|
||||
uint8 UTF8GenericPropertyTwoByte(const UTF8PropObj_2* st,
|
||||
const uint8** src,
|
||||
int* srclen);
|
||||
|
||||
// Look up property of one UTF-8 character (assumed to be valid).
|
||||
// (This is a faster version of UTF8GenericProperty.)
|
||||
bool UTF8HasGenericPropertyTwoByte(const UTF8PropObj_2& st, const char* src);
|
||||
|
||||
// Scan a UTF-8 stringpiece based on a state table.
|
||||
// Always scan complete UTF-8 characters
|
||||
// Set number of bytes scanned. Return reason for exiting
|
||||
int UTF8GenericScan(const UTF8ScanObj* st,
|
||||
const StringPiece& str,
|
||||
int* bytes_consumed);
|
||||
|
||||
|
||||
|
||||
// Scan a UTF-8 stringpiece based on state table, copying to output stringpiece
|
||||
// and doing text replacements.
|
||||
// Always scan complete UTF-8 characters
|
||||
// Set number of bytes consumed from input, number filled to output.
|
||||
// Return reason for exiting
|
||||
// Also writes an optional OffsetMap. Pass NULL to skip writing one.
|
||||
int UTF8GenericReplace(const UTF8ReplaceObj* st,
|
||||
const StringPiece& istr,
|
||||
StringPiece& ostr,
|
||||
bool is_plain_text,
|
||||
int* bytes_consumed,
|
||||
int* bytes_filled,
|
||||
int* chars_changed,
|
||||
OffsetMap* offsetmap);
|
||||
|
||||
// Older version without offsetmap
|
||||
int UTF8GenericReplace(const UTF8ReplaceObj* st,
|
||||
const StringPiece& istr,
|
||||
StringPiece& ostr,
|
||||
bool is_plain_text,
|
||||
int* bytes_consumed,
|
||||
int* bytes_filled,
|
||||
int* chars_changed);
|
||||
|
||||
// Older version without is_plain_text or offsetmap
|
||||
int UTF8GenericReplace(const UTF8ReplaceObj* st,
|
||||
const StringPiece& istr,
|
||||
StringPiece& ostr,
|
||||
int* bytes_consumed,
|
||||
int* bytes_filled,
|
||||
int* chars_changed);
|
||||
|
||||
|
||||
// TwoByte version is needed for tables > about 256 states, such
|
||||
// as the table for full Unicode 4.1 canonical + compatibility mapping
|
||||
|
||||
// Scan a UTF-8 stringpiece based on state table with two-byte entries,
|
||||
// copying to output stringpiece
|
||||
// and doing text replacements.
|
||||
// Always scan complete UTF-8 characters
|
||||
// Set number of bytes consumed from input, number filled to output.
|
||||
// Return reason for exiting
|
||||
// Also writes an optional OffsetMap. Pass NULL to skip writing one.
|
||||
int UTF8GenericReplaceTwoByte(const UTF8ReplaceObj_2* st,
|
||||
const StringPiece& istr,
|
||||
StringPiece& ostr,
|
||||
bool is_plain_text,
|
||||
int* bytes_consumed,
|
||||
int* bytes_filled,
|
||||
int* chars_changed,
|
||||
OffsetMap* offsetmap);
|
||||
|
||||
// Older version without offsetmap
|
||||
int UTF8GenericReplaceTwoByte(const UTF8ReplaceObj_2* st,
|
||||
const StringPiece& istr,
|
||||
StringPiece& ostr,
|
||||
bool is_plain_text,
|
||||
int* bytes_consumed,
|
||||
int* bytes_filled,
|
||||
int* chars_changed);
|
||||
|
||||
// Older version without is_plain_text or offsetmap
|
||||
int UTF8GenericReplaceTwoByte(const UTF8ReplaceObj_2* st,
|
||||
const StringPiece& istr,
|
||||
StringPiece& ostr,
|
||||
int* bytes_consumed,
|
||||
int* bytes_filled,
|
||||
int* chars_changed);
|
||||
|
||||
|
||||
static const unsigned char kUTF8LenTbl[256] = {
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4
|
||||
};
|
||||
|
||||
inline int UTF8OneCharLen(const char* in) {
|
||||
return kUTF8LenTbl[*reinterpret_cast<const uint8*>(in)];
|
||||
}
|
||||
|
||||
// Adjust a stringpiece to encompass complete UTF-8 characters.
|
||||
// The data pointer will be increased by 0..3 bytes to get to a character
|
||||
// boundary, and the length will then be decreased by 0..3 bytes
|
||||
// to encompass the last complete character.
|
||||
// This is useful especially when a UTF-8 string must be put into a fixed-
|
||||
// maximum-size buffer cleanly, such as a MySQL buffer.
|
||||
void UTF8TrimToChars(StringPiece* istr);
|
||||
|
||||
} // End namespace CLD2
|
||||
} // End namespace chrome_lang_id
|
||||
|
||||
#endif // SCRIPT_SPAN_UTF8STATETABLE_H_
|
||||
@ -0,0 +1,29 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "sentence_features.h"
|
||||
|
||||
#include "registry.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Declare registry for the whole Sentence feature functions. NOTE: this is not
|
||||
// yet set to anything meaningful. It will be set so in NNetLanguageIdentifier
|
||||
// constructor, *before* we use any feature.
|
||||
template <>
|
||||
WholeSentenceFeature::Registry
|
||||
*RegisterableClass<WholeSentenceFeature>::registry_ = nullptr;
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,35 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Features that operate on Sentence objects. Most features are defined
|
||||
// in this header so they may be re-used via composition into other more
|
||||
// advanced feature classes.
|
||||
|
||||
#ifndef SENTENCE_FEATURES_H_
|
||||
#define SENTENCE_FEATURES_H_
|
||||
|
||||
#include "feature_extractor.h"
|
||||
#include "cld_3/protos/sentence.pb.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Feature function that extracts features for the full Sentence.
|
||||
typedef FeatureFunction<Sentence> WholeSentenceFeature;
|
||||
|
||||
typedef FeatureExtractor<Sentence> WholeSentenceExtractor;
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // SENTENCE_FEATURES_H_
|
||||
@ -0,0 +1,72 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef SIMPLE_ADDER_H_
|
||||
#define SIMPLE_ADDER_H_
|
||||
|
||||
#include "base.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Class for adding (possibly) scaled arrays.
|
||||
class SimpleAdder {
|
||||
public:
|
||||
static constexpr const int kNumFloatsPerBatch = 1;
|
||||
|
||||
CLD3_ATTRIBUTE_ALWAYS_INLINE SimpleAdder(float *dest, int num_floats)
|
||||
: dest_(dest), num_floats_(num_floats) {}
|
||||
|
||||
CLD3_ATTRIBUTE_ALWAYS_INLINE ~SimpleAdder() {
|
||||
// Should call Finalize function before destruction.
|
||||
CLD3_DCHECK(dest_ == nullptr);
|
||||
}
|
||||
|
||||
// Caller must call this function before calling deconstruct this object.
|
||||
CLD3_ATTRIBUTE_ALWAYS_INLINE void Finalize() { dest_ = nullptr; }
|
||||
|
||||
CLD3_ATTRIBUTE_ALWAYS_INLINE void LazyAdd(const float *source) const {
|
||||
AddImpl(source, num_floats_, dest_);
|
||||
}
|
||||
|
||||
CLD3_ATTRIBUTE_ALWAYS_INLINE void LazyScaleAdd(const float *source,
|
||||
const float scale) const {
|
||||
ScaleAddImpl(source, num_floats_, scale, dest_);
|
||||
}
|
||||
|
||||
// Simple fast while loop to implement dest += source.
|
||||
CLD3_ATTRIBUTE_ALWAYS_INLINE static void AddImpl(
|
||||
const float *__restrict source, uint32 size, float *__restrict dest) {
|
||||
for (uint32 i = 0; i < size; ++i) {
|
||||
dest[i] += source[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Simple fast while loop to implement dest += scale * source.
|
||||
CLD3_ATTRIBUTE_ALWAYS_INLINE static void ScaleAddImpl(
|
||||
const float *__restrict source, uint32 size, const float scale,
|
||||
float *__restrict dest) {
|
||||
for (uint32 i = 0; i < size; ++i) {
|
||||
dest[i] += source[i] * scale;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
float *dest_;
|
||||
int num_floats_;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // SIMPLE_ADDER_H_
|
||||
@ -0,0 +1,161 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "task_context.h"
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
TaskContext::TaskContext() {}
|
||||
|
||||
TaskContext::~TaskContext() {}
|
||||
|
||||
TaskInput *TaskContext::GetInput(const string &name) {
|
||||
// Return existing input if it exists.
|
||||
for (int i = 0; i < spec_.input_size(); ++i) {
|
||||
if (spec_.input(i).name() == name) return spec_.mutable_input(i);
|
||||
}
|
||||
|
||||
// Create new input.
|
||||
TaskInput *input = spec_.add_input();
|
||||
input->set_name(name);
|
||||
return input;
|
||||
}
|
||||
|
||||
TaskInput *TaskContext::GetInput(const string &name, const string &file_format,
|
||||
const string &record_format) {
|
||||
TaskInput *input = GetInput(name);
|
||||
if (!file_format.empty()) {
|
||||
bool found = false;
|
||||
for (int i = 0; i < input->file_format_size(); ++i) {
|
||||
if (input->file_format(i) == file_format) found = true;
|
||||
}
|
||||
if (!found) input->add_file_format(file_format);
|
||||
}
|
||||
if (!record_format.empty()) {
|
||||
bool found = false;
|
||||
for (int i = 0; i < input->record_format_size(); ++i) {
|
||||
if (input->record_format(i) == record_format) found = true;
|
||||
}
|
||||
if (!found) input->add_record_format(record_format);
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
void TaskContext::SetParameter(const string &name, const string &value) {
|
||||
// If the parameter already exists update the value.
|
||||
for (int i = 0; i < spec_.parameter_size(); ++i) {
|
||||
if (spec_.parameter(i).name() == name) {
|
||||
spec_.mutable_parameter(i)->set_value(value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Add new parameter.
|
||||
TaskSpec::Parameter *param = spec_.add_parameter();
|
||||
param->set_name(name);
|
||||
param->set_value(value);
|
||||
}
|
||||
|
||||
string TaskContext::GetParameter(const string &name) const {
|
||||
// First try to find parameter in task specification.
|
||||
for (int i = 0; i < spec_.parameter_size(); ++i) {
|
||||
if (spec_.parameter(i).name() == name) return spec_.parameter(i).value();
|
||||
}
|
||||
|
||||
// Parameter not found, return empty string.
|
||||
return "";
|
||||
}
|
||||
|
||||
int TaskContext::GetIntParameter(const string &name) const {
|
||||
string value = GetParameter(name);
|
||||
return utils::ParseUsing<int>(value, 0, utils::ParseInt32);
|
||||
}
|
||||
|
||||
bool TaskContext::GetBoolParameter(const string &name) const {
|
||||
string value = GetParameter(name);
|
||||
return value == "true";
|
||||
}
|
||||
|
||||
double TaskContext::GetFloatParameter(const string &name) const {
|
||||
string value = GetParameter(name);
|
||||
return utils::ParseUsing<double>(value, .0, utils::ParseDouble);
|
||||
}
|
||||
|
||||
string TaskContext::Get(const string &name, const char *defval) const {
|
||||
// First try to find parameter in task specification.
|
||||
for (int i = 0; i < spec_.parameter_size(); ++i) {
|
||||
if (spec_.parameter(i).name() == name) return spec_.parameter(i).value();
|
||||
}
|
||||
|
||||
// Parameter not found, return default value.
|
||||
return defval;
|
||||
}
|
||||
|
||||
string TaskContext::Get(const string &name, const string &defval) const {
|
||||
return Get(name, defval.c_str());
|
||||
}
|
||||
|
||||
int TaskContext::Get(const string &name, int defval) const {
|
||||
string value = Get(name, "");
|
||||
return utils::ParseUsing<int>(value, defval, utils::ParseInt32);
|
||||
}
|
||||
|
||||
double TaskContext::Get(const string &name, double defval) const {
|
||||
string value = Get(name, "");
|
||||
return utils::ParseUsing<double>(value, defval, utils::ParseDouble);
|
||||
}
|
||||
|
||||
bool TaskContext::Get(const string &name, bool defval) const {
|
||||
string value = Get(name, "");
|
||||
return value.empty() ? defval : value == "true";
|
||||
}
|
||||
|
||||
string TaskContext::InputFile(const TaskInput &input) {
|
||||
CLD3_CHECK(input.part_size() == 1);
|
||||
return input.part(0).file_pattern();
|
||||
}
|
||||
|
||||
bool TaskContext::Supports(const TaskInput &input, const string &file_format,
|
||||
const string &record_format) {
|
||||
// Check file format.
|
||||
if (input.file_format_size() > 0) {
|
||||
bool found = false;
|
||||
for (int i = 0; i < input.file_format_size(); ++i) {
|
||||
if (input.file_format(i) == file_format) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) return false;
|
||||
}
|
||||
|
||||
// Check record format.
|
||||
if (input.record_format_size() > 0) {
|
||||
bool found = false;
|
||||
for (int i = 0; i < input.record_format_size(); ++i) {
|
||||
if (input.record_format(i) == record_format) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,81 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TASK_CONTEXT_H_
|
||||
#define TASK_CONTEXT_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base.h"
|
||||
#include "cld_3/protos/task_spec.pb.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// A task context holds configuration information for a task. It is basically a
|
||||
// wrapper around a TaskSpec protocol buffer.
|
||||
class TaskContext {
|
||||
public:
|
||||
TaskContext();
|
||||
~TaskContext();
|
||||
|
||||
// Returns the underlying task specification protocol buffer for the context.
|
||||
const TaskSpec &spec() const { return spec_; }
|
||||
TaskSpec *mutable_spec() { return &spec_; }
|
||||
|
||||
// Returns a named input descriptor for the task. A new input is created if
|
||||
// the task context does not already have an input with that name.
|
||||
TaskInput *GetInput(const string &name);
|
||||
TaskInput *GetInput(const string &name, const string &file_format,
|
||||
const string &record_format);
|
||||
|
||||
// Sets task parameter.
|
||||
void SetParameter(const string &name, const string &value);
|
||||
|
||||
// Returns task parameter. If the parameter is not in the task configuration
|
||||
// the (default) value of the corresponding command line flag is returned.
|
||||
string GetParameter(const string &name) const;
|
||||
int GetIntParameter(const string &name) const;
|
||||
bool GetBoolParameter(const string &name) const;
|
||||
double GetFloatParameter(const string &name) const;
|
||||
|
||||
// Returns task parameter. If the parameter is not in the task configuration
|
||||
// the default value is returned. Parameters retrieved using these methods
|
||||
// don't need to be defined with a DEFINE_*() macro.
|
||||
string Get(const string &name, const string &defval) const;
|
||||
string Get(const string &name, const char *defval) const;
|
||||
int Get(const string &name, int defval) const;
|
||||
double Get(const string &name, double defval) const;
|
||||
bool Get(const string &name, bool defval) const;
|
||||
|
||||
// Returns input file name for a single-file task input.
|
||||
static string InputFile(const TaskInput &input);
|
||||
|
||||
// Returns true if task input supports the file and record format.
|
||||
static bool Supports(const TaskInput &input, const string &file_format,
|
||||
const string &record_format);
|
||||
|
||||
private:
|
||||
// Underlying task specification protocol buffer.
|
||||
TaskSpec spec_;
|
||||
|
||||
// Vector of parameters required by this task. These must be specified in the
|
||||
// task rather than relying on default values.
|
||||
std::vector<string> required_parameters_;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // TASK_CONTEXT_H_
|
||||
@ -0,0 +1,74 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// This file contains the hard-coded parameters from the training workflow. If
|
||||
// you update the binary model, you may need to update the variables below as
|
||||
// well.
|
||||
|
||||
#include "task_context_params.h"
|
||||
|
||||
#include "task_context.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
void TaskContextParams::ToTaskContext(TaskContext *context) {
|
||||
context->SetParameter("language_identifier_features",
|
||||
kLanguageIdentifierFeatures);
|
||||
context->SetParameter("language_identifier_embedding_names",
|
||||
kLanguageIdentifierEmbeddingNames);
|
||||
context->SetParameter("language_identifier_embedding_dims",
|
||||
kLanguageIdentifierEmbeddingDims);
|
||||
}
|
||||
|
||||
int TaskContextParams::GetNumLanguages() {
|
||||
int i = 0;
|
||||
while (kLanguageNames[i] != nullptr) {
|
||||
i++;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
const char *const TaskContextParams::kLanguageNames[] = {
|
||||
"eo", "co", "eu", "ta", "de", "mt", "ps", "te", "su", "uz", "zh-Latn", "ne",
|
||||
"nl", "sw", "sq", "hmn", "ja", "no", "mn", "so", "ko", "kk", "sl", "ig",
|
||||
"mr", "th", "zu", "ml", "hr", "bs", "lo", "sd", "cy", "hy", "uk", "pt",
|
||||
"lv", "iw", "cs", "vi", "jv", "be", "km", "mk", "tr", "fy", "am", "zh",
|
||||
"da", "sv", "fi", "ht", "af", "la", "id", "fil", "sm", "ca", "el", "ka",
|
||||
"sr", "it", "sk", "ru", "ru-Latn", "bg", "ny", "fa", "haw", "gl", "et",
|
||||
"ms", "gd", "bg-Latn", "ha", "is", "ur", "mi", "hi", "bn", "hi-Latn", "fr",
|
||||
"yi", "hu", "xh", "my", "tg", "ro", "ar", "lb", "el-Latn", "st", "ceb",
|
||||
"kn", "az", "si", "ky", "mg", "en", "gu", "es", "pl", "ja-Latn", "ga", "lt",
|
||||
"sn", "yo", "pa", "ku",
|
||||
|
||||
// last element must be nullptr
|
||||
nullptr,
|
||||
};
|
||||
|
||||
const char TaskContextParams::kLanguageIdentifierFeatures[] =
|
||||
"continuous-bag-of-ngrams(include_terminators=true,include_spaces=false,"
|
||||
"use_equal_weight=false,id_dim=1000,size=2);continuous-bag-of-ngrams("
|
||||
"include_terminators=true,include_spaces=false,use_equal_weight=false,id_"
|
||||
"dim=5000,size=4);continuous-bag-of-relevant-scripts;script;continuous-bag-"
|
||||
"of-ngrams(include_terminators=true,include_spaces=false,use_equal_weight="
|
||||
"false,id_dim=5000,size=3);continuous-bag-of-ngrams(include_terminators="
|
||||
"true,include_spaces=false,use_equal_weight=false,id_dim=100,size=1)";
|
||||
|
||||
const char TaskContextParams::kLanguageIdentifierEmbeddingNames[] =
|
||||
"bigrams;quadgrams;relevant-scripts;text-script;trigrams;unigrams";
|
||||
|
||||
const char TaskContextParams::kLanguageIdentifierEmbeddingDims[] =
|
||||
"16;16;8;8;16;16";
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,54 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TASK_CONTEXT_PARAMS_H_
|
||||
#define TASK_CONTEXT_PARAMS_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base.h"
|
||||
#include "task_context.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// Encapsulates the TaskContext specifying only the parameters for the model.
|
||||
// The model weights are loaded statically.
|
||||
class TaskContextParams {
|
||||
public:
|
||||
// Gets the name of the i'th language.
|
||||
static const char *language_names(int i) { return kLanguageNames[i]; }
|
||||
|
||||
// Saves the parameters to the given TaskContext.
|
||||
static void ToTaskContext(TaskContext *context);
|
||||
|
||||
// Gets the number of languages.
|
||||
static int GetNumLanguages();
|
||||
|
||||
private:
|
||||
// Names of all the languages.
|
||||
static const char *const kLanguageNames[];
|
||||
|
||||
// Features in FML format.
|
||||
static const char kLanguageIdentifierFeatures[];
|
||||
|
||||
// Names of the embedding spaces.
|
||||
static const char kLanguageIdentifierEmbeddingNames[];
|
||||
|
||||
// Dimensions of the embedding spaces.
|
||||
static const char kLanguageIdentifierEmbeddingDims[];
|
||||
};
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // TASK_CONTEXT_PARAMS_H_
|
||||
@ -0,0 +1,96 @@
|
||||
// Copyright (C) 2006 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Author: Jim Meehan
|
||||
|
||||
#include "unicodetext.h"
|
||||
|
||||
#include "base.h"
|
||||
#include "utils.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// *************** Data representation **********
|
||||
// Note: the copy constructor is undefined.
|
||||
|
||||
void UnicodeText::Repr::PointTo(const char *data, int size) {
|
||||
if (ours_ && data_) delete[] data_; // If we owned the old buffer, free it.
|
||||
data_ = const_cast<char *>(data);
|
||||
size_ = size;
|
||||
capacity_ = size;
|
||||
ours_ = false;
|
||||
}
|
||||
|
||||
// *************** UnicodeText ******************
|
||||
|
||||
UnicodeText::UnicodeText() {}
|
||||
|
||||
UnicodeText &UnicodeText::PointToUTF8(const char *buffer, int byte_length) {
|
||||
repr_.PointTo(buffer, byte_length);
|
||||
return *this;
|
||||
}
|
||||
|
||||
UnicodeText::~UnicodeText() {}
|
||||
|
||||
// ******************* UnicodeText::const_iterator *********************
|
||||
|
||||
// The implementation of const_iterator would be nicer if it
|
||||
// inherited from boost::iterator_facade
|
||||
// (http://boost.org/libs/iterator/doc/iterator_facade.html).
|
||||
|
||||
UnicodeText::const_iterator::const_iterator() : it_(0) {}
|
||||
|
||||
UnicodeText::const_iterator &UnicodeText::const_iterator::operator=(
|
||||
const const_iterator &other) {
|
||||
if (&other != this) it_ = other.it_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
UnicodeText::const_iterator UnicodeText::begin() const {
|
||||
return const_iterator(repr_.data_);
|
||||
}
|
||||
|
||||
UnicodeText::const_iterator UnicodeText::end() const {
|
||||
return const_iterator(repr_.data_ + repr_.size_);
|
||||
}
|
||||
|
||||
char32 UnicodeText::const_iterator::operator*() const {
|
||||
// (We could call chartorune here, but that does some
|
||||
// error-checking, and we're guaranteed that our data is valid
|
||||
// UTF-8. Also, we expect this routine to be called very often. So
|
||||
// for speed, we do the calculation ourselves.)
|
||||
|
||||
// Convert from UTF-8
|
||||
unsigned char byte1 = static_cast<unsigned char>(it_[0]);
|
||||
if (byte1 < 0x80) return byte1;
|
||||
|
||||
unsigned char byte2 = static_cast<unsigned char>(it_[1]);
|
||||
if (byte1 < 0xE0) return ((byte1 & 0x1F) << 6) | (byte2 & 0x3F);
|
||||
|
||||
unsigned char byte3 = static_cast<unsigned char>(it_[2]);
|
||||
if (byte1 < 0xF0) {
|
||||
return ((byte1 & 0x0F) << 12) | ((byte2 & 0x3F) << 6) | (byte3 & 0x3F);
|
||||
}
|
||||
|
||||
unsigned char byte4 = static_cast<unsigned char>(it_[3]);
|
||||
return ((byte1 & 0x07) << 18) | ((byte2 & 0x3F) << 12) |
|
||||
((byte3 & 0x3F) << 6) | (byte4 & 0x3F);
|
||||
}
|
||||
|
||||
UnicodeText::const_iterator &UnicodeText::const_iterator::operator++() {
|
||||
it_ += chrome_lang_id::utils::OneCharLen(it_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,144 @@
|
||||
// Copyright (C) 2006 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Author: Jim Meehan
|
||||
|
||||
#ifndef UNICODETEXT_H_
|
||||
#define UNICODETEXT_H_
|
||||
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
|
||||
#include "base.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// ***************************** UnicodeText **************************
|
||||
//
|
||||
// A UnicodeText object is a wrapper around a sequence of Unicode
|
||||
// codepoint values that allows iteration over these values.
|
||||
//
|
||||
// The internal representation of the text is UTF-8. Since UTF-8 is a
|
||||
// variable-width format, UnicodeText does not provide random access
|
||||
// to the text, and changes to the text are permitted only at the end.
|
||||
//
|
||||
// The UnicodeText class defines a const_iterator. The dereferencing
|
||||
// operator (*) returns a codepoint (int32). The iterator is a
|
||||
// read-only iterator. It becomes invalid if the text is changed.
|
||||
//
|
||||
// Codepoints are integers in the range [0, 0xD7FF] or [0xE000,
|
||||
// 0x10FFFF], but UnicodeText has the additional restriction that it
|
||||
// can contain only those characters that are valid for interchange on
|
||||
// the Web. This excludes all of the control codes except for carriage
|
||||
// return, line feed, and horizontal tab. It also excludes
|
||||
// non-characters, but codepoints that are in the Private Use regions
|
||||
// are allowed, as are codepoints that are unassigned. (See the
|
||||
// Unicode reference for details.)
|
||||
//
|
||||
// MEMORY MANAGEMENT:
|
||||
//
|
||||
// PointToUTF8(buffer, size) creates an alias pointing to buffer.
|
||||
//
|
||||
// The purpose of an alias is to avoid making an unnecessary copy of a
|
||||
// UTF-8 buffer while still providing access to the Unicode values
|
||||
// within that text through iterators. The lifetime of an alias must not
|
||||
// exceed the lifetime of the buffer from which it was constructed.
|
||||
//
|
||||
// Aliases should be used with care. If the source from which an alias
|
||||
// was created is freed, or if the contents are changed, while the
|
||||
// alias is still in use, fatal errors could result. But it can be
|
||||
// quite useful to have a UnicodeText "window" through which to see a
|
||||
// UTF-8 buffer without having to pay the price of making a copy.
|
||||
|
||||
// TODO(abakalov): Consider merging this class with the script detection
|
||||
// code in the directory script_span.
|
||||
class UnicodeText {
|
||||
public:
|
||||
class const_iterator;
|
||||
|
||||
UnicodeText(); // Create an empty text.
|
||||
~UnicodeText();
|
||||
|
||||
class const_iterator {
|
||||
typedef const_iterator CI;
|
||||
|
||||
public:
|
||||
// Iterators are default-constructible.
|
||||
const_iterator();
|
||||
|
||||
// It's safe to make multiple passes over a UnicodeText.
|
||||
const_iterator(const const_iterator &other);
|
||||
const_iterator &operator=(const const_iterator &other);
|
||||
|
||||
char32 operator*() const; // Dereference
|
||||
|
||||
const_iterator &operator++(); // Advance (++iter)
|
||||
|
||||
friend bool operator==(const CI &lhs, const CI &rhs) {
|
||||
return lhs.it_ == rhs.it_;
|
||||
}
|
||||
friend bool operator!=(const CI &lhs, const CI &rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class UnicodeText;
|
||||
explicit const_iterator(const char *it) : it_(it) {}
|
||||
|
||||
const char *it_;
|
||||
};
|
||||
|
||||
const_iterator begin() const;
|
||||
const_iterator end() const;
|
||||
|
||||
// x.PointToUTF8(buf,len) changes x so that it points to buf
|
||||
// ("becomes an alias"). It does not take ownership or copy buf.
|
||||
// This function assumes that the input is interchange valid UTF8.
|
||||
UnicodeText &PointToUTF8(const char *utf8_buffer, int byte_length);
|
||||
|
||||
private:
|
||||
friend class const_iterator;
|
||||
|
||||
class Repr { // A byte-string.
|
||||
public:
|
||||
char *data_;
|
||||
int size_;
|
||||
int capacity_;
|
||||
bool ours_; // Do we own data_?
|
||||
|
||||
Repr() : data_(NULL), size_(0), capacity_(0), ours_(true) {}
|
||||
~Repr() {
|
||||
if (ours_) delete[] data_;
|
||||
}
|
||||
|
||||
void clear();
|
||||
void reserve(int capacity);
|
||||
void resize(int size);
|
||||
|
||||
void append(const char *bytes, int byte_length);
|
||||
void Copy(const char *data, int size);
|
||||
void TakeOwnershipOf(char *data, int size, int capacity);
|
||||
void PointTo(const char *data, int size);
|
||||
|
||||
private:
|
||||
Repr &operator=(const Repr &);
|
||||
Repr(const Repr &other);
|
||||
};
|
||||
|
||||
Repr repr_;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // UNICODETEXT_H_
|
||||
@ -0,0 +1,241 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "script_span/stringpiece.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace utils {
|
||||
|
||||
bool ParseInt32(const char *c_str, int *value) {
|
||||
char *temp;
|
||||
*value = strtol(c_str, &temp, 0); // NOLINT
|
||||
return (*temp == '\0');
|
||||
}
|
||||
|
||||
bool ParseDouble(const char *c_str, double *value) {
|
||||
char *temp;
|
||||
*value = strtod(c_str, &temp);
|
||||
return (*temp == '\0');
|
||||
}
|
||||
|
||||
static char hex_char[] = "0123456789abcdef";
|
||||
|
||||
string CEscape(const string &src) {
|
||||
string dest;
|
||||
|
||||
for (unsigned char c : src) {
|
||||
switch (c) {
|
||||
case '\n':
|
||||
dest.append("\\n");
|
||||
break;
|
||||
case '\r':
|
||||
dest.append("\\r");
|
||||
break;
|
||||
case '\t':
|
||||
dest.append("\\t");
|
||||
break;
|
||||
case '\"':
|
||||
dest.append("\\\"");
|
||||
break;
|
||||
case '\'':
|
||||
dest.append("\\'");
|
||||
break;
|
||||
case '\\':
|
||||
dest.append("\\\\");
|
||||
break;
|
||||
default:
|
||||
// Note that if we emit \xNN and the src character after that is a hex
|
||||
// digit then that digit must be escaped too to prevent it being
|
||||
// interpreted as part of the character code by C.
|
||||
if ((c >= 0x80) || !isprint(c)) {
|
||||
dest.append("\\");
|
||||
dest.push_back(hex_char[c / 64]);
|
||||
dest.push_back(hex_char[(c % 64) / 8]);
|
||||
dest.push_back(hex_char[c % 8]);
|
||||
} else {
|
||||
dest.push_back(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
std::vector<string> Split(const string &text, char delim) {
|
||||
std::vector<string> result;
|
||||
size_t token_start = 0;
|
||||
if (!text.empty()) {
|
||||
for (size_t i = 0; i < text.size() + 1; i++) {
|
||||
if ((i == text.size()) || (text[i] == delim)) {
|
||||
result.push_back(string(text.data() + token_start, i - token_start));
|
||||
token_start = i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int RemoveLeadingWhitespace(StringPiece *text) {
|
||||
int count = 0;
|
||||
const char *ptr = text->data();
|
||||
while (count < text->size() && isspace(*ptr)) {
|
||||
count++;
|
||||
ptr++;
|
||||
}
|
||||
text->remove_prefix(count);
|
||||
return count;
|
||||
}
|
||||
|
||||
int RemoveTrailingWhitespace(StringPiece *text) {
|
||||
int count = 0;
|
||||
const char *ptr = text->data() + text->size() - 1;
|
||||
while (count < text->size() && isspace(*ptr)) {
|
||||
++count;
|
||||
--ptr;
|
||||
}
|
||||
text->remove_suffix(count);
|
||||
return count;
|
||||
}
|
||||
|
||||
int RemoveWhitespaceContext(StringPiece *text) {
|
||||
// use RemoveLeadingWhitespace() and RemoveTrailingWhitespace() to do the job
|
||||
return RemoveLeadingWhitespace(text) + RemoveTrailingWhitespace(text);
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Lower-level versions of Get... that read directly from a character buffer
|
||||
// without any bounds checking.
|
||||
inline uint32 DecodeFixed32(const char *ptr) {
|
||||
return ((static_cast<uint32>(static_cast<unsigned char>(ptr[0]))) |
|
||||
(static_cast<uint32>(static_cast<unsigned char>(ptr[1])) << 8) |
|
||||
(static_cast<uint32>(static_cast<unsigned char>(ptr[2])) << 16) |
|
||||
(static_cast<uint32>(static_cast<unsigned char>(ptr[3])) << 24));
|
||||
}
|
||||
|
||||
// 0xff is in case char is signed.
|
||||
static inline uint32 ByteAs32(char c) { return static_cast<uint32>(c) & 0xff; }
|
||||
} // namespace
|
||||
|
||||
uint32 Hash32(const char *data, size_t n, uint32 seed) {
|
||||
// 'm' and 'r' are mixing constants generated offline.
|
||||
// They're not really 'magic', they just happen to work well.
|
||||
const uint32 m = 0x5bd1e995;
|
||||
const int r = 24;
|
||||
|
||||
// Initialize the hash to a 'random' value
|
||||
uint32 h = static_cast<uint32>(seed ^ n);
|
||||
|
||||
// Mix 4 bytes at a time into the hash
|
||||
while (n >= 4) {
|
||||
uint32 k = DecodeFixed32(data);
|
||||
k *= m;
|
||||
k ^= k >> r;
|
||||
k *= m;
|
||||
h *= m;
|
||||
h ^= k;
|
||||
data += 4;
|
||||
n -= 4;
|
||||
}
|
||||
|
||||
// Handle the last few bytes of the input array
|
||||
if (n == 3) {
|
||||
h ^= ByteAs32(data[2]) << 16;
|
||||
h ^= ByteAs32(data[1]) << 8;
|
||||
h ^= ByteAs32(data[0]);
|
||||
h *= m;
|
||||
} else if (n == 2) {
|
||||
h ^= ByteAs32(data[1]) << 8;
|
||||
h ^= ByteAs32(data[0]);
|
||||
h *= m;
|
||||
} else if (n == 1) {
|
||||
h ^= ByteAs32(data[0]);
|
||||
h *= m;
|
||||
}
|
||||
|
||||
// Do a few final mixes of the hash to ensure the last few
|
||||
// bytes are well-incorporated.
|
||||
h ^= h >> 13;
|
||||
h *= m;
|
||||
h ^= h >> 15;
|
||||
return h;
|
||||
}
|
||||
|
||||
uint32 Hash32WithDefaultSeed(const string &input) {
|
||||
return Hash32(input.data(), input.size(), 0xBEEF);
|
||||
}
|
||||
|
||||
PunctuationUtil::CharacterRange PunctuationUtil::kPunctuation[] = {
|
||||
{33, 35}, {37, 42}, {44, 47}, {58, 59},
|
||||
{63, 64}, {91, 93}, {95, 95}, {123, 123},
|
||||
{125, 125}, {161, 161}, {171, 171}, {183, 183},
|
||||
{187, 187}, {191, 191}, {894, 894}, {903, 903},
|
||||
{1370, 1375}, {1417, 1418}, {1470, 1470}, {1472, 1472},
|
||||
{1475, 1475}, {1478, 1478}, {1523, 1524}, {1548, 1549},
|
||||
{1563, 1563}, {1566, 1567}, {1642, 1645}, {1748, 1748},
|
||||
{1792, 1805}, {2404, 2405}, {2416, 2416}, {3572, 3572},
|
||||
{3663, 3663}, {3674, 3675}, {3844, 3858}, {3898, 3901},
|
||||
{3973, 3973}, {4048, 4049}, {4170, 4175}, {4347, 4347},
|
||||
{4961, 4968}, {5741, 5742}, {5787, 5788}, {5867, 5869},
|
||||
{5941, 5942}, {6100, 6102}, {6104, 6106}, {6144, 6154},
|
||||
{6468, 6469}, {6622, 6623}, {6686, 6687}, {8208, 8231},
|
||||
{8240, 8259}, {8261, 8273}, {8275, 8286}, {8317, 8318},
|
||||
{8333, 8334}, {9001, 9002}, {9140, 9142}, {10088, 10101},
|
||||
{10181, 10182}, {10214, 10219}, {10627, 10648}, {10712, 10715},
|
||||
{10748, 10749}, {11513, 11516}, {11518, 11519}, {11776, 11799},
|
||||
{11804, 11805}, {12289, 12291}, {12296, 12305}, {12308, 12319},
|
||||
{12336, 12336}, {12349, 12349}, {12448, 12448}, {12539, 12539},
|
||||
{64830, 64831}, {65040, 65049}, {65072, 65106}, {65108, 65121},
|
||||
{65123, 65123}, {65128, 65128}, {65130, 65131}, {65281, 65283},
|
||||
{65285, 65290}, {65292, 65295}, {65306, 65307}, {65311, 65312},
|
||||
{65339, 65341}, {65343, 65343}, {65371, 65371}, {65373, 65373},
|
||||
{65375, 65381}, {65792, 65793}, {66463, 66463}, {68176, 68184},
|
||||
{-1, -1}};
|
||||
|
||||
void NormalizeDigits(string *form) {
|
||||
for (size_t i = 0; i < form->size(); ++i) {
|
||||
if ((*form)[i] >= '0' && (*form)[i] <= '9') (*form)[i] = '9';
|
||||
}
|
||||
}
|
||||
|
||||
void GetUTF8Chars(const string &text, std::vector<string> *chars) {
|
||||
const char *start = text.c_str();
|
||||
const char *end = text.c_str() + text.size();
|
||||
while (start < end) {
|
||||
int char_length = UTF8FirstLetterNumBytes(start);
|
||||
chars->emplace_back(start, char_length);
|
||||
start += char_length;
|
||||
}
|
||||
}
|
||||
|
||||
int UTF8FirstLetterNumBytes(const char *utf8_str) {
|
||||
if (*utf8_str == '\0') return 0;
|
||||
return OneCharLen(utf8_str);
|
||||
}
|
||||
|
||||
int OneCharLen(const char *src) {
|
||||
// On most platforms, char is unsigned by default, but iOS is an exception.
|
||||
// The cast below makes sure we always interpret *src as an unsigned char.
|
||||
return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
|
||||
[(*(reinterpret_cast<const unsigned char *>(src)) & 0xFF) >> 4];
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,144 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef UTILS_H_
|
||||
#define UTILS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base.h"
|
||||
#include "script_span/stringpiece.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
namespace utils {
|
||||
|
||||
bool ParseInt32(const char *c_str, int *value);
|
||||
bool ParseDouble(const char *c_str, double *value);
|
||||
|
||||
template <typename T>
|
||||
T ParseUsing(const string &str, std::function<bool(const char *, T *)> func) {
|
||||
T value;
|
||||
func(str.c_str(), &value);
|
||||
return value;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T ParseUsing(const string &str, T defval,
|
||||
std::function<bool(const char *, T *)> func) {
|
||||
return str.empty() ? defval : ParseUsing<T>(str, func);
|
||||
}
|
||||
|
||||
string CEscape(const string &src);
|
||||
|
||||
std::vector<string> Split(const string &text, char delim);
|
||||
|
||||
int RemoveLeadingWhitespace(StringPiece *text);
|
||||
|
||||
int RemoveTrailingWhitespace(StringPiece *text);
|
||||
|
||||
int RemoveWhitespaceContext(StringPiece *text);
|
||||
|
||||
uint32 Hash32(const char *data, size_t n, uint32 seed);
|
||||
|
||||
uint32 Hash32WithDefaultSeed(const string &input);
|
||||
|
||||
// Deletes all the elements in an STL container and clears the container. This
|
||||
// function is suitable for use with a vector, set, hash_set, or any other STL
|
||||
// container which defines sensible begin(), end(), and clear() methods.
|
||||
// If container is NULL, this function is a no-op.
|
||||
template <typename T>
|
||||
void STLDeleteElements(T *container) {
|
||||
if (!container) return;
|
||||
auto it = container->begin();
|
||||
while (it != container->end()) {
|
||||
auto temp = it;
|
||||
++it;
|
||||
delete *temp;
|
||||
}
|
||||
container->clear();
|
||||
}
|
||||
|
||||
class PunctuationUtil {
|
||||
public:
|
||||
// Unicode character ranges for punctuation characters according to CoNLL.
|
||||
struct CharacterRange {
|
||||
int first;
|
||||
int last;
|
||||
};
|
||||
static CharacterRange kPunctuation[];
|
||||
|
||||
// Returns true if Unicode character is a punctuation character.
|
||||
static bool IsPunctuation(int u) {
|
||||
int i = 0;
|
||||
while (kPunctuation[i].first > 0) {
|
||||
if (u < kPunctuation[i].first) return false;
|
||||
if (u <= kPunctuation[i].last) return true;
|
||||
++i;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Determine if tag is a punctuation tag.
|
||||
static bool IsPunctuationTag(const string &tag) {
|
||||
for (size_t i = 0; i < tag.length(); ++i) {
|
||||
int c = tag[i];
|
||||
if (c != ',' && c != ':' && c != '.' && c != '\'' && c != '`') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns true if tag is non-empty and has only punctuation or parens
|
||||
// symbols.
|
||||
static bool IsPunctuationTagOrParens(const string &tag) {
|
||||
if (tag.empty()) return false;
|
||||
for (size_t i = 0; i < tag.length(); ++i) {
|
||||
int c = tag[i];
|
||||
if (c != '(' && c != ')' && c != ',' && c != ':' && c != '.' &&
|
||||
c != '\'' && c != '`') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
void NormalizeDigits(string *form);
|
||||
|
||||
// Takes a text and convert it into a vector, where each element is a utf8
|
||||
// character.
|
||||
void GetUTF8Chars(const string &text, std::vector<string> *chars);
|
||||
|
||||
// Returns the number of bytes in the first UTF-8 char at the beginning
|
||||
// of the string. It is assumed that the string is valid UTF-8. If
|
||||
// the first byte of the string is null, return 0 (for backwards
|
||||
// compatibility only; this use is discouraged).
|
||||
int UTF8FirstLetterNumBytes(const char *in_buf);
|
||||
|
||||
// Returns the length (number of bytes) of the Unicode code point starting at
|
||||
// src, based on inspecting just that one byte. Preconditions: src != NULL,
|
||||
// *src can be read, and *src is not '\0', and src points to a well-formed UTF-8
|
||||
// string.
|
||||
int OneCharLen(const char *src);
|
||||
|
||||
} // namespace utils
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // UTILS_H_
|
||||
@ -0,0 +1,64 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "workspace.h"
|
||||
|
||||
#include "base.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
WorkspaceSet::WorkspaceSet() {}
|
||||
|
||||
WorkspaceSet::~WorkspaceSet() { Reset(WorkspaceRegistry()); }
|
||||
|
||||
WorkspaceRegistry::WorkspaceRegistry() {}
|
||||
|
||||
WorkspaceRegistry::~WorkspaceRegistry() {}
|
||||
|
||||
string WorkspaceRegistry::DebugString() const {
|
||||
string str;
|
||||
for (auto &it : workspace_names_) {
|
||||
const string &type_name = workspace_types_.at(it.first);
|
||||
for (size_t index = 0; index < it.second.size(); ++index) {
|
||||
const string &workspace_name = it.second[index];
|
||||
str += "\n ";
|
||||
str += type_name;
|
||||
str += " :: ";
|
||||
str += workspace_name;
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
VectorIntWorkspace::~VectorIntWorkspace() {}
|
||||
|
||||
VectorIntWorkspace::VectorIntWorkspace(int size) : elements_(size) {}
|
||||
|
||||
VectorIntWorkspace::VectorIntWorkspace(int size, int value)
|
||||
: elements_(size, value) {}
|
||||
|
||||
VectorIntWorkspace::VectorIntWorkspace(const std::vector<int> &elements)
|
||||
: elements_(elements) {}
|
||||
|
||||
string VectorIntWorkspace::TypeName() { return "Vector"; }
|
||||
|
||||
VectorVectorIntWorkspace::~VectorVectorIntWorkspace() {}
|
||||
|
||||
VectorVectorIntWorkspace::VectorVectorIntWorkspace(int size)
|
||||
: elements_(size) {}
|
||||
|
||||
string VectorVectorIntWorkspace::TypeName() { return "VectorVector"; }
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
@ -0,0 +1,177 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Notes on thread-safety: All of the classes here are thread-compatible. More
|
||||
// specifically, the registry machinery is thread-safe, as long as each thread
|
||||
// performs feature extraction on a different Sentence object.
|
||||
|
||||
#ifndef WORKSPACE_H_
|
||||
#define WORKSPACE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string>
|
||||
#include <typeindex>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "base.h"
|
||||
|
||||
namespace chrome_lang_id {
|
||||
|
||||
// A base class for shared workspaces. Derived classes implement a static member
|
||||
// function TypeName() which returns a human readable string name for the class.
|
||||
class Workspace {
|
||||
public:
|
||||
// Polymorphic destructor.
|
||||
virtual ~Workspace() {}
|
||||
|
||||
protected:
|
||||
// Create an empty workspace.
|
||||
Workspace() {}
|
||||
|
||||
private:
|
||||
CLD3_DISALLOW_COPY_AND_ASSIGN(Workspace);
|
||||
};
|
||||
|
||||
// A registry that keeps track of workspaces.
|
||||
class WorkspaceRegistry {
|
||||
public:
|
||||
// Create an empty registry.
|
||||
WorkspaceRegistry();
|
||||
~WorkspaceRegistry();
|
||||
|
||||
const std::unordered_map<std::type_index, std::vector<std::string>>
|
||||
&WorkspaceNames() const {
|
||||
return workspace_names_;
|
||||
}
|
||||
|
||||
// Returns a string describing the registered workspaces.
|
||||
string DebugString() const;
|
||||
|
||||
private:
|
||||
// Workspace type names, indexed as workspace_types_[typeid].
|
||||
std::unordered_map<std::type_index, string> workspace_types_;
|
||||
|
||||
// Workspace names, indexed as workspace_names_[typeid][workspace].
|
||||
std::unordered_map<std::type_index, std::vector<string>> workspace_names_;
|
||||
|
||||
CLD3_DISALLOW_COPY_AND_ASSIGN(WorkspaceRegistry);
|
||||
};
|
||||
|
||||
// A typed collected of workspaces. The workspaces are indexed according to an
|
||||
// external WorkspaceRegistry. If the WorkspaceSet is const, the contents are
|
||||
// also immutable.
|
||||
class WorkspaceSet {
|
||||
public:
|
||||
WorkspaceSet();
|
||||
~WorkspaceSet();
|
||||
|
||||
void Reset(const WorkspaceRegistry ®istry) {
|
||||
// Deallocate current workspaces.
|
||||
for (auto &it : workspaces_) {
|
||||
for (size_t index = 0; index < it.second.size(); ++index) {
|
||||
delete it.second[index];
|
||||
}
|
||||
}
|
||||
workspaces_.clear();
|
||||
|
||||
// Allocate space for new workspaces.
|
||||
for (auto &it : registry.WorkspaceNames()) {
|
||||
workspaces_[it.first].resize(it.second.size());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// The set of workspaces, indexed as workspaces_[typeid][index].
|
||||
std::unordered_map<std::type_index, std::vector<Workspace *>> workspaces_;
|
||||
};
|
||||
|
||||
// A workspace that wraps around a single int.
|
||||
class SingletonIntWorkspace : public Workspace {
|
||||
public:
|
||||
// Default-initializes the int value.
|
||||
SingletonIntWorkspace() {}
|
||||
|
||||
// Initializes the int with the given value.
|
||||
explicit SingletonIntWorkspace(int value) : value_(value) {}
|
||||
|
||||
// Returns the name of this type of workspace.
|
||||
static string TypeName() { return "SingletonInt"; }
|
||||
|
||||
// Returns the int value.
|
||||
int get() const { return value_; }
|
||||
|
||||
// Sets the int value.
|
||||
void set(int value) { value_ = value; }
|
||||
|
||||
private:
|
||||
// The enclosed int.
|
||||
int value_ = 0;
|
||||
};
|
||||
|
||||
// A workspace that wraps around a vector of int.
|
||||
class VectorIntWorkspace : public Workspace {
|
||||
public:
|
||||
// Creates a vector of the given size.
|
||||
explicit VectorIntWorkspace(int size);
|
||||
|
||||
// Creates a vector initialized with the given array.
|
||||
explicit VectorIntWorkspace(const std::vector<int> &elements);
|
||||
|
||||
// Creates a vector of the given size, with each element initialized to the
|
||||
// given value.
|
||||
VectorIntWorkspace(int size, int value);
|
||||
|
||||
~VectorIntWorkspace() override;
|
||||
|
||||
// Returns the name of this type of workspace.
|
||||
static string TypeName();
|
||||
|
||||
// Returns the i'th element.
|
||||
int element(int i) const { return elements_[i]; }
|
||||
|
||||
// Sets the i'th element.
|
||||
void set_element(int i, int value) { elements_[i] = value; }
|
||||
|
||||
private:
|
||||
// The enclosed vector.
|
||||
std::vector<int> elements_;
|
||||
};
|
||||
|
||||
// A workspace that wraps around a vector of vector of int.
|
||||
class VectorVectorIntWorkspace : public Workspace {
|
||||
public:
|
||||
// Creates a vector of empty vectors of the given size.
|
||||
explicit VectorVectorIntWorkspace(int size);
|
||||
~VectorVectorIntWorkspace() override;
|
||||
|
||||
// Returns the name of this type of workspace.
|
||||
static string TypeName();
|
||||
|
||||
// Returns the i'th vector of elements.
|
||||
const std::vector<int> &elements(int i) const { return elements_[i]; }
|
||||
|
||||
// Mutable access to the i'th vector of elements.
|
||||
std::vector<int> *mutable_elements(int i) { return &(elements_[i]); }
|
||||
|
||||
private:
|
||||
// The enclosed vector of vector of elements.
|
||||
std::vector<std::vector<int>> elements_;
|
||||
};
|
||||
|
||||
} // namespace chrome_lang_id
|
||||
|
||||
#endif // WORKSPACE_H_
|
||||
@ -0,0 +1,123 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <google/protobuf/any.h>
|
||||
|
||||
#include <google/protobuf/generated_message_util.h>
|
||||
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
namespace internal {
|
||||
|
||||
namespace {
|
||||
string GetTypeUrl(const Descriptor* message,
|
||||
const string& type_url_prefix) {
|
||||
if (!type_url_prefix.empty() &&
|
||||
type_url_prefix[type_url_prefix.size() - 1] == '/') {
|
||||
return type_url_prefix + message->full_name();
|
||||
} else {
|
||||
return type_url_prefix + "/" + message->full_name();
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
const char kAnyFullTypeName[] = "google.protobuf.Any";
|
||||
const char kTypeGoogleApisComPrefix[] = "type.googleapis.com/";
|
||||
const char kTypeGoogleProdComPrefix[] = "type.googleprod.com/";
|
||||
|
||||
AnyMetadata::AnyMetadata(UrlType* type_url, ValueType* value)
|
||||
: type_url_(type_url), value_(value) {
|
||||
}
|
||||
|
||||
void AnyMetadata::PackFrom(const Message& message) {
|
||||
PackFrom(message, kTypeGoogleApisComPrefix);
|
||||
}
|
||||
|
||||
void AnyMetadata::PackFrom(const Message& message,
|
||||
const string& type_url_prefix) {
|
||||
type_url_->SetNoArena(&::google::protobuf::internal::GetEmptyString(),
|
||||
GetTypeUrl(message.GetDescriptor(), type_url_prefix));
|
||||
message.SerializeToString(value_->MutableNoArena(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited()));
|
||||
}
|
||||
|
||||
bool AnyMetadata::UnpackTo(Message* message) const {
|
||||
if (!InternalIs(message->GetDescriptor())) {
|
||||
return false;
|
||||
}
|
||||
return message->ParseFromString(value_->GetNoArena());
|
||||
}
|
||||
|
||||
bool AnyMetadata::InternalIs(const Descriptor* descriptor) const {
|
||||
const string type_url = type_url_->GetNoArena();
|
||||
string full_name;
|
||||
if (!ParseAnyTypeUrl(type_url, &full_name)) {
|
||||
return false;
|
||||
}
|
||||
return full_name == descriptor->full_name();
|
||||
}
|
||||
|
||||
bool ParseAnyTypeUrl(const string& type_url, string* url_prefix,
|
||||
string* full_type_name) {
|
||||
size_t pos = type_url.find_last_of("/");
|
||||
if (pos == string::npos || pos + 1 == type_url.size()) {
|
||||
return false;
|
||||
}
|
||||
if (url_prefix) {
|
||||
*url_prefix = type_url.substr(0, pos + 1);
|
||||
}
|
||||
*full_type_name = type_url.substr(pos + 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParseAnyTypeUrl(const string& type_url, string* full_type_name) {
|
||||
return ParseAnyTypeUrl(type_url, NULL, full_type_name);
|
||||
}
|
||||
|
||||
|
||||
bool GetAnyFieldDescriptors(const Message& message,
|
||||
const FieldDescriptor** type_url_field,
|
||||
const FieldDescriptor** value_field) {
|
||||
const Descriptor* descriptor = message.GetDescriptor();
|
||||
if (descriptor->full_name() != kAnyFullTypeName) {
|
||||
return false;
|
||||
}
|
||||
*type_url_field = descriptor->FindFieldByNumber(1);
|
||||
*value_field = descriptor->FindFieldByNumber(2);
|
||||
return (*type_url_field != NULL &&
|
||||
(*type_url_field)->type() == FieldDescriptor::TYPE_STRING &&
|
||||
*value_field != NULL &&
|
||||
(*value_field)->type() == FieldDescriptor::TYPE_BYTES);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
@ -0,0 +1,118 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef GOOGLE_PROTOBUF_ANY_H__
|
||||
#define GOOGLE_PROTOBUF_ANY_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/descriptor.h>
|
||||
#include <google/protobuf/message.h>
|
||||
#include <google/protobuf/arenastring.h>
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
namespace internal {
|
||||
|
||||
// Helper class used to implement google::protobuf::Any.
|
||||
class LIBPROTOBUF_EXPORT AnyMetadata {
|
||||
typedef ArenaStringPtr UrlType;
|
||||
typedef ArenaStringPtr ValueType;
|
||||
public:
|
||||
// AnyMetadata does not take ownership of "type_url" and "value".
|
||||
AnyMetadata(UrlType* type_url, ValueType* value);
|
||||
|
||||
// Packs a message using the default type URL prefix: "type.googleapis.com".
|
||||
// The resulted type URL will be "type.googleapis.com/<message_full_name>".
|
||||
void PackFrom(const Message& message);
|
||||
// Packs a message using the given type URL prefix. The type URL will be
|
||||
// constructed by concatenating the message type's full name to the prefix
|
||||
// with an optional "/" separator if the prefix doesn't already end up "/".
|
||||
// For example, both PackFrom(message, "type.googleapis.com") and
|
||||
// PackFrom(message, "type.googleapis.com/") yield the same result type
|
||||
// URL: "type.googleapis.com/<message_full_name>".
|
||||
void PackFrom(const Message& message, const string& type_url_prefix);
|
||||
|
||||
// Unpacks the payload into the given message. Returns false if the message's
|
||||
// type doesn't match the type specified in the type URL (i.e., the full
|
||||
// name after the last "/" of the type URL doesn't match the message's actual
|
||||
// full name) or parsing the payload has failed.
|
||||
bool UnpackTo(Message* message) const;
|
||||
|
||||
// Checks whether the type specified in the type URL matches the given type.
|
||||
// A type is consdiered matching if its full name matches the full name after
|
||||
// the last "/" in the type URL.
|
||||
template<typename T>
|
||||
bool Is() const {
|
||||
return InternalIs(T::default_instance().GetDescriptor());
|
||||
}
|
||||
|
||||
private:
|
||||
bool InternalIs(const Descriptor* message) const;
|
||||
|
||||
UrlType* type_url_;
|
||||
ValueType* value_;
|
||||
|
||||
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(AnyMetadata);
|
||||
};
|
||||
|
||||
extern const char kAnyFullTypeName[]; // "google.protobuf.Any".
|
||||
extern const char kTypeGoogleApisComPrefix[]; // "type.googleapis.com/".
|
||||
extern const char kTypeGoogleProdComPrefix[]; // "type.googleprod.com/".
|
||||
|
||||
// Get the proto type name from Any::type_url value. For example, passing
|
||||
// "type.googleapis.com/rpc.QueryOrigin" will return "rpc.QueryOrigin" in
|
||||
// *full_type_name. Returns false if the type_url does not have a "/"
|
||||
// in the type url separating the full type name.
|
||||
//
|
||||
// NOTE: this function is available publicly as:
|
||||
// google::protobuf::Any() // static method on the generated message type.
|
||||
bool ParseAnyTypeUrl(const string& type_url, string* full_type_name);
|
||||
|
||||
// Get the proto type name and prefix from Any::type_url value. For example,
|
||||
// passing "type.googleapis.com/rpc.QueryOrigin" will return
|
||||
// "type.googleapis.com/" in *url_prefix and "rpc.QueryOrigin" in
|
||||
// *full_type_name. Returns false if the type_url does not have a "/" in the
|
||||
// type url separating the full type name.
|
||||
bool ParseAnyTypeUrl(const string& type_url, string* url_prefix,
|
||||
string* full_type_name);
|
||||
|
||||
// See if message is of type google.protobuf.Any, if so, return the descriptors
|
||||
// for "type_url" and "value" fields.
|
||||
bool GetAnyFieldDescriptors(const Message& message,
|
||||
const FieldDescriptor** type_url_field,
|
||||
const FieldDescriptor** value_field);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace protobuf
|
||||
|
||||
} // namespace google
|
||||
#endif // GOOGLE_PROTOBUF_ANY_H__
|
||||
@ -0,0 +1,435 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: google/protobuf/any.proto
|
||||
|
||||
#include <google/protobuf/any.pb.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/stubs/port.h>
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/wire_format_lite_inl.h>
|
||||
#include <google/protobuf/descriptor.h>
|
||||
#include <google/protobuf/generated_message_reflection.h>
|
||||
#include <google/protobuf/reflection_ops.h>
|
||||
#include <google/protobuf/wire_format.h>
|
||||
// This is a temporary google only hack
|
||||
#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
#include "third_party/protobuf/version.h"
|
||||
#endif
|
||||
// @@protoc_insertion_point(includes)
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
class AnyDefaultTypeInternal {
|
||||
public:
|
||||
::google::protobuf::internal::ExplicitlyConstructed<Any>
|
||||
_instance;
|
||||
} _Any_default_instance_;
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
namespace protobuf_google_2fprotobuf_2fany_2eproto {
|
||||
static void InitDefaultsAny() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
{
|
||||
void* ptr = &::google::protobuf::_Any_default_instance_;
|
||||
new (ptr) ::google::protobuf::Any();
|
||||
::google::protobuf::internal::OnShutdownDestroyMessage(ptr);
|
||||
}
|
||||
::google::protobuf::Any::InitAsDefaultInstance();
|
||||
}
|
||||
|
||||
LIBPROTOBUF_EXPORT ::google::protobuf::internal::SCCInfo<0> scc_info_Any =
|
||||
{{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 0, InitDefaultsAny}, {}};
|
||||
|
||||
void InitDefaults() {
|
||||
::google::protobuf::internal::InitSCC(&scc_info_Any.base);
|
||||
}
|
||||
|
||||
::google::protobuf::Metadata file_level_metadata[1];
|
||||
|
||||
const ::google::protobuf::uint32 TableStruct::offsets[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
|
||||
~0u, // no _has_bits_
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::google::protobuf::Any, _internal_metadata_),
|
||||
~0u, // no _extensions_
|
||||
~0u, // no _oneof_case_
|
||||
~0u, // no _weak_field_map_
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::google::protobuf::Any, type_url_),
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::google::protobuf::Any, value_),
|
||||
};
|
||||
static const ::google::protobuf::internal::MigrationSchema schemas[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
|
||||
{ 0, -1, sizeof(::google::protobuf::Any)},
|
||||
};
|
||||
|
||||
static ::google::protobuf::Message const * const file_default_instances[] = {
|
||||
reinterpret_cast<const ::google::protobuf::Message*>(&::google::protobuf::_Any_default_instance_),
|
||||
};
|
||||
|
||||
void protobuf_AssignDescriptors() {
|
||||
AddDescriptors();
|
||||
AssignDescriptors(
|
||||
"google/protobuf/any.proto", schemas, file_default_instances, TableStruct::offsets,
|
||||
file_level_metadata, NULL, NULL);
|
||||
}
|
||||
|
||||
void protobuf_AssignDescriptorsOnce() {
|
||||
static ::google::protobuf::internal::once_flag once;
|
||||
::google::protobuf::internal::call_once(once, protobuf_AssignDescriptors);
|
||||
}
|
||||
|
||||
void protobuf_RegisterTypes(const ::std::string&) GOOGLE_PROTOBUF_ATTRIBUTE_COLD;
|
||||
void protobuf_RegisterTypes(const ::std::string&) {
|
||||
protobuf_AssignDescriptorsOnce();
|
||||
::google::protobuf::internal::RegisterAllTypes(file_level_metadata, 1);
|
||||
}
|
||||
|
||||
void AddDescriptorsImpl() {
|
||||
InitDefaults();
|
||||
static const char descriptor[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
|
||||
"\n\031google/protobuf/any.proto\022\017google.prot"
|
||||
"obuf\"&\n\003Any\022\020\n\010type_url\030\001 \001(\t\022\r\n\005value\030\002"
|
||||
" \001(\014Bo\n\023com.google.protobufB\010AnyProtoP\001Z"
|
||||
"%github.com/golang/protobuf/ptypes/any\242\002"
|
||||
"\003GPB\252\002\036Google.Protobuf.WellKnownTypesb\006p"
|
||||
"roto3"
|
||||
};
|
||||
::google::protobuf::DescriptorPool::InternalAddGeneratedFile(
|
||||
descriptor, 205);
|
||||
::google::protobuf::MessageFactory::InternalRegisterGeneratedFile(
|
||||
"google/protobuf/any.proto", &protobuf_RegisterTypes);
|
||||
}
|
||||
|
||||
void AddDescriptors() {
|
||||
static ::google::protobuf::internal::once_flag once;
|
||||
::google::protobuf::internal::call_once(once, AddDescriptorsImpl);
|
||||
}
|
||||
// Force AddDescriptors() to be called at dynamic initialization time.
|
||||
struct StaticDescriptorInitializer {
|
||||
StaticDescriptorInitializer() {
|
||||
AddDescriptors();
|
||||
}
|
||||
} static_descriptor_initializer;
|
||||
} // namespace protobuf_google_2fprotobuf_2fany_2eproto
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
|
||||
// ===================================================================
|
||||
|
||||
void Any::InitAsDefaultInstance() {
|
||||
}
|
||||
void Any::PackFrom(const ::google::protobuf::Message& message) {
|
||||
_any_metadata_.PackFrom(message);
|
||||
}
|
||||
|
||||
void Any::PackFrom(const ::google::protobuf::Message& message,
|
||||
const ::std::string& type_url_prefix) {
|
||||
_any_metadata_.PackFrom(message, type_url_prefix);
|
||||
}
|
||||
|
||||
bool Any::UnpackTo(::google::protobuf::Message* message) const {
|
||||
return _any_metadata_.UnpackTo(message);
|
||||
}
|
||||
bool Any::ParseAnyTypeUrl(const string& type_url,
|
||||
string* full_type_name) {
|
||||
return ::google::protobuf::internal::ParseAnyTypeUrl(type_url,
|
||||
full_type_name);
|
||||
}
|
||||
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
const int Any::kTypeUrlFieldNumber;
|
||||
const int Any::kValueFieldNumber;
|
||||
#endif // !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
|
||||
Any::Any()
|
||||
: ::google::protobuf::Message(), _internal_metadata_(NULL), _any_metadata_(&type_url_, &value_) {
|
||||
::google::protobuf::internal::InitSCC(
|
||||
&protobuf_google_2fprotobuf_2fany_2eproto::scc_info_Any.base);
|
||||
SharedCtor();
|
||||
// @@protoc_insertion_point(constructor:google.protobuf.Any)
|
||||
}
|
||||
Any::Any(const Any& from)
|
||||
: ::google::protobuf::Message(),
|
||||
_internal_metadata_(NULL),
|
||||
_any_metadata_(&type_url_, &value_) {
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
type_url_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.type_url().size() > 0) {
|
||||
type_url_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.type_url_);
|
||||
}
|
||||
value_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.value().size() > 0) {
|
||||
value_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.value_);
|
||||
}
|
||||
// @@protoc_insertion_point(copy_constructor:google.protobuf.Any)
|
||||
}
|
||||
|
||||
void Any::SharedCtor() {
|
||||
type_url_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
value_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
|
||||
Any::~Any() {
|
||||
// @@protoc_insertion_point(destructor:google.protobuf.Any)
|
||||
SharedDtor();
|
||||
}
|
||||
|
||||
void Any::SharedDtor() {
|
||||
type_url_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
value_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
|
||||
void Any::SetCachedSize(int size) const {
|
||||
_cached_size_.Set(size);
|
||||
}
|
||||
const ::google::protobuf::Descriptor* Any::descriptor() {
|
||||
::protobuf_google_2fprotobuf_2fany_2eproto::protobuf_AssignDescriptorsOnce();
|
||||
return ::protobuf_google_2fprotobuf_2fany_2eproto::file_level_metadata[kIndexInFileMessages].descriptor;
|
||||
}
|
||||
|
||||
const Any& Any::default_instance() {
|
||||
::google::protobuf::internal::InitSCC(&protobuf_google_2fprotobuf_2fany_2eproto::scc_info_Any.base);
|
||||
return *internal_default_instance();
|
||||
}
|
||||
|
||||
|
||||
void Any::Clear() {
|
||||
// @@protoc_insertion_point(message_clear_start:google.protobuf.Any)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
type_url_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
value_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
_internal_metadata_.Clear();
|
||||
}
|
||||
|
||||
bool Any::MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) {
|
||||
#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
|
||||
::google::protobuf::uint32 tag;
|
||||
// @@protoc_insertion_point(parse_start:google.protobuf.Any)
|
||||
for (;;) {
|
||||
::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
|
||||
tag = p.first;
|
||||
if (!p.second) goto handle_unusual;
|
||||
switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
|
||||
// string type_url = 1;
|
||||
case 1: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_type_url()));
|
||||
DO_(::google::protobuf::internal::WireFormatLite::VerifyUtf8String(
|
||||
this->type_url().data(), static_cast<int>(this->type_url().length()),
|
||||
::google::protobuf::internal::WireFormatLite::PARSE,
|
||||
"google.protobuf.Any.type_url"));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// bytes value = 2;
|
||||
case 2: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadBytes(
|
||||
input, this->mutable_value()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
handle_unusual:
|
||||
if (tag == 0) {
|
||||
goto success;
|
||||
}
|
||||
DO_(::google::protobuf::internal::WireFormat::SkipField(
|
||||
input, tag, _internal_metadata_.mutable_unknown_fields()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
success:
|
||||
// @@protoc_insertion_point(parse_success:google.protobuf.Any)
|
||||
return true;
|
||||
failure:
|
||||
// @@protoc_insertion_point(parse_failure:google.protobuf.Any)
|
||||
return false;
|
||||
#undef DO_
|
||||
}
|
||||
|
||||
void Any::SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const {
|
||||
// @@protoc_insertion_point(serialize_start:google.protobuf.Any)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// string type_url = 1;
|
||||
if (this->type_url().size() > 0) {
|
||||
::google::protobuf::internal::WireFormatLite::VerifyUtf8String(
|
||||
this->type_url().data(), static_cast<int>(this->type_url().length()),
|
||||
::google::protobuf::internal::WireFormatLite::SERIALIZE,
|
||||
"google.protobuf.Any.type_url");
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
1, this->type_url(), output);
|
||||
}
|
||||
|
||||
// bytes value = 2;
|
||||
if (this->value().size() > 0) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteBytesMaybeAliased(
|
||||
2, this->value(), output);
|
||||
}
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
::google::protobuf::internal::WireFormat::SerializeUnknownFields(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()), output);
|
||||
}
|
||||
// @@protoc_insertion_point(serialize_end:google.protobuf.Any)
|
||||
}
|
||||
|
||||
::google::protobuf::uint8* Any::InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const {
|
||||
(void)deterministic; // Unused
|
||||
// @@protoc_insertion_point(serialize_to_array_start:google.protobuf.Any)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// string type_url = 1;
|
||||
if (this->type_url().size() > 0) {
|
||||
::google::protobuf::internal::WireFormatLite::VerifyUtf8String(
|
||||
this->type_url().data(), static_cast<int>(this->type_url().length()),
|
||||
::google::protobuf::internal::WireFormatLite::SERIALIZE,
|
||||
"google.protobuf.Any.type_url");
|
||||
target =
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringToArray(
|
||||
1, this->type_url(), target);
|
||||
}
|
||||
|
||||
// bytes value = 2;
|
||||
if (this->value().size() > 0) {
|
||||
target =
|
||||
::google::protobuf::internal::WireFormatLite::WriteBytesToArray(
|
||||
2, this->value(), target);
|
||||
}
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()), target);
|
||||
}
|
||||
// @@protoc_insertion_point(serialize_to_array_end:google.protobuf.Any)
|
||||
return target;
|
||||
}
|
||||
|
||||
size_t Any::ByteSizeLong() const {
|
||||
// @@protoc_insertion_point(message_byte_size_start:google.protobuf.Any)
|
||||
size_t total_size = 0;
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
total_size +=
|
||||
::google::protobuf::internal::WireFormat::ComputeUnknownFieldsSize(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()));
|
||||
}
|
||||
// string type_url = 1;
|
||||
if (this->type_url().size() > 0) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->type_url());
|
||||
}
|
||||
|
||||
// bytes value = 2;
|
||||
if (this->value().size() > 0) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::BytesSize(
|
||||
this->value());
|
||||
}
|
||||
|
||||
int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
|
||||
SetCachedSize(cached_size);
|
||||
return total_size;
|
||||
}
|
||||
|
||||
void Any::MergeFrom(const ::google::protobuf::Message& from) {
|
||||
// @@protoc_insertion_point(generalized_merge_from_start:google.protobuf.Any)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
const Any* source =
|
||||
::google::protobuf::internal::DynamicCastToGenerated<const Any>(
|
||||
&from);
|
||||
if (source == NULL) {
|
||||
// @@protoc_insertion_point(generalized_merge_from_cast_fail:google.protobuf.Any)
|
||||
::google::protobuf::internal::ReflectionOps::Merge(from, this);
|
||||
} else {
|
||||
// @@protoc_insertion_point(generalized_merge_from_cast_success:google.protobuf.Any)
|
||||
MergeFrom(*source);
|
||||
}
|
||||
}
|
||||
|
||||
void Any::MergeFrom(const Any& from) {
|
||||
// @@protoc_insertion_point(class_specific_merge_from_start:google.protobuf.Any)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
if (from.type_url().size() > 0) {
|
||||
|
||||
type_url_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.type_url_);
|
||||
}
|
||||
if (from.value().size() > 0) {
|
||||
|
||||
value_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.value_);
|
||||
}
|
||||
}
|
||||
|
||||
void Any::CopyFrom(const ::google::protobuf::Message& from) {
|
||||
// @@protoc_insertion_point(generalized_copy_from_start:google.protobuf.Any)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
void Any::CopyFrom(const Any& from) {
|
||||
// @@protoc_insertion_point(class_specific_copy_from_start:google.protobuf.Any)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
bool Any::IsInitialized() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void Any::Swap(Any* other) {
|
||||
if (other == this) return;
|
||||
InternalSwap(other);
|
||||
}
|
||||
void Any::InternalSwap(Any* other) {
|
||||
using std::swap;
|
||||
type_url_.Swap(&other->type_url_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
value_.Swap(&other->value_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
_internal_metadata_.Swap(&other->_internal_metadata_);
|
||||
}
|
||||
|
||||
::google::protobuf::Metadata Any::GetMetadata() const {
|
||||
protobuf_google_2fprotobuf_2fany_2eproto::protobuf_AssignDescriptorsOnce();
|
||||
return ::protobuf_google_2fprotobuf_2fany_2eproto::file_level_metadata[kIndexInFileMessages];
|
||||
}
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::google::protobuf::Any* Arena::CreateMaybeMessage< ::google::protobuf::Any >(Arena* arena) {
|
||||
return Arena::CreateInternal< ::google::protobuf::Any >(arena);
|
||||
}
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
@ -0,0 +1,331 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: google/protobuf/any.proto
|
||||
|
||||
#ifndef PROTOBUF_INCLUDED_google_2fprotobuf_2fany_2eproto
|
||||
#define PROTOBUF_INCLUDED_google_2fprotobuf_2fany_2eproto
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
|
||||
#if GOOGLE_PROTOBUF_VERSION < 3006001
|
||||
#error This file was generated by a newer version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please update
|
||||
#error your headers.
|
||||
#endif
|
||||
#if 3006001 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION
|
||||
#error This file was generated by an older version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please
|
||||
#error regenerate this file with a newer version of protoc.
|
||||
#endif
|
||||
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/arena.h>
|
||||
#include <google/protobuf/arenastring.h>
|
||||
#include <google/protobuf/generated_message_table_driven.h>
|
||||
#include <google/protobuf/generated_message_util.h>
|
||||
#include <google/protobuf/inlined_string_field.h>
|
||||
#include <google/protobuf/metadata.h>
|
||||
#include <google/protobuf/message.h>
|
||||
#include <google/protobuf/repeated_field.h> // IWYU pragma: export
|
||||
#include <google/protobuf/extension_set.h> // IWYU pragma: export
|
||||
#include <google/protobuf/unknown_field_set.h>
|
||||
#include <google/protobuf/any.h>
|
||||
// @@protoc_insertion_point(includes)
|
||||
#define PROTOBUF_INTERNAL_EXPORT_protobuf_google_2fprotobuf_2fany_2eproto LIBPROTOBUF_EXPORT
|
||||
|
||||
namespace protobuf_google_2fprotobuf_2fany_2eproto {
|
||||
// Internal implementation detail -- do not use these members.
|
||||
struct LIBPROTOBUF_EXPORT TableStruct {
|
||||
static const ::google::protobuf::internal::ParseTableField entries[];
|
||||
static const ::google::protobuf::internal::AuxillaryParseTableField aux[];
|
||||
static const ::google::protobuf::internal::ParseTable schema[1];
|
||||
static const ::google::protobuf::internal::FieldMetadata field_metadata[];
|
||||
static const ::google::protobuf::internal::SerializationTable serialization_table[];
|
||||
static const ::google::protobuf::uint32 offsets[];
|
||||
};
|
||||
void LIBPROTOBUF_EXPORT AddDescriptors();
|
||||
} // namespace protobuf_google_2fprotobuf_2fany_2eproto
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
class Any;
|
||||
class AnyDefaultTypeInternal;
|
||||
LIBPROTOBUF_EXPORT extern AnyDefaultTypeInternal _Any_default_instance_;
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
template<> LIBPROTOBUF_EXPORT ::google::protobuf::Any* Arena::CreateMaybeMessage<::google::protobuf::Any>(Arena*);
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
|
||||
// ===================================================================
|
||||
|
||||
class LIBPROTOBUF_EXPORT Any : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:google.protobuf.Any) */ {
|
||||
public:
|
||||
Any();
|
||||
virtual ~Any();
|
||||
|
||||
Any(const Any& from);
|
||||
|
||||
inline Any& operator=(const Any& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
#if LANG_CXX11
|
||||
Any(Any&& from) noexcept
|
||||
: Any() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline Any& operator=(Any&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
static const ::google::protobuf::Descriptor* descriptor();
|
||||
static const Any& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const Any* internal_default_instance() {
|
||||
return reinterpret_cast<const Any*>(
|
||||
&_Any_default_instance_);
|
||||
}
|
||||
static constexpr int kIndexInFileMessages =
|
||||
0;
|
||||
|
||||
// implements Any -----------------------------------------------
|
||||
|
||||
void PackFrom(const ::google::protobuf::Message& message);
|
||||
void PackFrom(const ::google::protobuf::Message& message,
|
||||
const ::std::string& type_url_prefix);
|
||||
bool UnpackTo(::google::protobuf::Message* message) const;
|
||||
template<typename T> bool Is() const {
|
||||
return _any_metadata_.Is<T>();
|
||||
}
|
||||
static bool ParseAnyTypeUrl(const string& type_url,
|
||||
string* full_type_name);
|
||||
|
||||
void Swap(Any* other);
|
||||
friend void swap(Any& a, Any& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline Any* New() const final {
|
||||
return CreateMaybeMessage<Any>(NULL);
|
||||
}
|
||||
|
||||
Any* New(::google::protobuf::Arena* arena) const final {
|
||||
return CreateMaybeMessage<Any>(arena);
|
||||
}
|
||||
void CopyFrom(const ::google::protobuf::Message& from) final;
|
||||
void MergeFrom(const ::google::protobuf::Message& from) final;
|
||||
void CopyFrom(const Any& from);
|
||||
void MergeFrom(const Any& from);
|
||||
void Clear() final;
|
||||
bool IsInitialized() const final;
|
||||
|
||||
size_t ByteSizeLong() const final;
|
||||
bool MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) final;
|
||||
void SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const final;
|
||||
::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const final;
|
||||
int GetCachedSize() const final { return _cached_size_.Get(); }
|
||||
|
||||
private:
|
||||
void SharedCtor();
|
||||
void SharedDtor();
|
||||
void SetCachedSize(int size) const final;
|
||||
void InternalSwap(Any* other);
|
||||
private:
|
||||
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
|
||||
return NULL;
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return NULL;
|
||||
}
|
||||
public:
|
||||
|
||||
::google::protobuf::Metadata GetMetadata() const final;
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
// string type_url = 1;
|
||||
void clear_type_url();
|
||||
static const int kTypeUrlFieldNumber = 1;
|
||||
const ::std::string& type_url() const;
|
||||
void set_type_url(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_type_url(::std::string&& value);
|
||||
#endif
|
||||
void set_type_url(const char* value);
|
||||
void set_type_url(const char* value, size_t size);
|
||||
::std::string* mutable_type_url();
|
||||
::std::string* release_type_url();
|
||||
void set_allocated_type_url(::std::string* type_url);
|
||||
|
||||
// bytes value = 2;
|
||||
void clear_value();
|
||||
static const int kValueFieldNumber = 2;
|
||||
const ::std::string& value() const;
|
||||
void set_value(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_value(::std::string&& value);
|
||||
#endif
|
||||
void set_value(const char* value);
|
||||
void set_value(const void* value, size_t size);
|
||||
::std::string* mutable_value();
|
||||
::std::string* release_value();
|
||||
void set_allocated_value(::std::string* value);
|
||||
|
||||
// @@protoc_insertion_point(class_scope:google.protobuf.Any)
|
||||
private:
|
||||
|
||||
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
|
||||
::google::protobuf::internal::ArenaStringPtr type_url_;
|
||||
::google::protobuf::internal::ArenaStringPtr value_;
|
||||
mutable ::google::protobuf::internal::CachedSize _cached_size_;
|
||||
::google::protobuf::internal::AnyMetadata _any_metadata_;
|
||||
friend struct ::protobuf_google_2fprotobuf_2fany_2eproto::TableStruct;
|
||||
};
|
||||
// ===================================================================
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#endif // __GNUC__
|
||||
// Any
|
||||
|
||||
// string type_url = 1;
|
||||
inline void Any::clear_type_url() {
|
||||
type_url_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline const ::std::string& Any::type_url() const {
|
||||
// @@protoc_insertion_point(field_get:google.protobuf.Any.type_url)
|
||||
return type_url_.GetNoArena();
|
||||
}
|
||||
inline void Any::set_type_url(const ::std::string& value) {
|
||||
|
||||
type_url_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
|
||||
// @@protoc_insertion_point(field_set:google.protobuf.Any.type_url)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void Any::set_type_url(::std::string&& value) {
|
||||
|
||||
type_url_.SetNoArena(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
|
||||
// @@protoc_insertion_point(field_set_rvalue:google.protobuf.Any.type_url)
|
||||
}
|
||||
#endif
|
||||
inline void Any::set_type_url(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
|
||||
type_url_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
|
||||
// @@protoc_insertion_point(field_set_char:google.protobuf.Any.type_url)
|
||||
}
|
||||
inline void Any::set_type_url(const char* value, size_t size) {
|
||||
|
||||
type_url_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
::std::string(reinterpret_cast<const char*>(value), size));
|
||||
// @@protoc_insertion_point(field_set_pointer:google.protobuf.Any.type_url)
|
||||
}
|
||||
inline ::std::string* Any::mutable_type_url() {
|
||||
|
||||
// @@protoc_insertion_point(field_mutable:google.protobuf.Any.type_url)
|
||||
return type_url_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline ::std::string* Any::release_type_url() {
|
||||
// @@protoc_insertion_point(field_release:google.protobuf.Any.type_url)
|
||||
|
||||
return type_url_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline void Any::set_allocated_type_url(::std::string* type_url) {
|
||||
if (type_url != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
type_url_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), type_url);
|
||||
// @@protoc_insertion_point(field_set_allocated:google.protobuf.Any.type_url)
|
||||
}
|
||||
|
||||
// bytes value = 2;
|
||||
inline void Any::clear_value() {
|
||||
value_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline const ::std::string& Any::value() const {
|
||||
// @@protoc_insertion_point(field_get:google.protobuf.Any.value)
|
||||
return value_.GetNoArena();
|
||||
}
|
||||
inline void Any::set_value(const ::std::string& value) {
|
||||
|
||||
value_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
|
||||
// @@protoc_insertion_point(field_set:google.protobuf.Any.value)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void Any::set_value(::std::string&& value) {
|
||||
|
||||
value_.SetNoArena(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
|
||||
// @@protoc_insertion_point(field_set_rvalue:google.protobuf.Any.value)
|
||||
}
|
||||
#endif
|
||||
inline void Any::set_value(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
|
||||
value_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
|
||||
// @@protoc_insertion_point(field_set_char:google.protobuf.Any.value)
|
||||
}
|
||||
inline void Any::set_value(const void* value, size_t size) {
|
||||
|
||||
value_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
::std::string(reinterpret_cast<const char*>(value), size));
|
||||
// @@protoc_insertion_point(field_set_pointer:google.protobuf.Any.value)
|
||||
}
|
||||
inline ::std::string* Any::mutable_value() {
|
||||
|
||||
// @@protoc_insertion_point(field_mutable:google.protobuf.Any.value)
|
||||
return value_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline ::std::string* Any::release_value() {
|
||||
// @@protoc_insertion_point(field_release:google.protobuf.Any.value)
|
||||
|
||||
return value_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
inline void Any::set_allocated_value(::std::string* value) {
|
||||
if (value != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
value_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
|
||||
// @@protoc_insertion_point(field_set_allocated:google.protobuf.Any.value)
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif // __GNUC__
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
|
||||
#endif // PROTOBUF_INCLUDED_google_2fprotobuf_2fany_2eproto
|
||||
@ -0,0 +1,154 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.protobuf;
|
||||
|
||||
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
|
||||
option go_package = "github.com/golang/protobuf/ptypes/any";
|
||||
option java_package = "com.google.protobuf";
|
||||
option java_outer_classname = "AnyProto";
|
||||
option java_multiple_files = true;
|
||||
option objc_class_prefix = "GPB";
|
||||
|
||||
// `Any` contains an arbitrary serialized protocol buffer message along with a
|
||||
// URL that describes the type of the serialized message.
|
||||
//
|
||||
// Protobuf library provides support to pack/unpack Any values in the form
|
||||
// of utility functions or additional generated methods of the Any type.
|
||||
//
|
||||
// Example 1: Pack and unpack a message in C++.
|
||||
//
|
||||
// Foo foo = ...;
|
||||
// Any any;
|
||||
// any.PackFrom(foo);
|
||||
// ...
|
||||
// if (any.UnpackTo(&foo)) {
|
||||
// ...
|
||||
// }
|
||||
//
|
||||
// Example 2: Pack and unpack a message in Java.
|
||||
//
|
||||
// Foo foo = ...;
|
||||
// Any any = Any.pack(foo);
|
||||
// ...
|
||||
// if (any.is(Foo.class)) {
|
||||
// foo = any.unpack(Foo.class);
|
||||
// }
|
||||
//
|
||||
// Example 3: Pack and unpack a message in Python.
|
||||
//
|
||||
// foo = Foo(...)
|
||||
// any = Any()
|
||||
// any.Pack(foo)
|
||||
// ...
|
||||
// if any.Is(Foo.DESCRIPTOR):
|
||||
// any.Unpack(foo)
|
||||
// ...
|
||||
//
|
||||
// Example 4: Pack and unpack a message in Go
|
||||
//
|
||||
// foo := &pb.Foo{...}
|
||||
// any, err := ptypes.MarshalAny(foo)
|
||||
// ...
|
||||
// foo := &pb.Foo{}
|
||||
// if err := ptypes.UnmarshalAny(any, foo); err != nil {
|
||||
// ...
|
||||
// }
|
||||
//
|
||||
// The pack methods provided by protobuf library will by default use
|
||||
// 'type.googleapis.com/full.type.name' as the type URL and the unpack
|
||||
// methods only use the fully qualified type name after the last '/'
|
||||
// in the type URL, for example "foo.bar.com/x/y.z" will yield type
|
||||
// name "y.z".
|
||||
//
|
||||
//
|
||||
// JSON
|
||||
// ====
|
||||
// The JSON representation of an `Any` value uses the regular
|
||||
// representation of the deserialized, embedded message, with an
|
||||
// additional field `@type` which contains the type URL. Example:
|
||||
//
|
||||
// package google.profile;
|
||||
// message Person {
|
||||
// string first_name = 1;
|
||||
// string last_name = 2;
|
||||
// }
|
||||
//
|
||||
// {
|
||||
// "@type": "type.googleapis.com/google.profile.Person",
|
||||
// "firstName": <string>,
|
||||
// "lastName": <string>
|
||||
// }
|
||||
//
|
||||
// If the embedded message type is well-known and has a custom JSON
|
||||
// representation, that representation will be embedded adding a field
|
||||
// `value` which holds the custom JSON in addition to the `@type`
|
||||
// field. Example (for message [google.protobuf.Duration][]):
|
||||
//
|
||||
// {
|
||||
// "@type": "type.googleapis.com/google.protobuf.Duration",
|
||||
// "value": "1.212s"
|
||||
// }
|
||||
//
|
||||
message Any {
|
||||
// A URL/resource name that uniquely identifies the type of the serialized
|
||||
// protocol buffer message. The last segment of the URL's path must represent
|
||||
// the fully qualified name of the type (as in
|
||||
// `path/google.protobuf.Duration`). The name should be in a canonical form
|
||||
// (e.g., leading "." is not accepted).
|
||||
//
|
||||
// In practice, teams usually precompile into the binary all types that they
|
||||
// expect it to use in the context of Any. However, for URLs which use the
|
||||
// scheme `http`, `https`, or no scheme, one can optionally set up a type
|
||||
// server that maps type URLs to message definitions as follows:
|
||||
//
|
||||
// * If no scheme is provided, `https` is assumed.
|
||||
// * An HTTP GET on the URL must yield a [google.protobuf.Type][]
|
||||
// value in binary format, or produce an error.
|
||||
// * Applications are allowed to cache lookup results based on the
|
||||
// URL, or have them precompiled into a binary to avoid any
|
||||
// lookup. Therefore, binary compatibility needs to be preserved
|
||||
// on changes to types. (Use versioned type names to manage
|
||||
// breaking changes.)
|
||||
//
|
||||
// Note: this functionality is not currently available in the official
|
||||
// protobuf release, and it is not used for type URLs beginning with
|
||||
// type.googleapis.com.
|
||||
//
|
||||
// Schemes other than `http`, `https` (or the empty scheme) might be
|
||||
// used with implementation specific semantics.
|
||||
//
|
||||
string type_url = 1;
|
||||
|
||||
// Must be a valid serialized protocol buffer of the above specified type.
|
||||
bytes value = 2;
|
||||
}
|
||||
@ -0,0 +1,89 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <google/protobuf/any_test.pb.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
namespace {
|
||||
|
||||
TEST(AnyTest, TestPackAndUnpack) {
|
||||
protobuf_unittest::TestAny submessage;
|
||||
submessage.set_int32_value(12345);
|
||||
protobuf_unittest::TestAny message;
|
||||
message.mutable_any_value()->PackFrom(submessage);
|
||||
|
||||
string data = message.SerializeAsString();
|
||||
|
||||
ASSERT_TRUE(message.ParseFromString(data));
|
||||
EXPECT_TRUE(message.has_any_value());
|
||||
ASSERT_TRUE(message.any_value().UnpackTo(&submessage));
|
||||
EXPECT_EQ(12345, submessage.int32_value());
|
||||
}
|
||||
|
||||
TEST(AnyTest, TestPackAndUnpackAny) {
|
||||
// We can pack a Any message inside another Any message.
|
||||
protobuf_unittest::TestAny submessage;
|
||||
submessage.set_int32_value(12345);
|
||||
google::protobuf::Any any;
|
||||
any.PackFrom(submessage);
|
||||
protobuf_unittest::TestAny message;
|
||||
message.mutable_any_value()->PackFrom(any);
|
||||
|
||||
string data = message.SerializeAsString();
|
||||
|
||||
ASSERT_TRUE(message.ParseFromString(data));
|
||||
EXPECT_TRUE(message.has_any_value());
|
||||
ASSERT_TRUE(message.any_value().UnpackTo(&any));
|
||||
ASSERT_TRUE(any.UnpackTo(&submessage));
|
||||
EXPECT_EQ(12345, submessage.int32_value());
|
||||
}
|
||||
|
||||
TEST(AnyTest, TestIs) {
|
||||
protobuf_unittest::TestAny submessage;
|
||||
submessage.set_int32_value(12345);
|
||||
google::protobuf::Any any;
|
||||
any.PackFrom(submessage);
|
||||
ASSERT_TRUE(any.ParseFromString(any.SerializeAsString()));
|
||||
EXPECT_TRUE(any.Is<protobuf_unittest::TestAny>());
|
||||
EXPECT_FALSE(any.Is<google::protobuf::Any>());
|
||||
|
||||
protobuf_unittest::TestAny message;
|
||||
message.mutable_any_value()->PackFrom(any);
|
||||
ASSERT_TRUE(message.ParseFromString(message.SerializeAsString()));
|
||||
EXPECT_FALSE(message.any_value().Is<protobuf_unittest::TestAny>());
|
||||
EXPECT_TRUE(message.any_value().Is<google::protobuf::Any>());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace protobuf
|
||||
|
||||
} // namespace google
|
||||
@ -0,0 +1,41 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package protobuf_unittest;
|
||||
|
||||
import "google/protobuf/any.proto";
|
||||
|
||||
message TestAny {
|
||||
int32 int32_value = 1;
|
||||
google.protobuf.Any any_value = 2;
|
||||
repeated google.protobuf.Any repeated_any_value = 3;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,210 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.protobuf;
|
||||
|
||||
import "google/protobuf/source_context.proto";
|
||||
import "google/protobuf/type.proto";
|
||||
|
||||
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
|
||||
option java_package = "com.google.protobuf";
|
||||
option java_outer_classname = "ApiProto";
|
||||
option java_multiple_files = true;
|
||||
option objc_class_prefix = "GPB";
|
||||
option go_package = "google.golang.org/genproto/protobuf/api;api";
|
||||
|
||||
// Api is a light-weight descriptor for an API Interface.
|
||||
//
|
||||
// Interfaces are also described as "protocol buffer services" in some contexts,
|
||||
// such as by the "service" keyword in a .proto file, but they are different
|
||||
// from API Services, which represent a concrete implementation of an interface
|
||||
// as opposed to simply a description of methods and bindings. They are also
|
||||
// sometimes simply referred to as "APIs" in other contexts, such as the name of
|
||||
// this message itself. See https://cloud.google.com/apis/design/glossary for
|
||||
// detailed terminology.
|
||||
message Api {
|
||||
|
||||
// The fully qualified name of this interface, including package name
|
||||
// followed by the interface's simple name.
|
||||
string name = 1;
|
||||
|
||||
// The methods of this interface, in unspecified order.
|
||||
repeated Method methods = 2;
|
||||
|
||||
// Any metadata attached to the interface.
|
||||
repeated Option options = 3;
|
||||
|
||||
// A version string for this interface. If specified, must have the form
|
||||
// `major-version.minor-version`, as in `1.10`. If the minor version is
|
||||
// omitted, it defaults to zero. If the entire version field is empty, the
|
||||
// major version is derived from the package name, as outlined below. If the
|
||||
// field is not empty, the version in the package name will be verified to be
|
||||
// consistent with what is provided here.
|
||||
//
|
||||
// The versioning schema uses [semantic
|
||||
// versioning](http://semver.org) where the major version number
|
||||
// indicates a breaking change and the minor version an additive,
|
||||
// non-breaking change. Both version numbers are signals to users
|
||||
// what to expect from different versions, and should be carefully
|
||||
// chosen based on the product plan.
|
||||
//
|
||||
// The major version is also reflected in the package name of the
|
||||
// interface, which must end in `v<major-version>`, as in
|
||||
// `google.feature.v1`. For major versions 0 and 1, the suffix can
|
||||
// be omitted. Zero major versions must only be used for
|
||||
// experimental, non-GA interfaces.
|
||||
//
|
||||
//
|
||||
string version = 4;
|
||||
|
||||
// Source context for the protocol buffer service represented by this
|
||||
// message.
|
||||
SourceContext source_context = 5;
|
||||
|
||||
// Included interfaces. See [Mixin][].
|
||||
repeated Mixin mixins = 6;
|
||||
|
||||
// The source syntax of the service.
|
||||
Syntax syntax = 7;
|
||||
}
|
||||
|
||||
// Method represents a method of an API interface.
|
||||
message Method {
|
||||
|
||||
// The simple name of this method.
|
||||
string name = 1;
|
||||
|
||||
// A URL of the input message type.
|
||||
string request_type_url = 2;
|
||||
|
||||
// If true, the request is streamed.
|
||||
bool request_streaming = 3;
|
||||
|
||||
// The URL of the output message type.
|
||||
string response_type_url = 4;
|
||||
|
||||
// If true, the response is streamed.
|
||||
bool response_streaming = 5;
|
||||
|
||||
// Any metadata attached to the method.
|
||||
repeated Option options = 6;
|
||||
|
||||
// The source syntax of this method.
|
||||
Syntax syntax = 7;
|
||||
}
|
||||
|
||||
// Declares an API Interface to be included in this interface. The including
|
||||
// interface must redeclare all the methods from the included interface, but
|
||||
// documentation and options are inherited as follows:
|
||||
//
|
||||
// - If after comment and whitespace stripping, the documentation
|
||||
// string of the redeclared method is empty, it will be inherited
|
||||
// from the original method.
|
||||
//
|
||||
// - Each annotation belonging to the service config (http,
|
||||
// visibility) which is not set in the redeclared method will be
|
||||
// inherited.
|
||||
//
|
||||
// - If an http annotation is inherited, the path pattern will be
|
||||
// modified as follows. Any version prefix will be replaced by the
|
||||
// version of the including interface plus the [root][] path if
|
||||
// specified.
|
||||
//
|
||||
// Example of a simple mixin:
|
||||
//
|
||||
// package google.acl.v1;
|
||||
// service AccessControl {
|
||||
// // Get the underlying ACL object.
|
||||
// rpc GetAcl(GetAclRequest) returns (Acl) {
|
||||
// option (google.api.http).get = "/v1/{resource=**}:getAcl";
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// package google.storage.v2;
|
||||
// service Storage {
|
||||
// rpc GetAcl(GetAclRequest) returns (Acl);
|
||||
//
|
||||
// // Get a data record.
|
||||
// rpc GetData(GetDataRequest) returns (Data) {
|
||||
// option (google.api.http).get = "/v2/{resource=**}";
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Example of a mixin configuration:
|
||||
//
|
||||
// apis:
|
||||
// - name: google.storage.v2.Storage
|
||||
// mixins:
|
||||
// - name: google.acl.v1.AccessControl
|
||||
//
|
||||
// The mixin construct implies that all methods in `AccessControl` are
|
||||
// also declared with same name and request/response types in
|
||||
// `Storage`. A documentation generator or annotation processor will
|
||||
// see the effective `Storage.GetAcl` method after inherting
|
||||
// documentation and annotations as follows:
|
||||
//
|
||||
// service Storage {
|
||||
// // Get the underlying ACL object.
|
||||
// rpc GetAcl(GetAclRequest) returns (Acl) {
|
||||
// option (google.api.http).get = "/v2/{resource=**}:getAcl";
|
||||
// }
|
||||
// ...
|
||||
// }
|
||||
//
|
||||
// Note how the version in the path pattern changed from `v1` to `v2`.
|
||||
//
|
||||
// If the `root` field in the mixin is specified, it should be a
|
||||
// relative path under which inherited HTTP paths are placed. Example:
|
||||
//
|
||||
// apis:
|
||||
// - name: google.storage.v2.Storage
|
||||
// mixins:
|
||||
// - name: google.acl.v1.AccessControl
|
||||
// root: acls
|
||||
//
|
||||
// This implies the following inherited HTTP annotation:
|
||||
//
|
||||
// service Storage {
|
||||
// // Get the underlying ACL object.
|
||||
// rpc GetAcl(GetAclRequest) returns (Acl) {
|
||||
// option (google.api.http).get = "/v2/acls/{resource=**}:getAcl";
|
||||
// }
|
||||
// ...
|
||||
// }
|
||||
message Mixin {
|
||||
// The fully qualified name of the interface which is included.
|
||||
string name = 1;
|
||||
|
||||
// If non-empty specifies a path under which inherited HTTP paths
|
||||
// are rooted.
|
||||
string root = 2;
|
||||
}
|
||||
@ -0,0 +1,415 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <google/protobuf/arena.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
|
||||
#ifdef ADDRESS_SANITIZER
|
||||
#include <sanitizer/asan_interface.h>
|
||||
#endif // ADDRESS_SANITIZER
|
||||
|
||||
#include <google/protobuf/stubs/port.h>
|
||||
|
||||
namespace google {
|
||||
static const size_t kMinCleanupListElements = 8;
|
||||
static const size_t kMaxCleanupListElements = 64; // 1kB on 64-bit.
|
||||
|
||||
namespace protobuf {
|
||||
namespace internal {
|
||||
|
||||
|
||||
std::atomic<int64> ArenaImpl::lifecycle_id_generator_;
|
||||
#if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL)
|
||||
ArenaImpl::ThreadCache& ArenaImpl::thread_cache() {
|
||||
static internal::ThreadLocalStorage<ThreadCache>* thread_cache_ =
|
||||
new internal::ThreadLocalStorage<ThreadCache>();
|
||||
return *thread_cache_->Get();
|
||||
}
|
||||
#elif defined(PROTOBUF_USE_DLLS)
|
||||
ArenaImpl::ThreadCache& ArenaImpl::thread_cache() {
|
||||
static GOOGLE_THREAD_LOCAL ThreadCache thread_cache_ = { -1, NULL };
|
||||
return thread_cache_;
|
||||
}
|
||||
#else
|
||||
GOOGLE_THREAD_LOCAL ArenaImpl::ThreadCache ArenaImpl::thread_cache_ = {-1, NULL};
|
||||
#endif
|
||||
|
||||
void ArenaImpl::Init() {
|
||||
lifecycle_id_ =
|
||||
lifecycle_id_generator_.fetch_add(1, std::memory_order_relaxed);
|
||||
hint_.store(nullptr, std::memory_order_relaxed);
|
||||
threads_.store(nullptr, std::memory_order_relaxed);
|
||||
|
||||
if (initial_block_) {
|
||||
// Thread which calls Init() owns the first block. This allows the
|
||||
// single-threaded case to allocate on the first block without having to
|
||||
// perform atomic operations.
|
||||
new (initial_block_) Block(options_.initial_block_size, NULL);
|
||||
SerialArena* serial =
|
||||
SerialArena::New(initial_block_, &thread_cache(), this);
|
||||
serial->set_next(NULL);
|
||||
threads_.store(serial, std::memory_order_relaxed);
|
||||
space_allocated_.store(options_.initial_block_size,
|
||||
std::memory_order_relaxed);
|
||||
CacheSerialArena(serial);
|
||||
} else {
|
||||
space_allocated_.store(0, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
ArenaImpl::~ArenaImpl() {
|
||||
// Have to do this in a first pass, because some of the destructors might
|
||||
// refer to memory in other blocks.
|
||||
CleanupList();
|
||||
FreeBlocks();
|
||||
}
|
||||
|
||||
uint64 ArenaImpl::Reset() {
|
||||
// Have to do this in a first pass, because some of the destructors might
|
||||
// refer to memory in other blocks.
|
||||
CleanupList();
|
||||
uint64 space_allocated = FreeBlocks();
|
||||
Init();
|
||||
|
||||
return space_allocated;
|
||||
}
|
||||
|
||||
ArenaImpl::Block* ArenaImpl::NewBlock(Block* last_block, size_t min_bytes) {
|
||||
size_t size;
|
||||
if (last_block) {
|
||||
// Double the current block size, up to a limit.
|
||||
size = std::min(2 * last_block->size(), options_.max_block_size);
|
||||
} else {
|
||||
size = options_.start_block_size;
|
||||
}
|
||||
// Verify that min_bytes + kBlockHeaderSize won't overflow.
|
||||
GOOGLE_CHECK_LE(min_bytes, std::numeric_limits<size_t>::max() - kBlockHeaderSize);
|
||||
size = std::max(size, kBlockHeaderSize + min_bytes);
|
||||
|
||||
void* mem = options_.block_alloc(size);
|
||||
Block* b = new (mem) Block(size, last_block);
|
||||
space_allocated_.fetch_add(size, std::memory_order_relaxed);
|
||||
return b;
|
||||
}
|
||||
|
||||
ArenaImpl::Block::Block(size_t size, Block* next)
|
||||
: next_(next), pos_(kBlockHeaderSize), size_(size) {}
|
||||
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE
|
||||
void ArenaImpl::SerialArena::AddCleanupFallback(void* elem,
|
||||
void (*cleanup)(void*)) {
|
||||
size_t size = cleanup_ ? cleanup_->size * 2 : kMinCleanupListElements;
|
||||
size = std::min(size, kMaxCleanupListElements);
|
||||
size_t bytes = internal::AlignUpTo8(CleanupChunk::SizeOf(size));
|
||||
CleanupChunk* list = reinterpret_cast<CleanupChunk*>(AllocateAligned(bytes));
|
||||
list->next = cleanup_;
|
||||
list->size = size;
|
||||
|
||||
cleanup_ = list;
|
||||
cleanup_ptr_ = &list->nodes[0];
|
||||
cleanup_limit_ = &list->nodes[size];
|
||||
|
||||
AddCleanup(elem, cleanup);
|
||||
}
|
||||
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_FUNC_ALIGN(32)
|
||||
void* ArenaImpl::AllocateAligned(size_t n) {
|
||||
SerialArena* arena;
|
||||
if (GOOGLE_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
|
||||
return arena->AllocateAligned(n);
|
||||
} else {
|
||||
return AllocateAlignedFallback(n);
|
||||
}
|
||||
}
|
||||
|
||||
void* ArenaImpl::AllocateAlignedAndAddCleanup(size_t n,
|
||||
void (*cleanup)(void*)) {
|
||||
SerialArena* arena;
|
||||
if (GOOGLE_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
|
||||
return arena->AllocateAlignedAndAddCleanup(n, cleanup);
|
||||
} else {
|
||||
return AllocateAlignedAndAddCleanupFallback(n, cleanup);
|
||||
}
|
||||
}
|
||||
|
||||
void ArenaImpl::AddCleanup(void* elem, void (*cleanup)(void*)) {
|
||||
SerialArena* arena;
|
||||
if (GOOGLE_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
|
||||
arena->AddCleanup(elem, cleanup);
|
||||
} else {
|
||||
return AddCleanupFallback(elem, cleanup);
|
||||
}
|
||||
}
|
||||
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE
|
||||
void* ArenaImpl::AllocateAlignedFallback(size_t n) {
|
||||
return GetSerialArena()->AllocateAligned(n);
|
||||
}
|
||||
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE
|
||||
void* ArenaImpl::AllocateAlignedAndAddCleanupFallback(size_t n,
|
||||
void (*cleanup)(void*)) {
|
||||
return GetSerialArena()->AllocateAlignedAndAddCleanup(n, cleanup);
|
||||
}
|
||||
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE
|
||||
void ArenaImpl::AddCleanupFallback(void* elem, void (*cleanup)(void*)) {
|
||||
GetSerialArena()->AddCleanup(elem, cleanup);
|
||||
}
|
||||
|
||||
inline GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
|
||||
bool ArenaImpl::GetSerialArenaFast(ArenaImpl::SerialArena** arena) {
|
||||
// If this thread already owns a block in this arena then try to use that.
|
||||
// This fast path optimizes the case where multiple threads allocate from the
|
||||
// same arena.
|
||||
ThreadCache* tc = &thread_cache();
|
||||
if (GOOGLE_PREDICT_TRUE(tc->last_lifecycle_id_seen == lifecycle_id_)) {
|
||||
*arena = tc->last_serial_arena;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check whether we own the last accessed SerialArena on this arena. This
|
||||
// fast path optimizes the case where a single thread uses multiple arenas.
|
||||
SerialArena* serial = hint_.load(std::memory_order_acquire);
|
||||
if (GOOGLE_PREDICT_TRUE(serial != NULL && serial->owner() == tc)) {
|
||||
*arena = serial;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ArenaImpl::SerialArena* ArenaImpl::GetSerialArena() {
|
||||
SerialArena* arena;
|
||||
if (GOOGLE_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
|
||||
return arena;
|
||||
} else {
|
||||
return GetSerialArenaFallback(&thread_cache());
|
||||
}
|
||||
}
|
||||
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE
|
||||
void* ArenaImpl::SerialArena::AllocateAlignedFallback(size_t n) {
|
||||
// Sync back to current's pos.
|
||||
head_->set_pos(head_->size() - (limit_ - ptr_));
|
||||
|
||||
head_ = arena_->NewBlock(head_, n);
|
||||
ptr_ = head_->Pointer(head_->pos());
|
||||
limit_ = head_->Pointer(head_->size());
|
||||
|
||||
#ifdef ADDRESS_SANITIZER
|
||||
ASAN_POISON_MEMORY_REGION(ptr_, limit_ - ptr_);
|
||||
#endif // ADDRESS_SANITIZER
|
||||
|
||||
return AllocateAligned(n);
|
||||
}
|
||||
|
||||
uint64 ArenaImpl::SpaceAllocated() const {
|
||||
return space_allocated_.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
uint64 ArenaImpl::SpaceUsed() const {
|
||||
SerialArena* serial = threads_.load(std::memory_order_acquire);
|
||||
uint64 space_used = 0;
|
||||
for ( ; serial; serial = serial->next()) {
|
||||
space_used += serial->SpaceUsed();
|
||||
}
|
||||
return space_used;
|
||||
}
|
||||
|
||||
uint64 ArenaImpl::SerialArena::SpaceUsed() const {
|
||||
// Get current block's size from ptr_ (since we can't trust head_->pos().
|
||||
uint64 space_used = ptr_ - head_->Pointer(kBlockHeaderSize);
|
||||
// Get subsequent block size from b->pos().
|
||||
for (Block* b = head_->next(); b; b = b->next()) {
|
||||
space_used += (b->pos() - kBlockHeaderSize);
|
||||
}
|
||||
// Remove the overhead of the SerialArena itself.
|
||||
space_used -= kSerialArenaSize;
|
||||
return space_used;
|
||||
}
|
||||
|
||||
uint64 ArenaImpl::FreeBlocks() {
|
||||
uint64 space_allocated = 0;
|
||||
// By omitting an Acquire barrier we ensure that any user code that doesn't
|
||||
// properly synchronize Reset() or the destructor will throw a TSAN warning.
|
||||
SerialArena* serial = threads_.load(std::memory_order_relaxed);
|
||||
|
||||
while (serial) {
|
||||
// This is inside a block we are freeing, so we need to read it now.
|
||||
SerialArena* next = serial->next();
|
||||
space_allocated += ArenaImpl::SerialArena::Free(serial, initial_block_,
|
||||
options_.block_dealloc);
|
||||
// serial is dead now.
|
||||
serial = next;
|
||||
}
|
||||
|
||||
return space_allocated;
|
||||
}
|
||||
|
||||
uint64 ArenaImpl::SerialArena::Free(ArenaImpl::SerialArena* serial,
|
||||
Block* initial_block,
|
||||
void (*block_dealloc)(void*, size_t)) {
|
||||
uint64 space_allocated = 0;
|
||||
|
||||
// We have to be careful in this function, since we will be freeing the Block
|
||||
// that contains this SerialArena. Be careful about accessing |serial|.
|
||||
|
||||
for (Block* b = serial->head_; b; ) {
|
||||
// This is inside the block we are freeing, so we need to read it now.
|
||||
Block* next_block = b->next();
|
||||
space_allocated += (b->size());
|
||||
|
||||
#ifdef ADDRESS_SANITIZER
|
||||
// This memory was provided by the underlying allocator as unpoisoned, so
|
||||
// return it in an unpoisoned state.
|
||||
ASAN_UNPOISON_MEMORY_REGION(b->Pointer(0), b->size());
|
||||
#endif // ADDRESS_SANITIZER
|
||||
|
||||
if (b != initial_block) {
|
||||
block_dealloc(b, b->size());
|
||||
}
|
||||
|
||||
b = next_block;
|
||||
}
|
||||
|
||||
return space_allocated;
|
||||
}
|
||||
|
||||
void ArenaImpl::CleanupList() {
|
||||
// By omitting an Acquire barrier we ensure that any user code that doesn't
|
||||
// properly synchronize Reset() or the destructor will throw a TSAN warning.
|
||||
SerialArena* serial = threads_.load(std::memory_order_relaxed);
|
||||
|
||||
for ( ; serial; serial = serial->next()) {
|
||||
serial->CleanupList();
|
||||
}
|
||||
}
|
||||
|
||||
void ArenaImpl::SerialArena::CleanupList() {
|
||||
if (cleanup_ != NULL) {
|
||||
CleanupListFallback();
|
||||
}
|
||||
}
|
||||
|
||||
void ArenaImpl::SerialArena::CleanupListFallback() {
|
||||
// Cleanup newest chunk: ptrs give us length.
|
||||
size_t n = cleanup_ptr_ - &cleanup_->nodes[0];
|
||||
CleanupNode* node = cleanup_ptr_;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
--node;
|
||||
node->cleanup(node->elem);
|
||||
}
|
||||
|
||||
// Cleanup older chunks, which are known to be full.
|
||||
CleanupChunk* list = cleanup_->next;
|
||||
while (list) {
|
||||
size_t n = list->size;
|
||||
CleanupNode* node = &list->nodes[list->size];
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
--node;
|
||||
node->cleanup(node->elem);
|
||||
}
|
||||
list = list->next;
|
||||
}
|
||||
}
|
||||
|
||||
ArenaImpl::SerialArena* ArenaImpl::SerialArena::New(Block* b, void* owner,
|
||||
ArenaImpl* arena) {
|
||||
GOOGLE_DCHECK_EQ(b->pos(), kBlockHeaderSize); // Should be a fresh block
|
||||
GOOGLE_DCHECK_LE(kBlockHeaderSize + kSerialArenaSize, b->size());
|
||||
SerialArena* serial =
|
||||
reinterpret_cast<SerialArena*>(b->Pointer(kBlockHeaderSize));
|
||||
b->set_pos(kBlockHeaderSize + kSerialArenaSize);
|
||||
serial->arena_ = arena;
|
||||
serial->owner_ = owner;
|
||||
serial->head_ = b;
|
||||
serial->ptr_ = b->Pointer(b->pos());
|
||||
serial->limit_ = b->Pointer(b->size());
|
||||
serial->cleanup_ = NULL;
|
||||
serial->cleanup_ptr_ = NULL;
|
||||
serial->cleanup_limit_ = NULL;
|
||||
return serial;
|
||||
}
|
||||
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE
|
||||
ArenaImpl::SerialArena* ArenaImpl::GetSerialArenaFallback(void* me) {
|
||||
// Look for this SerialArena in our linked list.
|
||||
SerialArena* serial = threads_.load(std::memory_order_acquire);
|
||||
for ( ; serial; serial = serial->next()) {
|
||||
if (serial->owner() == me) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!serial) {
|
||||
// This thread doesn't have any SerialArena, which also means it doesn't
|
||||
// have any blocks yet. So we'll allocate its first block now.
|
||||
Block* b = NewBlock(NULL, kSerialArenaSize);
|
||||
serial = SerialArena::New(b, me, this);
|
||||
|
||||
SerialArena* head = threads_.load(std::memory_order_relaxed);
|
||||
do {
|
||||
serial->set_next(head);
|
||||
} while (!threads_.compare_exchange_weak(
|
||||
head, serial, std::memory_order_release, std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
CacheSerialArena(serial);
|
||||
return serial;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
void Arena::CallDestructorHooks() {
|
||||
uint64 space_allocated = impl_.SpaceAllocated();
|
||||
// Call the reset hook
|
||||
if (on_arena_reset_ != NULL) {
|
||||
on_arena_reset_(this, hooks_cookie_, space_allocated);
|
||||
}
|
||||
|
||||
// Call the destruction hook
|
||||
if (on_arena_destruction_ != NULL) {
|
||||
on_arena_destruction_(this, hooks_cookie_, space_allocated);
|
||||
}
|
||||
}
|
||||
|
||||
void Arena::OnArenaAllocation(const std::type_info* allocated_type,
|
||||
size_t n) const {
|
||||
if (on_arena_allocation_ != NULL) {
|
||||
on_arena_allocation_(allocated_type, n, hooks_cookie_);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
@ -0,0 +1,703 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// This file defines an Arena allocator for better allocation performance.
|
||||
|
||||
#ifndef GOOGLE_PROTOBUF_ARENA_H__
|
||||
#define GOOGLE_PROTOBUF_ARENA_H__
|
||||
|
||||
#include <limits>
|
||||
#ifdef max
|
||||
#undef max // Visual Studio defines this macro
|
||||
#endif
|
||||
#if defined(_MSC_VER) && !defined(_LIBCPP_STD_VER) && !_HAS_EXCEPTIONS
|
||||
// Work around bugs in MSVC <typeinfo> header when _HAS_EXCEPTIONS=0.
|
||||
#include <exception>
|
||||
#include <typeinfo>
|
||||
namespace std {
|
||||
using type_info = ::type_info;
|
||||
}
|
||||
#else
|
||||
#include <typeinfo>
|
||||
#endif
|
||||
|
||||
#include <google/protobuf/arena_impl.h>
|
||||
#include <google/protobuf/stubs/port.h>
|
||||
#include <type_traits>
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
|
||||
struct ArenaOptions; // defined below
|
||||
|
||||
} // namespace protobuf
|
||||
|
||||
namespace quality_webanswers {
|
||||
|
||||
void TempPrivateWorkAround(::google::protobuf::ArenaOptions* arena_options);
|
||||
|
||||
} // namespace quality_webanswers
|
||||
|
||||
namespace protobuf {
|
||||
|
||||
class Arena; // defined below
|
||||
class Message; // defined in message.h
|
||||
class MessageLite;
|
||||
|
||||
namespace arena_metrics {
|
||||
|
||||
void EnableArenaMetrics(::google::protobuf::ArenaOptions* options);
|
||||
|
||||
} // namespace arena_metrics
|
||||
|
||||
namespace internal {
|
||||
|
||||
struct ArenaStringPtr; // defined in arenastring.h
|
||||
class LazyField; // defined in lazy_field.h
|
||||
|
||||
template <typename Type>
|
||||
class GenericTypeHandler; // defined in repeated_field.h
|
||||
|
||||
// Templated cleanup methods.
|
||||
template <typename T>
|
||||
void arena_destruct_object(void* object) {
|
||||
reinterpret_cast<T*>(object)->~T();
|
||||
}
|
||||
template <typename T>
|
||||
void arena_delete_object(void* object) {
|
||||
delete reinterpret_cast<T*>(object);
|
||||
}
|
||||
inline void arena_free(void* object, size_t size) {
|
||||
#if defined(__GXX_DELETE_WITH_SIZE__) || defined(__cpp_sized_deallocation)
|
||||
::operator delete(object, size);
|
||||
#else
|
||||
(void)size;
|
||||
::operator delete(object);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
// ArenaOptions provides optional additional parameters to arena construction
|
||||
// that control its block-allocation behavior.
|
||||
struct ArenaOptions {
|
||||
// This defines the size of the first block requested from the system malloc.
|
||||
// Subsequent block sizes will increase in a geometric series up to a maximum.
|
||||
size_t start_block_size;
|
||||
|
||||
// This defines the maximum block size requested from system malloc (unless an
|
||||
// individual arena allocation request occurs with a size larger than this
|
||||
// maximum). Requested block sizes increase up to this value, then remain
|
||||
// here.
|
||||
size_t max_block_size;
|
||||
|
||||
// An initial block of memory for the arena to use, or NULL for none. If
|
||||
// provided, the block must live at least as long as the arena itself. The
|
||||
// creator of the Arena retains ownership of the block after the Arena is
|
||||
// destroyed.
|
||||
char* initial_block;
|
||||
|
||||
// The size of the initial block, if provided.
|
||||
size_t initial_block_size;
|
||||
|
||||
// A function pointer to an alloc method that returns memory blocks of size
|
||||
// requested. By default, it contains a ptr to the malloc function.
|
||||
//
|
||||
// NOTE: block_alloc and dealloc functions are expected to behave like
|
||||
// malloc and free, including Asan poisoning.
|
||||
void* (*block_alloc)(size_t);
|
||||
// A function pointer to a dealloc method that takes ownership of the blocks
|
||||
// from the arena. By default, it contains a ptr to a wrapper function that
|
||||
// calls free.
|
||||
void (*block_dealloc)(void*, size_t);
|
||||
|
||||
ArenaOptions()
|
||||
: start_block_size(kDefaultStartBlockSize),
|
||||
max_block_size(kDefaultMaxBlockSize),
|
||||
initial_block(NULL),
|
||||
initial_block_size(0),
|
||||
block_alloc(&::operator new),
|
||||
block_dealloc(&internal::arena_free),
|
||||
on_arena_init(NULL),
|
||||
on_arena_reset(NULL),
|
||||
on_arena_destruction(NULL),
|
||||
on_arena_allocation(NULL) {}
|
||||
|
||||
private:
|
||||
// Hooks for adding external functionality such as user-specific metrics
|
||||
// collection, specific debugging abilities, etc.
|
||||
// Init hook may return a pointer to a cookie to be stored in the arena.
|
||||
// reset and destruction hooks will then be called with the same cookie
|
||||
// pointer. This allows us to save an external object per arena instance and
|
||||
// use it on the other hooks (Note: It is just as legal for init to return
|
||||
// NULL and not use the cookie feature).
|
||||
// on_arena_reset and on_arena_destruction also receive the space used in
|
||||
// the arena just before the reset.
|
||||
void* (*on_arena_init)(Arena* arena);
|
||||
void (*on_arena_reset)(Arena* arena, void* cookie, uint64 space_used);
|
||||
void (*on_arena_destruction)(Arena* arena, void* cookie, uint64 space_used);
|
||||
|
||||
// type_info is promised to be static - its lifetime extends to
|
||||
// match program's lifetime (It is given by typeid operator).
|
||||
// Note: typeid(void) will be passed as allocated_type every time we
|
||||
// intentionally want to avoid monitoring an allocation. (i.e. internal
|
||||
// allocations for managing the arena)
|
||||
void (*on_arena_allocation)(const std::type_info* allocated_type,
|
||||
uint64 alloc_size, void* cookie);
|
||||
|
||||
// Constants define default starting block size and max block size for
|
||||
// arena allocator behavior -- see descriptions above.
|
||||
static const size_t kDefaultStartBlockSize = 256;
|
||||
static const size_t kDefaultMaxBlockSize = 8192;
|
||||
|
||||
friend void ::google::protobuf::arena_metrics::EnableArenaMetrics(ArenaOptions*);
|
||||
friend void quality_webanswers::TempPrivateWorkAround(ArenaOptions*);
|
||||
friend class Arena;
|
||||
friend class ArenaOptionsTestFriend;
|
||||
};
|
||||
|
||||
// Support for non-RTTI environments. (The metrics hooks API uses type
|
||||
// information.)
|
||||
#ifndef GOOGLE_PROTOBUF_NO_RTTI
|
||||
#define RTTI_TYPE_ID(type) (&typeid(type))
|
||||
#else
|
||||
#define RTTI_TYPE_ID(type) (NULL)
|
||||
#endif
|
||||
|
||||
// Arena allocator. Arena allocation replaces ordinary (heap-based) allocation
|
||||
// with new/delete, and improves performance by aggregating allocations into
|
||||
// larger blocks and freeing allocations all at once. Protocol messages are
|
||||
// allocated on an arena by using Arena::CreateMessage<T>(Arena*), below, and
|
||||
// are automatically freed when the arena is destroyed.
|
||||
//
|
||||
// This is a thread-safe implementation: multiple threads may allocate from the
|
||||
// arena concurrently. Destruction is not thread-safe and the destructing
|
||||
// thread must synchronize with users of the arena first.
|
||||
//
|
||||
// An arena provides two allocation interfaces: CreateMessage<T>, which works
|
||||
// for arena-enabled proto2 message types as well as other types that satisfy
|
||||
// the appropriate protocol (described below), and Create<T>, which works for
|
||||
// any arbitrary type T. CreateMessage<T> is better when the type T supports it,
|
||||
// because this interface (i) passes the arena pointer to the created object so
|
||||
// that its sub-objects and internal allocations can use the arena too, and (ii)
|
||||
// elides the object's destructor call when possible. Create<T> does not place
|
||||
// any special requirements on the type T, and will invoke the object's
|
||||
// destructor when the arena is destroyed.
|
||||
//
|
||||
// The arena message allocation protocol, required by CreateMessage<T>, is as
|
||||
// follows:
|
||||
//
|
||||
// - The type T must have (at least) two constructors: a constructor with no
|
||||
// arguments, called when a T is allocated on the heap; and a constructor with
|
||||
// a google::protobuf::Arena* argument, called when a T is allocated on an arena. If the
|
||||
// second constructor is called with a NULL arena pointer, it must be
|
||||
// equivalent to invoking the first (no-argument) constructor.
|
||||
//
|
||||
// - The type T must have a particular type trait: a nested type
|
||||
// |InternalArenaConstructable_|. This is usually a typedef to |void|. If no
|
||||
// such type trait exists, then the instantiation CreateMessage<T> will fail
|
||||
// to compile.
|
||||
//
|
||||
// - The type T *may* have the type trait |DestructorSkippable_|. If this type
|
||||
// trait is present in the type, then its destructor will not be called if and
|
||||
// only if it was passed a non-NULL arena pointer. If this type trait is not
|
||||
// present on the type, then its destructor is always called when the
|
||||
// containing arena is destroyed.
|
||||
//
|
||||
// - One- and two-user-argument forms of CreateMessage<T>() also exist that
|
||||
// forward these constructor arguments to T's constructor: for example,
|
||||
// CreateMessage<T>(Arena*, arg1, arg2) forwards to a constructor T(Arena*,
|
||||
// arg1, arg2).
|
||||
//
|
||||
// This protocol is implemented by all arena-enabled proto2 message classes as
|
||||
// well as RepeatedPtrField.
|
||||
//
|
||||
// Do NOT subclass Arena. This class will be marked as final when C++11 is
|
||||
// enabled.
|
||||
class LIBPROTOBUF_EXPORT Arena {
|
||||
public:
|
||||
// Arena constructor taking custom options. See ArenaOptions below for
|
||||
// descriptions of the options available.
|
||||
explicit Arena(const ArenaOptions& options) : impl_(options) {
|
||||
Init(options);
|
||||
}
|
||||
|
||||
// Block overhead. Use this as a guide for how much to over-allocate the
|
||||
// initial block if you want an allocation of size N to fit inside it.
|
||||
//
|
||||
// WARNING: if you allocate multiple objects, it is difficult to guarantee
|
||||
// that a series of allocations will fit in the initial block, especially if
|
||||
// Arena changes its alignment guarantees in the future!
|
||||
static const size_t kBlockOverhead = internal::ArenaImpl::kBlockHeaderSize +
|
||||
internal::ArenaImpl::kSerialArenaSize;
|
||||
|
||||
// Default constructor with sensible default options, tuned for average
|
||||
// use-cases.
|
||||
Arena() : impl_(ArenaOptions()) { Init(ArenaOptions()); }
|
||||
|
||||
~Arena() {
|
||||
if (hooks_cookie_) {
|
||||
CallDestructorHooks();
|
||||
}
|
||||
}
|
||||
|
||||
void Init(const ArenaOptions& options) {
|
||||
on_arena_allocation_ = options.on_arena_allocation;
|
||||
on_arena_reset_ = options.on_arena_reset;
|
||||
on_arena_destruction_ = options.on_arena_destruction;
|
||||
// Call the initialization hook
|
||||
if (options.on_arena_init != NULL) {
|
||||
hooks_cookie_ = options.on_arena_init(this);
|
||||
} else {
|
||||
hooks_cookie_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// API to create proto2 message objects on the arena. If the arena passed in
|
||||
// is NULL, then a heap allocated object is returned. Type T must be a message
|
||||
// defined in a .proto file with cc_enable_arenas set to true, otherwise a
|
||||
// compilation error will occur.
|
||||
//
|
||||
// RepeatedField and RepeatedPtrField may also be instantiated directly on an
|
||||
// arena with this method.
|
||||
//
|
||||
// This function also accepts any type T that satisfies the arena message
|
||||
// allocation protocol, documented above.
|
||||
template <typename T, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateMessage(
|
||||
Arena* arena, Args&&... args) {
|
||||
static_assert(
|
||||
InternalHelper<T>::is_arena_constructable::value,
|
||||
"CreateMessage can only construct types that are ArenaConstructable");
|
||||
// We must delegate to CreateMaybeMessage() and NOT CreateMessageInternal()
|
||||
// because protobuf generated classes specialize CreateMaybeMessage() and we
|
||||
// need to use that specialization for code size reasons.
|
||||
return Arena::CreateMaybeMessage<T>(arena, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// API to create any objects on the arena. Note that only the object will
|
||||
// be created on the arena; the underlying ptrs (in case of a proto2 message)
|
||||
// will be still heap allocated. Proto messages should usually be allocated
|
||||
// with CreateMessage<T>() instead.
|
||||
//
|
||||
// Note that even if T satisfies the arena message construction protocol
|
||||
// (InternalArenaConstructable_ trait and optional DestructorSkippable_
|
||||
// trait), as described above, this function does not follow the protocol;
|
||||
// instead, it treats T as a black-box type, just as if it did not have these
|
||||
// traits. Specifically, T's constructor arguments will always be only those
|
||||
// passed to Create<T>() -- no additional arena pointer is implicitly added.
|
||||
// Furthermore, the destructor will always be called at arena destruction time
|
||||
// (unless the destructor is trivial). Hence, from T's point of view, it is as
|
||||
// if the object were allocated on the heap (except that the underlying memory
|
||||
// is obtained from the arena).
|
||||
template <typename T, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* Create(Arena* arena,
|
||||
Args&&... args) {
|
||||
return CreateNoMessage<T>(arena, is_arena_constructable<T>(),
|
||||
std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// Create an array of object type T on the arena *without* invoking the
|
||||
// constructor of T. If `arena` is null, then the return value should be freed
|
||||
// with `delete[] x;` (or `::operator delete[](x);`).
|
||||
// To ensure safe uses, this function checks at compile time
|
||||
// (when compiled as C++11) that T is trivially default-constructible and
|
||||
// trivially destructible.
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateArray(
|
||||
Arena* arena, size_t num_elements) {
|
||||
static_assert(std::is_pod<T>::value,
|
||||
"CreateArray requires a trivially constructible type");
|
||||
static_assert(std::is_trivially_destructible<T>::value,
|
||||
"CreateArray requires a trivially destructible type");
|
||||
GOOGLE_CHECK_LE(num_elements, std::numeric_limits<size_t>::max() / sizeof(T))
|
||||
<< "Requested size is too large to fit into size_t.";
|
||||
if (arena == NULL) {
|
||||
return static_cast<T*>(::operator new[](num_elements * sizeof(T)));
|
||||
} else {
|
||||
return arena->CreateInternalRawArray<T>(num_elements);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the total space allocated by the arena, which is the sum of the
|
||||
// sizes of the underlying blocks. This method is relatively fast; a counter
|
||||
// is kept as blocks are allocated.
|
||||
uint64 SpaceAllocated() const { return impl_.SpaceAllocated(); }
|
||||
// Returns the total space used by the arena. Similar to SpaceAllocated but
|
||||
// does not include free space and block overhead. The total space returned
|
||||
// may not include space used by other threads executing concurrently with
|
||||
// the call to this method.
|
||||
uint64 SpaceUsed() const { return impl_.SpaceUsed(); }
|
||||
// DEPRECATED. Please use SpaceAllocated() and SpaceUsed().
|
||||
//
|
||||
// Combines SpaceAllocated and SpaceUsed. Returns a pair of
|
||||
// <space_allocated, space_used>.
|
||||
PROTOBUF_RUNTIME_DEPRECATED("Please use SpaceAllocated() and SpaceUsed()")
|
||||
std::pair<uint64, uint64> SpaceAllocatedAndUsed() const {
|
||||
return std::make_pair(SpaceAllocated(), SpaceUsed());
|
||||
}
|
||||
|
||||
// Frees all storage allocated by this arena after calling destructors
|
||||
// registered with OwnDestructor() and freeing objects registered with Own().
|
||||
// Any objects allocated on this arena are unusable after this call. It also
|
||||
// returns the total space used by the arena which is the sums of the sizes
|
||||
// of the allocated blocks. This method is not thread-safe.
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE uint64 Reset() {
|
||||
// Call the reset hook
|
||||
if (on_arena_reset_ != NULL) {
|
||||
on_arena_reset_(this, hooks_cookie_, impl_.SpaceAllocated());
|
||||
}
|
||||
return impl_.Reset();
|
||||
}
|
||||
|
||||
// Adds |object| to a list of heap-allocated objects to be freed with |delete|
|
||||
// when the arena is destroyed or reset.
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE void Own(T* object) {
|
||||
OwnInternal(object, std::is_convertible<T*, Message*>());
|
||||
}
|
||||
|
||||
// Adds |object| to a list of objects whose destructors will be manually
|
||||
// called when the arena is destroyed or reset. This differs from Own() in
|
||||
// that it does not free the underlying memory with |delete|; hence, it is
|
||||
// normally only used for objects that are placement-newed into
|
||||
// arena-allocated memory.
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE void OwnDestructor(T* object) {
|
||||
if (object != NULL) {
|
||||
impl_.AddCleanup(object, &internal::arena_destruct_object<T>);
|
||||
}
|
||||
}
|
||||
|
||||
// Adds a custom member function on an object to the list of destructors that
|
||||
// will be manually called when the arena is destroyed or reset. This differs
|
||||
// from OwnDestructor() in that any member function may be specified, not only
|
||||
// the class destructor.
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE void OwnCustomDestructor(
|
||||
void* object, void (*destruct)(void*)) {
|
||||
impl_.AddCleanup(object, destruct);
|
||||
}
|
||||
|
||||
// Retrieves the arena associated with |value| if |value| is an arena-capable
|
||||
// message, or NULL otherwise. This differs from value->GetArena() in that the
|
||||
// latter is a virtual call, while this method is a templated call that
|
||||
// resolves at compile-time.
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static Arena* GetArena(
|
||||
const T* value) {
|
||||
return GetArenaInternal(value, is_arena_constructable<T>());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class InternalHelper {
|
||||
template <typename U>
|
||||
static char DestructorSkippable(const typename U::DestructorSkippable_*);
|
||||
template <typename U>
|
||||
static double DestructorSkippable(...);
|
||||
|
||||
typedef std::integral_constant<
|
||||
bool, sizeof(DestructorSkippable<T>(static_cast<const T*>(0))) ==
|
||||
sizeof(char) ||
|
||||
std::is_trivially_destructible<T>::value>
|
||||
is_destructor_skippable;
|
||||
|
||||
template <typename U>
|
||||
static char ArenaConstructable(
|
||||
const typename U::InternalArenaConstructable_*);
|
||||
template <typename U>
|
||||
static double ArenaConstructable(...);
|
||||
|
||||
typedef std::integral_constant<bool, sizeof(ArenaConstructable<T>(
|
||||
static_cast<const T*>(0))) ==
|
||||
sizeof(char)>
|
||||
is_arena_constructable;
|
||||
|
||||
template <typename... Args>
|
||||
static T* Construct(void* ptr, Args&&... args) {
|
||||
return new (ptr) T(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
static Arena* GetArena(const T* p) { return p->GetArenaNoVirtual(); }
|
||||
|
||||
friend class Arena;
|
||||
};
|
||||
|
||||
// Helper typetraits that indicates support for arenas in a type T at compile
|
||||
// time. This is public only to allow construction of higher-level templated
|
||||
// utilities.
|
||||
//
|
||||
// is_arena_constructable<T>::value is true if the message type T has arena
|
||||
// support enabled, and false otherwise.
|
||||
//
|
||||
// is_destructor_skippable<T>::value is true if the message type T has told
|
||||
// the arena that it is safe to skip the destructor, and false otherwise.
|
||||
//
|
||||
// This is inside Arena because only Arena has the friend relationships
|
||||
// necessary to see the underlying generated code traits.
|
||||
template <typename T>
|
||||
struct is_arena_constructable : InternalHelper<T>::is_arena_constructable {};
|
||||
template <typename T>
|
||||
struct is_destructor_skippable : InternalHelper<T>::is_destructor_skippable {
|
||||
};
|
||||
|
||||
private:
|
||||
template <typename T, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateMessageInternal(
|
||||
Arena* arena, Args&&... args) {
|
||||
static_assert(
|
||||
InternalHelper<T>::is_arena_constructable::value,
|
||||
"CreateMessage can only construct types that are ArenaConstructable");
|
||||
if (arena == NULL) {
|
||||
return new T(nullptr, std::forward<Args>(args)...);
|
||||
} else {
|
||||
return arena->DoCreateMessage<T>(std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
// This specialization for no arguments is necessary, because its behavior is
|
||||
// slightly different. When the arena pointer is nullptr, it calls T()
|
||||
// instead of T(nullptr).
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateMessageInternal(
|
||||
Arena* arena) {
|
||||
static_assert(
|
||||
InternalHelper<T>::is_arena_constructable::value,
|
||||
"CreateMessage can only construct types that are ArenaConstructable");
|
||||
if (arena == NULL) {
|
||||
return new T();
|
||||
} else {
|
||||
return arena->DoCreateMessage<T>();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateInternal(
|
||||
Arena* arena, Args&&... args) {
|
||||
if (arena == NULL) {
|
||||
return new T(std::forward<Args>(args)...);
|
||||
} else {
|
||||
return arena->DoCreate<T>(std::is_trivially_destructible<T>::value,
|
||||
std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
void CallDestructorHooks();
|
||||
void OnArenaAllocation(const std::type_info* allocated_type, size_t n) const;
|
||||
inline void AllocHook(const std::type_info* allocated_type, size_t n) const {
|
||||
if (GOOGLE_PREDICT_FALSE(hooks_cookie_ != NULL)) {
|
||||
OnArenaAllocation(allocated_type, n);
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate and also optionally call on_arena_allocation callback with the
|
||||
// allocated type info when the hooks are in place in ArenaOptions and
|
||||
// the cookie is not null.
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void* AllocateInternal(
|
||||
bool skip_explicit_ownership) {
|
||||
const size_t n = internal::AlignUpTo8(sizeof(T));
|
||||
AllocHook(RTTI_TYPE_ID(T), n);
|
||||
// Monitor allocation if needed.
|
||||
if (skip_explicit_ownership) {
|
||||
return impl_.AllocateAligned(n);
|
||||
} else {
|
||||
return impl_.AllocateAlignedAndAddCleanup(
|
||||
n, &internal::arena_destruct_object<T>);
|
||||
}
|
||||
}
|
||||
|
||||
// CreateMessage<T> requires that T supports arenas, but this private method
|
||||
// works whether or not T supports arenas. These are not exposed to user code
|
||||
// as it can cause confusing API usages, and end up having double free in
|
||||
// user code. These are used only internally from LazyField and Repeated
|
||||
// fields, since they are designed to work in all mode combinations.
|
||||
template <typename Msg, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static Msg* DoCreateMaybeMessage(
|
||||
Arena* arena, std::true_type, Args&&... args) {
|
||||
return CreateMessageInternal<Msg>(arena, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* DoCreateMaybeMessage(
|
||||
Arena* arena, std::false_type, Args&&... args) {
|
||||
return CreateInternal<T>(arena, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateMaybeMessage(
|
||||
Arena* arena, Args&&... args) {
|
||||
return DoCreateMaybeMessage<T>(arena, is_arena_constructable<T>(),
|
||||
std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateNoMessage(
|
||||
Arena* arena, std::true_type, Args&&... args) {
|
||||
// User is constructing with Create() despite the fact that T supports arena
|
||||
// construction. In this case we have to delegate to CreateInternal(), and
|
||||
// we can't use any CreateMaybeMessage() specialization that may be defined.
|
||||
return CreateInternal<T>(arena, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateNoMessage(
|
||||
Arena* arena, std::false_type, Args&&... args) {
|
||||
// User is constructing with Create() and the type does not support arena
|
||||
// construction. In this case we can delegate to CreateMaybeMessage() and
|
||||
// use any specialization that may be available for that.
|
||||
return CreateMaybeMessage<T>(arena, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// Just allocate the required size for the given type assuming the
|
||||
// type has a trivial constructor.
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE T* CreateInternalRawArray(
|
||||
size_t num_elements) {
|
||||
GOOGLE_CHECK_LE(num_elements, std::numeric_limits<size_t>::max() / sizeof(T))
|
||||
<< "Requested size is too large to fit into size_t.";
|
||||
const size_t n = internal::AlignUpTo8(sizeof(T) * num_elements);
|
||||
// Monitor allocation if needed.
|
||||
AllocHook(RTTI_TYPE_ID(T), n);
|
||||
return static_cast<T*>(impl_.AllocateAligned(n));
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE T* DoCreate(
|
||||
bool skip_explicit_ownership, Args&&... args) {
|
||||
return new (AllocateInternal<T>(skip_explicit_ownership))
|
||||
T(std::forward<Args>(args)...);
|
||||
}
|
||||
template <typename T, typename... Args>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE T* DoCreateMessage(Args&&... args) {
|
||||
return InternalHelper<T>::Construct(
|
||||
AllocateInternal<T>(InternalHelper<T>::is_destructor_skippable::value),
|
||||
this, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// CreateInArenaStorage is used to implement map field. Without it,
|
||||
// google::protobuf::Map need to call generated message's protected arena constructor,
|
||||
// which needs to declare google::protobuf::Map as friend of generated message.
|
||||
template <typename T>
|
||||
static void CreateInArenaStorage(T* ptr, Arena* arena) {
|
||||
CreateInArenaStorageInternal(ptr, arena,
|
||||
typename is_arena_constructable<T>::type());
|
||||
RegisterDestructorInternal(
|
||||
ptr, arena,
|
||||
typename InternalHelper<T>::is_destructor_skippable::type());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void CreateInArenaStorageInternal(T* ptr, Arena* arena,
|
||||
std::true_type) {
|
||||
InternalHelper<T>::Construct(ptr, arena);
|
||||
}
|
||||
template <typename T>
|
||||
static void CreateInArenaStorageInternal(T* ptr, Arena* /* arena */,
|
||||
std::false_type) {
|
||||
new (ptr) T();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void RegisterDestructorInternal(T* /* ptr */, Arena* /* arena */,
|
||||
std::true_type) {}
|
||||
template <typename T>
|
||||
static void RegisterDestructorInternal(T* ptr, Arena* arena,
|
||||
std::false_type) {
|
||||
arena->OwnDestructor(ptr);
|
||||
}
|
||||
|
||||
// These implement Own(), which registers an object for deletion (destructor
|
||||
// call and operator delete()). The second parameter has type 'true_type' if T
|
||||
// is a subtype of ::google::protobuf::Message and 'false_type' otherwise. Collapsing
|
||||
// all template instantiations to one for generic Message reduces code size,
|
||||
// using the virtual destructor instead.
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void OwnInternal(T* object,
|
||||
std::true_type) {
|
||||
if (object != NULL) {
|
||||
impl_.AddCleanup(object, &internal::arena_delete_object<Message>);
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void OwnInternal(T* object,
|
||||
std::false_type) {
|
||||
if (object != NULL) {
|
||||
impl_.AddCleanup(object, &internal::arena_delete_object<T>);
|
||||
}
|
||||
}
|
||||
|
||||
// Implementation for GetArena(). Only message objects with
|
||||
// InternalArenaConstructable_ tags can be associated with an arena, and such
|
||||
// objects must implement a GetArenaNoVirtual() method.
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static Arena* GetArenaInternal(
|
||||
const T* value, std::true_type) {
|
||||
return InternalHelper<T>::GetArena(value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static Arena* GetArenaInternal(
|
||||
const T* /* value */, std::false_type) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// For friends of arena.
|
||||
void* AllocateAligned(size_t n) {
|
||||
AllocHook(NULL, n);
|
||||
return impl_.AllocateAligned(internal::AlignUpTo8(n));
|
||||
}
|
||||
|
||||
internal::ArenaImpl impl_;
|
||||
|
||||
void (*on_arena_allocation_)(const std::type_info* allocated_type,
|
||||
uint64 alloc_size, void* cookie);
|
||||
void (*on_arena_reset_)(Arena* arena, void* cookie, uint64 space_used);
|
||||
void (*on_arena_destruction_)(Arena* arena, void* cookie, uint64 space_used);
|
||||
|
||||
// The arena may save a cookie it receives from the external on_init hook
|
||||
// and then use it when calling the on_reset and on_destruction hooks.
|
||||
void* hooks_cookie_;
|
||||
|
||||
template <typename Type>
|
||||
friend class internal::GenericTypeHandler;
|
||||
friend struct internal::ArenaStringPtr; // For AllocateAligned.
|
||||
friend class internal::LazyField; // For CreateMaybeMessage.
|
||||
friend class MessageLite;
|
||||
template <typename Key, typename T>
|
||||
friend class Map;
|
||||
};
|
||||
|
||||
// Defined above for supporting environments without RTTI.
|
||||
#undef RTTI_TYPE_ID
|
||||
|
||||
} // namespace protobuf
|
||||
|
||||
} // namespace google
|
||||
#endif // GOOGLE_PROTOBUF_ARENA_H__
|
||||
@ -0,0 +1,321 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// This file defines an Arena allocator for better allocation performance.
|
||||
|
||||
#ifndef GOOGLE_PROTOBUF_ARENA_IMPL_H__
|
||||
#define GOOGLE_PROTOBUF_ARENA_IMPL_H__
|
||||
|
||||
#include <atomic>
|
||||
#include <limits>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/stubs/logging.h>
|
||||
|
||||
#include <google/protobuf/stubs/port.h>
|
||||
|
||||
#ifdef ADDRESS_SANITIZER
|
||||
#include <sanitizer/asan_interface.h>
|
||||
#endif // ADDRESS_SANITIZER
|
||||
|
||||
namespace google {
|
||||
|
||||
namespace protobuf {
|
||||
namespace internal {
|
||||
|
||||
inline size_t AlignUpTo8(size_t n) {
|
||||
// Align n to next multiple of 8 (from Hacker's Delight, Chapter 3.)
|
||||
return (n + 7) & -8;
|
||||
}
|
||||
|
||||
// This class provides the core Arena memory allocation library. Different
|
||||
// implementations only need to implement the public interface below.
|
||||
// Arena is not a template type as that would only be useful if all protos
|
||||
// in turn would be templates, which will/cannot happen. However separating
|
||||
// the memory allocation part from the cruft of the API users expect we can
|
||||
// use #ifdef the select the best implementation based on hardware / OS.
|
||||
class LIBPROTOBUF_EXPORT ArenaImpl {
|
||||
public:
|
||||
struct Options {
|
||||
size_t start_block_size;
|
||||
size_t max_block_size;
|
||||
char* initial_block;
|
||||
size_t initial_block_size;
|
||||
void* (*block_alloc)(size_t);
|
||||
void (*block_dealloc)(void*, size_t);
|
||||
|
||||
template <typename O>
|
||||
explicit Options(const O& options)
|
||||
: start_block_size(options.start_block_size),
|
||||
max_block_size(options.max_block_size),
|
||||
initial_block(options.initial_block),
|
||||
initial_block_size(options.initial_block_size),
|
||||
block_alloc(options.block_alloc),
|
||||
block_dealloc(options.block_dealloc) {}
|
||||
};
|
||||
|
||||
template <typename O>
|
||||
explicit ArenaImpl(const O& options) : options_(options) {
|
||||
if (options_.initial_block != NULL && options_.initial_block_size > 0) {
|
||||
GOOGLE_CHECK_GE(options_.initial_block_size, sizeof(Block))
|
||||
<< ": Initial block size too small for header.";
|
||||
initial_block_ = reinterpret_cast<Block*>(options_.initial_block);
|
||||
} else {
|
||||
initial_block_ = NULL;
|
||||
}
|
||||
|
||||
Init();
|
||||
}
|
||||
|
||||
// Destructor deletes all owned heap allocated objects, and destructs objects
|
||||
// that have non-trivial destructors, except for proto2 message objects whose
|
||||
// destructors can be skipped. Also, frees all blocks except the initial block
|
||||
// if it was passed in.
|
||||
~ArenaImpl();
|
||||
|
||||
uint64 Reset();
|
||||
|
||||
uint64 SpaceAllocated() const;
|
||||
uint64 SpaceUsed() const;
|
||||
|
||||
void* AllocateAligned(size_t n);
|
||||
|
||||
void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*));
|
||||
|
||||
// Add object pointer and cleanup function pointer to the list.
|
||||
void AddCleanup(void* elem, void (*cleanup)(void*));
|
||||
|
||||
private:
|
||||
void* AllocateAlignedFallback(size_t n);
|
||||
void* AllocateAlignedAndAddCleanupFallback(size_t n, void (*cleanup)(void*));
|
||||
void AddCleanupFallback(void* elem, void (*cleanup)(void*));
|
||||
|
||||
// Node contains the ptr of the object to be cleaned up and the associated
|
||||
// cleanup function ptr.
|
||||
struct CleanupNode {
|
||||
void* elem; // Pointer to the object to be cleaned up.
|
||||
void (*cleanup)(void*); // Function pointer to the destructor or deleter.
|
||||
};
|
||||
|
||||
// Cleanup uses a chunked linked list, to reduce pointer chasing.
|
||||
struct CleanupChunk {
|
||||
static size_t SizeOf(size_t i) {
|
||||
return sizeof(CleanupChunk) + (sizeof(CleanupNode) * (i - 1));
|
||||
}
|
||||
size_t size; // Total elements in the list.
|
||||
CleanupChunk* next; // Next node in the list.
|
||||
CleanupNode nodes[1]; // True length is |size|.
|
||||
};
|
||||
|
||||
class Block;
|
||||
|
||||
// A thread-unsafe Arena that can only be used within its owning thread.
|
||||
class LIBPROTOBUF_EXPORT SerialArena {
|
||||
public:
|
||||
// The allocate/free methods here are a little strange, since SerialArena is
|
||||
// allocated inside a Block which it also manages. This is to avoid doing
|
||||
// an extra allocation for the SerialArena itself.
|
||||
|
||||
// Creates a new SerialArena inside Block* and returns it.
|
||||
static SerialArena* New(Block* b, void* owner, ArenaImpl* arena);
|
||||
|
||||
// Destroys this SerialArena, freeing all blocks with the given dealloc
|
||||
// function, except any block equal to |initial_block|.
|
||||
static uint64 Free(SerialArena* serial, Block* initial_block,
|
||||
void (*block_dealloc)(void*, size_t));
|
||||
|
||||
void CleanupList();
|
||||
uint64 SpaceUsed() const;
|
||||
|
||||
void* AllocateAligned(size_t n) {
|
||||
GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n); // Must be already aligned.
|
||||
GOOGLE_DCHECK_GE(limit_, ptr_);
|
||||
if (GOOGLE_PREDICT_FALSE(static_cast<size_t>(limit_ - ptr_) < n)) {
|
||||
return AllocateAlignedFallback(n);
|
||||
}
|
||||
void* ret = ptr_;
|
||||
ptr_ += n;
|
||||
#ifdef ADDRESS_SANITIZER
|
||||
ASAN_UNPOISON_MEMORY_REGION(ret, n);
|
||||
#endif // ADDRESS_SANITIZER
|
||||
return ret;
|
||||
}
|
||||
|
||||
void AddCleanup(void* elem, void (*cleanup)(void*)) {
|
||||
if (GOOGLE_PREDICT_FALSE(cleanup_ptr_ == cleanup_limit_)) {
|
||||
AddCleanupFallback(elem, cleanup);
|
||||
return;
|
||||
}
|
||||
cleanup_ptr_->elem = elem;
|
||||
cleanup_ptr_->cleanup = cleanup;
|
||||
cleanup_ptr_++;
|
||||
}
|
||||
|
||||
void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*)) {
|
||||
void* ret = AllocateAligned(n);
|
||||
AddCleanup(ret, cleanup);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* owner() const { return owner_; }
|
||||
SerialArena* next() const { return next_; }
|
||||
void set_next(SerialArena* next) { next_ = next; }
|
||||
|
||||
private:
|
||||
void* AllocateAlignedFallback(size_t n);
|
||||
void AddCleanupFallback(void* elem, void (*cleanup)(void*));
|
||||
void CleanupListFallback();
|
||||
|
||||
ArenaImpl* arena_; // Containing arena.
|
||||
void* owner_; // &ThreadCache of this thread;
|
||||
Block* head_; // Head of linked list of blocks.
|
||||
CleanupChunk* cleanup_; // Head of cleanup list.
|
||||
SerialArena* next_; // Next SerialArena in this linked list.
|
||||
|
||||
// Next pointer to allocate from. Always 8-byte aligned. Points inside
|
||||
// head_ (and head_->pos will always be non-canonical). We keep these
|
||||
// here to reduce indirection.
|
||||
char* ptr_;
|
||||
char* limit_;
|
||||
|
||||
// Next CleanupList members to append to. These point inside cleanup_.
|
||||
CleanupNode* cleanup_ptr_;
|
||||
CleanupNode* cleanup_limit_;
|
||||
};
|
||||
|
||||
// Blocks are variable length malloc-ed objects. The following structure
|
||||
// describes the common header for all blocks.
|
||||
class LIBPROTOBUF_EXPORT Block {
|
||||
public:
|
||||
Block(size_t size, Block* next);
|
||||
|
||||
char* Pointer(size_t n) {
|
||||
GOOGLE_DCHECK(n <= size_);
|
||||
return reinterpret_cast<char*>(this) + n;
|
||||
}
|
||||
|
||||
Block* next() const { return next_; }
|
||||
size_t pos() const { return pos_; }
|
||||
size_t size() const { return size_; }
|
||||
void set_pos(size_t pos) { pos_ = pos; }
|
||||
|
||||
private:
|
||||
Block* next_; // Next block for this thread.
|
||||
size_t pos_;
|
||||
size_t size_;
|
||||
// data follows
|
||||
};
|
||||
|
||||
struct ThreadCache {
|
||||
#if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL)
|
||||
// If we are using the ThreadLocalStorage class to store the ThreadCache,
|
||||
// then the ThreadCache's default constructor has to be responsible for
|
||||
// initializing it.
|
||||
ThreadCache() : last_lifecycle_id_seen(-1), last_serial_arena(NULL) {}
|
||||
#endif
|
||||
|
||||
// The ThreadCache is considered valid as long as this matches the
|
||||
// lifecycle_id of the arena being used.
|
||||
int64 last_lifecycle_id_seen;
|
||||
SerialArena* last_serial_arena;
|
||||
};
|
||||
static std::atomic<int64> lifecycle_id_generator_;
|
||||
#if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL)
|
||||
// Android ndk does not support GOOGLE_THREAD_LOCAL keyword so we use a custom thread
|
||||
// local storage class we implemented.
|
||||
// iOS also does not support the GOOGLE_THREAD_LOCAL keyword.
|
||||
static ThreadCache& thread_cache();
|
||||
#elif defined(PROTOBUF_USE_DLLS)
|
||||
// Thread local variables cannot be exposed through DLL interface but we can
|
||||
// wrap them in static functions.
|
||||
static ThreadCache& thread_cache();
|
||||
#else
|
||||
static GOOGLE_THREAD_LOCAL ThreadCache thread_cache_;
|
||||
static ThreadCache& thread_cache() { return thread_cache_; }
|
||||
#endif
|
||||
|
||||
void Init();
|
||||
|
||||
// Free all blocks and return the total space used which is the sums of sizes
|
||||
// of the all the allocated blocks.
|
||||
uint64 FreeBlocks();
|
||||
// Delete or Destruct all objects owned by the arena.
|
||||
void CleanupList();
|
||||
|
||||
inline void CacheSerialArena(SerialArena* serial) {
|
||||
thread_cache().last_serial_arena = serial;
|
||||
thread_cache().last_lifecycle_id_seen = lifecycle_id_;
|
||||
// TODO(haberman): evaluate whether we would gain efficiency by getting rid
|
||||
// of hint_. It's the only write we do to ArenaImpl in the allocation path,
|
||||
// which will dirty the cache line.
|
||||
|
||||
hint_.store(serial, std::memory_order_release);
|
||||
}
|
||||
|
||||
|
||||
std::atomic<SerialArena*>
|
||||
threads_; // Pointer to a linked list of SerialArena.
|
||||
std::atomic<SerialArena*> hint_; // Fast thread-local block access
|
||||
std::atomic<size_t> space_allocated_; // Total size of all allocated blocks.
|
||||
|
||||
Block *initial_block_; // If non-NULL, points to the block that came from
|
||||
// user data.
|
||||
|
||||
Block* NewBlock(Block* last_block, size_t min_bytes);
|
||||
|
||||
SerialArena* GetSerialArena();
|
||||
bool GetSerialArenaFast(SerialArena** arena);
|
||||
SerialArena* GetSerialArenaFallback(void* me);
|
||||
int64 lifecycle_id_; // Unique for each arena. Changes on Reset().
|
||||
|
||||
Options options_;
|
||||
|
||||
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArenaImpl);
|
||||
// All protos have pointers back to the arena hence Arena must have
|
||||
// pointer stability.
|
||||
ArenaImpl(ArenaImpl&&) = delete;
|
||||
ArenaImpl& operator=(ArenaImpl&&) = delete;
|
||||
|
||||
public:
|
||||
// kBlockHeaderSize is sizeof(Block), aligned up to the nearest multiple of 8
|
||||
// to protect the invariant that pos is always at a multiple of 8.
|
||||
static const size_t kBlockHeaderSize = (sizeof(Block) + 7) & -8;
|
||||
static const size_t kSerialArenaSize = (sizeof(SerialArena) + 7) & -8;
|
||||
static_assert(kBlockHeaderSize % 8 == 0,
|
||||
"kBlockHeaderSize must be a multiple of 8.");
|
||||
static_assert(kSerialArenaSize % 8 == 0,
|
||||
"kSerialArenaSize must be a multiple of 8.");
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace protobuf
|
||||
|
||||
} // namespace google
|
||||
#endif // GOOGLE_PROTOBUF_ARENA_IMPL_H__
|
||||
@ -0,0 +1,50 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <google/protobuf/stubs/logging.h>
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/arena_test_util.h>
|
||||
|
||||
|
||||
#define EXPECT_EQ GOOGLE_CHECK_EQ
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
namespace internal {
|
||||
|
||||
NoHeapChecker::~NoHeapChecker() {
|
||||
capture_alloc.Unhook();
|
||||
EXPECT_EQ(0, capture_alloc.alloc_count());
|
||||
EXPECT_EQ(0, capture_alloc.free_count());
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
@ -0,0 +1,91 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef GOOGLE_PROTOBUF_ARENA_TEST_UTIL_H__
|
||||
#define GOOGLE_PROTOBUF_ARENA_TEST_UTIL_H__
|
||||
|
||||
#include <google/protobuf/stubs/logging.h>
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/arena.h>
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
|
||||
template <typename T, bool use_arena>
|
||||
void TestParseCorruptedString(const T& message) {
|
||||
int success_count = 0;
|
||||
string s = message.SerializeAsString();
|
||||
const int kMaxIters = 900;
|
||||
const int stride = s.size() <= kMaxIters ? 1 : s.size() / kMaxIters;
|
||||
const int start = stride == 1 || use_arena ? 0 : (stride + 1) / 2;
|
||||
for (int i = start; i < s.size(); i += stride) {
|
||||
for (int c = 1 + (i % 17); c < 256; c += 2 * c + (i & 3)) {
|
||||
s[i] ^= c;
|
||||
google::protobuf::Arena arena;
|
||||
T* message =
|
||||
google::protobuf::Arena::CreateMessage<T>(use_arena ? &arena : nullptr);
|
||||
if (message->ParseFromString(s)) {
|
||||
++success_count;
|
||||
}
|
||||
if (!use_arena) {
|
||||
delete message;
|
||||
}
|
||||
s[i] ^= c; // Restore s to its original state.
|
||||
}
|
||||
}
|
||||
// This next line is a low bar. But getting through the test without crashing
|
||||
// due to use-after-free or other bugs is a big part of what we're checking.
|
||||
GOOGLE_CHECK_GT(success_count, 0);
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
class NoHeapChecker {
|
||||
public:
|
||||
NoHeapChecker() {
|
||||
capture_alloc.Hook();
|
||||
}
|
||||
~NoHeapChecker();
|
||||
private:
|
||||
class NewDeleteCapture {
|
||||
public:
|
||||
// TOOD(xiaofeng): Implement this for opensource protobuf.
|
||||
void Hook() {}
|
||||
void Unhook() {}
|
||||
int alloc_count() { return 0; }
|
||||
int free_count() { return 0; }
|
||||
} capture_alloc;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace protobuf
|
||||
|
||||
} // namespace google
|
||||
#endif // GOOGLE_PROTOBUF_ARENA_TEST_UTIL_H__
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,43 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// The ArenaString implementation is not included in the open-source release. Do
|
||||
// not include this file in the distribution.
|
||||
|
||||
#include <google/protobuf/arenastring.h>
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
namespace internal {
|
||||
|
||||
|
||||
} // namespace internal
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
@ -0,0 +1,403 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef GOOGLE_PROTOBUF_ARENASTRING_H__
|
||||
#define GOOGLE_PROTOBUF_ARENASTRING_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <google/protobuf/arena.h>
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/stubs/fastmem.h>
|
||||
#include <google/protobuf/stubs/logging.h>
|
||||
#include <google/protobuf/stubs/port.h>
|
||||
|
||||
// This is the implementation of arena string fields written for the open-source
|
||||
// release. The ArenaStringPtr struct below is an internal implementation class
|
||||
// and *should not be used* by user code. It is used to collect string
|
||||
// operations together into one place and abstract away the underlying
|
||||
// string-field pointer representation, so that (for example) an alternate
|
||||
// implementation that knew more about ::std::string's internals could integrate more
|
||||
// closely with the arena allocator.
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
namespace internal {
|
||||
|
||||
template <typename T>
|
||||
class TaggedPtr {
|
||||
public:
|
||||
void Set(T* p) { ptr_ = reinterpret_cast<uintptr_t>(p); }
|
||||
T* Get() const { return reinterpret_cast<T*>(ptr_); }
|
||||
|
||||
bool IsNull() { return ptr_ == 0; }
|
||||
|
||||
private:
|
||||
uintptr_t ptr_;
|
||||
};
|
||||
|
||||
struct LIBPROTOBUF_EXPORT ArenaStringPtr {
|
||||
inline void Set(const ::std::string* default_value,
|
||||
const ::std::string& value, ::google::protobuf::Arena* arena) {
|
||||
if (ptr_ == default_value) {
|
||||
CreateInstance(arena, &value);
|
||||
} else {
|
||||
*ptr_ = value;
|
||||
}
|
||||
}
|
||||
|
||||
inline void SetLite(const ::std::string* default_value,
|
||||
const ::std::string& value,
|
||||
::google::protobuf::Arena* arena) {
|
||||
Set(default_value, value, arena);
|
||||
}
|
||||
|
||||
// Basic accessors.
|
||||
inline const ::std::string& Get() const { return *ptr_; }
|
||||
|
||||
inline ::std::string* Mutable(const ::std::string* default_value,
|
||||
::google::protobuf::Arena* arena) {
|
||||
if (ptr_ == default_value) {
|
||||
CreateInstance(arena, default_value);
|
||||
}
|
||||
return ptr_;
|
||||
}
|
||||
|
||||
// Release returns a ::std::string* instance that is heap-allocated and is not
|
||||
// Own()'d by any arena. If the field was not set, it returns NULL. The caller
|
||||
// retains ownership. Clears this field back to NULL state. Used to implement
|
||||
// release_<field>() methods on generated classes.
|
||||
inline ::std::string* Release(const ::std::string* default_value,
|
||||
::google::protobuf::Arena* arena) {
|
||||
if (ptr_ == default_value) {
|
||||
return NULL;
|
||||
}
|
||||
return ReleaseNonDefault(default_value, arena);
|
||||
}
|
||||
|
||||
// Similar to Release, but ptr_ cannot be the default_value.
|
||||
inline ::std::string* ReleaseNonDefault(
|
||||
const ::std::string* default_value, ::google::protobuf::Arena* arena) {
|
||||
GOOGLE_DCHECK(!IsDefault(default_value));
|
||||
::std::string* released = NULL;
|
||||
if (arena != NULL) {
|
||||
// ptr_ is owned by the arena.
|
||||
released = new ::std::string;
|
||||
released->swap(*ptr_);
|
||||
} else {
|
||||
released = ptr_;
|
||||
}
|
||||
ptr_ = const_cast< ::std::string* >(default_value);
|
||||
return released;
|
||||
}
|
||||
|
||||
// UnsafeArenaRelease returns a ::std::string*, but it may be arena-owned (i.e.
|
||||
// have its destructor already registered) if arena != NULL. If the field was
|
||||
// not set, this returns NULL. This method clears this field back to NULL
|
||||
// state. Used to implement unsafe_arena_release_<field>() methods on
|
||||
// generated classes.
|
||||
inline ::std::string* UnsafeArenaRelease(const ::std::string* default_value,
|
||||
::google::protobuf::Arena* /* arena */) {
|
||||
if (ptr_ == default_value) {
|
||||
return NULL;
|
||||
}
|
||||
::std::string* released = ptr_;
|
||||
ptr_ = const_cast< ::std::string* >(default_value);
|
||||
return released;
|
||||
}
|
||||
|
||||
// Takes a string that is heap-allocated, and takes ownership. The string's
|
||||
// destructor is registered with the arena. Used to implement
|
||||
// set_allocated_<field> in generated classes.
|
||||
inline void SetAllocated(const ::std::string* default_value,
|
||||
::std::string* value, ::google::protobuf::Arena* arena) {
|
||||
if (arena == NULL && ptr_ != default_value) {
|
||||
Destroy(default_value, arena);
|
||||
}
|
||||
if (value != NULL) {
|
||||
ptr_ = value;
|
||||
if (arena != NULL) {
|
||||
arena->Own(value);
|
||||
}
|
||||
} else {
|
||||
ptr_ = const_cast< ::std::string* >(default_value);
|
||||
}
|
||||
}
|
||||
|
||||
// Takes a string that has lifetime equal to the arena's lifetime. The arena
|
||||
// must be non-null. It is safe only to pass this method a value returned by
|
||||
// UnsafeArenaRelease() on another field of a message in the same arena. Used
|
||||
// to implement unsafe_arena_set_allocated_<field> in generated classes.
|
||||
inline void UnsafeArenaSetAllocated(const ::std::string* default_value,
|
||||
::std::string* value,
|
||||
::google::protobuf::Arena* /* arena */) {
|
||||
if (value != NULL) {
|
||||
ptr_ = value;
|
||||
} else {
|
||||
ptr_ = const_cast< ::std::string* >(default_value);
|
||||
}
|
||||
}
|
||||
|
||||
// Swaps internal pointers. Arena-safety semantics: this is guarded by the
|
||||
// logic in Swap()/UnsafeArenaSwap() at the message level, so this method is
|
||||
// 'unsafe' if called directly.
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void Swap(ArenaStringPtr* other) {
|
||||
std::swap(ptr_, other->ptr_);
|
||||
}
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void Swap(
|
||||
ArenaStringPtr* other, const ::std::string* default_value, Arena* arena) {
|
||||
#ifndef NDEBUG
|
||||
// For debug builds, we swap the contents of the string, rather than the
|
||||
// string instances themselves. This invalidates previously taken const
|
||||
// references that are (per our documentation) invalidated by calling Swap()
|
||||
// on the message.
|
||||
//
|
||||
// If both strings are the default_value, swapping is uninteresting.
|
||||
// Otherwise, we use ArenaStringPtr::Mutable() to access the string, to
|
||||
// ensure that we do not try to mutate default_value itself.
|
||||
if (IsDefault(default_value) && other->IsDefault(default_value)) {
|
||||
return;
|
||||
}
|
||||
|
||||
::std::string* this_ptr = Mutable(default_value, arena);
|
||||
::std::string* other_ptr = other->Mutable(default_value, arena);
|
||||
|
||||
this_ptr->swap(*other_ptr);
|
||||
#else
|
||||
std::swap(ptr_, other->ptr_);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Frees storage (if not on an arena).
|
||||
inline void Destroy(const ::std::string* default_value,
|
||||
::google::protobuf::Arena* arena) {
|
||||
if (arena == NULL && ptr_ != default_value) {
|
||||
delete ptr_;
|
||||
}
|
||||
}
|
||||
|
||||
// Clears content, but keeps allocated string if arena != NULL, to avoid the
|
||||
// overhead of heap operations. After this returns, the content (as seen by
|
||||
// the user) will always be the empty string. Assumes that |default_value|
|
||||
// is an empty string.
|
||||
inline void ClearToEmpty(const ::std::string* default_value,
|
||||
::google::protobuf::Arena* /* arena */) {
|
||||
if (ptr_ == default_value) {
|
||||
// Already set to default (which is empty) -- do nothing.
|
||||
} else {
|
||||
ptr_->clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Clears content, assuming that the current value is not the empty string
|
||||
// default.
|
||||
inline void ClearNonDefaultToEmpty() {
|
||||
ptr_->clear();
|
||||
}
|
||||
inline void ClearNonDefaultToEmptyNoArena() {
|
||||
ptr_->clear();
|
||||
}
|
||||
|
||||
// Clears content, but keeps allocated string if arena != NULL, to avoid the
|
||||
// overhead of heap operations. After this returns, the content (as seen by
|
||||
// the user) will always be equal to |default_value|.
|
||||
inline void ClearToDefault(const ::std::string* default_value,
|
||||
::google::protobuf::Arena* /* arena */) {
|
||||
if (ptr_ == default_value) {
|
||||
// Already set to default -- do nothing.
|
||||
} else {
|
||||
// Have another allocated string -- rather than throwing this away and
|
||||
// resetting ptr_ to the canonical default string instance, we just reuse
|
||||
// this instance.
|
||||
*ptr_ = *default_value;
|
||||
}
|
||||
}
|
||||
|
||||
// Called from generated code / reflection runtime only. Resets value to point
|
||||
// to a default string pointer, with the semantics that this ArenaStringPtr
|
||||
// does not own the pointed-to memory. Disregards initial value of ptr_ (so
|
||||
// this is the *ONLY* safe method to call after construction or when
|
||||
// reinitializing after becoming the active field in a oneof union).
|
||||
inline void UnsafeSetDefault(const ::std::string* default_value) {
|
||||
// Casting away 'const' is safe here: accessors ensure that ptr_ is only
|
||||
// returned as a const if it is equal to default_value.
|
||||
ptr_ = const_cast< ::std::string* >(default_value);
|
||||
}
|
||||
|
||||
// The 'NoArena' variants of methods below assume arena == NULL and are
|
||||
// optimized to provide very little overhead relative to a raw string pointer
|
||||
// (while still being in-memory compatible with other code that assumes
|
||||
// ArenaStringPtr). Note the invariant that a class instance that has only
|
||||
// ever been mutated by NoArena methods must *only* be in the String state
|
||||
// (i.e., tag bits are not used), *NEVER* ArenaString. This allows all
|
||||
// tagged-pointer manipulations to be avoided.
|
||||
inline void SetNoArena(const ::std::string* default_value,
|
||||
const ::std::string& value) {
|
||||
if (ptr_ == default_value) {
|
||||
CreateInstanceNoArena(&value);
|
||||
} else {
|
||||
*ptr_ = value;
|
||||
}
|
||||
}
|
||||
|
||||
#if LANG_CXX11
|
||||
void SetNoArena(const ::std::string* default_value, ::std::string&& value) {
|
||||
if (IsDefault(default_value)) {
|
||||
ptr_ = new ::std::string(std::move(value));
|
||||
} else {
|
||||
*ptr_ = std::move(value);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void AssignWithDefault(const ::std::string* default_value, ArenaStringPtr value);
|
||||
|
||||
inline const ::std::string& GetNoArena() const { return *ptr_; }
|
||||
|
||||
inline ::std::string* MutableNoArena(const ::std::string* default_value) {
|
||||
if (ptr_ == default_value) {
|
||||
CreateInstanceNoArena(default_value);
|
||||
}
|
||||
return ptr_;
|
||||
}
|
||||
|
||||
inline ::std::string* ReleaseNoArena(const ::std::string* default_value) {
|
||||
if (ptr_ == default_value) {
|
||||
return NULL;
|
||||
} else {
|
||||
return ReleaseNonDefaultNoArena(default_value);
|
||||
}
|
||||
}
|
||||
|
||||
inline ::std::string* ReleaseNonDefaultNoArena(
|
||||
const ::std::string* default_value) {
|
||||
GOOGLE_DCHECK(!IsDefault(default_value));
|
||||
::std::string* released = ptr_;
|
||||
ptr_ = const_cast< ::std::string* >(default_value);
|
||||
return released;
|
||||
}
|
||||
|
||||
|
||||
inline void SetAllocatedNoArena(const ::std::string* default_value,
|
||||
::std::string* value) {
|
||||
if (ptr_ != default_value) {
|
||||
delete ptr_;
|
||||
}
|
||||
if (value != NULL) {
|
||||
ptr_ = value;
|
||||
} else {
|
||||
ptr_ = const_cast< ::std::string* >(default_value);
|
||||
}
|
||||
}
|
||||
|
||||
inline void DestroyNoArena(const ::std::string* default_value) {
|
||||
if (ptr_ != default_value) {
|
||||
delete ptr_;
|
||||
}
|
||||
}
|
||||
|
||||
inline void ClearToEmptyNoArena(const ::std::string* default_value) {
|
||||
if (ptr_ == default_value) {
|
||||
// Nothing: already equal to default (which is the empty string).
|
||||
} else {
|
||||
ptr_->clear();
|
||||
}
|
||||
}
|
||||
|
||||
inline void ClearToDefaultNoArena(const ::std::string* default_value) {
|
||||
if (ptr_ == default_value) {
|
||||
// Nothing: already set to default.
|
||||
} else {
|
||||
// Reuse existing allocated instance.
|
||||
*ptr_ = *default_value;
|
||||
}
|
||||
}
|
||||
|
||||
// Internal accessor used only at parse time to provide direct access to the
|
||||
// raw pointer from the shared parse routine (in the non-arenas case). The
|
||||
// parse routine does the string allocation in order to save code size in the
|
||||
// generated parsing code.
|
||||
inline ::std::string** UnsafeRawStringPointer() {
|
||||
return &ptr_;
|
||||
}
|
||||
|
||||
inline bool IsDefault(const ::std::string* default_value) const {
|
||||
return ptr_ == default_value;
|
||||
}
|
||||
|
||||
// Internal accessors!!!!
|
||||
void UnsafeSetTaggedPointer(TaggedPtr< ::std::string> value) {
|
||||
ptr_ = value.Get();
|
||||
}
|
||||
// Generated code only! An optimization, in certain cases the generated
|
||||
// code is certain we can obtain a string with no default checks and
|
||||
// tag tests.
|
||||
::std::string* UnsafeMutablePointer() { return ptr_; }
|
||||
|
||||
private:
|
||||
::std::string* ptr_;
|
||||
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE
|
||||
void CreateInstance(::google::protobuf::Arena* arena,
|
||||
const ::std::string* initial_value) {
|
||||
GOOGLE_DCHECK(initial_value != NULL);
|
||||
// uses "new ::std::string" when arena is nullptr
|
||||
ptr_ = Arena::Create< ::std::string >(arena, *initial_value);
|
||||
}
|
||||
GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE
|
||||
void CreateInstanceNoArena(const ::std::string* initial_value) {
|
||||
GOOGLE_DCHECK(initial_value != NULL);
|
||||
ptr_ = new ::std::string(*initial_value);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace protobuf
|
||||
|
||||
|
||||
|
||||
namespace protobuf {
|
||||
namespace internal {
|
||||
|
||||
inline void ArenaStringPtr::AssignWithDefault(const ::std::string* default_value,
|
||||
ArenaStringPtr value) {
|
||||
const ::std::string* me = *UnsafeRawStringPointer();
|
||||
const ::std::string* other = *value.UnsafeRawStringPointer();
|
||||
// If the pointers are the same then do nothing.
|
||||
if (me != other) {
|
||||
SetNoArena(default_value, value.GetNoArena());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace protobuf
|
||||
|
||||
} // namespace google
|
||||
#endif // GOOGLE_PROTOBUF_ARENASTRING_H__
|
||||
@ -0,0 +1,138 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Based on mvels@'s frankenstring.
|
||||
|
||||
#include <google/protobuf/arenastring.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <google/protobuf/stubs/logging.h>
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/io/zero_copy_stream_impl.h>
|
||||
|
||||
|
||||
namespace google {
|
||||
using google::protobuf::internal::ArenaStringPtr;
|
||||
|
||||
namespace protobuf {
|
||||
|
||||
|
||||
static string WrapString(const char* value) {
|
||||
return value;
|
||||
}
|
||||
|
||||
// Test ArenaStringPtr with arena == NULL.
|
||||
TEST(ArenaStringPtrTest, ArenaStringPtrOnHeap) {
|
||||
ArenaStringPtr field;
|
||||
::std::string default_value = "default";
|
||||
field.UnsafeSetDefault(&default_value);
|
||||
EXPECT_EQ(string("default"), field.Get());
|
||||
field.Set(&default_value, WrapString("Test short"), NULL);
|
||||
EXPECT_EQ(string("Test short"), field.Get());
|
||||
field.Set(&default_value, WrapString("Test long long long long value"), NULL);
|
||||
EXPECT_EQ(string("Test long long long long value"), field.Get());
|
||||
field.Set(&default_value, string(""), NULL);
|
||||
field.Destroy(&default_value, NULL);
|
||||
|
||||
ArenaStringPtr field2;
|
||||
field2.UnsafeSetDefault(&default_value);
|
||||
::std::string* mut = field2.Mutable(&default_value, NULL);
|
||||
EXPECT_EQ(mut, field2.Mutable(&default_value, NULL));
|
||||
EXPECT_EQ(mut, &field2.Get());
|
||||
EXPECT_NE(&default_value, mut);
|
||||
EXPECT_EQ(string("default"), *mut);
|
||||
*mut = "Test long long long long value"; // ensure string allocates storage
|
||||
EXPECT_EQ(string("Test long long long long value"), field2.Get());
|
||||
field2.Destroy(&default_value, NULL);
|
||||
}
|
||||
|
||||
TEST(ArenaStringPtrTest, ArenaStringPtrOnArena) {
|
||||
google::protobuf::Arena arena;
|
||||
ArenaStringPtr field;
|
||||
::std::string default_value = "default";
|
||||
field.UnsafeSetDefault(&default_value);
|
||||
EXPECT_EQ(string("default"), field.Get());
|
||||
field.Set(&default_value, WrapString("Test short"), &arena);
|
||||
EXPECT_EQ(string("Test short"), field.Get());
|
||||
field.Set(&default_value, WrapString("Test long long long long value"),
|
||||
&arena);
|
||||
EXPECT_EQ(string("Test long long long long value"), field.Get());
|
||||
field.Set(&default_value, string(""), &arena);
|
||||
field.Destroy(&default_value, &arena);
|
||||
|
||||
ArenaStringPtr field2;
|
||||
field2.UnsafeSetDefault(&default_value);
|
||||
::std::string* mut = field2.Mutable(&default_value, &arena);
|
||||
EXPECT_EQ(mut, field2.Mutable(&default_value, &arena));
|
||||
EXPECT_EQ(mut, &field2.Get());
|
||||
EXPECT_NE(&default_value, mut);
|
||||
EXPECT_EQ(string("default"), *mut);
|
||||
*mut = "Test long long long long value"; // ensure string allocates storage
|
||||
EXPECT_EQ(string("Test long long long long value"), field2.Get());
|
||||
field2.Destroy(&default_value, &arena);
|
||||
}
|
||||
|
||||
TEST(ArenaStringPtrTest, ArenaStringPtrOnArenaNoSSO) {
|
||||
google::protobuf::Arena arena;
|
||||
ArenaStringPtr field;
|
||||
::std::string default_value = "default";
|
||||
field.UnsafeSetDefault(&default_value);
|
||||
EXPECT_EQ(string("default"), field.Get());
|
||||
|
||||
// Avoid triggering the SSO optimization by setting the string to something
|
||||
// larger than the internal buffer.
|
||||
field.Set(&default_value, WrapString("Test long long long long value"),
|
||||
&arena);
|
||||
EXPECT_EQ(string("Test long long long long value"), field.Get());
|
||||
field.Set(&default_value, string(""), &arena);
|
||||
field.Destroy(&default_value, &arena);
|
||||
|
||||
ArenaStringPtr field2;
|
||||
field2.UnsafeSetDefault(&default_value);
|
||||
::std::string* mut = field2.Mutable(&default_value, &arena);
|
||||
EXPECT_EQ(mut, field2.Mutable(&default_value, &arena));
|
||||
EXPECT_EQ(mut, &field2.Get());
|
||||
EXPECT_NE(&default_value, mut);
|
||||
EXPECT_EQ(string("default"), *mut);
|
||||
*mut = "Test long long long long value"; // ensure string allocates storage
|
||||
EXPECT_EQ(string("Test long long long long value"), field2.Get());
|
||||
field2.Destroy(&default_value, &arena);
|
||||
}
|
||||
|
||||
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,883 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Author: kenton@google.com (Kenton Varda)
|
||||
// Based on original Protocol Buffers design by
|
||||
// Sanjay Ghemawat, Jeff Dean, and others.
|
||||
//
|
||||
// The messages in this file describe the definitions found in .proto files.
|
||||
// A valid .proto file can be translated directly to a FileDescriptorProto
|
||||
// without any other information (e.g. without reading its imports).
|
||||
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package google.protobuf;
|
||||
option go_package = "github.com/golang/protobuf/protoc-gen-go/descriptor;descriptor";
|
||||
option java_package = "com.google.protobuf";
|
||||
option java_outer_classname = "DescriptorProtos";
|
||||
option csharp_namespace = "Google.Protobuf.Reflection";
|
||||
option objc_class_prefix = "GPB";
|
||||
option cc_enable_arenas = true;
|
||||
|
||||
// descriptor.proto must be optimized for speed because reflection-based
|
||||
// algorithms don't work during bootstrapping.
|
||||
option optimize_for = SPEED;
|
||||
|
||||
// The protocol compiler can output a FileDescriptorSet containing the .proto
|
||||
// files it parses.
|
||||
message FileDescriptorSet {
|
||||
repeated FileDescriptorProto file = 1;
|
||||
}
|
||||
|
||||
// Describes a complete .proto file.
|
||||
message FileDescriptorProto {
|
||||
optional string name = 1; // file name, relative to root of source tree
|
||||
optional string package = 2; // e.g. "foo", "foo.bar", etc.
|
||||
|
||||
// Names of files imported by this file.
|
||||
repeated string dependency = 3;
|
||||
// Indexes of the public imported files in the dependency list above.
|
||||
repeated int32 public_dependency = 10;
|
||||
// Indexes of the weak imported files in the dependency list.
|
||||
// For Google-internal migration only. Do not use.
|
||||
repeated int32 weak_dependency = 11;
|
||||
|
||||
// All top-level definitions in this file.
|
||||
repeated DescriptorProto message_type = 4;
|
||||
repeated EnumDescriptorProto enum_type = 5;
|
||||
repeated ServiceDescriptorProto service = 6;
|
||||
repeated FieldDescriptorProto extension = 7;
|
||||
|
||||
optional FileOptions options = 8;
|
||||
|
||||
// This field contains optional information about the original source code.
|
||||
// You may safely remove this entire field without harming runtime
|
||||
// functionality of the descriptors -- the information is needed only by
|
||||
// development tools.
|
||||
optional SourceCodeInfo source_code_info = 9;
|
||||
|
||||
// The syntax of the proto file.
|
||||
// The supported values are "proto2" and "proto3".
|
||||
optional string syntax = 12;
|
||||
}
|
||||
|
||||
// Describes a message type.
|
||||
message DescriptorProto {
|
||||
optional string name = 1;
|
||||
|
||||
repeated FieldDescriptorProto field = 2;
|
||||
repeated FieldDescriptorProto extension = 6;
|
||||
|
||||
repeated DescriptorProto nested_type = 3;
|
||||
repeated EnumDescriptorProto enum_type = 4;
|
||||
|
||||
message ExtensionRange {
|
||||
optional int32 start = 1;
|
||||
optional int32 end = 2;
|
||||
|
||||
optional ExtensionRangeOptions options = 3;
|
||||
}
|
||||
repeated ExtensionRange extension_range = 5;
|
||||
|
||||
repeated OneofDescriptorProto oneof_decl = 8;
|
||||
|
||||
optional MessageOptions options = 7;
|
||||
|
||||
// Range of reserved tag numbers. Reserved tag numbers may not be used by
|
||||
// fields or extension ranges in the same message. Reserved ranges may
|
||||
// not overlap.
|
||||
message ReservedRange {
|
||||
optional int32 start = 1; // Inclusive.
|
||||
optional int32 end = 2; // Exclusive.
|
||||
}
|
||||
repeated ReservedRange reserved_range = 9;
|
||||
// Reserved field names, which may not be used by fields in the same message.
|
||||
// A given name may only be reserved once.
|
||||
repeated string reserved_name = 10;
|
||||
}
|
||||
|
||||
message ExtensionRangeOptions {
|
||||
// The parser stores options it doesn't recognize here. See above.
|
||||
repeated UninterpretedOption uninterpreted_option = 999;
|
||||
|
||||
// Clients can define custom options in extensions of this message. See above.
|
||||
extensions 1000 to max;
|
||||
}
|
||||
|
||||
// Describes a field within a message.
|
||||
message FieldDescriptorProto {
|
||||
enum Type {
|
||||
// 0 is reserved for errors.
|
||||
// Order is weird for historical reasons.
|
||||
TYPE_DOUBLE = 1;
|
||||
TYPE_FLOAT = 2;
|
||||
// Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT64 if
|
||||
// negative values are likely.
|
||||
TYPE_INT64 = 3;
|
||||
TYPE_UINT64 = 4;
|
||||
// Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT32 if
|
||||
// negative values are likely.
|
||||
TYPE_INT32 = 5;
|
||||
TYPE_FIXED64 = 6;
|
||||
TYPE_FIXED32 = 7;
|
||||
TYPE_BOOL = 8;
|
||||
TYPE_STRING = 9;
|
||||
// Tag-delimited aggregate.
|
||||
// Group type is deprecated and not supported in proto3. However, Proto3
|
||||
// implementations should still be able to parse the group wire format and
|
||||
// treat group fields as unknown fields.
|
||||
TYPE_GROUP = 10;
|
||||
TYPE_MESSAGE = 11; // Length-delimited aggregate.
|
||||
|
||||
// New in version 2.
|
||||
TYPE_BYTES = 12;
|
||||
TYPE_UINT32 = 13;
|
||||
TYPE_ENUM = 14;
|
||||
TYPE_SFIXED32 = 15;
|
||||
TYPE_SFIXED64 = 16;
|
||||
TYPE_SINT32 = 17; // Uses ZigZag encoding.
|
||||
TYPE_SINT64 = 18; // Uses ZigZag encoding.
|
||||
};
|
||||
|
||||
enum Label {
|
||||
// 0 is reserved for errors
|
||||
LABEL_OPTIONAL = 1;
|
||||
LABEL_REQUIRED = 2;
|
||||
LABEL_REPEATED = 3;
|
||||
};
|
||||
|
||||
optional string name = 1;
|
||||
optional int32 number = 3;
|
||||
optional Label label = 4;
|
||||
|
||||
// If type_name is set, this need not be set. If both this and type_name
|
||||
// are set, this must be one of TYPE_ENUM, TYPE_MESSAGE or TYPE_GROUP.
|
||||
optional Type type = 5;
|
||||
|
||||
// For message and enum types, this is the name of the type. If the name
|
||||
// starts with a '.', it is fully-qualified. Otherwise, C++-like scoping
|
||||
// rules are used to find the type (i.e. first the nested types within this
|
||||
// message are searched, then within the parent, on up to the root
|
||||
// namespace).
|
||||
optional string type_name = 6;
|
||||
|
||||
// For extensions, this is the name of the type being extended. It is
|
||||
// resolved in the same manner as type_name.
|
||||
optional string extendee = 2;
|
||||
|
||||
// For numeric types, contains the original text representation of the value.
|
||||
// For booleans, "true" or "false".
|
||||
// For strings, contains the default text contents (not escaped in any way).
|
||||
// For bytes, contains the C escaped value. All bytes >= 128 are escaped.
|
||||
// TODO(kenton): Base-64 encode?
|
||||
optional string default_value = 7;
|
||||
|
||||
// If set, gives the index of a oneof in the containing type's oneof_decl
|
||||
// list. This field is a member of that oneof.
|
||||
optional int32 oneof_index = 9;
|
||||
|
||||
// JSON name of this field. The value is set by protocol compiler. If the
|
||||
// user has set a "json_name" option on this field, that option's value
|
||||
// will be used. Otherwise, it's deduced from the field's name by converting
|
||||
// it to camelCase.
|
||||
optional string json_name = 10;
|
||||
|
||||
optional FieldOptions options = 8;
|
||||
}
|
||||
|
||||
// Describes a oneof.
|
||||
message OneofDescriptorProto {
|
||||
optional string name = 1;
|
||||
optional OneofOptions options = 2;
|
||||
}
|
||||
|
||||
// Describes an enum type.
|
||||
message EnumDescriptorProto {
|
||||
optional string name = 1;
|
||||
|
||||
repeated EnumValueDescriptorProto value = 2;
|
||||
|
||||
optional EnumOptions options = 3;
|
||||
|
||||
// Range of reserved numeric values. Reserved values may not be used by
|
||||
// entries in the same enum. Reserved ranges may not overlap.
|
||||
//
|
||||
// Note that this is distinct from DescriptorProto.ReservedRange in that it
|
||||
// is inclusive such that it can appropriately represent the entire int32
|
||||
// domain.
|
||||
message EnumReservedRange {
|
||||
optional int32 start = 1; // Inclusive.
|
||||
optional int32 end = 2; // Inclusive.
|
||||
}
|
||||
|
||||
// Range of reserved numeric values. Reserved numeric values may not be used
|
||||
// by enum values in the same enum declaration. Reserved ranges may not
|
||||
// overlap.
|
||||
repeated EnumReservedRange reserved_range = 4;
|
||||
|
||||
// Reserved enum value names, which may not be reused. A given name may only
|
||||
// be reserved once.
|
||||
repeated string reserved_name = 5;
|
||||
}
|
||||
|
||||
// Describes a value within an enum.
|
||||
message EnumValueDescriptorProto {
|
||||
optional string name = 1;
|
||||
optional int32 number = 2;
|
||||
|
||||
optional EnumValueOptions options = 3;
|
||||
}
|
||||
|
||||
// Describes a service.
|
||||
message ServiceDescriptorProto {
|
||||
optional string name = 1;
|
||||
repeated MethodDescriptorProto method = 2;
|
||||
|
||||
optional ServiceOptions options = 3;
|
||||
}
|
||||
|
||||
// Describes a method of a service.
|
||||
message MethodDescriptorProto {
|
||||
optional string name = 1;
|
||||
|
||||
// Input and output type names. These are resolved in the same way as
|
||||
// FieldDescriptorProto.type_name, but must refer to a message type.
|
||||
optional string input_type = 2;
|
||||
optional string output_type = 3;
|
||||
|
||||
optional MethodOptions options = 4;
|
||||
|
||||
// Identifies if client streams multiple client messages
|
||||
optional bool client_streaming = 5 [default=false];
|
||||
// Identifies if server streams multiple server messages
|
||||
optional bool server_streaming = 6 [default=false];
|
||||
}
|
||||
|
||||
|
||||
// ===================================================================
|
||||
// Options
|
||||
|
||||
// Each of the definitions above may have "options" attached. These are
|
||||
// just annotations which may cause code to be generated slightly differently
|
||||
// or may contain hints for code that manipulates protocol messages.
|
||||
//
|
||||
// Clients may define custom options as extensions of the *Options messages.
|
||||
// These extensions may not yet be known at parsing time, so the parser cannot
|
||||
// store the values in them. Instead it stores them in a field in the *Options
|
||||
// message called uninterpreted_option. This field must have the same name
|
||||
// across all *Options messages. We then use this field to populate the
|
||||
// extensions when we build a descriptor, at which point all protos have been
|
||||
// parsed and so all extensions are known.
|
||||
//
|
||||
// Extension numbers for custom options may be chosen as follows:
|
||||
// * For options which will only be used within a single application or
|
||||
// organization, or for experimental options, use field numbers 50000
|
||||
// through 99999. It is up to you to ensure that you do not use the
|
||||
// same number for multiple options.
|
||||
// * For options which will be published and used publicly by multiple
|
||||
// independent entities, e-mail protobuf-global-extension-registry@google.com
|
||||
// to reserve extension numbers. Simply provide your project name (e.g.
|
||||
// Objective-C plugin) and your project website (if available) -- there's no
|
||||
// need to explain how you intend to use them. Usually you only need one
|
||||
// extension number. You can declare multiple options with only one extension
|
||||
// number by putting them in a sub-message. See the Custom Options section of
|
||||
// the docs for examples:
|
||||
// https://developers.google.com/protocol-buffers/docs/proto#options
|
||||
// If this turns out to be popular, a web service will be set up
|
||||
// to automatically assign option numbers.
|
||||
|
||||
|
||||
message FileOptions {
|
||||
|
||||
// Sets the Java package where classes generated from this .proto will be
|
||||
// placed. By default, the proto package is used, but this is often
|
||||
// inappropriate because proto packages do not normally start with backwards
|
||||
// domain names.
|
||||
optional string java_package = 1;
|
||||
|
||||
|
||||
// If set, all the classes from the .proto file are wrapped in a single
|
||||
// outer class with the given name. This applies to both Proto1
|
||||
// (equivalent to the old "--one_java_file" option) and Proto2 (where
|
||||
// a .proto always translates to a single class, but you may want to
|
||||
// explicitly choose the class name).
|
||||
optional string java_outer_classname = 8;
|
||||
|
||||
// If set true, then the Java code generator will generate a separate .java
|
||||
// file for each top-level message, enum, and service defined in the .proto
|
||||
// file. Thus, these types will *not* be nested inside the outer class
|
||||
// named by java_outer_classname. However, the outer class will still be
|
||||
// generated to contain the file's getDescriptor() method as well as any
|
||||
// top-level extensions defined in the file.
|
||||
optional bool java_multiple_files = 10 [default=false];
|
||||
|
||||
// This option does nothing.
|
||||
optional bool java_generate_equals_and_hash = 20 [deprecated=true];
|
||||
|
||||
// If set true, then the Java2 code generator will generate code that
|
||||
// throws an exception whenever an attempt is made to assign a non-UTF-8
|
||||
// byte sequence to a string field.
|
||||
// Message reflection will do the same.
|
||||
// However, an extension field still accepts non-UTF-8 byte sequences.
|
||||
// This option has no effect on when used with the lite runtime.
|
||||
optional bool java_string_check_utf8 = 27 [default=false];
|
||||
|
||||
|
||||
// Generated classes can be optimized for speed or code size.
|
||||
enum OptimizeMode {
|
||||
SPEED = 1; // Generate complete code for parsing, serialization,
|
||||
// etc.
|
||||
CODE_SIZE = 2; // Use ReflectionOps to implement these methods.
|
||||
LITE_RUNTIME = 3; // Generate code using MessageLite and the lite runtime.
|
||||
}
|
||||
optional OptimizeMode optimize_for = 9 [default=SPEED];
|
||||
|
||||
// Sets the Go package where structs generated from this .proto will be
|
||||
// placed. If omitted, the Go package will be derived from the following:
|
||||
// - The basename of the package import path, if provided.
|
||||
// - Otherwise, the package statement in the .proto file, if present.
|
||||
// - Otherwise, the basename of the .proto file, without extension.
|
||||
optional string go_package = 11;
|
||||
|
||||
|
||||
|
||||
// Should generic services be generated in each language? "Generic" services
|
||||
// are not specific to any particular RPC system. They are generated by the
|
||||
// main code generators in each language (without additional plugins).
|
||||
// Generic services were the only kind of service generation supported by
|
||||
// early versions of google.protobuf.
|
||||
//
|
||||
// Generic services are now considered deprecated in favor of using plugins
|
||||
// that generate code specific to your particular RPC system. Therefore,
|
||||
// these default to false. Old code which depends on generic services should
|
||||
// explicitly set them to true.
|
||||
optional bool cc_generic_services = 16 [default=false];
|
||||
optional bool java_generic_services = 17 [default=false];
|
||||
optional bool py_generic_services = 18 [default=false];
|
||||
optional bool php_generic_services = 42 [default=false];
|
||||
|
||||
// Is this file deprecated?
|
||||
// Depending on the target platform, this can emit Deprecated annotations
|
||||
// for everything in the file, or it will be completely ignored; in the very
|
||||
// least, this is a formalization for deprecating files.
|
||||
optional bool deprecated = 23 [default=false];
|
||||
|
||||
// Enables the use of arenas for the proto messages in this file. This applies
|
||||
// only to generated classes for C++.
|
||||
optional bool cc_enable_arenas = 31 [default=false];
|
||||
|
||||
|
||||
// Sets the objective c class prefix which is prepended to all objective c
|
||||
// generated classes from this .proto. There is no default.
|
||||
optional string objc_class_prefix = 36;
|
||||
|
||||
// Namespace for generated classes; defaults to the package.
|
||||
optional string csharp_namespace = 37;
|
||||
|
||||
// By default Swift generators will take the proto package and CamelCase it
|
||||
// replacing '.' with underscore and use that to prefix the types/symbols
|
||||
// defined. When this options is provided, they will use this value instead
|
||||
// to prefix the types/symbols defined.
|
||||
optional string swift_prefix = 39;
|
||||
|
||||
// Sets the php class prefix which is prepended to all php generated classes
|
||||
// from this .proto. Default is empty.
|
||||
optional string php_class_prefix = 40;
|
||||
|
||||
// Use this option to change the namespace of php generated classes. Default
|
||||
// is empty. When this option is empty, the package name will be used for
|
||||
// determining the namespace.
|
||||
optional string php_namespace = 41;
|
||||
|
||||
|
||||
// Use this option to change the namespace of php generated metadata classes.
|
||||
// Default is empty. When this option is empty, the proto file name will be used
|
||||
// for determining the namespace.
|
||||
optional string php_metadata_namespace = 44;
|
||||
|
||||
// Use this option to change the package of ruby generated classes. Default
|
||||
// is empty. When this option is not set, the package name will be used for
|
||||
// determining the ruby package.
|
||||
optional string ruby_package = 45;
|
||||
|
||||
// The parser stores options it doesn't recognize here.
|
||||
// See the documentation for the "Options" section above.
|
||||
repeated UninterpretedOption uninterpreted_option = 999;
|
||||
|
||||
// Clients can define custom options in extensions of this message.
|
||||
// See the documentation for the "Options" section above.
|
||||
extensions 1000 to max;
|
||||
|
||||
reserved 38;
|
||||
}
|
||||
|
||||
message MessageOptions {
|
||||
// Set true to use the old proto1 MessageSet wire format for extensions.
|
||||
// This is provided for backwards-compatibility with the MessageSet wire
|
||||
// format. You should not use this for any other reason: It's less
|
||||
// efficient, has fewer features, and is more complicated.
|
||||
//
|
||||
// The message must be defined exactly as follows:
|
||||
// message Foo {
|
||||
// option message_set_wire_format = true;
|
||||
// extensions 4 to max;
|
||||
// }
|
||||
// Note that the message cannot have any defined fields; MessageSets only
|
||||
// have extensions.
|
||||
//
|
||||
// All extensions of your type must be singular messages; e.g. they cannot
|
||||
// be int32s, enums, or repeated messages.
|
||||
//
|
||||
// Because this is an option, the above two restrictions are not enforced by
|
||||
// the protocol compiler.
|
||||
optional bool message_set_wire_format = 1 [default=false];
|
||||
|
||||
// Disables the generation of the standard "descriptor()" accessor, which can
|
||||
// conflict with a field of the same name. This is meant to make migration
|
||||
// from proto1 easier; new code should avoid fields named "descriptor".
|
||||
optional bool no_standard_descriptor_accessor = 2 [default=false];
|
||||
|
||||
// Is this message deprecated?
|
||||
// Depending on the target platform, this can emit Deprecated annotations
|
||||
// for the message, or it will be completely ignored; in the very least,
|
||||
// this is a formalization for deprecating messages.
|
||||
optional bool deprecated = 3 [default=false];
|
||||
|
||||
// Whether the message is an automatically generated map entry type for the
|
||||
// maps field.
|
||||
//
|
||||
// For maps fields:
|
||||
// map<KeyType, ValueType> map_field = 1;
|
||||
// The parsed descriptor looks like:
|
||||
// message MapFieldEntry {
|
||||
// option map_entry = true;
|
||||
// optional KeyType key = 1;
|
||||
// optional ValueType value = 2;
|
||||
// }
|
||||
// repeated MapFieldEntry map_field = 1;
|
||||
//
|
||||
// Implementations may choose not to generate the map_entry=true message, but
|
||||
// use a native map in the target language to hold the keys and values.
|
||||
// The reflection APIs in such implementions still need to work as
|
||||
// if the field is a repeated message field.
|
||||
//
|
||||
// NOTE: Do not set the option in .proto files. Always use the maps syntax
|
||||
// instead. The option should only be implicitly set by the proto compiler
|
||||
// parser.
|
||||
optional bool map_entry = 7;
|
||||
|
||||
reserved 8; // javalite_serializable
|
||||
reserved 9; // javanano_as_lite
|
||||
|
||||
// The parser stores options it doesn't recognize here. See above.
|
||||
repeated UninterpretedOption uninterpreted_option = 999;
|
||||
|
||||
// Clients can define custom options in extensions of this message. See above.
|
||||
extensions 1000 to max;
|
||||
}
|
||||
|
||||
message FieldOptions {
|
||||
// The ctype option instructs the C++ code generator to use a different
|
||||
// representation of the field than it normally would. See the specific
|
||||
// options below. This option is not yet implemented in the open source
|
||||
// release -- sorry, we'll try to include it in a future version!
|
||||
optional CType ctype = 1 [default = STRING];
|
||||
enum CType {
|
||||
// Default mode.
|
||||
STRING = 0;
|
||||
|
||||
CORD = 1;
|
||||
|
||||
STRING_PIECE = 2;
|
||||
}
|
||||
// The packed option can be enabled for repeated primitive fields to enable
|
||||
// a more efficient representation on the wire. Rather than repeatedly
|
||||
// writing the tag and type for each element, the entire array is encoded as
|
||||
// a single length-delimited blob. In proto3, only explicit setting it to
|
||||
// false will avoid using packed encoding.
|
||||
optional bool packed = 2;
|
||||
|
||||
// The jstype option determines the JavaScript type used for values of the
|
||||
// field. The option is permitted only for 64 bit integral and fixed types
|
||||
// (int64, uint64, sint64, fixed64, sfixed64). A field with jstype JS_STRING
|
||||
// is represented as JavaScript string, which avoids loss of precision that
|
||||
// can happen when a large value is converted to a floating point JavaScript.
|
||||
// Specifying JS_NUMBER for the jstype causes the generated JavaScript code to
|
||||
// use the JavaScript "number" type. The behavior of the default option
|
||||
// JS_NORMAL is implementation dependent.
|
||||
//
|
||||
// This option is an enum to permit additional types to be added, e.g.
|
||||
// goog.math.Integer.
|
||||
optional JSType jstype = 6 [default = JS_NORMAL];
|
||||
enum JSType {
|
||||
// Use the default type.
|
||||
JS_NORMAL = 0;
|
||||
|
||||
// Use JavaScript strings.
|
||||
JS_STRING = 1;
|
||||
|
||||
// Use JavaScript numbers.
|
||||
JS_NUMBER = 2;
|
||||
}
|
||||
|
||||
// Should this field be parsed lazily? Lazy applies only to message-type
|
||||
// fields. It means that when the outer message is initially parsed, the
|
||||
// inner message's contents will not be parsed but instead stored in encoded
|
||||
// form. The inner message will actually be parsed when it is first accessed.
|
||||
//
|
||||
// This is only a hint. Implementations are free to choose whether to use
|
||||
// eager or lazy parsing regardless of the value of this option. However,
|
||||
// setting this option true suggests that the protocol author believes that
|
||||
// using lazy parsing on this field is worth the additional bookkeeping
|
||||
// overhead typically needed to implement it.
|
||||
//
|
||||
// This option does not affect the public interface of any generated code;
|
||||
// all method signatures remain the same. Furthermore, thread-safety of the
|
||||
// interface is not affected by this option; const methods remain safe to
|
||||
// call from multiple threads concurrently, while non-const methods continue
|
||||
// to require exclusive access.
|
||||
//
|
||||
//
|
||||
// Note that implementations may choose not to check required fields within
|
||||
// a lazy sub-message. That is, calling IsInitialized() on the outer message
|
||||
// may return true even if the inner message has missing required fields.
|
||||
// This is necessary because otherwise the inner message would have to be
|
||||
// parsed in order to perform the check, defeating the purpose of lazy
|
||||
// parsing. An implementation which chooses not to check required fields
|
||||
// must be consistent about it. That is, for any particular sub-message, the
|
||||
// implementation must either *always* check its required fields, or *never*
|
||||
// check its required fields, regardless of whether or not the message has
|
||||
// been parsed.
|
||||
optional bool lazy = 5 [default=false];
|
||||
|
||||
// Is this field deprecated?
|
||||
// Depending on the target platform, this can emit Deprecated annotations
|
||||
// for accessors, or it will be completely ignored; in the very least, this
|
||||
// is a formalization for deprecating fields.
|
||||
optional bool deprecated = 3 [default=false];
|
||||
|
||||
// For Google-internal migration only. Do not use.
|
||||
optional bool weak = 10 [default=false];
|
||||
|
||||
|
||||
// The parser stores options it doesn't recognize here. See above.
|
||||
repeated UninterpretedOption uninterpreted_option = 999;
|
||||
|
||||
// Clients can define custom options in extensions of this message. See above.
|
||||
extensions 1000 to max;
|
||||
|
||||
reserved 4; // removed jtype
|
||||
}
|
||||
|
||||
message OneofOptions {
|
||||
// The parser stores options it doesn't recognize here. See above.
|
||||
repeated UninterpretedOption uninterpreted_option = 999;
|
||||
|
||||
// Clients can define custom options in extensions of this message. See above.
|
||||
extensions 1000 to max;
|
||||
}
|
||||
|
||||
message EnumOptions {
|
||||
|
||||
// Set this option to true to allow mapping different tag names to the same
|
||||
// value.
|
||||
optional bool allow_alias = 2;
|
||||
|
||||
// Is this enum deprecated?
|
||||
// Depending on the target platform, this can emit Deprecated annotations
|
||||
// for the enum, or it will be completely ignored; in the very least, this
|
||||
// is a formalization for deprecating enums.
|
||||
optional bool deprecated = 3 [default=false];
|
||||
|
||||
reserved 5; // javanano_as_lite
|
||||
|
||||
// The parser stores options it doesn't recognize here. See above.
|
||||
repeated UninterpretedOption uninterpreted_option = 999;
|
||||
|
||||
// Clients can define custom options in extensions of this message. See above.
|
||||
extensions 1000 to max;
|
||||
}
|
||||
|
||||
message EnumValueOptions {
|
||||
// Is this enum value deprecated?
|
||||
// Depending on the target platform, this can emit Deprecated annotations
|
||||
// for the enum value, or it will be completely ignored; in the very least,
|
||||
// this is a formalization for deprecating enum values.
|
||||
optional bool deprecated = 1 [default=false];
|
||||
|
||||
// The parser stores options it doesn't recognize here. See above.
|
||||
repeated UninterpretedOption uninterpreted_option = 999;
|
||||
|
||||
// Clients can define custom options in extensions of this message. See above.
|
||||
extensions 1000 to max;
|
||||
}
|
||||
|
||||
message ServiceOptions {
|
||||
|
||||
// Note: Field numbers 1 through 32 are reserved for Google's internal RPC
|
||||
// framework. We apologize for hoarding these numbers to ourselves, but
|
||||
// we were already using them long before we decided to release Protocol
|
||||
// Buffers.
|
||||
|
||||
// Is this service deprecated?
|
||||
// Depending on the target platform, this can emit Deprecated annotations
|
||||
// for the service, or it will be completely ignored; in the very least,
|
||||
// this is a formalization for deprecating services.
|
||||
optional bool deprecated = 33 [default=false];
|
||||
|
||||
// The parser stores options it doesn't recognize here. See above.
|
||||
repeated UninterpretedOption uninterpreted_option = 999;
|
||||
|
||||
// Clients can define custom options in extensions of this message. See above.
|
||||
extensions 1000 to max;
|
||||
}
|
||||
|
||||
message MethodOptions {
|
||||
|
||||
// Note: Field numbers 1 through 32 are reserved for Google's internal RPC
|
||||
// framework. We apologize for hoarding these numbers to ourselves, but
|
||||
// we were already using them long before we decided to release Protocol
|
||||
// Buffers.
|
||||
|
||||
// Is this method deprecated?
|
||||
// Depending on the target platform, this can emit Deprecated annotations
|
||||
// for the method, or it will be completely ignored; in the very least,
|
||||
// this is a formalization for deprecating methods.
|
||||
optional bool deprecated = 33 [default=false];
|
||||
|
||||
// Is this method side-effect-free (or safe in HTTP parlance), or idempotent,
|
||||
// or neither? HTTP based RPC implementation may choose GET verb for safe
|
||||
// methods, and PUT verb for idempotent methods instead of the default POST.
|
||||
enum IdempotencyLevel {
|
||||
IDEMPOTENCY_UNKNOWN = 0;
|
||||
NO_SIDE_EFFECTS = 1; // implies idempotent
|
||||
IDEMPOTENT = 2; // idempotent, but may have side effects
|
||||
}
|
||||
optional IdempotencyLevel idempotency_level =
|
||||
34 [default=IDEMPOTENCY_UNKNOWN];
|
||||
|
||||
// The parser stores options it doesn't recognize here. See above.
|
||||
repeated UninterpretedOption uninterpreted_option = 999;
|
||||
|
||||
// Clients can define custom options in extensions of this message. See above.
|
||||
extensions 1000 to max;
|
||||
}
|
||||
|
||||
|
||||
// A message representing a option the parser does not recognize. This only
|
||||
// appears in options protos created by the compiler::Parser class.
|
||||
// DescriptorPool resolves these when building Descriptor objects. Therefore,
|
||||
// options protos in descriptor objects (e.g. returned by Descriptor::options(),
|
||||
// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions
|
||||
// in them.
|
||||
message UninterpretedOption {
|
||||
// The name of the uninterpreted option. Each string represents a segment in
|
||||
// a dot-separated name. is_extension is true iff a segment represents an
|
||||
// extension (denoted with parentheses in options specs in .proto files).
|
||||
// E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents
|
||||
// "foo.(bar.baz).qux".
|
||||
message NamePart {
|
||||
required string name_part = 1;
|
||||
required bool is_extension = 2;
|
||||
}
|
||||
repeated NamePart name = 2;
|
||||
|
||||
// The value of the uninterpreted option, in whatever type the tokenizer
|
||||
// identified it as during parsing. Exactly one of these should be set.
|
||||
optional string identifier_value = 3;
|
||||
optional uint64 positive_int_value = 4;
|
||||
optional int64 negative_int_value = 5;
|
||||
optional double double_value = 6;
|
||||
optional bytes string_value = 7;
|
||||
optional string aggregate_value = 8;
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
// Optional source code info
|
||||
|
||||
// Encapsulates information about the original source file from which a
|
||||
// FileDescriptorProto was generated.
|
||||
message SourceCodeInfo {
|
||||
// A Location identifies a piece of source code in a .proto file which
|
||||
// corresponds to a particular definition. This information is intended
|
||||
// to be useful to IDEs, code indexers, documentation generators, and similar
|
||||
// tools.
|
||||
//
|
||||
// For example, say we have a file like:
|
||||
// message Foo {
|
||||
// optional string foo = 1;
|
||||
// }
|
||||
// Let's look at just the field definition:
|
||||
// optional string foo = 1;
|
||||
// ^ ^^ ^^ ^ ^^^
|
||||
// a bc de f ghi
|
||||
// We have the following locations:
|
||||
// span path represents
|
||||
// [a,i) [ 4, 0, 2, 0 ] The whole field definition.
|
||||
// [a,b) [ 4, 0, 2, 0, 4 ] The label (optional).
|
||||
// [c,d) [ 4, 0, 2, 0, 5 ] The type (string).
|
||||
// [e,f) [ 4, 0, 2, 0, 1 ] The name (foo).
|
||||
// [g,h) [ 4, 0, 2, 0, 3 ] The number (1).
|
||||
//
|
||||
// Notes:
|
||||
// - A location may refer to a repeated field itself (i.e. not to any
|
||||
// particular index within it). This is used whenever a set of elements are
|
||||
// logically enclosed in a single code segment. For example, an entire
|
||||
// extend block (possibly containing multiple extension definitions) will
|
||||
// have an outer location whose path refers to the "extensions" repeated
|
||||
// field without an index.
|
||||
// - Multiple locations may have the same path. This happens when a single
|
||||
// logical declaration is spread out across multiple places. The most
|
||||
// obvious example is the "extend" block again -- there may be multiple
|
||||
// extend blocks in the same scope, each of which will have the same path.
|
||||
// - A location's span is not always a subset of its parent's span. For
|
||||
// example, the "extendee" of an extension declaration appears at the
|
||||
// beginning of the "extend" block and is shared by all extensions within
|
||||
// the block.
|
||||
// - Just because a location's span is a subset of some other location's span
|
||||
// does not mean that it is a descendent. For example, a "group" defines
|
||||
// both a type and a field in a single declaration. Thus, the locations
|
||||
// corresponding to the type and field and their components will overlap.
|
||||
// - Code which tries to interpret locations should probably be designed to
|
||||
// ignore those that it doesn't understand, as more types of locations could
|
||||
// be recorded in the future.
|
||||
repeated Location location = 1;
|
||||
message Location {
|
||||
// Identifies which part of the FileDescriptorProto was defined at this
|
||||
// location.
|
||||
//
|
||||
// Each element is a field number or an index. They form a path from
|
||||
// the root FileDescriptorProto to the place where the definition. For
|
||||
// example, this path:
|
||||
// [ 4, 3, 2, 7, 1 ]
|
||||
// refers to:
|
||||
// file.message_type(3) // 4, 3
|
||||
// .field(7) // 2, 7
|
||||
// .name() // 1
|
||||
// This is because FileDescriptorProto.message_type has field number 4:
|
||||
// repeated DescriptorProto message_type = 4;
|
||||
// and DescriptorProto.field has field number 2:
|
||||
// repeated FieldDescriptorProto field = 2;
|
||||
// and FieldDescriptorProto.name has field number 1:
|
||||
// optional string name = 1;
|
||||
//
|
||||
// Thus, the above path gives the location of a field name. If we removed
|
||||
// the last element:
|
||||
// [ 4, 3, 2, 7 ]
|
||||
// this path refers to the whole field declaration (from the beginning
|
||||
// of the label to the terminating semicolon).
|
||||
repeated int32 path = 1 [packed=true];
|
||||
|
||||
// Always has exactly three or four elements: start line, start column,
|
||||
// end line (optional, otherwise assumed same as start line), end column.
|
||||
// These are packed into a single field for efficiency. Note that line
|
||||
// and column numbers are zero-based -- typically you will want to add
|
||||
// 1 to each before displaying to a user.
|
||||
repeated int32 span = 2 [packed=true];
|
||||
|
||||
// If this SourceCodeInfo represents a complete declaration, these are any
|
||||
// comments appearing before and after the declaration which appear to be
|
||||
// attached to the declaration.
|
||||
//
|
||||
// A series of line comments appearing on consecutive lines, with no other
|
||||
// tokens appearing on those lines, will be treated as a single comment.
|
||||
//
|
||||
// leading_detached_comments will keep paragraphs of comments that appear
|
||||
// before (but not connected to) the current element. Each paragraph,
|
||||
// separated by empty lines, will be one comment element in the repeated
|
||||
// field.
|
||||
//
|
||||
// Only the comment content is provided; comment markers (e.g. //) are
|
||||
// stripped out. For block comments, leading whitespace and an asterisk
|
||||
// will be stripped from the beginning of each line other than the first.
|
||||
// Newlines are included in the output.
|
||||
//
|
||||
// Examples:
|
||||
//
|
||||
// optional int32 foo = 1; // Comment attached to foo.
|
||||
// // Comment attached to bar.
|
||||
// optional int32 bar = 2;
|
||||
//
|
||||
// optional string baz = 3;
|
||||
// // Comment attached to baz.
|
||||
// // Another line attached to baz.
|
||||
//
|
||||
// // Comment attached to qux.
|
||||
// //
|
||||
// // Another line attached to qux.
|
||||
// optional double qux = 4;
|
||||
//
|
||||
// // Detached comment for corge. This is not leading or trailing comments
|
||||
// // to qux or corge because there are blank lines separating it from
|
||||
// // both.
|
||||
//
|
||||
// // Detached comment for corge paragraph 2.
|
||||
//
|
||||
// optional string corge = 5;
|
||||
// /* Block comment attached
|
||||
// * to corge. Leading asterisks
|
||||
// * will be removed. */
|
||||
// /* Block comment attached to
|
||||
// * grault. */
|
||||
// optional int32 grault = 6;
|
||||
//
|
||||
// // ignored detached comments.
|
||||
optional string leading_comments = 3;
|
||||
optional string trailing_comments = 4;
|
||||
repeated string leading_detached_comments = 6;
|
||||
}
|
||||
}
|
||||
|
||||
// Describes the relationship between generated code and its original source
|
||||
// file. A GeneratedCodeInfo message is associated with only one generated
|
||||
// source file, but may contain references to different source .proto files.
|
||||
message GeneratedCodeInfo {
|
||||
// An Annotation connects some span of text in generated code to an element
|
||||
// of its generating .proto file.
|
||||
repeated Annotation annotation = 1;
|
||||
message Annotation {
|
||||
// Identifies the element in the original source .proto file. This field
|
||||
// is formatted the same as SourceCodeInfo.Location.path.
|
||||
repeated int32 path = 1 [packed=true];
|
||||
|
||||
// Identifies the filesystem path to the original source .proto.
|
||||
optional string source_file = 2;
|
||||
|
||||
// Identifies the starting offset in bytes in the generated code
|
||||
// that relates to the identified object.
|
||||
optional int32 begin = 3;
|
||||
|
||||
// Identifies the ending offset in bytes in the generated code that
|
||||
// relates to the identified offset. The end offset should be one past
|
||||
// the last relevant byte (so the length of the text = end - begin).
|
||||
optional int32 end = 4;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,547 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Author: kenton@google.com (Kenton Varda)
|
||||
// Based on original Protocol Buffers design by
|
||||
// Sanjay Ghemawat, Jeff Dean, and others.
|
||||
|
||||
#include <google/protobuf/descriptor_database.h>
|
||||
|
||||
#include <set>
|
||||
|
||||
#include <google/protobuf/descriptor.pb.h>
|
||||
#include <google/protobuf/wire_format_lite_inl.h>
|
||||
#include <google/protobuf/stubs/strutil.h>
|
||||
|
||||
#include <google/protobuf/stubs/map_util.h>
|
||||
#include <google/protobuf/stubs/stl_util.h>
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
|
||||
DescriptorDatabase::~DescriptorDatabase() {}
|
||||
|
||||
// ===================================================================
|
||||
|
||||
template <typename Value>
|
||||
bool SimpleDescriptorDatabase::DescriptorIndex<Value>::AddFile(
|
||||
const FileDescriptorProto& file,
|
||||
Value value) {
|
||||
if (!InsertIfNotPresent(&by_name_, file.name(), value)) {
|
||||
GOOGLE_LOG(ERROR) << "File already exists in database: " << file.name();
|
||||
return false;
|
||||
}
|
||||
|
||||
// We must be careful here -- calling file.package() if file.has_package() is
|
||||
// false could access an uninitialized static-storage variable if we are being
|
||||
// run at startup time.
|
||||
string path = file.has_package() ? file.package() : string();
|
||||
if (!path.empty()) path += '.';
|
||||
|
||||
for (int i = 0; i < file.message_type_size(); i++) {
|
||||
if (!AddSymbol(path + file.message_type(i).name(), value)) return false;
|
||||
if (!AddNestedExtensions(file.message_type(i), value)) return false;
|
||||
}
|
||||
for (int i = 0; i < file.enum_type_size(); i++) {
|
||||
if (!AddSymbol(path + file.enum_type(i).name(), value)) return false;
|
||||
}
|
||||
for (int i = 0; i < file.extension_size(); i++) {
|
||||
if (!AddSymbol(path + file.extension(i).name(), value)) return false;
|
||||
if (!AddExtension(file.extension(i), value)) return false;
|
||||
}
|
||||
for (int i = 0; i < file.service_size(); i++) {
|
||||
if (!AddSymbol(path + file.service(i).name(), value)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
bool SimpleDescriptorDatabase::DescriptorIndex<Value>::AddSymbol(
|
||||
const string& name, Value value) {
|
||||
// We need to make sure not to violate our map invariant.
|
||||
|
||||
// If the symbol name is invalid it could break our lookup algorithm (which
|
||||
// relies on the fact that '.' sorts before all other characters that are
|
||||
// valid in symbol names).
|
||||
if (!ValidateSymbolName(name)) {
|
||||
GOOGLE_LOG(ERROR) << "Invalid symbol name: " << name;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to look up the symbol to make sure a super-symbol doesn't already
|
||||
// exist.
|
||||
typename std::map<string, Value>::iterator iter = FindLastLessOrEqual(name);
|
||||
|
||||
if (iter == by_symbol_.end()) {
|
||||
// Apparently the map is currently empty. Just insert and be done with it.
|
||||
by_symbol_.insert(
|
||||
typename std::map<string, Value>::value_type(name, value));
|
||||
return true;
|
||||
}
|
||||
|
||||
if (IsSubSymbol(iter->first, name)) {
|
||||
GOOGLE_LOG(ERROR) << "Symbol name \"" << name << "\" conflicts with the existing "
|
||||
"symbol \"" << iter->first << "\".";
|
||||
return false;
|
||||
}
|
||||
|
||||
// OK, that worked. Now we have to make sure that no symbol in the map is
|
||||
// a sub-symbol of the one we are inserting. The only symbol which could
|
||||
// be so is the first symbol that is greater than the new symbol. Since
|
||||
// |iter| points at the last symbol that is less than or equal, we just have
|
||||
// to increment it.
|
||||
++iter;
|
||||
|
||||
if (iter != by_symbol_.end() && IsSubSymbol(name, iter->first)) {
|
||||
GOOGLE_LOG(ERROR) << "Symbol name \"" << name << "\" conflicts with the existing "
|
||||
"symbol \"" << iter->first << "\".";
|
||||
return false;
|
||||
}
|
||||
|
||||
// OK, no conflicts.
|
||||
|
||||
// Insert the new symbol using the iterator as a hint, the new entry will
|
||||
// appear immediately before the one the iterator is pointing at.
|
||||
by_symbol_.insert(iter,
|
||||
typename std::map<string, Value>::value_type(name, value));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
bool SimpleDescriptorDatabase::DescriptorIndex<Value>::AddNestedExtensions(
|
||||
const DescriptorProto& message_type,
|
||||
Value value) {
|
||||
for (int i = 0; i < message_type.nested_type_size(); i++) {
|
||||
if (!AddNestedExtensions(message_type.nested_type(i), value)) return false;
|
||||
}
|
||||
for (int i = 0; i < message_type.extension_size(); i++) {
|
||||
if (!AddExtension(message_type.extension(i), value)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
bool SimpleDescriptorDatabase::DescriptorIndex<Value>::AddExtension(
|
||||
const FieldDescriptorProto& field,
|
||||
Value value) {
|
||||
if (!field.extendee().empty() && field.extendee()[0] == '.') {
|
||||
// The extension is fully-qualified. We can use it as a lookup key in
|
||||
// the by_symbol_ table.
|
||||
if (!InsertIfNotPresent(
|
||||
&by_extension_,
|
||||
std::make_pair(field.extendee().substr(1), field.number()),
|
||||
value)) {
|
||||
GOOGLE_LOG(ERROR) << "Extension conflicts with extension already in database: "
|
||||
"extend " << field.extendee() << " { "
|
||||
<< field.name() << " = " << field.number() << " }";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// Not fully-qualified. We can't really do anything here, unfortunately.
|
||||
// We don't consider this an error, though, because the descriptor is
|
||||
// valid.
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
Value SimpleDescriptorDatabase::DescriptorIndex<Value>::FindFile(
|
||||
const string& filename) {
|
||||
return FindWithDefault(by_name_, filename, Value());
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
Value SimpleDescriptorDatabase::DescriptorIndex<Value>::FindSymbol(
|
||||
const string& name) {
|
||||
typename std::map<string, Value>::iterator iter = FindLastLessOrEqual(name);
|
||||
|
||||
return (iter != by_symbol_.end() && IsSubSymbol(iter->first, name)) ?
|
||||
iter->second : Value();
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
Value SimpleDescriptorDatabase::DescriptorIndex<Value>::FindExtension(
|
||||
const string& containing_type,
|
||||
int field_number) {
|
||||
return FindWithDefault(
|
||||
by_extension_, std::make_pair(containing_type, field_number), Value());
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
bool SimpleDescriptorDatabase::DescriptorIndex<Value>::FindAllExtensionNumbers(
|
||||
const string& containing_type,
|
||||
std::vector<int>* output) {
|
||||
typename std::map<std::pair<string, int>, Value>::const_iterator it =
|
||||
by_extension_.lower_bound(std::make_pair(containing_type, 0));
|
||||
bool success = false;
|
||||
|
||||
for (; it != by_extension_.end() && it->first.first == containing_type;
|
||||
++it) {
|
||||
output->push_back(it->first.second);
|
||||
success = true;
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
typename std::map<string, Value>::iterator
|
||||
SimpleDescriptorDatabase::DescriptorIndex<Value>::FindLastLessOrEqual(
|
||||
const string& name) {
|
||||
// Find the last key in the map which sorts less than or equal to the
|
||||
// symbol name. Since upper_bound() returns the *first* key that sorts
|
||||
// *greater* than the input, we want the element immediately before that.
|
||||
typename std::map<string, Value>::iterator iter =
|
||||
by_symbol_.upper_bound(name);
|
||||
if (iter != by_symbol_.begin()) --iter;
|
||||
return iter;
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
bool SimpleDescriptorDatabase::DescriptorIndex<Value>::IsSubSymbol(
|
||||
const string& sub_symbol, const string& super_symbol) {
|
||||
return sub_symbol == super_symbol ||
|
||||
(HasPrefixString(super_symbol, sub_symbol) &&
|
||||
super_symbol[sub_symbol.size()] == '.');
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
bool SimpleDescriptorDatabase::DescriptorIndex<Value>::ValidateSymbolName(
|
||||
const string& name) {
|
||||
for (int i = 0; i < name.size(); i++) {
|
||||
// I don't trust ctype.h due to locales. :(
|
||||
if (name[i] != '.' && name[i] != '_' &&
|
||||
(name[i] < '0' || name[i] > '9') &&
|
||||
(name[i] < 'A' || name[i] > 'Z') &&
|
||||
(name[i] < 'a' || name[i] > 'z')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
SimpleDescriptorDatabase::SimpleDescriptorDatabase() {}
|
||||
SimpleDescriptorDatabase::~SimpleDescriptorDatabase() {
|
||||
STLDeleteElements(&files_to_delete_);
|
||||
}
|
||||
|
||||
bool SimpleDescriptorDatabase::Add(const FileDescriptorProto& file) {
|
||||
FileDescriptorProto* new_file = new FileDescriptorProto;
|
||||
new_file->CopyFrom(file);
|
||||
return AddAndOwn(new_file);
|
||||
}
|
||||
|
||||
bool SimpleDescriptorDatabase::AddAndOwn(const FileDescriptorProto* file) {
|
||||
files_to_delete_.push_back(file);
|
||||
return index_.AddFile(*file, file);
|
||||
}
|
||||
|
||||
bool SimpleDescriptorDatabase::FindFileByName(
|
||||
const string& filename,
|
||||
FileDescriptorProto* output) {
|
||||
return MaybeCopy(index_.FindFile(filename), output);
|
||||
}
|
||||
|
||||
bool SimpleDescriptorDatabase::FindFileContainingSymbol(
|
||||
const string& symbol_name,
|
||||
FileDescriptorProto* output) {
|
||||
return MaybeCopy(index_.FindSymbol(symbol_name), output);
|
||||
}
|
||||
|
||||
bool SimpleDescriptorDatabase::FindFileContainingExtension(
|
||||
const string& containing_type,
|
||||
int field_number,
|
||||
FileDescriptorProto* output) {
|
||||
return MaybeCopy(index_.FindExtension(containing_type, field_number), output);
|
||||
}
|
||||
|
||||
bool SimpleDescriptorDatabase::FindAllExtensionNumbers(
|
||||
const string& extendee_type,
|
||||
std::vector<int>* output) {
|
||||
return index_.FindAllExtensionNumbers(extendee_type, output);
|
||||
}
|
||||
|
||||
|
||||
bool SimpleDescriptorDatabase::MaybeCopy(const FileDescriptorProto* file,
|
||||
FileDescriptorProto* output) {
|
||||
if (file == NULL) return false;
|
||||
output->CopyFrom(*file);
|
||||
return true;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
EncodedDescriptorDatabase::EncodedDescriptorDatabase() {}
|
||||
EncodedDescriptorDatabase::~EncodedDescriptorDatabase() {
|
||||
for (int i = 0; i < files_to_delete_.size(); i++) {
|
||||
operator delete(files_to_delete_[i]);
|
||||
}
|
||||
}
|
||||
|
||||
bool EncodedDescriptorDatabase::Add(
|
||||
const void* encoded_file_descriptor, int size) {
|
||||
FileDescriptorProto file;
|
||||
if (file.ParseFromArray(encoded_file_descriptor, size)) {
|
||||
return index_.AddFile(file, std::make_pair(encoded_file_descriptor, size));
|
||||
} else {
|
||||
GOOGLE_LOG(ERROR) << "Invalid file descriptor data passed to "
|
||||
"EncodedDescriptorDatabase::Add().";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool EncodedDescriptorDatabase::AddCopy(
|
||||
const void* encoded_file_descriptor, int size) {
|
||||
void* copy = operator new(size);
|
||||
memcpy(copy, encoded_file_descriptor, size);
|
||||
files_to_delete_.push_back(copy);
|
||||
return Add(copy, size);
|
||||
}
|
||||
|
||||
bool EncodedDescriptorDatabase::FindFileByName(
|
||||
const string& filename,
|
||||
FileDescriptorProto* output) {
|
||||
return MaybeParse(index_.FindFile(filename), output);
|
||||
}
|
||||
|
||||
bool EncodedDescriptorDatabase::FindFileContainingSymbol(
|
||||
const string& symbol_name,
|
||||
FileDescriptorProto* output) {
|
||||
return MaybeParse(index_.FindSymbol(symbol_name), output);
|
||||
}
|
||||
|
||||
bool EncodedDescriptorDatabase::FindNameOfFileContainingSymbol(
|
||||
const string& symbol_name,
|
||||
string* output) {
|
||||
std::pair<const void*, int> encoded_file = index_.FindSymbol(symbol_name);
|
||||
if (encoded_file.first == NULL) return false;
|
||||
|
||||
// Optimization: The name should be the first field in the encoded message.
|
||||
// Try to just read it directly.
|
||||
io::CodedInputStream input(reinterpret_cast<const uint8*>(encoded_file.first),
|
||||
encoded_file.second);
|
||||
|
||||
const uint32 kNameTag = internal::WireFormatLite::MakeTag(
|
||||
FileDescriptorProto::kNameFieldNumber,
|
||||
internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
|
||||
|
||||
if (input.ReadTagNoLastTag() == kNameTag) {
|
||||
// Success!
|
||||
return internal::WireFormatLite::ReadString(&input, output);
|
||||
} else {
|
||||
// Slow path. Parse whole message.
|
||||
FileDescriptorProto file_proto;
|
||||
if (!file_proto.ParseFromArray(encoded_file.first, encoded_file.second)) {
|
||||
return false;
|
||||
}
|
||||
*output = file_proto.name();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool EncodedDescriptorDatabase::FindFileContainingExtension(
|
||||
const string& containing_type,
|
||||
int field_number,
|
||||
FileDescriptorProto* output) {
|
||||
return MaybeParse(index_.FindExtension(containing_type, field_number),
|
||||
output);
|
||||
}
|
||||
|
||||
bool EncodedDescriptorDatabase::FindAllExtensionNumbers(
|
||||
const string& extendee_type,
|
||||
std::vector<int>* output) {
|
||||
return index_.FindAllExtensionNumbers(extendee_type, output);
|
||||
}
|
||||
|
||||
bool EncodedDescriptorDatabase::MaybeParse(
|
||||
std::pair<const void*, int> encoded_file,
|
||||
FileDescriptorProto* output) {
|
||||
if (encoded_file.first == NULL) return false;
|
||||
return output->ParseFromArray(encoded_file.first, encoded_file.second);
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
|
||||
DescriptorPoolDatabase::DescriptorPoolDatabase(const DescriptorPool& pool)
|
||||
: pool_(pool) {}
|
||||
DescriptorPoolDatabase::~DescriptorPoolDatabase() {}
|
||||
|
||||
bool DescriptorPoolDatabase::FindFileByName(
|
||||
const string& filename,
|
||||
FileDescriptorProto* output) {
|
||||
const FileDescriptor* file = pool_.FindFileByName(filename);
|
||||
if (file == NULL) return false;
|
||||
output->Clear();
|
||||
file->CopyTo(output);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DescriptorPoolDatabase::FindFileContainingSymbol(
|
||||
const string& symbol_name,
|
||||
FileDescriptorProto* output) {
|
||||
const FileDescriptor* file = pool_.FindFileContainingSymbol(symbol_name);
|
||||
if (file == NULL) return false;
|
||||
output->Clear();
|
||||
file->CopyTo(output);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DescriptorPoolDatabase::FindFileContainingExtension(
|
||||
const string& containing_type,
|
||||
int field_number,
|
||||
FileDescriptorProto* output) {
|
||||
const Descriptor* extendee = pool_.FindMessageTypeByName(containing_type);
|
||||
if (extendee == NULL) return false;
|
||||
|
||||
const FieldDescriptor* extension =
|
||||
pool_.FindExtensionByNumber(extendee, field_number);
|
||||
if (extension == NULL) return false;
|
||||
|
||||
output->Clear();
|
||||
extension->file()->CopyTo(output);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DescriptorPoolDatabase::FindAllExtensionNumbers(
|
||||
const string& extendee_type,
|
||||
std::vector<int>* output) {
|
||||
const Descriptor* extendee = pool_.FindMessageTypeByName(extendee_type);
|
||||
if (extendee == NULL) return false;
|
||||
|
||||
std::vector<const FieldDescriptor*> extensions;
|
||||
pool_.FindAllExtensions(extendee, &extensions);
|
||||
|
||||
for (int i = 0; i < extensions.size(); ++i) {
|
||||
output->push_back(extensions[i]->number());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
|
||||
MergedDescriptorDatabase::MergedDescriptorDatabase(
|
||||
DescriptorDatabase* source1,
|
||||
DescriptorDatabase* source2) {
|
||||
sources_.push_back(source1);
|
||||
sources_.push_back(source2);
|
||||
}
|
||||
MergedDescriptorDatabase::MergedDescriptorDatabase(
|
||||
const std::vector<DescriptorDatabase*>& sources)
|
||||
: sources_(sources) {}
|
||||
MergedDescriptorDatabase::~MergedDescriptorDatabase() {}
|
||||
|
||||
bool MergedDescriptorDatabase::FindFileByName(
|
||||
const string& filename,
|
||||
FileDescriptorProto* output) {
|
||||
for (int i = 0; i < sources_.size(); i++) {
|
||||
if (sources_[i]->FindFileByName(filename, output)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MergedDescriptorDatabase::FindFileContainingSymbol(
|
||||
const string& symbol_name,
|
||||
FileDescriptorProto* output) {
|
||||
for (int i = 0; i < sources_.size(); i++) {
|
||||
if (sources_[i]->FindFileContainingSymbol(symbol_name, output)) {
|
||||
// The symbol was found in source i. However, if one of the previous
|
||||
// sources defines a file with the same name (which presumably doesn't
|
||||
// contain the symbol, since it wasn't found in that source), then we
|
||||
// must hide it from the caller.
|
||||
FileDescriptorProto temp;
|
||||
for (int j = 0; j < i; j++) {
|
||||
if (sources_[j]->FindFileByName(output->name(), &temp)) {
|
||||
// Found conflicting file in a previous source.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MergedDescriptorDatabase::FindFileContainingExtension(
|
||||
const string& containing_type,
|
||||
int field_number,
|
||||
FileDescriptorProto* output) {
|
||||
for (int i = 0; i < sources_.size(); i++) {
|
||||
if (sources_[i]->FindFileContainingExtension(
|
||||
containing_type, field_number, output)) {
|
||||
// The symbol was found in source i. However, if one of the previous
|
||||
// sources defines a file with the same name (which presumably doesn't
|
||||
// contain the symbol, since it wasn't found in that source), then we
|
||||
// must hide it from the caller.
|
||||
FileDescriptorProto temp;
|
||||
for (int j = 0; j < i; j++) {
|
||||
if (sources_[j]->FindFileByName(output->name(), &temp)) {
|
||||
// Found conflicting file in a previous source.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MergedDescriptorDatabase::FindAllExtensionNumbers(
|
||||
const string& extendee_type,
|
||||
std::vector<int>* output) {
|
||||
std::set<int> merged_results;
|
||||
std::vector<int> results;
|
||||
bool success = false;
|
||||
|
||||
for (int i = 0; i < sources_.size(); i++) {
|
||||
if (sources_[i]->FindAllExtensionNumbers(extendee_type, &results)) {
|
||||
std::copy(results.begin(), results.end(),
|
||||
std::insert_iterator<std::set<int> >(merged_results,
|
||||
merged_results.begin()));
|
||||
success = true;
|
||||
}
|
||||
results.clear();
|
||||
}
|
||||
|
||||
std::copy(merged_results.begin(), merged_results.end(),
|
||||
std::insert_iterator<std::vector<int> >(*output, output->end()));
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue