Skip to content

Commit

Permalink
feat(textproto): allow compilation units to contain multiple sources (#…
Browse files Browse the repository at this point in the history
…4772)

Some portions of google3 contain so many textproto build targets that it is causing performance problems for blaze. Stuffing multiple textprotos in a single compilation unit will allow us to reduce the number of build targets.
  • Loading branch information
justbuchanan committed Dec 3, 2020
1 parent 5d627e8 commit 37d7125
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 46 deletions.
1 change: 1 addition & 0 deletions kythe/cxx/indexer/textproto/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ cc_library(
"//kythe/cxx/indexer/proto:vname_util",
"//kythe/proto:analysis_cc_proto",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/container:node_hash_map",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
Expand Down
106 changes: 61 additions & 45 deletions kythe/cxx/indexer/textproto/analyzer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <memory>

#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/match.h"
Expand Down Expand Up @@ -838,16 +839,19 @@ absl::Status AnalyzeCompilationUnit(PluginLoadCallback plugin_loader,
const proto::CompilationUnit& unit,
const std::vector<proto::FileData>& files,
KytheGraphRecorder* recorder) {
if (unit.source_file().size() != 1) {
if (unit.source_file().empty()) {
return absl::FailedPreconditionError(
"Expected Unit to contain 1 source file");
"Expected Unit to contain 1+ source files");
}
if (files.size() < 2) {
return absl::FailedPreconditionError(
"Must provide at least 2 files: a textproto and 1+ .proto files");
}

const std::string textproto_name = unit.source_file(0);
absl::flat_hash_set<std::string> textproto_filenames;
for (const std::string& filename : unit.source_file()) {
textproto_filenames.insert(filename);
}

// Parse path substitutions from arguments.
absl::flat_hash_map<std::string, std::string> file_substitution_cache;
Expand All @@ -868,15 +872,17 @@ absl::Status AnalyzeCompilationUnit(PluginLoadCallback plugin_loader,
}
LOG(INFO) << "Proto message name: " << message_name;

absl::flat_hash_map<std::string, const proto::FileData*> file_data_by_path;

// Load all proto files into in-memory SourceTree.
PreloadedProtoFileTree file_reader(&path_substitutions,
&file_substitution_cache);
std::vector<std::string> proto_filenames;
const proto::FileData* textproto_file_data = nullptr;
for (const auto& file : files) {
// Skip textproto - only proto files go in the descriptor db.
if (file.info().path() == textproto_name) {
textproto_file_data = &file;
if (textproto_filenames.find(file.info().path()) !=
textproto_filenames.end()) {
file_data_by_path[file.info().path()] = &file;
continue;
}

Expand All @@ -886,8 +892,9 @@ absl::Status AnalyzeCompilationUnit(PluginLoadCallback plugin_loader,
}
proto_filenames.push_back(file.info().path());
}
if (textproto_file_data == nullptr) {
return absl::NotFoundError("Couldn't find textproto source in file data.");
if (textproto_filenames.size() != file_data_by_path.size()) {
return absl::NotFoundError(
"Couldn't find all textproto sources in file data.");
}

// Build proto descriptor pool with top-level protos.
Expand Down Expand Up @@ -920,48 +927,57 @@ absl::Status AnalyzeCompilationUnit(PluginLoadCallback plugin_loader,
"Unable to find proto message in descriptor pool: ", message_name));
}

// Use reflection to create an instance of the top-level proto message.
// note: msg_factory must outlive any protos created from it.
google::protobuf::DynamicMessageFactory msg_factory;
std::unique_ptr<Message> proto(msg_factory.GetPrototype(descriptor)->New());

// Parse textproto into @proto, recording input locations to @parse_tree.
TextFormat::ParseInfoTree parse_tree;
{
TextFormat::Parser parser;
parser.WriteLocationsTo(&parse_tree);
// Relax parser restrictions - even if the proto is partially ill-defined,
// we'd like to analyze the parts that are good.
parser.AllowPartialMessage(true);
parser.AllowUnknownExtension(true);
if (!parser.ParseFromString(textproto_file_data->content(), proto.get())) {
return absl::UnknownError("Failed to parse text proto");
for (auto& source : file_data_by_path) {
// Use reflection to create an instance of the top-level proto message.
// note: msg_factory must outlive any protos created from it.
google::protobuf::DynamicMessageFactory msg_factory;
std::unique_ptr<Message> proto(msg_factory.GetPrototype(descriptor)->New());

// Parse textproto into @proto, recording input locations to @parse_tree.
TextFormat::ParseInfoTree parse_tree;
{
TextFormat::Parser parser;
parser.WriteLocationsTo(&parse_tree);
// Relax parser restrictions - even if the proto is partially ill-defined,
// we'd like to analyze the parts that are good.
parser.AllowPartialMessage(true);
parser.AllowUnknownExtension(true);
if (!parser.ParseFromString(source.second->content(), proto.get())) {
return absl::UnknownError("Failed to parse text proto");
}
}
}

// Emit file node.
proto::VName file_vname = LookupVNameForFullPath(textproto_name, unit);
recorder->AddProperty(VNameRef(file_vname), NodeKindID::kFile);
// Record source text as a fact.
recorder->AddProperty(VNameRef(file_vname), PropertyID::kText,
textproto_file_data->content());

TextprotoAnalyzer analyzer(&unit, textproto_file_data->content(),
&file_substitution_cache, recorder,
descriptor_pool);

// Load plugins
analyzer.SetPlugins(plugin_loader(*proto));
// Emit file node.
proto::VName file_vname = LookupVNameForFullPath(source.first, unit);
recorder->AddProperty(VNameRef(file_vname), NodeKindID::kFile);
// Record source text as a fact.
recorder->AddProperty(VNameRef(file_vname), PropertyID::kText,
source.second->content());

TextprotoAnalyzer analyzer(&unit, source.second->content(),
&file_substitution_cache, recorder,
descriptor_pool);

// Load plugins
analyzer.SetPlugins(plugin_loader(*proto));

absl::Status status =
analyzer.AnalyzeSchemaComments(file_vname, *descriptor);
if (!status.ok()) {
std::string msg =
absl::StrCat("Error analyzing schema comments: ", status.ToString());
LOG(ERROR) << msg << status;
analyzer.EmitDiagnostic(file_vname, "schema_comments", msg);
}

absl::Status status = analyzer.AnalyzeSchemaComments(file_vname, *descriptor);
if (!status.ok()) {
std::string msg =
absl::StrCat("Error analyzing schema comments: ", status.ToString());
LOG(ERROR) << msg << status;
analyzer.EmitDiagnostic(file_vname, "schema_comments", msg);
auto s =
analyzer.AnalyzeMessage(file_vname, *proto, *descriptor, parse_tree);
if (!s.ok()) {
return s;
}
}

return analyzer.AnalyzeMessage(file_vname, *proto, *descriptor, parse_tree);
return absl::OkStatus();
}

} // namespace lang_textproto
Expand Down
2 changes: 1 addition & 1 deletion kythe/cxx/indexer/textproto/analyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace lang_textproto {
// The canonical name for the textproto language in Kythe.
extern const absl::string_view kLanguageName;

/// Analyzes the textproto file described by @unit and emits graph facts to
/// Analyzes the textproto file(s) described by @unit and emits graph facts to
/// @recorder.
///
/// The basic indexing flow is as follows:
Expand Down

0 comments on commit 37d7125

Please sign in to comment.