Skip to content

Commit

Permalink
AVRO-2397: [c++] Add support for type and field aliases (#2270)
Browse files Browse the repository at this point in the history
* Add support for type and field aliases

* AVRO-2397: Improve code style consistency

---------

Co-authored-by: Martin Grigorov <[email protected]>
  • Loading branch information
mparry and martin-g committed Feb 22, 2024
1 parent 568b19c commit a462498
Show file tree
Hide file tree
Showing 8 changed files with 264 additions and 123 deletions.
22 changes: 15 additions & 7 deletions lang/c++/api/Node.hh
Original file line number Diff line number Diff line change
Expand Up @@ -40,30 +40,38 @@ class GenericDatum;
using NodePtr = std::shared_ptr<Node>;

class AVRO_DECL Name {
struct Aliases;

std::string ns_;
std::string simpleName_;
std::unique_ptr<Aliases> aliases_;

public:
Name() = default;
explicit Name(const std::string &fullname);
Name(std::string simpleName, std::string ns) : ns_(std::move(ns)), simpleName_(std::move(simpleName)) { check(); }
Name();
explicit Name(const std::string &name);
Name(std::string simpleName, std::string ns);
Name(const Name& other);
Name& operator=(const Name& other);
Name(Name&& other);
Name& operator=(Name&& other);
~Name();

std::string fullname() const;
const std::string &ns() const { return ns_; }
const std::string &simpleName() const { return simpleName_; }
const std::vector<std::string> &aliases() const;

void ns(std::string n) { ns_ = std::move(n); }
void simpleName(std::string n) { simpleName_ = std::move(n); }
void fullname(const std::string &n);
void addAlias(const std::string &alias);

bool operator<(const Name &n) const;
void check() const;
bool operator==(const Name &n) const;
bool operator!=(const Name &n) const { return !((*this) == n); }
void clear() {
ns_.clear();
simpleName_.clear();
}
bool equalOrAliasedBy(const Name &n) const;
void clear();
explicit operator std::string() const {
return fullname();
}
Expand Down
48 changes: 12 additions & 36 deletions lang/c++/api/NodeImpl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -294,42 +294,30 @@ protected:
};

class AVRO_DECL NodeRecord : public NodeImplRecord {
std::vector<GenericDatum> defaultValues;
std::vector<std::vector<std::string>> fieldsAliases_;
std::vector<GenericDatum> fieldsDefaultValues_;

public:
NodeRecord() : NodeImplRecord(AVRO_RECORD) {}

NodeRecord(const HasName &name, const MultiLeaves &fields,
const LeafNames &fieldsNames,
std::vector<GenericDatum> dv);
const LeafNames &fieldsNames, std::vector<GenericDatum> dv);

NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
const LeafNames &fieldsNames,
std::vector<GenericDatum> dv) : NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, MultiAttributes(), NoSize()),
defaultValues(std::move(dv)) {
leafNameCheck();
}
const LeafNames &fieldsNames, std::vector<GenericDatum> dv);

NodeRecord(const HasName &name, const MultiLeaves &fields,
const LeafNames &fieldsNames,
const std::vector<GenericDatum>& dv,
const MultiAttributes &customAttributes) :
NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, customAttributes, NoSize()),
defaultValues(dv) {
leafNameCheck();
}
const LeafNames &fieldsNames, std::vector<std::vector<std::string>> fieldsAliases,
std::vector<GenericDatum> dv, const MultiAttributes &customAttributes);

NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
const LeafNames &fieldsNames,
const std::vector<GenericDatum>& dv,
const MultiAttributes &customAttributes) :
NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, customAttributes, NoSize()),
defaultValues(dv) {
leafNameCheck();
}
const LeafNames &fieldsNames, std::vector<std::vector<std::string>> fieldsAliases,
std::vector<GenericDatum> dv, const MultiAttributes &customAttributes);

void swap(NodeRecord &r) {
NodeImplRecord::swap(r);
defaultValues.swap(r.defaultValues);
fieldsAliases_.swap(r.fieldsAliases_);
fieldsDefaultValues_.swap(r.fieldsDefaultValues_);
}

SchemaResolution resolve(const Node &reader) const override;
Expand All @@ -344,22 +332,10 @@ public:
}

const GenericDatum &defaultValueAt(size_t index) override {
return defaultValues[index];
return fieldsDefaultValues_[index];
}

void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;

private:
// check if leaf name is valid Name and is not duplicate
void leafNameCheck() {
for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
throw Exception(boost::format(
"Cannot add duplicate field: %1%")
% leafNameAttributes_.get(i));
}
}
}
};

class AVRO_DECL NodeEnum : public NodeImplEnum {
Expand Down
46 changes: 32 additions & 14 deletions lang/c++/impl/Compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,13 @@ string getDocField(const Entity &e, const Object &m) {

struct Field {
const string name;
const vector<string> aliases;
const NodePtr schema;
const GenericDatum defaultValue;
const CustomAttributes customAttributes;
Field(string n, NodePtr v, GenericDatum dv, const CustomAttributes& ca) : name(std::move(n)), schema(std::move(v)), defaultValue(std::move(dv)), customAttributes(std::move(ca)) {}

Field(string n, vector<string> a, NodePtr v, GenericDatum dv, const CustomAttributes& ca)
: name(std::move(n)), aliases(std::move(a)), schema(std::move(v)), defaultValue(std::move(dv)), customAttributes(ca) {}
};

static void assertType(const Entity &e, EntityType et) {
Expand Down Expand Up @@ -263,7 +266,7 @@ static GenericDatum makeGenericDatum(NodePtr n,
static const std::unordered_set<std::string>& getKnownFields() {
// return known fields
static const std::unordered_set<std::string> kKnownFields =
{"name", "type", "default", "doc", "size", "logicalType",
{"name", "type", "aliases", "default", "doc", "size", "logicalType",
"values", "precision", "scale", "namespace"};
return kKnownFields;
}
Expand All @@ -282,7 +285,13 @@ static void getCustomAttributes(const Object& m, CustomAttributes &customAttribu

static Field makeField(const Entity &e, SymbolTable &st, const string &ns) {
const Object &m = e.objectValue();
const string &n = getStringField(e, m, "name");
string n = getStringField(e, m, "name");
vector<string> aliases;
if (containsField(m, "aliases")) {
for (const auto &alias : getArrayField(e, m, "aliases")) {
aliases.emplace_back(alias.stringValue());
}
}
auto it = findField(e, m, "type");
auto it2 = m.find("default");
NodePtr node = makeNode(it->second, st, ns);
Expand All @@ -293,34 +302,34 @@ static Field makeField(const Entity &e, SymbolTable &st, const string &ns) {
// Get custom attributes
CustomAttributes customAttributes;
getCustomAttributes(m, customAttributes);

return Field(n, node, d, customAttributes);
return Field(std::move(n), std::move(aliases), node, d, customAttributes);
}

// Extended makeRecordNode (with doc).
static NodePtr makeRecordNode(const Entity &e, const Name &name,
const string *doc, const Object &m,
SymbolTable &st, const string &ns) {
const Array &v = getArrayField(e, m, "fields");
concepts::MultiAttribute<string> fieldNames;
vector<vector<string>> fieldAliases;
concepts::MultiAttribute<NodePtr> fieldValues;
concepts::MultiAttribute<CustomAttributes> customAttributes;
vector<GenericDatum> defaultValues;

for (const auto &it : v) {
for (const auto &it : getArrayField(e, m, "fields")) {
Field f = makeField(it, st, ns);
fieldNames.add(f.name);
fieldAliases.push_back(f.aliases);
fieldValues.add(f.schema);
defaultValues.push_back(f.defaultValue);
customAttributes.add(f.customAttributes);
}

NodeRecord *node;
if (doc == nullptr) {
node = new NodeRecord(asSingleAttribute(name), fieldValues, fieldNames,
defaultValues, customAttributes);
fieldAliases, defaultValues, customAttributes);
} else {
node = new NodeRecord(asSingleAttribute(name), asSingleAttribute(*doc),
fieldValues, fieldNames, defaultValues, customAttributes);
fieldValues, fieldNames, fieldAliases, defaultValues, customAttributes);
}
return NodePtr(node);
}
Expand Down Expand Up @@ -422,8 +431,9 @@ static NodePtr makeMapNode(const Entity &e, const Object &m,
static Name getName(const Entity &e, const Object &m, const string &ns) {
const string &name = getStringField(e, m, "name");

Name result;
if (isFullName(name)) {
return Name(name);
result = Name(name);
} else {
auto it = m.find("namespace");
if (it != m.end()) {
Expand All @@ -432,11 +442,19 @@ static Name getName(const Entity &e, const Object &m, const string &ns) {
"Json field \"%1%\" is not a %2%: %3%")
% "namespace" % json::type_traits<string>::name() % it->second.toString());
}
Name result = Name(name, it->second.stringValue());
return result;
result = Name(name, it->second.stringValue());
} else {
result = Name(name, ns);
}
}

if (containsField(m, "aliases")) {
for (const auto &alias : getArrayField(e, m, "aliases")) {
result.addAlias(alias.stringValue());
}
return Name(name, ns);
}

return result;
}

static NodePtr makeNode(const Entity &e, const Object &m,
Expand Down
62 changes: 61 additions & 1 deletion lang/c++/impl/Node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/

#include <cmath>
#include <unordered_set>

#include "Node.hh"

Expand All @@ -26,12 +27,44 @@ using std::string;

Node::~Node() = default;

struct Name::Aliases {
std::vector<std::string> raw;
std::unordered_set<std::string> fullyQualified;
};

Name::Name() = default;

Name::Name(const std::string &name) {
fullname(name);
}

Name::Name(std::string simpleName, std::string ns) : ns_(std::move(ns)), simpleName_(std::move(simpleName)) {
check();
}

Name::Name(const Name& other) {
*this = other;
}

Name& Name::operator=(const Name& other) {
if (this != &other) {
ns_ = other.ns_;
simpleName_ = other.simpleName_;
if (other.aliases_) {
aliases_ = std::make_unique<Aliases>(*other.aliases_);
}
}
return *this;
}

Name::Name(Name&& other) = default;

Name& Name::operator=(Name&& other) = default;

Name::~Name() = default;

string Name::fullname() const {
return (ns_.empty()) ? simpleName_ : ns_ + "." + simpleName_;
return ns_.empty() ? simpleName_ : ns_ + "." + simpleName_;
}

void Name::fullname(const string &name) {
Expand All @@ -46,6 +79,23 @@ void Name::fullname(const string &name) {
check();
}

const std::vector<std::string>& Name::aliases() const {
static const std::vector<std::string> emptyAliases;
return aliases_ ? aliases_->raw : emptyAliases;
}

void Name::addAlias(const std::string &alias) {
if (!aliases_) {
aliases_ = std::make_unique<Aliases>();
}
aliases_->raw.push_back(alias);
if (!ns_.empty() && alias.find_last_of('.') == string::npos) {
aliases_->fullyQualified.emplace(ns_ + "." + alias);
} else {
aliases_->fullyQualified.insert(alias);
}
}

bool Name::operator<(const Name &n) const {
return (ns_ < n.ns_) || (!(n.ns_ < ns_) && (simpleName_ < n.simpleName_));
}
Expand All @@ -72,6 +122,16 @@ bool Name::operator==(const Name &n) const {
return ns_ == n.ns_ && simpleName_ == n.simpleName_;
}

bool Name::equalOrAliasedBy(const Name &n) const {
return *this == n || (n.aliases_ && n.aliases_->fullyQualified.find(fullname()) != n.aliases_->fullyQualified.end());
}

void Name::clear() {
ns_.clear();
simpleName_.clear();
aliases_.reset();
}

void Node::setLogicalType(LogicalType logicalType) {
checkLock();

Expand Down
Loading

0 comments on commit a462498

Please sign in to comment.