Skip to content

Commit

Permalink
feat: refactor type system and add vector type, support stl index (#682)
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Chi Z <[email protected]>
  • Loading branch information
skyzh committed Jan 13, 2024
1 parent 82912b1 commit a5c45ee
Show file tree
Hide file tree
Showing 45 changed files with 769 additions and 167 deletions.
45 changes: 44 additions & 1 deletion src/binder/bind_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "fmt/ranges.h"
#include "nodes/nodes.hpp"
#include "nodes/primnodes.hpp"
#include "nodes/value.hpp"
#include "pg_definitions.hpp"
#include "postgres_parser.hpp"
#include "type/type_id.h"
Expand Down Expand Up @@ -86,6 +87,16 @@ auto Binder::BindColumnDefinition(duckdb_libpgquery::PGColumnDef *cdef) -> Colum
return {colname, TypeId::VARCHAR, varchar_max_length};
}

if (name == "vector") {
auto exprs = BindExpressionList(cdef->typeName->typmods);
if (exprs.size() != 1) {
throw bustub::Exception("should specify vector length");
}
const auto &vector_length_val = dynamic_cast<const BoundConstant &>(*exprs[0]);
uint32_t vector_length = std::stoi(vector_length_val.ToString());
return {colname, TypeId::VECTOR, vector_length};
}

throw NotImplementedException(fmt::format("unsupported type: {}", name));
}

Expand Down Expand Up @@ -156,19 +167,51 @@ auto Binder::BindCreate(duckdb_libpgquery::PGCreateStmt *pg_stmt) -> std::unique

auto Binder::BindIndex(duckdb_libpgquery::PGIndexStmt *stmt) -> std::unique_ptr<IndexStatement> {
std::vector<std::unique_ptr<BoundColumnRef>> cols;
std::vector<std::string> col_options;
auto table = BindBaseTableRef(stmt->relation->relname, std::nullopt);

for (auto cell = stmt->indexParams->head; cell != nullptr; cell = cell->next) {
auto index_element = reinterpret_cast<duckdb_libpgquery::PGIndexElem *>(cell->data.ptr_value);
if (index_element->name != nullptr) {
auto column_ref = ResolveColumn(*table, std::vector{std::string(index_element->name)});
cols.emplace_back(std::make_unique<BoundColumnRef>(dynamic_cast<const BoundColumnRef &>(*column_ref)));
std::string opt;
if (index_element->opclass != nullptr) {
for (auto c = index_element->opclass->head; c != nullptr; c = lnext(c)) {
opt = reinterpret_cast<duckdb_libpgquery::PGValue *>(c->data.ptr_value)->val.str;
break;
}
}
col_options.emplace_back(opt);
} else {
throw NotImplementedException("create index by expr is not supported yet");
}
}

return std::make_unique<IndexStatement>(stmt->idxname, std::move(table), std::move(cols));
std::string index_type;

if (stmt->accessMethod != nullptr) {
index_type = stmt->accessMethod;
if (index_type == "art") {
index_type = "";
}
}

std::vector<std::pair<std::string, int>> options;

if (stmt->options != nullptr) {
for (auto c = stmt->options->head; c != nullptr; c = lnext(c)) {
auto def_elem = reinterpret_cast<duckdb_libpgquery::PGDefElem *>(c->data.ptr_value);
int val;
if (def_elem->arg != nullptr) {
val = reinterpret_cast<duckdb_libpgquery::PGValue *>(def_elem->arg)->val.ival;
}
options.emplace_back(def_elem->defname, val);
}
}

return std::make_unique<IndexStatement>(stmt->idxname, std::move(table), std::move(cols), std::move(index_type),
std::move(col_options), std::move(options));
}

} // namespace bustub
5 changes: 5 additions & 0 deletions src/binder/bind_select.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <iterator>
#include <memory>
#include <optional>
#include <string>
#include <vector>
#include "binder/binder.h"
#include "binder/bound_expression.h"
Expand Down Expand Up @@ -467,6 +468,10 @@ auto Binder::BindConstant(duckdb_libpgquery::PGAConst *node) -> std::unique_ptr<
BUSTUB_ENSURE(val.val.ival <= BUSTUB_INT32_MAX, "value out of range");
return std::make_unique<BoundConstant>(ValueFactory::GetIntegerValue(static_cast<int32_t>(val.val.ival)));
}
case duckdb_libpgquery::T_PGFloat: {
double parsed_val = std::stod(std::string(val.val.str));
return std::make_unique<BoundConstant>(ValueFactory::GetDecimalValue(parsed_val));
}
case duckdb_libpgquery::T_PGString: {
return std::make_unique<BoundConstant>(ValueFactory::GetVarcharValue(val.val.str));
}
Expand Down
11 changes: 8 additions & 3 deletions src/binder/statement/index_statement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,19 @@
namespace bustub {

IndexStatement::IndexStatement(std::string index_name, std::unique_ptr<BoundBaseTableRef> table,
std::vector<std::unique_ptr<BoundColumnRef>> cols)
std::vector<std::unique_ptr<BoundColumnRef>> cols, std::string index_type,
std::vector<std::string> col_options, std::vector<std::pair<std::string, int>> options)
: BoundStatement(StatementType::INDEX_STATEMENT),
index_name_(std::move(index_name)),
table_(std::move(table)),
cols_(std::move(cols)) {}
cols_(std::move(cols)),
index_type_(std::move(index_type)),
col_options_(std::move(col_options)),
options_(std::move(options)) {}

auto IndexStatement::ToString() const -> std::string {
return fmt::format("BoundIndex {{ index_name={}, table={}, cols={} }}", index_name_, *table_, cols_);
return fmt::format("BoundIndex {{ index_name={}, table={}, cols={}, using={}, col_options=[{}], options=[{}] }}",
index_name_, *table_, cols_, index_type_, fmt::join(col_options_, ","), fmt::join(options_, ","));
}

} // namespace bustub
14 changes: 8 additions & 6 deletions src/catalog/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,28 @@

#include <sstream>
#include <string>
#include "type/type_id.h"

namespace bustub {

auto Column::ToString(bool simplified) const -> std::string {
if (simplified) {
std::ostringstream os;
os << column_name_ << ":" << Type::TypeIdToString(column_type_);
if (column_type_ == VARCHAR) {
os << "(" << length_ << ")";
}
if (column_type_ == VECTOR) {
os << "(" << length_ / sizeof(double) << ")";
}
return (os.str());
}

std::ostringstream os;

os << "Column[" << column_name_ << ", " << Type::TypeIdToString(column_type_) << ", "
<< "Offset:" << column_offset_ << ", ";

if (IsInlined()) {
os << "FixedLength:" << fixed_length_;
} else {
os << "VarLength:" << variable_length_;
}
os << "Length:" << length_;
os << "]";
return (os.str());
}
Expand Down
6 changes: 5 additions & 1 deletion src/catalog/schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@ Schema::Schema(const std::vector<Column> &columns) {
}
// set column offset
column.column_offset_ = curr_offset;
curr_offset += column.GetFixedLength();
if (column.IsInlined()) {
curr_offset += column.GetStorageSize();
} else {
curr_offset += sizeof(uint32_t);
}

// add column
this->columns_.push_back(column);
Expand Down
30 changes: 26 additions & 4 deletions src/common/bustub_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,37 @@ void BustubInstance::HandleIndexStatement(Transaction *txn, const IndexStatement
}

std::unique_lock<std::shared_mutex> l(catalog_lock_);
auto info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false, IndexType::HashTableIndex);
IndexInfo *info = nullptr;

if (stmt.index_type_.empty()) {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false); // create default index
} else if (stmt.index_type_ == "hash") {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false, IndexType::HashTableIndex);
} else if (stmt.index_type_ == "bplustree") {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false, IndexType::BPlusTreeIndex);
} else if (stmt.index_type_ == "stl_ordered") {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false, IndexType::STLOrderedIndex);
} else if (stmt.index_type_ == "stl_unordered") {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false, IndexType::STLUnorderedIndex);
} else {
UNIMPLEMENTED("unsupported index type " + stmt.index_type_);
}
l.unlock();

if (info == nullptr) {
throw bustub::Exception("Failed to create index");
}
WriteOneCell(fmt::format("Index created with id = {}", info->index_oid_), writer);
WriteOneCell(fmt::format("Index created with id = {} with type = {}", info->index_oid_, info->index_type_), writer);
}

void BustubInstance::HandleExplainStatement(Transaction *txn, const ExplainStatement &stmt, ResultWriter &writer) {
Expand Down
28 changes: 7 additions & 21 deletions src/execution/plan_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ auto SeqScanPlanNode::InferScanSchema(const BoundBaseTableRef &table) -> Schema
std::vector<Column> schema;
for (const auto &column : table.schema_.GetColumns()) {
auto col_name = fmt::format("{}.{}", table.GetBoundTableName(), column.GetName());
schema.emplace_back(Column(col_name, column));
schema.emplace_back(col_name, column);
}
return Schema(schema);
}
Expand All @@ -38,12 +38,7 @@ auto ProjectionPlanNode::InferProjectionSchema(const std::vector<AbstractExpress
std::vector<Column> schema;
for (const auto &expr : expressions) {
auto type_id = expr->GetReturnType();
if (type_id != TypeId::VARCHAR) {
schema.emplace_back("<unnamed>", type_id);
} else {
// TODO(chi): infer the correct VARCHAR length. Maybe it doesn't matter for executors?
schema.emplace_back("<unnamed>", type_id, VARCHAR_DEFAULT_LENGTH);
}
schema.emplace_back(expr->GetReturnType().WithColumnName("<unnamed>"));
}
return Schema(schema);
}
Expand All @@ -55,7 +50,7 @@ auto ProjectionPlanNode::RenameSchema(const Schema &schema, const std::vector<st
}
size_t idx = 0;
for (const auto &column : schema.GetColumns()) {
output.emplace_back(Column(col_names[idx++], column));
output.emplace_back(col_names[idx++], column);
}
return Schema(output);
}
Expand All @@ -66,30 +61,21 @@ auto AggregationPlanNode::InferAggSchema(const std::vector<AbstractExpressionRef
std::vector<Column> output;
output.reserve(group_bys.size() + aggregates.size());
for (const auto &column : group_bys) {
// TODO(chi): correctly process VARCHAR column
if (column->GetReturnType() == TypeId::VARCHAR) {
output.emplace_back(Column("<unnamed>", column->GetReturnType(), 128));
} else {
output.emplace_back(Column("<unnamed>", column->GetReturnType()));
}
output.emplace_back(column->GetReturnType().WithColumnName("<unnamed>"));
}
for (size_t idx = 0; idx < aggregates.size(); idx++) {
// TODO(chi): correctly infer agg call return type
output.emplace_back(Column("<unnamed>", TypeId::INTEGER));
output.emplace_back("<unnamed>", TypeId::INTEGER);
}
return Schema(output);
}

auto WindowFunctionPlanNode::InferWindowSchema(const std::vector<AbstractExpressionRef> &columns) -> Schema {
std::vector<Column> output;
output.reserve(columns.size());
// TODO(avery): correctly infer window call return type
for (const auto &column : columns) {
// TODO(chi): correctly process VARCHAR column
if (column->GetReturnType() == TypeId::VARCHAR) {
output.emplace_back(Column("<unnamed>", column->GetReturnType(), 128));
} else {
output.emplace_back(Column("<unnamed>", column->GetReturnType()));
}
output.emplace_back(column->GetReturnType().WithColumnName("<unnamed>"));
}
return Schema(output);
}
Expand Down
10 changes: 9 additions & 1 deletion src/include/binder/statement/index_statement.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "binder/bound_statement.h"
Expand All @@ -21,7 +22,8 @@ namespace bustub {
class IndexStatement : public BoundStatement {
public:
explicit IndexStatement(std::string index_name, std::unique_ptr<BoundBaseTableRef> table,
std::vector<std::unique_ptr<BoundColumnRef>> cols);
std::vector<std::unique_ptr<BoundColumnRef>> cols, std::string index_type,
std::vector<std::string> col_options, std::vector<std::pair<std::string, int>> options);

/** Name of the index */
std::string index_name_;
Expand All @@ -32,6 +34,12 @@ class IndexStatement : public BoundStatement {
/** Name of the columns */
std::vector<std::unique_ptr<BoundColumnRef>> cols_;

/** Using */
std::string index_type_;

std::vector<std::string> col_options_;
std::vector<std::pair<std::string, int>> options_;

auto ToString() const -> std::string override;
};

Expand Down
Loading

0 comments on commit a5c45ee

Please sign in to comment.