C++ Day 4: BusTub — Shell/Driver
Published in
9 min readAug 27, 2024
Alrighty! We have seen most of the include
folder contents. Now it is time to see the rest of the execution flow.
tools
tools/shell/shell.cpp
auto GetWidthOfUtf8(const void *beg, const void *end, size_t *width) -> int {
size_t computed_width = 0;
utf8proc_ssize_t n;
utf8proc_ssize_t size = static_cast<const char *>(end) - static_cast<const char *>(beg);
auto pstring = static_cast<utf8proc_uint8_t const *>(beg);
utf8proc_int32_t data;
while ((n = utf8proc_iterate(pstring, size, &data)) > 0) {
computed_width += utf8proc_charwidth(data);
pstring += n;
size -= n;
}
*width = computed_width;
return 0;
}
// NOLINTNEXTLINE
auto main(int argc, char **argv) -> int {
ft_set_u8strwid_func(&GetWidthOfUtf8);
....
}
void ft_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width))
{
buffer_set_u8strwid_func(u8strwid);
}
- Function Pointers
Yes, you can pass a function as an argument to another function in C++. This is a feature of C++ that allows you to use function pointers or callable objects (like lambda functions) as arguments to other functions.
NOTE: This part is confusing. Will circle back later.
auto bustub = std::make_unique<bustub::BustubInstance>("test.db");
- make_unique <> is used for creating single-owner pointers
- src/include/common/bustub_instance.h
class BustubInstance {
private:
/**
* Get the executor context from the BusTub instance.
*/
auto MakeExecutorContext(Transaction *txn, bool is_modify) -> std::unique_ptr<ExecutorContext>;
public:
explicit BustubInstance(const std::string &db_file_name, size_t bpm_size = 128);
explicit BustubInstance(size_t bpm_size = 128);
~BustubInstance();
/**
* Execute a SQL query in the BusTub instance.
*/
auto ExecuteSql(const std::string &sql, ResultWriter &writer, std::shared_ptr<CheckOptions> check_options = nullptr)
-> bool;
/**
* Execute a SQL query in the BusTub instance with provided txn.
*/
auto ExecuteSqlTxn(const std::string &sql, ResultWriter &writer, Transaction *txn,
std::shared_ptr<CheckOptions> check_options = nullptr) -> bool;
/** Enable managed txn mode on this BusTub instance, allowing statements like `BEGIN`. */
void EnableManagedTxn();
/** Get the current transaction. */
auto CurrentManagedTxn() -> Transaction *;
/**
* FOR TEST ONLY. Generate test tables in this BusTub instance.
* It's used in the shell to predefine some tables, as we don't support
* create / drop table and insert for now. Should remove it in the future.
*/
void GenerateTestTable();
/**
* FOR TEST ONLY. Generate mock tables in this BusTub instance.
* It's used in the shell to predefine some tables, as we don't support
* create / drop table and insert for now. Should remove it in the future.
*/
void GenerateMockTable();
// Currently the followings are directly referenced by recovery test, so
// we cannot do anything on them until someone decides to refactor the recovery test.
std::unique_ptr<DiskManager> disk_manager_;
std::unique_ptr<BufferPoolManager> buffer_pool_manager_;
std::unique_ptr<LockManager> lock_manager_;
std::unique_ptr<TransactionManager> txn_manager_;
std::unique_ptr<LogManager> log_manager_;
std::unique_ptr<CheckpointManager> checkpoint_manager_;
std::unique_ptr<Catalog> catalog_;
std::unique_ptr<ExecutionEngine> execution_engine_;
/** Coordination for catalog */
std::shared_mutex catalog_lock_;
auto GetSessionVariable(const std::string &key) -> std::string {
if (session_variables_.find(key) != session_variables_.end()) {
return session_variables_[key];
}
return "";
}
auto IsForceStarterRule() -> bool {
auto variable = StringUtil::Lower(GetSessionVariable("force_optimizer_starter_rule"));
return variable == "1" || variable == "true" || variable == "yes";
}
private:
void CmdDisplayTables(ResultWriter &writer);
void CmdDbgMvcc(const std::vector<std::string> ¶ms, ResultWriter &writer);
void CmdTxn(const std::vector<std::string> ¶ms, ResultWriter &writer);
void CmdDisplayIndices(ResultWriter &writer);
void CmdDisplayHelp(ResultWriter &writer);
void WriteOneCell(const std::string &cell, ResultWriter &writer);
void HandleCreateStatement(Transaction *txn, const CreateStatement &stmt, ResultWriter &writer);
void HandleIndexStatement(Transaction *txn, const IndexStatement &stmt, ResultWriter &writer);
void HandleExplainStatement(Transaction *txn, const ExplainStatement &stmt, ResultWriter &writer);
void HandleTxnStatement(Transaction *txn, const TransactionStatement &stmt, ResultWriter &writer);
void HandleVariableShowStatement(Transaction *txn, const VariableShowStatement &stmt, ResultWriter &writer);
void HandleVariableSetStatement(Transaction *txn, const VariableSetStatement &stmt, ResultWriter &writer);
std::unordered_map<std::string, std::string> session_variables_;
Transaction *current_txn_{nullptr};
bool managed_txn_mode_{false};
};
explicit
: make constructor casting safe
- virtual function vs
non-virtual
function
- src/common/bustub_instance.cpp
void BustubInstance::GenerateMockTable() {
// The actual content generated by mock scan executors are described in `mock_scan_executor.cpp`.
auto txn = txn_manager_->Begin();
std::shared_lock<std::shared_mutex> l(catalog_lock_);
for (auto table_name = &mock_table_list[0]; *table_name != nullptr; table_name++) {
catalog_->CreateTable(txn, *table_name, GetMockTableSchemaOf(*table_name), false);
}
l.unlock();
txn_manager_->Commit(txn);
}
- shared_lock<std::shared_mutex>
void BustubInstance::CmdTxn(const std::vector<std::string> ¶ms, ResultWriter &writer) {
if (!managed_txn_mode_) {
writer.OneCell("only supported in managed mode, please use bustub-shell");
return;
}
auto dump_current_txn = [&](const std::string &prefix) {
writer.OneCell(fmt::format("{}txn_id={} txn_real_id={} read_ts={} commit_ts={} status={} iso_lvl={}", prefix,
current_txn_->GetTransactionIdHumanReadable(), current_txn_->GetTransactionId(),
current_txn_->GetReadTs(), current_txn_->GetCommitTs(),
current_txn_->GetTransactionState(), current_txn_->GetIsolationLevel()));
};
if (params.size() == 1) {
if (current_txn_ != nullptr) {
dump_current_txn("");
} else {
writer.OneCell("no active txn, each statement starts a new txn.");
}
return;
}
...
}
- [&] is lambda function
- src/include/execution/executors/abstract_executor.h
namespace bustub {
class ExecutorContext;
/**
* The AbstractExecutor implements the Volcano tuple-at-a-time iterator model.
* This is the base class from which all executors in the BustTub execution
* engine inherit, and defines the minimal interface that all executors support.
*/
class AbstractExecutor {
public:
/**
* Construct a new AbstractExecutor instance.
* @param exec_ctx the executor context that the executor runs with
*/
explicit AbstractExecutor(ExecutorContext *exec_ctx) : exec_ctx_{exec_ctx} {}
/** Virtual destructor. */
virtual ~AbstractExecutor() = default;
/**
* Initialize the executor.
* @warning This function must be called before Next() is called!
*/
virtual void Init() = 0;
/**
* Yield the next tuple from this executor.
* @param[out] tuple The next tuple produced by this executor
* @param[out] rid The next tuple RID produced by this executor
* @return `true` if a tuple was produced, `false` if there are no more tuples
*/
virtual auto Next(Tuple *tuple, RID *rid) -> bool = 0;
/** @return The schema of the tuples that this executor produces */
virtual auto GetOutputSchema() const -> const Schema & = 0;
/** @return The executor context in which this executor runs */
auto GetExecutorContext() -> ExecutorContext * { return exec_ctx_; }
protected:
/** The executor context in which the executor runs */
ExecutorContext *exec_ctx_;
};
} // namespace bustub
- src/include/execution/executors/mock_scan_executor.h
namespace bustub {
extern const char *mock_table_list[];
class MockScanExecutor : public AbstractExecutor {
public:
}
}
- extern
mock_scan_executor.cpp
const char *mock_table_list[] = {"__mock_table_1", "__mock_table_2", "__mock_table_3", "__mock_table_tas_2022",
"__mock_table_tas_2023", "__mock_table_tas_2023_fall", "__mock_table_tas_2024",
"__mock_agg_input_small", "__mock_agg_input_big", "__mock_table_schedule_2022",
"__mock_table_schedule", "__mock_table_123", "__mock_graph",
// For leaderboard Q1
"__mock_t1",
// For leaderboard Q2
"__mock_t4_1m", "__mock_t5_1m", "__mock_t6_1m",
// For leaderboard Q3
"__mock_t7", "__mock_t8", "__mock_t9",
// For P3 leaderboard Q4
"__mock_t10", "__mock_t11", nullptr};
- src/include/catalog/catalog.h
namespace bustub {
using table_oid_t = uint32_t;
using column_oid_t = uint32_t;
using index_oid_t = uint32_t;
class Catalog {
public:
/** Indicates that an operation returning a `TableInfo*` failed */
static constexpr TableInfo *NULL_TABLE_INFO{nullptr};
/** Indicates that an operation returning a `IndexInfo*` failed */
static constexpr IndexInfo *NULL_INDEX_INFO{nullptr};
auto CreateTable(Transaction *txn, const std::string &table_name, const Schema &schema, bool create_table_heap = true)
-> TableInfo * {
if (table_names_.count(table_name) != 0) {
return NULL_TABLE_INFO;
}
// Construct the table heap
std::unique_ptr<TableHeap> table = nullptr;
// When create_table_heap == false, it means that we're running binder tests (where no txn will be provided) or
// we are running shell without buffer pool. We don't need to create TableHeap in this case.
if (create_table_heap) {
table = std::make_unique<TableHeap>(bpm_);
} else {
// Otherwise, create an empty heap only for binder tests
table = TableHeap::CreateEmptyHeap(create_table_heap);
}
// Fetch the table OID for the new table
const auto table_oid = next_table_oid_.fetch_add(1);
// Construct the table information
auto meta = std::make_unique<TableInfo>(schema, table_name, std::move(table), table_oid);
auto *tmp = meta.get();
// Update the internal tracking mechanisms
tables_.emplace(table_oid, std::move(meta));
table_names_.emplace(table_name, table_oid);
index_names_.emplace(table_name, std::unordered_map<std::string, index_oid_t>{});
return tmp;
}
private:
[[maybe_unused]] BufferPoolManager *bpm_;
[[maybe_unused]] LockManager *lock_manager_;
[[maybe_unused]] LogManager *log_manager_;
....
};
} // namespace bustub
- using table_oid_t = uint32_t;
- trailing return type
- atomic add
std::atomic<table_oid_t> next_table_oid_{0};
const auto table_oid = next_table_oid_.fetch_add(1);
- emplace
tables_.emplace(table_oid, std::move(meta));
table_names_.emplace(table_name, table_oid);
- std::move
- [[may_unused]]
shared_ptr
vsunique_ptr
vsweak_ptr
std::shared_ptr<Schema> key_schema_;
- having under scroll at the end of each variable
std::unordered_map<index_oid_t, std::unique_ptr<IndexInfo>> indexes_;
std::unordered_map<std::string, std::unordered_map<std::string, index_oid_t>> index_names_;
std::atomic<index_oid_t> next_index_oid_{0};
- src/include/storage/index/index.h
class Index {
public:
/**
* Construct a new Index instance.
* @param metadata An owning pointer to the index metadata
*/
explicit Index(std::unique_ptr<IndexMetadata> &&metadata) : metadata_{std::move(metadata)} {}
virtual ~Index() = default;
}
- src/include/storage/index/b_plus_tree_index.h
class BPlusTreeIndex : public Index {
public:
BPlusTreeIndex(std::unique_ptr<IndexMetadata> &&metadata, BufferPoolManager *buffer_pool_manager);
auto InsertEntry(const Tuple &key, RID rid, Transaction *transaction) -> bool override;
void DeleteEntry(const Tuple &key, RID rid, Transaction *transaction) override;
void ScanKey(const Tuple &key, std::vector<RID> *result, Transaction *transaction) override;
}
- what does
override
mean here?
- src/include/storage/index/stl_ordered.h
template <typename KT, typename VT, typename Cmp>
class STLOrderedIndexIterator {
STLOrderedIndexIterator(const std::map<KT, VT, StlComparatorWrapper<KT, Cmp>> *map,
typename std::map<KT, VT, StlComparatorWrapper<KT, Cmp>>::const_iterator iter)
: map_(map), iter_(std::move(iter)) {}
auto operator*() -> const std::pair<KT, VT> & {
ret_val_ = *iter_;
return ret_val_;
}
auto operator++() -> STLOrderedIndexIterator & {
iter_++;
return *this;
}
inline auto operator==(const STLOrderedIndexIterator &itr) const -> bool { return itr.iter_ == iter_; }
inline auto operator!=(const STLOrderedIndexIterator &itr) const -> bool { return !(*this == itr); }
private:
const std::map<KT, VT, StlComparatorWrapper<KT, Cmp>> *map_;
typename std::map<KT, VT, StlComparatorWrapper<KT, Cmp>>::const_iterator iter_;
std::pair<KT, VT> ret_val_;
};
- custom comparator for map
- operator overloading
- const_iterator
- src/include/storage/index/index_iterator.h
namespace bustub {
#define INDEXITERATOR_TYPE IndexIterator<KeyType, ValueType, KeyComparator>
INDEX_TEMPLATE_ARGUMENTS
class IndexIterator {
}
src/include/storage/index/b_plus_tree.h
// Main class providing the API for the Interactive B+ Tree.
INDEX_TEMPLATE_ARGUMENTS
class BPlusTree {
using InternalPage = BPlusTreeInternalPage<KeyType, page_id_t, KeyComparator>;
using LeafPage = BPlusTreeLeafPage<KeyType, ValueType, KeyComparator>;
public:
// Return the page id of the root node
auto GetRootPageId() -> page_id_t;
// Index iterator
auto Begin() -> INDEXITERATOR_TYPE;
auto End() -> INDEXITERATOR_TYPE;
auto Begin(const KeyType &key) -> INDEXITERATOR_TYPE;
}
#define
vsusing
- Optional
std::optional<size_t> limit = std::nullopt;
limit = std::make_optional(constant_expr.val_.GetAs<int32_t>());
#include <iostream>
#include <string>
#include <unordered_map>
#include <optional>
class ConfigManager {
public:
ConfigManager() {
// Initialize some configuration values
config_["server_ip"] = "192.168.1.1";
config_["server_port"] = "8080";
// "max_connections" is deliberately left out to demonstrate optional usage
}
// Function to retrieve a configuration value by key
std::optional<std::string> GetConfigValue(const std::string &key) const {
auto it = config_.find(key);
if (it != config_.end()) {
return it->second; // Return the value if found
} else {
return std::nullopt; // Return std::nullopt if the key is not found
}
}
private:
std::unordered_map<std::string, std::string> config_;
};
int main() {
ConfigManager configManager;
// Attempt to retrieve various configuration values
std::optional<std::string> serverIp = configManager.GetConfigValue("server_ip");
std::optional<std::string> serverPort = configManager.GetConfigValue("server_port");
std::optional<std::string> maxConnections = configManager.GetConfigValue("max_connections");
// Check and display the results
if (serverIp) { // NOTE: Usage is here.
std::cout << "Server IP: " << *serverIp << std::endl;
} else {
std::cout << "Server IP not found." << std::endl;
}
}
- auto [_, expr] — structured binding. Return 2 results in one return.
Conclusion
I hope this is of some use to someone. Subscribe to us for more related content.