C++ Day 3: BusTub — Types System

Arjun Sunil Kumar
Distributed Systems Engineering
10 min readAug 27, 2024

Let’s now take a popular toy database from the CMU Database group to learn more about the C++ database landscape.

Project Structure

Note: There is no include package.

src/include

- src/include/type/type.h

#pragma once // ensures the file is included only once in a single compilation. 

// It's an alternative to using traditional include guards,
// which use #ifndef, #define, and #endif macros.

#include <cstdint>
#include <string>

#include "type/type_id.h"

namespace bustub {

class Value;

enum class CmpBool { CmpFalse = 0, CmpTrue = 1, CmpNull = 2 };

class Type {
public:
explicit Type(TypeId type_id) : type_id_(type_id) {}

virtual ~Type() = default;

}

}
  • #pragma once : replacement for ifdef
  • scoped enum vs regular enum
  • class Value;: Forward declaration of the Value class. This tells the compiler that theValue is a class type, without providing its definition.
  • static vs inline vs inline static vs virtual
  // Get the size of this data type in bytes
static auto GetTypeSize(TypeId type_id) -> uint64_t;

// Is this type coercable from the other type
auto IsCoercableFrom(TypeId type_id) const -> bool;

// Debug info
static auto TypeIdToString(TypeId type_id) -> std::string;

static auto GetMinValue(TypeId type_id) -> Value;
static auto GetMaxValue(TypeId type_id) -> Value;

inline static auto GetInstance(TypeId type_id) -> Type * { return k_types[type_id]; }

inline auto GetTypeId() const -> TypeId { return type_id_; }

virtual auto CompareEquals(const Value &left, const Value &right) const -> CmpBool;
virtual auto CompareNotEquals(const Value &left, const Value &right) const -> CmpBool;
virtual auto CompareLessThan(const Value &left, const Value &right) const -> CmpBool;
virtual auto CompareLessThanEquals(const Value &left, const Value &right) const -> CmpBool;
virtual auto CompareGreaterThan(const Value &left, const Value &right) const -> CmpBool;
virtual auto CompareGreaterThanEquals(const Value &left, const Value &right) const -> CmpBool;

// Other mathematical functions
virtual auto Add(const Value &left, const Value &right) const -> Value;
virtual auto Subtract(const Value &left, const Value &right) const -> Value;
virtual auto Multiply(const Value &left, const Value &right) const -> Value;
virtual auto Divide(const Value &left, const Value &right) const -> Value;
virtual auto Modulo(const Value &left, const Value &right) const -> Value;
virtual auto Min(const Value &left, const Value &right) const -> Value;
virtual auto Max(const Value &left, const Value &right) const -> Value;
virtual auto Sqrt(const Value &val) const -> Value;
virtual auto OperateNull(const Value &val, const Value &right) const -> Value;
virtual auto IsZero(const Value &val) const -> bool;

protected:
// The actual type ID
TypeId type_id_;
// Singleton instances.
static Type *k_types[10];
  • public vs private vs protected:
class Example {
public:
int publicVar; // Accessible from anywhere
protected:
int protectedVar; // Accessible within this class and derived classes
private:
int privateVar; // Accessible only within this class
};

- src/include/type/abstract_pool.h

// Interface of a memory pool that can quickly allocate chunks of memory
class AbstractPool {
public:
// Virtual destructor
virtual ~AbstractPool() = default;

virtual auto Allocate(size_t size) -> void * = 0;

/**
* @brief Returns the provided chunk of memory back into the pool
*/
virtual void Free(void *ptr) = 0;
};

} // namespace bustub
  • = default vs = 0 vs = delete

- src/include/type/boolean_type.h

namespace bustub {
// A boolean value isn't a real SQL type, but we treat it as one to keep
// consistent in the expression subsystem.
class BooleanType : public Type {
public:
~BooleanType() override = default;
BooleanType();

// Comparison functions
auto CompareEquals(const Value &left, const Value &right) const -> CmpBool override;
auto CompareNotEquals(const Value &left, const Value &right) const -> CmpBool override;
auto CompareLessThan(const Value &left, const Value &right) const -> CmpBool override;
auto CompareLessThanEquals(const Value &left, const Value &right) const -> CmpBool override;
auto CompareGreaterThan(const Value &left, const Value &right) const -> CmpBool override;
auto CompareGreaterThanEquals(const Value &left, const Value &right) const -> CmpBool override;

// Decimal types are always inlined
auto IsInlined(const Value &val) const -> bool override { return true; }

// Debug
auto ToString(const Value &val) const -> std::string override;

// Serialize this value into the given storage space
void SerializeTo(const Value &val, char *storage) const override;

// Deserialize a value of the given type from the given storage space.
auto DeserializeFrom(const char *storage) const -> Value override;

// Create a copy of this value
auto Copy(const Value &val) const -> Value override;

auto CastAs(const Value &val, TypeId type_id) const -> Value override;
};
} // namespace bustub
  • class BooleanType : public Type
  • override

- src/type/boolean_type.cpp


// value.h

namespace bustub {

class Column;

// Note this function
inline auto GetCmpBool(bool boolean) -> CmpBool { return boolean ? CmpBool::CmpTrue : CmpBool::CmpFalse; }

}



namespace bustub {
#define BOOLEAN_COMPARE_FUNC(OP) GetCmpBool(left.value_.boolean_ OP right.CastAs(TypeId::BOOLEAN).value_.boolean_)

BooleanType::BooleanType() : Type(TypeId::BOOLEAN) {}

auto BooleanType::CompareEquals(const Value &left, const Value &right) const -> CmpBool {
assert(GetTypeId() == TypeId::BOOLEAN);
assert(left.CheckComparable(right));
if (left.IsNull() || right.IsNull()) {
return CmpBool::CmpNull;
}
return BOOLEAN_COMPARE_FUNC(==); // NOLINT
}

...


void BooleanType::SerializeTo(const Value &val, char *storage) const {
*reinterpret_cast<int8_t *>(storage) = val.value_.boolean_;
}

// Deserialize a value of the given type from the given storage space.
auto BooleanType::DeserializeFrom(const char *storage) const -> Value {
int8_t val = *reinterpret_cast<const int8_t *>(storage);
return {TypeId::BOOLEAN, val};
}

auto BooleanType::CastAs(const Value &val, const TypeId type_id) const -> Value {
switch (type_id) {
case TypeId::BOOLEAN:
return Copy(val);
case TypeId::VARCHAR: {
if (val.IsNull()) {
return {TypeId::VARCHAR, nullptr, 0, false};
}
return {TypeId::VARCHAR, val.ToString()};
}
default:
break;
}
throw Exception("BOOLEAN is not coercable to " + Type::TypeIdToString(type_id));
}

}
  • #define BOOLEAN_COMPARE_FUNC
  • void Class::Fn(…) const
  • reinterpret_cast< data_type *>
  • throw Exception
throw Exception("BOOLEAN is not coercable to " + Type::TypeIdToString(type_id));

- src/include/type/integer_parent_type.h


template <class T1, class T2>
auto IntegerParentType::AddValue(const Value &left, const Value &right) const -> Value {
auto x = left.GetAs<T1>();
auto y = right.GetAs<T2>();
auto sum1 = static_cast<T1>(x + y);
auto sum2 = static_cast<T2>(x + y);

if ((x + y) != sum1 && (x + y) != sum2) {
throw Exception(ExceptionType::OUT_OF_RANGE, "Numeric value out of range.");
}
// Overflow detection
if (sizeof(x) >= sizeof(y)) {
if ((x > 0 && y > 0 && sum1 < 0) || (x < 0 && y < 0 && sum1 > 0)) {
throw Exception(ExceptionType::OUT_OF_RANGE, "Numeric value out of range.");
}
return Value(left.GetTypeId(), sum1);
}
if ((x > 0 && y > 0 && sum2 < 0) || (x < 0 && y < 0 && sum2 > 0)) {
throw Exception(ExceptionType::OUT_OF_RANGE, "Numeric value out of range.");
}
return Value(right.GetTypeId(), sum2);
}
  • template
  • reintrepret_cast vs static_cast
class Base {};
class Derived : public Base {};

Base* basePtr = new Derived();
Derived* derivedPtr = static_cast<Derived*>(basePtr); // Safe conversion, base to derived

int a = 10;
float b = static_cast<float>(a); // Convert int to float
int* p = new int(65);
char* c = reinterpret_cast<char*>(p); // Convert int pointer to char pointer

// Now c points to the first byte of the integer value
std::cout << *c << std::endl; // Output may be undefined

void* voidPtr = p;
int* intPtr = reinterpret_cast<int*>(voidPtr); // Convert void pointer back to int pointer
  • sizeof()
int a;
double b;
std::cout << "Size of variable a: " << sizeof(a) << " bytes" << std::endl;
std::cout << "Size of variable b: " << sizeof(b) << " bytes" << std::endl;
  // Overflow detection
if (sizeof(x) >= sizeof(y)) {
if ((x > 0 && y > 0 && sum1 < 0) || (x < 0 && y < 0 && sum1 > 0)) {
throw Exception(ExceptionType::OUT_OF_RANGE, "Numeric value out of range.");
}
return Value(left.GetTypeId(), sum1);
}

- src/include/type/limits.h

namespace bustub {

static constexpr double DBL_LOWEST = std::numeric_limits<double>::lowest();
static constexpr double FLT_LOWEST = std::numeric_limits<float>::lowest();

static constexpr int8_t BUSTUB_INT8_MIN = (SCHAR_MIN + 1);
static constexpr int16_t BUSTUB_INT16_MIN = (SHRT_MIN + 1);
static constexpr int32_t BUSTUB_INT32_MIN = (INT_MIN + 1);
static constexpr int64_t BUSTUB_INT64_MIN = (LLONG_MIN + 1);
static constexpr double BUSTUB_DECIMAL_MIN = FLT_LOWEST;
}
  • constexpr: compile time computation
  • int8_t: 8 bit int

- src/include/type/type_util.h


namespace bustub {
/**
* Type Utility Functions
*/
class TypeUtil {
public:
/**
* Use memcmp to evaluate two strings
* This does not work with VARBINARY attributes.
*/
static inline auto CompareStrings(const char *str1, int len1, const char *str2, int len2) -> int {
assert(str1 != nullptr);
assert(len1 >= 0);
assert(str2 != nullptr);
assert(len2 >= 0);
// PAVLO: 2017-04-04
// The reason why we use memcmp here is that our inputs are
// not null-terminated strings, so we can't use strncmp
int ret = memcmp(str1, str2, static_cast<size_t>(std::min(len1, len2)));
if (ret == 0 && len1 != len2) {
ret = len1 - len2;
}
return ret;
}

}
  • memcmp: used to compare strings lexicographically
#include <iostream>
#include <cstring>

int main() {
char buffer1[] = "Hello, World!";
char buffer2[] = "Hello, World!";
char buffer3[] = "Hello, C++!";

// Compare buffer1 and buffer2
if (memcmp(buffer1, buffer2, sizeof(buffer1)) == 0) {
std::cout << "buffer1 and buffer2 are equal." << std::endl;
} else {
std::cout << "buffer1 and buffer2 are not equal." << std::endl;
}

// Compare buffer1 and buffer3
if (memcmp(buffer1, buffer3, sizeof(buffer1)) == 0) {
std::cout << "buffer1 and buffer3 are equal." << std::endl;
} else {
std::cout << "buffer1 and buffer3 are not equal." << std::endl;
}

return 0;
}

- src/include/type/value.h


namespace bustub {

class Column;

inline auto GetCmpBool(bool boolean) -> CmpBool { return boolean ? CmpBool::CmpTrue : CmpBool::CmpFalse; }

// A value is an abstract class that represents a view over SQL data stored in
// some materialized state. All values have a type and comparison functions, but
// subclasses implement other type-specific functionality.
class Value {
// Friend Type classes
friend class Type;
friend class NumericType;
friend class IntegerParentType;
friend class TinyintType;
friend class SmallintType;
friend class IntegerType;
friend class BigintType;
friend class DecimalType;
friend class TimestampType;
friend class BooleanType;
friend class VarlenType;
friend class VectorType;

}
  • friend class: Give access to private functions
  • union vs struct
 protected:
// The actual value item
union Val {
int8_t boolean_;
int8_t tinyint_;
int16_t smallint_;
int32_t integer_;
int64_t bigint_;
double decimal_;
uint64_t timestamp_;
char *varlen_;
const char *const_varlen_;
} value_;

union {
uint32_t len_;
TypeId elem_type_id_;
} size_;

bool manage_data_;
// The data type
TypeId type_id_;
};
} // namespace bustub
  • fmt::formatter

template <typename T>
struct fmt::formatter<T, std::enable_if_t<std::is_base_of<bustub::Value, T>::value, char>>
: fmt::formatter<std::string> {
template <typename FormatCtx>
auto format(const bustub::Value &x, FormatCtx &ctx) const {
return fmt::formatter<std::string>::format(x.ToString(), ctx);
}
};

template <typename T>
struct fmt::formatter<std::unique_ptr<T>, std::enable_if_t<std::is_base_of<bustub::Value, T>::value, char>>
: fmt::formatter<std::string> {
template <typename FormatCtx>
auto format(const std::unique_ptr<bustub::Value> &x, FormatCtx &ctx) const {
return fmt::formatter<std::string>::format(x->ToString(), ctx);
}
};s
  • std::enable_if_t and std::is_base_of
#include <iostream>
#include <type_traits>

// Base class
class Base {
public:
virtual void display() const {
std::cout << "Base class" << std::endl;
}
};

// Derived classes
class Derived1 : public Base {
public:
void display() const override {
std::cout << "Derived1 class" << std::endl;
}
};

class Derived2 : public Base {
public:
void display() const override {
std::cout << "Derived2 class" << std::endl;
}
};

// Non-derived class
class NotDerived {
public:
void display() const {
std::cout << "NotDerived class" << std::endl;
}
};

// Template function enabled only for types derived from Base
template <typename T>
auto process(const T& obj) -> std::enable_if_t<std::is_base_of<Base, T>::value> {
std::cout << "Processing a derived type:" << std::endl;
obj.display();
}

int main() {
Derived1 d1;
Derived2 d2;
NotDerived nd;

process(d1); // OK: Derived1 is derived from Base
process(d2); // OK: Derived2 is derived from Base
// process(nd); // Error: NotDerived is not derived from Base

return 0;
}
  • formatter: fmt library.

The code defines a specialization of the fmt::formatter struct template from the fmt library.

// A formatter for objects of type T.
FMT_EXPORT
template <typename T, typename Char = char, typename Enable = void>
struct formatter {
// A deleted default constructor indicates a disabled formatter.
formatter() = delete;
};


template <typename T>
struct fmt::formatter<T, std::enable_if_t<std::is_base_of<bustub::Value, T>::value, char>>
: fmt::formatter<std::string> {


template <typename FormatCtx>
auto format(const bustub::Value &x, FormatCtx &ctx) const {
// x.ToString()
return fmt::formatter<std::string>::format(x.ToString(), ctx);
}


};

Conclusion

I hope this is of some use to someone. Subscribe to us for more related content.

--

--

Arjun Sunil Kumar
Distributed Systems Engineering

Writes on Database Kernel, Distributed Systems, Cloud Technology, Data Engineering & SDE Paradigm. github.com/arjunsk