C++ Day 3: BusTub — Types System
Published in
10 min readAug 27, 2024
Let’s now take a popular toy database from the CMU Database group to learn more about the C++ database landscape.
Project Structure
Note: There is no include
package.
src/include
- src/include/type/type.h
#pragma once // ensures the file is included only once in a single compilation.
// It's an alternative to using traditional include guards,
// which use #ifndef, #define, and #endif macros.
#include <cstdint>
#include <string>
#include "type/type_id.h"
namespace bustub {
class Value;
enum class CmpBool { CmpFalse = 0, CmpTrue = 1, CmpNull = 2 };
class Type {
public:
explicit Type(TypeId type_id) : type_id_(type_id) {}
virtual ~Type() = default;
}
}
#pragma once
: replacement forifdef
scoped enum
vs regular enum
class Value;
: Forward declaration of theValue
class. This tells the compiler that theValue
is a class type, without providing its definition.static
vsinline
vsinline static
vsvirtual
// Get the size of this data type in bytes
static auto GetTypeSize(TypeId type_id) -> uint64_t;
// Is this type coercable from the other type
auto IsCoercableFrom(TypeId type_id) const -> bool;
// Debug info
static auto TypeIdToString(TypeId type_id) -> std::string;
static auto GetMinValue(TypeId type_id) -> Value;
static auto GetMaxValue(TypeId type_id) -> Value;
inline static auto GetInstance(TypeId type_id) -> Type * { return k_types[type_id]; }
inline auto GetTypeId() const -> TypeId { return type_id_; }
virtual auto CompareEquals(const Value &left, const Value &right) const -> CmpBool;
virtual auto CompareNotEquals(const Value &left, const Value &right) const -> CmpBool;
virtual auto CompareLessThan(const Value &left, const Value &right) const -> CmpBool;
virtual auto CompareLessThanEquals(const Value &left, const Value &right) const -> CmpBool;
virtual auto CompareGreaterThan(const Value &left, const Value &right) const -> CmpBool;
virtual auto CompareGreaterThanEquals(const Value &left, const Value &right) const -> CmpBool;
// Other mathematical functions
virtual auto Add(const Value &left, const Value &right) const -> Value;
virtual auto Subtract(const Value &left, const Value &right) const -> Value;
virtual auto Multiply(const Value &left, const Value &right) const -> Value;
virtual auto Divide(const Value &left, const Value &right) const -> Value;
virtual auto Modulo(const Value &left, const Value &right) const -> Value;
virtual auto Min(const Value &left, const Value &right) const -> Value;
virtual auto Max(const Value &left, const Value &right) const -> Value;
virtual auto Sqrt(const Value &val) const -> Value;
virtual auto OperateNull(const Value &val, const Value &right) const -> Value;
virtual auto IsZero(const Value &val) const -> bool;
protected:
// The actual type ID
TypeId type_id_;
// Singleton instances.
static Type *k_types[10];
- public vs private vs
protected
:
class Example {
public:
int publicVar; // Accessible from anywhere
protected:
int protectedVar; // Accessible within this class and derived classes
private:
int privateVar; // Accessible only within this class
};
- src/include/type/abstract_pool.h
// Interface of a memory pool that can quickly allocate chunks of memory
class AbstractPool {
public:
// Virtual destructor
virtual ~AbstractPool() = default;
virtual auto Allocate(size_t size) -> void * = 0;
/**
* @brief Returns the provided chunk of memory back into the pool
*/
virtual void Free(void *ptr) = 0;
};
} // namespace bustub
= default
vs= 0
vs= delete
- src/include/type/boolean_type.h
namespace bustub {
// A boolean value isn't a real SQL type, but we treat it as one to keep
// consistent in the expression subsystem.
class BooleanType : public Type {
public:
~BooleanType() override = default;
BooleanType();
// Comparison functions
auto CompareEquals(const Value &left, const Value &right) const -> CmpBool override;
auto CompareNotEquals(const Value &left, const Value &right) const -> CmpBool override;
auto CompareLessThan(const Value &left, const Value &right) const -> CmpBool override;
auto CompareLessThanEquals(const Value &left, const Value &right) const -> CmpBool override;
auto CompareGreaterThan(const Value &left, const Value &right) const -> CmpBool override;
auto CompareGreaterThanEquals(const Value &left, const Value &right) const -> CmpBool override;
// Decimal types are always inlined
auto IsInlined(const Value &val) const -> bool override { return true; }
// Debug
auto ToString(const Value &val) const -> std::string override;
// Serialize this value into the given storage space
void SerializeTo(const Value &val, char *storage) const override;
// Deserialize a value of the given type from the given storage space.
auto DeserializeFrom(const char *storage) const -> Value override;
// Create a copy of this value
auto Copy(const Value &val) const -> Value override;
auto CastAs(const Value &val, TypeId type_id) const -> Value override;
};
} // namespace bustub
class BooleanType : public Type
- override
- src/type/boolean_type.cpp
// value.h
namespace bustub {
class Column;
// Note this function
inline auto GetCmpBool(bool boolean) -> CmpBool { return boolean ? CmpBool::CmpTrue : CmpBool::CmpFalse; }
}
namespace bustub {
#define BOOLEAN_COMPARE_FUNC(OP) GetCmpBool(left.value_.boolean_ OP right.CastAs(TypeId::BOOLEAN).value_.boolean_)
BooleanType::BooleanType() : Type(TypeId::BOOLEAN) {}
auto BooleanType::CompareEquals(const Value &left, const Value &right) const -> CmpBool {
assert(GetTypeId() == TypeId::BOOLEAN);
assert(left.CheckComparable(right));
if (left.IsNull() || right.IsNull()) {
return CmpBool::CmpNull;
}
return BOOLEAN_COMPARE_FUNC(==); // NOLINT
}
...
void BooleanType::SerializeTo(const Value &val, char *storage) const {
*reinterpret_cast<int8_t *>(storage) = val.value_.boolean_;
}
// Deserialize a value of the given type from the given storage space.
auto BooleanType::DeserializeFrom(const char *storage) const -> Value {
int8_t val = *reinterpret_cast<const int8_t *>(storage);
return {TypeId::BOOLEAN, val};
}
auto BooleanType::CastAs(const Value &val, const TypeId type_id) const -> Value {
switch (type_id) {
case TypeId::BOOLEAN:
return Copy(val);
case TypeId::VARCHAR: {
if (val.IsNull()) {
return {TypeId::VARCHAR, nullptr, 0, false};
}
return {TypeId::VARCHAR, val.ToString()};
}
default:
break;
}
throw Exception("BOOLEAN is not coercable to " + Type::TypeIdToString(type_id));
}
}
- #define BOOLEAN_COMPARE_FUNC
- void Class::Fn(…) const
- reinterpret_cast< data_type *>
- throw Exception
throw Exception("BOOLEAN is not coercable to " + Type::TypeIdToString(type_id));
- src/include/type/integer_parent_type.h
template <class T1, class T2>
auto IntegerParentType::AddValue(const Value &left, const Value &right) const -> Value {
auto x = left.GetAs<T1>();
auto y = right.GetAs<T2>();
auto sum1 = static_cast<T1>(x + y);
auto sum2 = static_cast<T2>(x + y);
if ((x + y) != sum1 && (x + y) != sum2) {
throw Exception(ExceptionType::OUT_OF_RANGE, "Numeric value out of range.");
}
// Overflow detection
if (sizeof(x) >= sizeof(y)) {
if ((x > 0 && y > 0 && sum1 < 0) || (x < 0 && y < 0 && sum1 > 0)) {
throw Exception(ExceptionType::OUT_OF_RANGE, "Numeric value out of range.");
}
return Value(left.GetTypeId(), sum1);
}
if ((x > 0 && y > 0 && sum2 < 0) || (x < 0 && y < 0 && sum2 > 0)) {
throw Exception(ExceptionType::OUT_OF_RANGE, "Numeric value out of range.");
}
return Value(right.GetTypeId(), sum2);
}
- template
reintrepret_cast
vsstatic_cast
class Base {};
class Derived : public Base {};
Base* basePtr = new Derived();
Derived* derivedPtr = static_cast<Derived*>(basePtr); // Safe conversion, base to derived
int a = 10;
float b = static_cast<float>(a); // Convert int to float
int* p = new int(65);
char* c = reinterpret_cast<char*>(p); // Convert int pointer to char pointer
// Now c points to the first byte of the integer value
std::cout << *c << std::endl; // Output may be undefined
void* voidPtr = p;
int* intPtr = reinterpret_cast<int*>(voidPtr); // Convert void pointer back to int pointer
- sizeof()
int a;
double b;
std::cout << "Size of variable a: " << sizeof(a) << " bytes" << std::endl;
std::cout << "Size of variable b: " << sizeof(b) << " bytes" << std::endl;
// Overflow detection
if (sizeof(x) >= sizeof(y)) {
if ((x > 0 && y > 0 && sum1 < 0) || (x < 0 && y < 0 && sum1 > 0)) {
throw Exception(ExceptionType::OUT_OF_RANGE, "Numeric value out of range.");
}
return Value(left.GetTypeId(), sum1);
}
- src/include/type/limits.h
namespace bustub {
static constexpr double DBL_LOWEST = std::numeric_limits<double>::lowest();
static constexpr double FLT_LOWEST = std::numeric_limits<float>::lowest();
static constexpr int8_t BUSTUB_INT8_MIN = (SCHAR_MIN + 1);
static constexpr int16_t BUSTUB_INT16_MIN = (SHRT_MIN + 1);
static constexpr int32_t BUSTUB_INT32_MIN = (INT_MIN + 1);
static constexpr int64_t BUSTUB_INT64_MIN = (LLONG_MIN + 1);
static constexpr double BUSTUB_DECIMAL_MIN = FLT_LOWEST;
}
constexpr
: compile time computation
int8_t
: 8 bit int
- src/include/type/type_util.h
namespace bustub {
/**
* Type Utility Functions
*/
class TypeUtil {
public:
/**
* Use memcmp to evaluate two strings
* This does not work with VARBINARY attributes.
*/
static inline auto CompareStrings(const char *str1, int len1, const char *str2, int len2) -> int {
assert(str1 != nullptr);
assert(len1 >= 0);
assert(str2 != nullptr);
assert(len2 >= 0);
// PAVLO: 2017-04-04
// The reason why we use memcmp here is that our inputs are
// not null-terminated strings, so we can't use strncmp
int ret = memcmp(str1, str2, static_cast<size_t>(std::min(len1, len2)));
if (ret == 0 && len1 != len2) {
ret = len1 - len2;
}
return ret;
}
}
memcmp
: used to compare strings lexicographically
#include <iostream>
#include <cstring>
int main() {
char buffer1[] = "Hello, World!";
char buffer2[] = "Hello, World!";
char buffer3[] = "Hello, C++!";
// Compare buffer1 and buffer2
if (memcmp(buffer1, buffer2, sizeof(buffer1)) == 0) {
std::cout << "buffer1 and buffer2 are equal." << std::endl;
} else {
std::cout << "buffer1 and buffer2 are not equal." << std::endl;
}
// Compare buffer1 and buffer3
if (memcmp(buffer1, buffer3, sizeof(buffer1)) == 0) {
std::cout << "buffer1 and buffer3 are equal." << std::endl;
} else {
std::cout << "buffer1 and buffer3 are not equal." << std::endl;
}
return 0;
}
- src/include/type/value.h
namespace bustub {
class Column;
inline auto GetCmpBool(bool boolean) -> CmpBool { return boolean ? CmpBool::CmpTrue : CmpBool::CmpFalse; }
// A value is an abstract class that represents a view over SQL data stored in
// some materialized state. All values have a type and comparison functions, but
// subclasses implement other type-specific functionality.
class Value {
// Friend Type classes
friend class Type;
friend class NumericType;
friend class IntegerParentType;
friend class TinyintType;
friend class SmallintType;
friend class IntegerType;
friend class BigintType;
friend class DecimalType;
friend class TimestampType;
friend class BooleanType;
friend class VarlenType;
friend class VectorType;
}
- friend class: Give access to private functions
- union vs struct
protected:
// The actual value item
union Val {
int8_t boolean_;
int8_t tinyint_;
int16_t smallint_;
int32_t integer_;
int64_t bigint_;
double decimal_;
uint64_t timestamp_;
char *varlen_;
const char *const_varlen_;
} value_;
union {
uint32_t len_;
TypeId elem_type_id_;
} size_;
bool manage_data_;
// The data type
TypeId type_id_;
};
} // namespace bustub
- fmt::formatter
template <typename T>
struct fmt::formatter<T, std::enable_if_t<std::is_base_of<bustub::Value, T>::value, char>>
: fmt::formatter<std::string> {
template <typename FormatCtx>
auto format(const bustub::Value &x, FormatCtx &ctx) const {
return fmt::formatter<std::string>::format(x.ToString(), ctx);
}
};
template <typename T>
struct fmt::formatter<std::unique_ptr<T>, std::enable_if_t<std::is_base_of<bustub::Value, T>::value, char>>
: fmt::formatter<std::string> {
template <typename FormatCtx>
auto format(const std::unique_ptr<bustub::Value> &x, FormatCtx &ctx) const {
return fmt::formatter<std::string>::format(x->ToString(), ctx);
}
};s
- std::enable_if_t and std::is_base_of
#include <iostream>
#include <type_traits>
// Base class
class Base {
public:
virtual void display() const {
std::cout << "Base class" << std::endl;
}
};
// Derived classes
class Derived1 : public Base {
public:
void display() const override {
std::cout << "Derived1 class" << std::endl;
}
};
class Derived2 : public Base {
public:
void display() const override {
std::cout << "Derived2 class" << std::endl;
}
};
// Non-derived class
class NotDerived {
public:
void display() const {
std::cout << "NotDerived class" << std::endl;
}
};
// Template function enabled only for types derived from Base
template <typename T>
auto process(const T& obj) -> std::enable_if_t<std::is_base_of<Base, T>::value> {
std::cout << "Processing a derived type:" << std::endl;
obj.display();
}
int main() {
Derived1 d1;
Derived2 d2;
NotDerived nd;
process(d1); // OK: Derived1 is derived from Base
process(d2); // OK: Derived2 is derived from Base
// process(nd); // Error: NotDerived is not derived from Base
return 0;
}
- formatter: fmt library.
The code defines a specialization of the fmt::formatter
struct template from the fmt
library.
// A formatter for objects of type T.
FMT_EXPORT
template <typename T, typename Char = char, typename Enable = void>
struct formatter {
// A deleted default constructor indicates a disabled formatter.
formatter() = delete;
};
template <typename T>
struct fmt::formatter<T, std::enable_if_t<std::is_base_of<bustub::Value, T>::value, char>>
: fmt::formatter<std::string> {
template <typename FormatCtx>
auto format(const bustub::Value &x, FormatCtx &ctx) const {
// x.ToString()
return fmt::formatter<std::string>::format(x.ToString(), ctx);
}
};
Conclusion
I hope this is of some use to someone. Subscribe to us for more related content.