Eureka Engineering
Published in

Eureka Engineering

Rustで作るSQL Parser

Photo by Pascal Müller on Unsplash

Table of Contents

  1. Introduction
  2. Goals & Anti-Goals
  3. Design
  4. Lexer
  5. Parser
  6. Conclusion
  7. References

1 Introduction

2 Goals & Anti-Goals

2.1 Goals

  • Pratt Parsingを参考にしたLexer/Parser実装
  • 簡単なSELECT文までが対象

2.2 Anti-Goals

  • yaccやbisonなどのParser Generatorの話はしない

3 Design

  • SQL → (by Lexer) → Token → (by Parser) → AST
SELECT 1+2, id, name FROM user WHERE id = 1;
Select { 
columns: [
Column {
value: Infix { op: Plus, left: Number(1), right: Number(2) }, alias: ""
},
Column {
value: Identifier("id"), alias: ""
},
Column {
value: Identifier("name"), alias: ""
}
],
table: TableExpression {
from: "user",
where_cond: Some(Infix { op: Eq, left: Identifier("id"), right: Number(1) }),
group_by: None
}
}

4 Lexer

SELECT 1+2, id, name FROM user WHERE id = 1;



- SELECT (SelectKwd)
- 1 (NumberType)
- + (Plus)
- 2 (NumberType)
- , (Comma)
- id (Ident)
- , (Comma)
- name (Ident)
- FROM (From)
- user (Ident)
- WHERE (Where)
- id (Ident)
- = (EqOp)
- 1 (NumberType)
- ; (Semicoln)
#[derive(Clone, Debug, Eq, EnumIter, Hash, PartialEq)]
pub enum TokenType {
/// System
StartToken,
Illegal(String),
EOF,
/// Synbol Char
RightBra, // ]
RightBrace, // }
/// Literal
Ident(String), // a~z, A~Z, 0~ 9
NumberType(i64), // 0 ~ 9
/// Keyword
Alter, // "ALTER"
And, // "AND"
Both, // "BOTH"
}
/// 1文字ずつ読んでいきます.
fn read_char(&mut self) {
if self.is_over_next_position() {
// '0' is EOF
self.current_char = CHAR_EOF;
} else {
self.current_char = self.query.as_bytes()[self.next_position] as char;
}

self.current_position = self.next_position;
self.next_position += 1;
}
// 1文字ずつ読んでTokenにしていきます.
// 説明のため実装を省略して記載しています.
pub fn next_token(&mut self) -> Option<Token> {
self.skip_whitespace();

let token = match self.current_char {
// Symbol系
'&' => Token::new(TokenType::Ampersand),
'@' => Token::new(TokenType::AtMark),
// 先読みをして判断する場合
'=' => {
if self.peek_char() == '=' {
self.read_char();
Token::new(TokenType::EqEqOp)
} else {
Token::new(TokenType::EqOp)
}
}
'0' => Token::new(TokenType::EOF),

_ => {
if self.is_letter() {
// 予約語やカラム名
let ident = self.read_identifier();
let token_type = Token::lookup_ident(ident.clone());
Token::new(token_type)
} else if self.is_number() {
// 数字
let number = self.read_number();
Token::new(TokenType::NumberType(number))
} else {
Token::new(TokenType::Illegal(self.current_char.to_string()))
}
}
};

self.read_char();
Some(token)
}

5 Parser

<select statement> ::= SELECT [ <expression> ] <table expression>

<table expression> ::= <from clause> [ <where clause> ]

<where clause> ::= <value expression>

<value expression> ::= <clause> | <clause> (+|-|+|/|=|AND|OR) <clause>

<clause> ::= <identifier> | <number> | <function>...

5.1 AST

#[derive(Debug, Clone)]
pub enum ValueExpression {
Identifier(String),
Number(i64),
Bool(bool),
Prefix {op: PrefixOp, right: i64},
Infix {op: InfixOp, left: Box<ValueExpression>, right: Box<ValueExpression>},
}
#[derive(Debug, Clone)]
pub enum Statement {
Select {
columns: Vec<Column>,
table: TableExpression,
},
}
#[derive(Debug, Clone)]
pub struct TableExpression {
pub from: String,
pub where_cond: Option<ValueExpression>,
pub group_by: Option<Vec<Column>>,
}

5.2 Parsing

pub struct Parser {
lexer: Lexer,
current_token: Token,
peek_token: Token,
}
fn parse_select_statement(&mut self) -> Statement {
self.next_token();

let columns = self.parse_columns();

if !self.expect_peek(TokenType::From) {
panic!("not expected type");
}

let table = self.parse_table_expression();

if self.expect_peek(TokenType::SemiColon) {
self.next_token();
}

Statement::Select { columns, table }
}
fn parse_table_expression(&mut self) -> TableExpression {
self.next_token();

let table_name = match &self.current_token.token_type {
TokenType::Ident(table_name) => table_name.clone(),
_ => panic!("not expected type!"),
};

let where_cond = if self.expect_peek(TokenType::Where) {
self.next_token();
Some(self.parse_value_expression(LOWEAT))
} else {
None
};

TableExpression {
from: table_name.to_string(),
where_cond: where_cond,
group_by: None,
}
}

6 Conclusion

7 References

--

--

Learn about Eureka’s engineering efforts, product developments and more.

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store