parser in rust
This commit is contained in:
parent
4844e43447
commit
ae959f7768
20
rust/rox/src/expression.rs
Normal file
20
rust/rox/src/expression.rs
Normal file
@ -0,0 +1,20 @@
|
||||
use crate::token::{self, Token};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Expression {
|
||||
Binary {
|
||||
left: Box<Expression>,
|
||||
operator: Token,
|
||||
right: Box<Expression>,
|
||||
},
|
||||
Grouping {
|
||||
expression: Box<Expression>,
|
||||
},
|
||||
Literal {
|
||||
value: token::Literal,
|
||||
},
|
||||
Unary {
|
||||
operator: Token,
|
||||
right: Box<Expression>,
|
||||
},
|
||||
}
|
@ -7,10 +7,12 @@ use std::{
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use tracing::error;
|
||||
use tracing::{error, info};
|
||||
|
||||
pub mod cli;
|
||||
pub mod expression;
|
||||
pub mod keywords;
|
||||
pub mod parser;
|
||||
pub mod scanner;
|
||||
pub mod token;
|
||||
pub mod tokenizer {
|
||||
@ -28,7 +30,11 @@ pub mod tokenizer {
|
||||
/// Read the source code in a file and scan it to tokens.
|
||||
pub fn compile(source: &Path) -> Result<(), io::Error> {
|
||||
let input = fs::read_to_string(source)?;
|
||||
let _tokens = scanner::tokenize(&input);
|
||||
let tokens = scanner::tokenize(&input);
|
||||
match parser::generate_ast(tokens) {
|
||||
Ok(ast) => info!("{ast:?}"),
|
||||
Err(e) => error!("{e}"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -45,6 +51,10 @@ pub fn repl() {
|
||||
Err(e) => error!("{}", e),
|
||||
}
|
||||
let input = input.trim().to_string();
|
||||
let _tokens = scanner::tokenize(&input);
|
||||
let tokens = scanner::tokenize(&input);
|
||||
match parser::generate_ast(tokens) {
|
||||
Ok(ast) => info!("{ast:?}"),
|
||||
Err(e) => error!("{e}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
299
rust/rox/src/parser.rs
Normal file
299
rust/rox/src/parser.rs
Normal file
@ -0,0 +1,299 @@
|
||||
use thiserror::Error;
|
||||
use tracing::error;
|
||||
|
||||
use crate::{
|
||||
expression::Expression,
|
||||
token::{
|
||||
self, Token,
|
||||
TokenType::{self, *},
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum ParserError {
|
||||
#[error("empty token stream")]
|
||||
NoTokens,
|
||||
#[error("line {0}: expected expression")]
|
||||
ExpressionExpected(usize),
|
||||
#[error("line {0}: expected ')' after expression.")]
|
||||
ParenAfterExpression(usize),
|
||||
#[error("Out of bounds access at index {0}.")]
|
||||
OutOfBoundsAccess(usize),
|
||||
#[error("line {0}: literal expected.")]
|
||||
LiteralExpected(usize),
|
||||
}
|
||||
|
||||
/// Parse the Lox language.
|
||||
#[derive(Debug, Clone)]
|
||||
struct Parser {
|
||||
current: usize,
|
||||
current_token: Token,
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
/// Create a new parser instance, fail if the tokens vector is empty.
|
||||
fn new(tokens: Vec<Token>) -> Result<Self, ParserError> {
|
||||
let current_token = tokens.first().ok_or(ParserError::NoTokens)?.clone();
|
||||
|
||||
Ok(Self {
|
||||
current: 0,
|
||||
current_token,
|
||||
tokens,
|
||||
})
|
||||
}
|
||||
|
||||
/// Check if any of the provided types match the type of the current token.
|
||||
///
|
||||
/// If so, advance the current token.
|
||||
fn matches(&mut self, types: &[TokenType]) -> bool {
|
||||
let matches = types.iter().any(|x| self.check(x));
|
||||
matches.then(|| self.advance());
|
||||
matches
|
||||
}
|
||||
|
||||
/// Return true if the current token type matches the match_type, false otherwise.
|
||||
fn check(&self, match_type: &TokenType) -> bool {
|
||||
self.current_token.token_type == *match_type
|
||||
}
|
||||
|
||||
/// Advance the current token if we have not hit Eof yet.
|
||||
///
|
||||
/// Return the token before the advancement.
|
||||
fn advance(&mut self) -> Result<&Token, ParserError> {
|
||||
if !self.is_at_end() {
|
||||
self.current += 1;
|
||||
self.current_token = self
|
||||
.tokens
|
||||
.get(self.current)
|
||||
.ok_or(ParserError::OutOfBoundsAccess(self.current))?
|
||||
.clone();
|
||||
}
|
||||
|
||||
self.previous()
|
||||
}
|
||||
|
||||
/// Return true if the current token is Eof, false otherwise.
|
||||
fn is_at_end(&self) -> bool {
|
||||
self.current_token.token_type == Eof
|
||||
}
|
||||
|
||||
/// Return the token before the current one or an error if there is none.
|
||||
fn previous(&self) -> Result<&Token, ParserError> {
|
||||
self.tokens
|
||||
.get(self.current - 1)
|
||||
.ok_or_else(|| ParserError::OutOfBoundsAccess(self.current - 1))
|
||||
}
|
||||
|
||||
/// Consume the current token if its token type matches the provided token_type and advance the
|
||||
/// current token. Otherwise return None..
|
||||
fn consume(&mut self, token_type: &TokenType) -> Option<&Token> {
|
||||
if self.check(token_type) {
|
||||
self.advance().ok()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a binary expression using the next_precedence function and operators to match.
|
||||
fn binary_expr(
|
||||
&mut self,
|
||||
next_precedence: impl Fn(&mut Self) -> Result<Expression, ParserError>,
|
||||
operators: &[TokenType],
|
||||
) -> Result<Expression, ParserError> {
|
||||
let mut expr = next_precedence(self)?;
|
||||
|
||||
while self.matches(operators) {
|
||||
let operator = self.previous()?.clone();
|
||||
let right = next_precedence(self)?;
|
||||
expr = Expression::Binary {
|
||||
left: Box::new(expr.clone()),
|
||||
operator,
|
||||
right: Box::new(right),
|
||||
};
|
||||
}
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
/// expression -> equality ;
|
||||
fn expression(&mut self) -> Result<Expression, ParserError> {
|
||||
self.equality()
|
||||
}
|
||||
|
||||
/// equality -> comparison ( ( "!=" | "==" ) comparison )* ;
|
||||
fn equality(&mut self) -> Result<Expression, ParserError> {
|
||||
self.binary_expr(Self::comparison, &[BangEqual, EqualEqual])
|
||||
}
|
||||
|
||||
/// comparison -> term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
|
||||
fn comparison(&mut self) -> Result<Expression, ParserError> {
|
||||
self.binary_expr(Self::term, &[Greater, GreaterEqual, Less, LessEqual])
|
||||
}
|
||||
|
||||
/// term -> factor ( ( "-" | "+" ) factor )* ;
|
||||
fn term(&mut self) -> Result<Expression, ParserError> {
|
||||
self.binary_expr(Self::factor, &[Minus, Plus])
|
||||
}
|
||||
|
||||
/// factor -> unary ( ( "/" | "*" ) unary )* ;
|
||||
fn factor(&mut self) -> Result<Expression, ParserError> {
|
||||
self.binary_expr(Self::unary, &[Slash, Star])
|
||||
}
|
||||
|
||||
/// unary -> ( "!" | "-" ) unary | primary ;
|
||||
fn unary(&mut self) -> Result<Expression, ParserError> {
|
||||
if self.matches(&[Bang, Minus]) {
|
||||
let operator = self.previous()?.clone();
|
||||
let right = self.unary()?;
|
||||
|
||||
Ok(Expression::Unary {
|
||||
operator,
|
||||
right: Box::new(right),
|
||||
})
|
||||
} else {
|
||||
self.primary()
|
||||
}
|
||||
}
|
||||
|
||||
/// primary -> NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")" ;
|
||||
fn primary(&mut self) -> Result<Expression, ParserError> {
|
||||
if self.matches(&[False]) {
|
||||
Ok(Expression::Literal {
|
||||
value: token::Literal::Boolean(false),
|
||||
})
|
||||
} else if self.matches(&[True]) {
|
||||
Ok(Expression::Literal {
|
||||
value: token::Literal::Boolean(true),
|
||||
})
|
||||
} else if self.matches(&[Nil]) {
|
||||
Ok(Expression::Literal {
|
||||
value: token::Literal::Nil,
|
||||
})
|
||||
} else if self.matches(&[Number, String]) {
|
||||
let prev = self.previous()?;
|
||||
let value = prev
|
||||
.literal
|
||||
.clone()
|
||||
.ok_or(ParserError::LiteralExpected(prev.line))?;
|
||||
|
||||
Ok(Expression::Literal { value })
|
||||
} else if self.matches(&[LeftParen]) {
|
||||
let expr = self.expression()?;
|
||||
let line = self.current_token.line;
|
||||
self.consume(&RightParen)
|
||||
.ok_or(ParserError::ParenAfterExpression(line))?;
|
||||
|
||||
Ok(Expression::Grouping {
|
||||
expression: Box::new(expr),
|
||||
})
|
||||
} else {
|
||||
let prev = self.previous()?;
|
||||
Err(ParserError::ExpressionExpected(prev.line))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to parse the provided tokens into an AST.
|
||||
pub fn generate_ast(tokens: Vec<Token>) -> Result<Expression, ParserError> {
|
||||
let mut parser = Parser::new(tokens)?;
|
||||
parser.expression()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
expression::Expression,
|
||||
token::{Literal, Token, TokenType},
|
||||
};
|
||||
|
||||
use super::generate_ast;
|
||||
|
||||
#[test]
|
||||
fn simple_expression() {
|
||||
let ast = generate_ast(vec![
|
||||
Token {
|
||||
token_type: TokenType::Number,
|
||||
lexeme: "3".into(),
|
||||
literal: Some(Literal::Number(3.0)),
|
||||
line: 1,
|
||||
},
|
||||
Token {
|
||||
token_type: TokenType::Star,
|
||||
lexeme: "*".into(),
|
||||
literal: None,
|
||||
line: 1,
|
||||
},
|
||||
Token {
|
||||
token_type: TokenType::Number,
|
||||
lexeme: "4".into(),
|
||||
literal: Some(Literal::Number(4.0)),
|
||||
line: 1,
|
||||
},
|
||||
Token {
|
||||
token_type: TokenType::Plus,
|
||||
lexeme: "+".into(),
|
||||
literal: None,
|
||||
line: 1,
|
||||
},
|
||||
Token {
|
||||
token_type: TokenType::Number,
|
||||
lexeme: "2".into(),
|
||||
literal: Some(Literal::Number(2.0)),
|
||||
line: 1,
|
||||
},
|
||||
Token {
|
||||
token_type: TokenType::Star,
|
||||
lexeme: "*".into(),
|
||||
literal: None,
|
||||
line: 1,
|
||||
},
|
||||
Token {
|
||||
token_type: TokenType::Number,
|
||||
lexeme: "6".into(),
|
||||
literal: Some(Literal::Number(6.0)),
|
||||
line: 1,
|
||||
},
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
ast,
|
||||
Expression::Binary {
|
||||
left: Box::new(Expression::Binary {
|
||||
left: Box::new(Expression::Literal {
|
||||
value: Literal::Number(3.0)
|
||||
}),
|
||||
operator: Token {
|
||||
token_type: TokenType::Star,
|
||||
lexeme: "*".into(),
|
||||
literal: None,
|
||||
line: 1
|
||||
},
|
||||
right: Box::new(Expression::Literal {
|
||||
value: Literal::Number(4.0)
|
||||
})
|
||||
}),
|
||||
operator: Token {
|
||||
token_type: TokenType::Plus,
|
||||
lexeme: "+".into(),
|
||||
literal: None,
|
||||
line: 1
|
||||
},
|
||||
right: Box::new(Expression::Binary {
|
||||
left: Box::new(Expression::Literal {
|
||||
value: Literal::Number(2.0)
|
||||
}),
|
||||
operator: Token {
|
||||
token_type: TokenType::Star,
|
||||
lexeme: "*".into(),
|
||||
literal: None,
|
||||
line: 1
|
||||
},
|
||||
right: Box::new(Expression::Literal {
|
||||
value: Literal::Number(6.0)
|
||||
})
|
||||
})
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
@ -9,7 +9,7 @@ use crate::{
|
||||
},
|
||||
};
|
||||
use lazy_static::lazy_static;
|
||||
use tracing::{debug, error};
|
||||
use tracing::error;
|
||||
|
||||
lazy_static! {
|
||||
/// Tokenizers to use in scanning. They are tried in the exact order in which they appear in
|
||||
@ -58,7 +58,6 @@ pub fn tokenize(source: &str) -> Vec<Token> {
|
||||
literal: None,
|
||||
line,
|
||||
});
|
||||
debug!("{:?}", tokens);
|
||||
|
||||
tokens
|
||||
}
|
||||
|
@ -61,6 +61,10 @@ pub enum Literal {
|
||||
String(String),
|
||||
// Number literal, represented as f64 (thus it can be decimal).
|
||||
Number(f64),
|
||||
/// Boolean literal.
|
||||
Boolean(bool),
|
||||
/// Null literal.
|
||||
Nil,
|
||||
}
|
||||
|
||||
/// Consumed token.
|
||||
|
Loading…
Reference in New Issue
Block a user