crafting-interpreters/rust/rox/src/parser.rs

311 lines
9.5 KiB
Rust
Raw Normal View History

2025-02-10 13:36:55 +00:00
use thiserror::Error;
use tracing::error;
use crate::{
expression::Expression,
token::{
self, Token,
TokenType::{self, *},
},
};
#[derive(Error, Debug)]
pub enum ParserError {
#[error("empty token stream")]
NoTokens,
#[error("line {0}: expected expression")]
ExpressionExpected(usize),
#[error("line {0}: expected ')' after expression.")]
ParenAfterExpression(usize),
#[error("Out of bounds access at index {0}.")]
OutOfBoundsAccess(usize),
#[error("line {0}: literal expected.")]
LiteralExpected(usize),
}
/// Parse the Lox language.
#[derive(Debug, Clone)]
struct Parser {
current: usize,
current_token: Token,
tokens: Vec<Token>,
}
impl Parser {
/// Create a new parser instance, fail if the tokens vector is empty.
fn new(tokens: Vec<Token>) -> Result<Self, ParserError> {
let current_token = tokens.first().ok_or(ParserError::NoTokens)?.clone();
Ok(Self {
current: 0,
current_token,
tokens,
})
}
/// Check if any of the provided types match the type of the current token.
///
/// If so, advance the current token.
fn matches(&mut self, types: &[TokenType]) -> bool {
let matches = types.iter().any(|x| self.check(x));
matches.then(|| self.advance());
matches
}
/// Return true if the current token type matches the match_type, false otherwise.
fn check(&self, match_type: &TokenType) -> bool {
self.current_token.token_type == *match_type
}
/// Advance the current token if we have not hit Eof yet.
///
/// Return the token before the advancement.
fn advance(&mut self) -> Result<&Token, ParserError> {
if !self.is_at_end() {
self.current += 1;
self.current_token = self
.tokens
.get(self.current)
.ok_or(ParserError::OutOfBoundsAccess(self.current))?
.clone();
}
self.previous()
}
/// Return true if the current token is Eof, false otherwise.
fn is_at_end(&self) -> bool {
self.current_token.token_type == Eof
}
/// Return the token before the current one or an error if there is none.
fn previous(&self) -> Result<&Token, ParserError> {
self.tokens
.get(self.current - 1)
.ok_or_else(|| ParserError::OutOfBoundsAccess(self.current - 1))
}
/// Consume the current token if its token type matches the provided token_type and advance the
/// current token. Otherwise return None..
fn consume(&mut self, token_type: &TokenType) -> Option<&Token> {
if self.check(token_type) {
self.advance().ok()
} else {
None
}
}
/// Parse a binary expression using the next_precedence function and operators to match.
fn binary_expr(
&mut self,
next_precedence: impl Fn(&mut Self) -> Result<Expression, ParserError>,
operators: &[TokenType],
) -> Result<Expression, ParserError> {
let mut expr = next_precedence(self)?;
while self.matches(operators) {
let operator = self.previous()?.clone();
let right = next_precedence(self)?;
expr = Expression::Binary {
left: Box::new(expr.clone()),
operator,
right: Box::new(right),
};
}
Ok(expr)
}
/// expression -> equality ;
fn expression(&mut self) -> Result<Expression, ParserError> {
self.equality()
}
/// equality -> comparison ( ( "!=" | "==" ) comparison )* ;
fn equality(&mut self) -> Result<Expression, ParserError> {
self.binary_expr(Self::comparison, &[BangEqual, EqualEqual])
}
/// comparison -> term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
fn comparison(&mut self) -> Result<Expression, ParserError> {
self.binary_expr(Self::term, &[Greater, GreaterEqual, Less, LessEqual])
}
/// term -> factor ( ( "-" | "+" ) factor )* ;
fn term(&mut self) -> Result<Expression, ParserError> {
self.binary_expr(Self::factor, &[Minus, Plus])
}
/// factor -> unary ( ( "/" | "*" ) unary )* ;
fn factor(&mut self) -> Result<Expression, ParserError> {
self.binary_expr(Self::unary, &[Slash, Star])
}
/// unary -> ( "!" | "-" ) unary | primary ;
fn unary(&mut self) -> Result<Expression, ParserError> {
if self.matches(&[Bang, Minus]) {
let operator = self.previous()?.clone();
let right = self.unary()?;
Ok(Expression::Unary {
operator,
right: Box::new(right),
})
} else {
self.primary()
}
}
/// primary -> NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")" ;
fn primary(&mut self) -> Result<Expression, ParserError> {
if self.matches(&[False]) {
Ok(Expression::Literal {
value: token::Literal::Boolean(false),
})
} else if self.matches(&[True]) {
Ok(Expression::Literal {
value: token::Literal::Boolean(true),
})
} else if self.matches(&[Nil]) {
Ok(Expression::Literal {
value: token::Literal::Nil,
})
} else if self.matches(&[Number, String]) {
let prev = self.previous()?;
let value = prev
.literal
.clone()
.ok_or(ParserError::LiteralExpected(prev.line))?;
Ok(Expression::Literal { value })
} else if self.matches(&[LeftParen]) {
let expr = self.expression()?;
let line = self.current_token.line;
self.consume(&RightParen)
.ok_or(ParserError::ParenAfterExpression(line))?;
Ok(Expression::Grouping {
expression: Box::new(expr),
})
} else {
let prev = self.previous()?;
Err(ParserError::ExpressionExpected(prev.line))
}
}
2025-02-10 13:45:11 +00:00
fn synchronize(&mut self) {
let _ = self.advance();
while !self.is_at_end()
&& self.previous().unwrap().token_type != Semicolon
&& !&[Class, Fun, Var, For, If, While, Print, Return]
.contains(&self.current_token.token_type)
{
let _ = self.advance();
}
}
2025-02-10 13:36:55 +00:00
}
/// Try to parse the provided tokens into an AST.
pub fn generate_ast(tokens: Vec<Token>) -> Result<Expression, ParserError> {
let mut parser = Parser::new(tokens)?;
parser.expression()
}
#[cfg(test)]
mod tests {
use crate::{
expression::Expression,
token::{Literal, Token, TokenType},
};
use super::generate_ast;
#[test]
fn simple_expression() {
let ast = generate_ast(vec![
Token {
token_type: TokenType::Number,
lexeme: "3".into(),
literal: Some(Literal::Number(3.0)),
line: 1,
},
Token {
token_type: TokenType::Star,
lexeme: "*".into(),
literal: None,
line: 1,
},
Token {
token_type: TokenType::Number,
lexeme: "4".into(),
literal: Some(Literal::Number(4.0)),
line: 1,
},
Token {
token_type: TokenType::Plus,
lexeme: "+".into(),
literal: None,
line: 1,
},
Token {
token_type: TokenType::Number,
lexeme: "2".into(),
literal: Some(Literal::Number(2.0)),
line: 1,
},
Token {
token_type: TokenType::Star,
lexeme: "*".into(),
literal: None,
line: 1,
},
Token {
token_type: TokenType::Number,
lexeme: "6".into(),
literal: Some(Literal::Number(6.0)),
line: 1,
},
])
.unwrap();
assert_eq!(
ast,
Expression::Binary {
left: Box::new(Expression::Binary {
left: Box::new(Expression::Literal {
value: Literal::Number(3.0)
}),
operator: Token {
token_type: TokenType::Star,
lexeme: "*".into(),
literal: None,
line: 1
},
right: Box::new(Expression::Literal {
value: Literal::Number(4.0)
})
}),
operator: Token {
token_type: TokenType::Plus,
lexeme: "+".into(),
literal: None,
line: 1
},
right: Box::new(Expression::Binary {
left: Box::new(Expression::Literal {
value: Literal::Number(2.0)
}),
operator: Token {
token_type: TokenType::Star,
lexeme: "*".into(),
literal: None,
line: 1
},
right: Box::new(Expression::Literal {
value: Literal::Number(6.0)
})
})
}
)
}
}