use thiserror::Error; use tracing::error; use crate::{ expression::Expression, token::{ self, Token, TokenType::{self, *}, }, }; #[derive(Error, Debug)] pub enum ParserError { #[error("empty token stream")] NoTokens, #[error("line {0}: expected expression")] ExpressionExpected(usize), #[error("line {0}: expected ')' after expression.")] ParenAfterExpression(usize), #[error("Out of bounds access at index {0}.")] OutOfBoundsAccess(usize), #[error("line {0}: literal expected.")] LiteralExpected(usize), } /// Parse the Lox language. #[derive(Debug, Clone)] struct Parser { current: usize, current_token: Token, tokens: Vec, } impl Parser { /// Create a new parser instance, fail if the tokens vector is empty. fn new(tokens: Vec) -> Result { let current_token = tokens.first().ok_or(ParserError::NoTokens)?.clone(); Ok(Self { current: 0, current_token, tokens, }) } /// Check if any of the provided types match the type of the current token. /// /// If so, advance the current token. fn matches(&mut self, types: &[TokenType]) -> bool { let matches = types.iter().any(|x| self.check(x)); matches.then(|| self.advance()); matches } /// Return true if the current token type matches the match_type, false otherwise. fn check(&self, match_type: &TokenType) -> bool { self.current_token.token_type == *match_type } /// Advance the current token if we have not hit Eof yet. /// /// Return the token before the advancement. fn advance(&mut self) -> Result<&Token, ParserError> { if !self.is_at_end() { self.current += 1; self.current_token = self .tokens .get(self.current) .ok_or(ParserError::OutOfBoundsAccess(self.current))? .clone(); } self.previous() } /// Return true if the current token is Eof, false otherwise. fn is_at_end(&self) -> bool { self.current_token.token_type == Eof } /// Return the token before the current one or an error if there is none. fn previous(&self) -> Result<&Token, ParserError> { self.tokens .get(self.current - 1) .ok_or_else(|| ParserError::OutOfBoundsAccess(self.current - 1)) } /// Consume the current token if its token type matches the provided token_type and advance the /// current token. Otherwise return None.. fn consume(&mut self, token_type: &TokenType) -> Option<&Token> { if self.check(token_type) { self.advance().ok() } else { None } } /// Parse a binary expression using the next_precedence function and operators to match. fn binary_expr( &mut self, next_precedence: impl Fn(&mut Self) -> Result, operators: &[TokenType], ) -> Result { let mut expr = next_precedence(self)?; while self.matches(operators) { let operator = self.previous()?.clone(); let right = next_precedence(self)?; expr = Expression::Binary { left: Box::new(expr.clone()), operator, right: Box::new(right), }; } Ok(expr) } /// expression -> equality ; fn expression(&mut self) -> Result { self.equality() } /// equality -> comparison ( ( "!=" | "==" ) comparison )* ; fn equality(&mut self) -> Result { self.binary_expr(Self::comparison, &[BangEqual, EqualEqual]) } /// comparison -> term ( ( ">" | ">=" | "<" | "<=" ) term )* ; fn comparison(&mut self) -> Result { self.binary_expr(Self::term, &[Greater, GreaterEqual, Less, LessEqual]) } /// term -> factor ( ( "-" | "+" ) factor )* ; fn term(&mut self) -> Result { self.binary_expr(Self::factor, &[Minus, Plus]) } /// factor -> unary ( ( "/" | "*" ) unary )* ; fn factor(&mut self) -> Result { self.binary_expr(Self::unary, &[Slash, Star]) } /// unary -> ( "!" | "-" ) unary | primary ; fn unary(&mut self) -> Result { if self.matches(&[Bang, Minus]) { let operator = self.previous()?.clone(); let right = self.unary()?; Ok(Expression::Unary { operator, right: Box::new(right), }) } else { self.primary() } } /// primary -> NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")" ; fn primary(&mut self) -> Result { if self.matches(&[False]) { Ok(Expression::Literal { value: token::Literal::Boolean(false), }) } else if self.matches(&[True]) { Ok(Expression::Literal { value: token::Literal::Boolean(true), }) } else if self.matches(&[Nil]) { Ok(Expression::Literal { value: token::Literal::Nil, }) } else if self.matches(&[Number, String]) { let prev = self.previous()?; let value = prev .literal .clone() .ok_or(ParserError::LiteralExpected(prev.line))?; Ok(Expression::Literal { value }) } else if self.matches(&[LeftParen]) { let expr = self.expression()?; let line = self.current_token.line; self.consume(&RightParen) .ok_or(ParserError::ParenAfterExpression(line))?; Ok(Expression::Grouping { expression: Box::new(expr), }) } else { let prev = self.previous()?; Err(ParserError::ExpressionExpected(prev.line)) } } fn synchronize(&mut self) { let _ = self.advance(); while !self.is_at_end() && self.previous().unwrap().token_type != Semicolon && !&[Class, Fun, Var, For, If, While, Print, Return] .contains(&self.current_token.token_type) { let _ = self.advance(); } } } /// Try to parse the provided tokens into an AST. pub fn generate_ast(tokens: Vec) -> Result { let mut parser = Parser::new(tokens)?; parser.expression() } #[cfg(test)] mod tests { use crate::{ expression::Expression, token::{Literal, Token, TokenType}, }; use super::generate_ast; #[test] fn simple_expression() { let ast = generate_ast(vec![ Token { token_type: TokenType::Number, lexeme: "3".into(), literal: Some(Literal::Number(3.0)), line: 1, }, Token { token_type: TokenType::Star, lexeme: "*".into(), literal: None, line: 1, }, Token { token_type: TokenType::Number, lexeme: "4".into(), literal: Some(Literal::Number(4.0)), line: 1, }, Token { token_type: TokenType::Plus, lexeme: "+".into(), literal: None, line: 1, }, Token { token_type: TokenType::Number, lexeme: "2".into(), literal: Some(Literal::Number(2.0)), line: 1, }, Token { token_type: TokenType::Star, lexeme: "*".into(), literal: None, line: 1, }, Token { token_type: TokenType::Number, lexeme: "6".into(), literal: Some(Literal::Number(6.0)), line: 1, }, ]) .unwrap(); assert_eq!( ast, Expression::Binary { left: Box::new(Expression::Binary { left: Box::new(Expression::Literal { value: Literal::Number(3.0) }), operator: Token { token_type: TokenType::Star, lexeme: "*".into(), literal: None, line: 1 }, right: Box::new(Expression::Literal { value: Literal::Number(4.0) }) }), operator: Token { token_type: TokenType::Plus, lexeme: "+".into(), literal: None, line: 1 }, right: Box::new(Expression::Binary { left: Box::new(Expression::Literal { value: Literal::Number(2.0) }), operator: Token { token_type: TokenType::Star, lexeme: "*".into(), literal: None, line: 1 }, right: Box::new(Expression::Literal { value: Literal::Number(6.0) }) }) } ) } }