use crate::{ expression::Expression, statement::Statement, token::{ self, Literal, Token, TokenType::{self, *}, }, }; use thiserror::Error; use tracing::error; #[derive(Error, Debug)] pub enum ParserError { #[error("empty token stream")] NoTokens, #[error("[line {0}] expected expression")] ExpressionExpected(usize), #[error("[line {0}] expected ')' after expression.")] ParenAfterExpression(usize), #[error("[Out of bounds access at index {0}.")] OutOfBoundsAccess(usize), #[error("[line {0}] literal expected.")] LiteralExpected(usize), #[error("[line {0}] expected ';' after value.")] SemicolonAfterValueExpected(usize), #[error("[line {0}] expected ';' after expression.")] SemicolonAfterExpressionExpected(usize), #[error("[line {0}] expected variable name.")] VariableNameExpected(usize), #[error("[line {0}] invalid assignment target.")] InvalidAssignmentTarget(usize), #[error("[line {0}] expected '}}' after block.")] RightBraceAfterBlockExpected(usize), #[error("[line {0}] expected '(' after if.")] LeftParenAfterIfExpected(usize), #[error("[line {0}] expected ')' after condition.")] RightParenAfterConditionExpected(usize), #[error("[line {0}] expected '(' after while.")] LeftParenAfterWhileExpected(usize), #[error("[line {0}] expected '(' after for.")] LeftParenAfterForExpected(usize), #[error("[line {0}] expected ';' after loop condition.")] SemicolonAfterLoopConditionExpected(usize), #[error("[line {0}] expected ')' after for clauses.")] RightParenAfterForClausesExpected(usize), #[error("[line {0}] expected ')' after arguments.")] RightParenAfterArgumentsExpected(usize), #[error("[line {0}] expected function name.")] FunctionNameExpected(usize), #[error("[line {0}] expected '(' after function name.")] LeftParenAfterFunctionNameExpected(usize), #[error("[line {0}] expected ')' after parameters.")] RightParenAfterParamsExpected(usize), #[error("[line {0}] expected parameter name.")] ParamNameExpected(usize), #[error("[line {0}] expected '{{' before function body.")] LeftBraceBeforeFunctionBodyExpected(usize), #[error("[line {0}] expected ';' after return value.")] SemicolonAfterReturnExpected(usize), } /// Parse the Lox language. #[derive(Debug, Clone)] struct Parser { current: usize, current_token: Token, tokens: Vec, } impl Parser { /// Create a new parser instance, fail if the tokens vector is empty. fn new(tokens: Vec) -> Result { let current_token = tokens.first().ok_or(ParserError::NoTokens)?.clone(); Ok(Self { current: 0, current_token, tokens, }) } /// Parse all tokens to a list of statements for execution. fn run(&mut self) -> Result, ParserError> { let mut statements = Vec::new(); while !self.is_at_end() { match self.declaration() { Ok(x) => statements.push(x), Err(e) => { error!("{e}"); self.synchronize(); } } } Ok(statements) } /// Check if any of the provided types match the type of the current token. /// /// If so, advance the current token. fn matches(&mut self, types: &[TokenType]) -> bool { let matches = types.iter().any(|x| self.check(x)); matches.then(|| self.advance()); matches } /// Return true if the current token type matches the match_type, false otherwise. fn check(&self, match_type: &TokenType) -> bool { self.current_token.token_type == *match_type } /// Advance the current token if we have not hit Eof yet. /// /// Return the token before the advancement. fn advance(&mut self) -> Result<&Token, ParserError> { if !self.is_at_end() { self.current += 1; self.current_token = self .tokens .get(self.current) .ok_or(ParserError::OutOfBoundsAccess(self.current))? .clone(); } self.previous() } /// Return true if the current token is Eof, false otherwise. fn is_at_end(&self) -> bool { self.current_token.token_type == Eof } /// Return the token before the current one or an error if there is none. fn previous(&self) -> Result<&Token, ParserError> { self.tokens .get(self.current - 1) .ok_or_else(|| ParserError::OutOfBoundsAccess(self.current - 1)) } /// Consume the current token if its token type matches the provided token_type and advance the /// current token. Otherwise return None.. fn consume(&mut self, token_type: &TokenType) -> Option<&Token> { if self.check(token_type) { self.advance().ok() } else { None } } /// Parse a binary expression using the next_precedence function and operators to match. fn binary_expr( &mut self, next_precedence: impl Fn(&mut Self) -> Result, operators: &[TokenType], ) -> Result { let mut expr = next_precedence(self)?; while self.matches(operators) { let operator = self.previous()?.clone(); let right = next_precedence(self)?; expr = Expression::Binary { left: Box::new(expr.clone()), operator, right: Box::new(right), }; } Ok(expr) } /// expression -> equality ; fn expression(&mut self) -> Result { self.assignment() } fn declaration(&mut self) -> Result { if self.matches(&[Fun]) { self.function() } else if self.matches(&[Var]) { self.var_declaration() } else { self.statement() } } fn statement(&mut self) -> Result { if self.matches(&[For]) { self.for_statement() } else if self.matches(&[If]) { self.if_statement() } else if self.matches(&[Print]) { self.print_statement() } else if self.matches(&[Return]) { self.return_statement() } else if self.matches(&[While]) { self.while_statement() } else if self.matches(&[LeftBrace]) { Ok(Statement::Block(self.block()?)) } else { self.expression_statement() } } /// Build up a while statement from a for statement. fn for_statement(&mut self) -> Result { let line = self.current_token.line; self.consume(&LeftParen) .ok_or(ParserError::LeftParenAfterForExpected(line))?; let initializer = if self.matches(&[Semicolon]) { None } else if self.matches(&[Var]) { Some(self.var_declaration()?) } else { Some(self.expression_statement()?) }; let condition = if !self.matches(&[Semicolon]) { self.expression()? } else { Expression::Literal { value: Literal::Boolean(true), } }; self.consume(&Semicolon) .ok_or(ParserError::SemicolonAfterLoopConditionExpected(line))?; let increment = if !self.check(&RightParen) { Some(self.expression()?) } else { None }; self.consume(&RightParen) .ok_or(ParserError::RightParenAfterForClausesExpected(line))?; let body = self.statement()?; let body = match increment { Some(inc) => Statement::Block(vec![body, Statement::Expression(inc)]), None => body, }; let body = Statement::While { condition, body: Box::new(body), }; let body = match initializer { Some(initializer) => Statement::Block(vec![initializer, body]), None => body, }; Ok(body) } fn if_statement(&mut self) -> Result { let line = self.current_token.line; self.consume(&LeftParen) .ok_or(ParserError::LeftParenAfterIfExpected(line))?; let condition = self.expression()?; self.consume(&RightParen) .ok_or(ParserError::RightParenAfterConditionExpected(line))?; let then_branch = self.statement()?; let else_branch = if self.matches(&[Else]) { Some(Box::new(self.statement()?)) } else { None }; Ok(Statement::If { condition, then_branch: Box::new(then_branch), else_branch, }) } fn print_statement(&mut self) -> Result { let value = self.expression()?; let line = self.current_token.line; self.consume(&Semicolon) .ok_or(ParserError::SemicolonAfterValueExpected(line))?; Ok(Statement::Print(value)) } fn return_statement(&mut self) -> Result { let keyword = self.previous()?.clone(); let value = if self.check(&Semicolon) { None } else { Some(self.expression()?) }; self.consume(&Semicolon) .ok_or(ParserError::SemicolonAfterReturnExpected(keyword.line))?; Ok(Statement::Return { keyword, value }) } fn var_declaration(&mut self) -> Result { let line = self.current_token.line; let name = self .consume(&Identifier) .ok_or(ParserError::VariableNameExpected(line))? .clone(); let initializer = if self.matches(&[Equal]) { Some(self.expression()?) } else { None }; self.consume(&Semicolon) .ok_or(ParserError::SemicolonAfterExpressionExpected(line))?; Ok(Statement::Var { name, initializer: Box::new(initializer), }) } fn while_statement(&mut self) -> Result { let line = self.current_token.line; self.consume(&LeftParen) .ok_or(ParserError::LeftParenAfterWhileExpected(line))?; let condition = self.expression()?; self.consume(&RightParen) .ok_or(ParserError::RightParenAfterConditionExpected(line))?; let body = self.statement()?; Ok(Statement::While { condition, body: Box::new(body), }) } fn expression_statement(&mut self) -> Result { let expr = self.expression()?; let line = self.current_token.line; self.consume(&Semicolon) .ok_or(ParserError::SemicolonAfterExpressionExpected(line))?; Ok(Statement::Expression(expr)) } fn function(&mut self) -> Result { let line = self.current_token.line; let name = self .consume(&Identifier) .ok_or(ParserError::FunctionNameExpected(line))? .clone(); self.consume(&LeftParen) .ok_or(ParserError::LeftParenAfterFunctionNameExpected(line))?; let mut params = Vec::new(); if !self.check(&RightParen) { let param = self .consume(&Identifier) .ok_or(ParserError::ParamNameExpected(line))? .clone(); params.push(param); while self.matches(&[Comma]) { let param = self .consume(&Identifier) .ok_or(ParserError::ParamNameExpected(line))? .clone(); params.push(param); } } self.consume(&RightParen) .ok_or(ParserError::RightParenAfterParamsExpected(line))?; self.consume(&LeftBrace) .ok_or(ParserError::LeftBraceBeforeFunctionBodyExpected(line))?; let body = self.block()?; Ok(Statement::Function { name, params, body }) } fn block(&mut self) -> Result, ParserError> { let mut statements = Vec::new(); while !self.check(&RightBrace) && !self.is_at_end() { statements.push(self.declaration()?); } let line = self.previous()?.line; self.consume(&RightBrace) .ok_or(ParserError::RightBraceAfterBlockExpected(line))?; Ok(statements) } fn assignment(&mut self) -> Result { let expr = self.or()?; if self.matches(&[Equal]) { let equals = self.previous()?.clone(); let value = self.assignment()?; if let Expression::Variable { name } = expr { Ok(Expression::Assign { name, value: Box::new(value), }) } else { Err(ParserError::InvalidAssignmentTarget(equals.line)) } } else { Ok(expr) } } fn logical_operator( &mut self, operator: TokenType, parse_fn: F, ) -> Result where F: Fn(&mut Self) -> Result, { let mut expr = parse_fn(self)?; while self.matches(&[operator]) { let operator = self.previous()?.clone(); let right = parse_fn(self)?; expr = Expression::Logical { left: Box::new(expr), operator, right: Box::new(right), }; } Ok(expr) } fn or(&mut self) -> Result { self.logical_operator(Or, Self::and) } fn and(&mut self) -> Result { self.logical_operator(And, Self::equality) } /// equality -> comparison ( ( "!=" | "==" ) comparison )* ; fn equality(&mut self) -> Result { self.binary_expr(Self::comparison, &[BangEqual, EqualEqual]) } /// comparison -> term ( ( ">" | ">=" | "<" | "<=" ) term )* ; fn comparison(&mut self) -> Result { self.binary_expr(Self::term, &[Greater, GreaterEqual, Less, LessEqual]) } /// term -> factor ( ( "-" | "+" ) factor )* ; fn term(&mut self) -> Result { self.binary_expr(Self::factor, &[Minus, Plus]) } /// factor -> unary ( ( "/" | "*" ) unary )* ; fn factor(&mut self) -> Result { self.binary_expr(Self::unary, &[Slash, Star]) } /// unary -> ( "!" | "-" ) unary | primary ; fn unary(&mut self) -> Result { if self.matches(&[Bang, Minus]) { let operator = self.previous()?.clone(); let right = self.unary()?; Ok(Expression::Unary { operator, right: Box::new(right), }) } else { self.call() } } fn call(&mut self) -> Result { let mut expr = self.primary()?; loop { if self.matches(&[LeftParen]) { expr = self.finish_call(expr)?; } else { break; } } Ok(expr) } fn finish_call(&mut self, callee: Expression) -> Result { let mut args = Vec::new(); if !self.check(&RightParen) { args.push(self.expression()?); while self.matches(&[Comma]) { args.push(self.expression()?); } } let line = self.current_token.line; let paren = self .consume(&RightParen) .ok_or(ParserError::RightParenAfterArgumentsExpected(line))? .clone(); Ok(Expression::Call { callee: Box::new(callee), paren, args, }) } /// primary -> NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")" ; fn primary(&mut self) -> Result { if self.matches(&[False]) { Ok(Expression::Literal { value: token::Literal::Boolean(false), }) } else if self.matches(&[True]) { Ok(Expression::Literal { value: token::Literal::Boolean(true), }) } else if self.matches(&[Nil]) { Ok(Expression::Literal { value: token::Literal::Nil, }) } else if self.matches(&[Identifier]) { let prev = self.previous()?.clone(); Ok(Expression::Variable { name: prev }) } else if self.matches(&[Number, String]) { let prev = self.previous()?; let value = prev .literal .clone() .ok_or(ParserError::LiteralExpected(prev.line))?; Ok(Expression::Literal { value }) } else if self.matches(&[LeftParen]) { let expr = self.expression()?; let line = self.current_token.line; self.consume(&RightParen) .ok_or(ParserError::ParenAfterExpression(line))?; Ok(Expression::Grouping { expression: Box::new(expr), }) } else { let prev = self.previous()?; Err(ParserError::ExpressionExpected(prev.line)) } } fn synchronize(&mut self) { let _ = self.advance(); while !self.is_at_end() && self.previous().unwrap().token_type != Semicolon && !&[Class, Fun, Var, For, If, While, Print, Return] .contains(&self.current_token.token_type) { let _ = self.advance(); } } } /// Try to parse the provided tokens into an AST. pub fn ast(tokens: Vec) -> Result, ParserError> { let mut parser = Parser::new(tokens)?; parser.run() }