crafting-interpreters/src/parser.rs

697 lines
23 KiB
Rust

use crate::{
expression::Expression,
statement::Statement,
token::{
self, Literal, Token,
TokenType::{self, *},
},
};
use thiserror::Error;
use tracing::error;
#[derive(Error, Debug)]
pub enum ParserError {
#[error("empty token stream")]
NoTokens,
#[error("line {0}: expected expression")]
ExpressionExpected(usize),
#[error("line {0}: expected ')' after expression.")]
ParenAfterExpression(usize),
#[error("Out of bounds access at index {0}.")]
OutOfBoundsAccess(usize),
#[error("line {0}: literal expected.")]
LiteralExpected(usize),
#[error("line {0}: expected ';' after value.")]
SemicolonAfterValueExpected(usize),
#[error("line {0}: expected ';' after expression.")]
SemicolonAfterExpressionExpected(usize),
#[error("line {0}: expected class name.")]
ClassNameExpected(usize),
#[error("line {0}: expected variable name.")]
VariableNameExpected(usize),
#[error("line {0}: invalid assignment target.")]
InvalidAssignmentTarget(usize),
#[error("line {0}: expected '}}' after block.")]
RightBraceAfterBlockExpected(usize),
#[error("line {0}: expected '(' after if.")]
LeftParenAfterIfExpected(usize),
#[error("line {0}: expected ')' after condition.")]
RightParenAfterConditionExpected(usize),
#[error("line {0}: expected '(' after while.")]
LeftParenAfterWhileExpected(usize),
#[error("line {0}: expected '(' after for.")]
LeftParenAfterForExpected(usize),
#[error("line {0}: expected ';' after loop condition.")]
SemicolonAfterLoopConditionExpected(usize),
#[error("line {0}: expected ')' after for clauses.")]
RightParenAfterForClausesExpected(usize),
#[error("line {0}: expected ')' after arguments.")]
RightParenAfterArgumentsExpected(usize),
#[error("line {0}: expected function name.")]
FunctionNameExpected(usize),
#[error("line {0}: expected '(' after function name.")]
LeftParenAfterFunctionNameExpected(usize),
#[error("line {0}: expected ')' after parameters.")]
RightParenAfterParamsExpected(usize),
#[error("line {0}: expected parameter name.")]
ParamNameExpected(usize),
#[error("line {0}: expected '{{' before function body.")]
LeftBraceBeforeFunctionBodyExpected(usize),
#[error("line {0}: expected ';' after return value.")]
SemicolonAfterReturnExpected(usize),
#[error("line {0}: expected '{{' before class body.")]
LeftBraceBeforeClassExpected(usize),
#[error("line {0}: expected '}}' after class body.")]
RightBraceAfterClassExpected(usize),
#[error("line {0}: expected property name after '.'.")]
PropertyNameAfterDotExpected(usize),
#[error("line {0}: expected '.' after 'super'.")]
DotAfterSuper(usize),
#[error("line {0}: expected superclass method name.")]
SuperclassMethodNameExpected(usize),
}
/// Parse the Lox language tokens into an abstract syntax tree.
#[derive(Debug, Clone)]
struct Parser {
current: usize,
current_token: Token,
tokens: Vec<Token>,
}
impl Parser {
/// Create a new parser instance, fail if the tokens vector is empty.
/// Initialize the current token to the first token in the list.
fn new(tokens: Vec<Token>) -> Result<Self, ParserError> {
let current_token = tokens.first().ok_or(ParserError::NoTokens)?.clone();
Ok(Self {
current: 0,
current_token,
tokens,
})
}
/// Parse all tokens to a list of statements for execution.
/// Continue parsing until reaching the end of the token stream.
/// Handle errors by logging them and synchronizing to continue parsing.
fn run(&mut self) -> Result<Vec<Statement>, ParserError> {
let mut statements = Vec::new();
while !self.is_at_end() {
match self.declaration() {
Ok(x) => statements.push(x),
Err(e) => {
error!("{e}");
self.synchronize();
}
}
}
Ok(statements)
}
/// Check if any of the provided types match the type of the current token.
///
/// If so, advance the current token and return true.
/// Otherwise, return false without advancing.
fn matches(&mut self, types: &[TokenType]) -> bool {
let matches = types.iter().any(|x| self.check(x));
matches.then(|| self.advance());
matches
}
/// Return true if the current token type matches the match_type, false otherwise.
fn check(&self, match_type: &TokenType) -> bool {
self.current_token.token_type == *match_type
}
/// Advance the current token if we have not hit Eof yet.
///
/// Return the token before the advancement.
fn advance(&mut self) -> Result<&Token, ParserError> {
if !self.is_at_end() {
self.current += 1;
self.current_token = self
.tokens
.get(self.current)
.ok_or(ParserError::OutOfBoundsAccess(self.current))?
.clone();
}
self.previous()
}
/// Return true if the current token is Eof, false otherwise.
fn is_at_end(&self) -> bool {
self.current_token.token_type == Eof
}
/// Return the token before the current one or an error if there is none.
fn previous(&self) -> Result<&Token, ParserError> {
self.tokens
.get(self.current - 1)
.ok_or_else(|| ParserError::OutOfBoundsAccess(self.current - 1))
}
/// Consume the current token if its token type matches the provided token_type and advance the
/// current token. Otherwise return None.
fn consume(&mut self, token_type: &TokenType) -> Option<&Token> {
if self.check(token_type) {
self.advance().ok()
} else {
None
}
}
/// Parse a binary expression using the next_precedence function and operators to match.
fn binary_expr(
&mut self,
next_precedence: impl Fn(&mut Self) -> Result<Expression, ParserError>,
operators: &[TokenType],
) -> Result<Expression, ParserError> {
let mut expr = next_precedence(self)?;
while self.matches(operators) {
let operator = self.previous()?.clone();
let right = next_precedence(self)?;
expr = Expression::Binary {
left: Box::new(expr.clone()),
operator,
right: Box::new(right),
};
}
Ok(expr)
}
/// expression -> equality ;
fn expression(&mut self) -> Result<Expression, ParserError> {
self.assignment()
}
/// Parse a declaration.
fn declaration(&mut self) -> Result<Statement, ParserError> {
if self.matches(&[Class]) {
self.class_declaration()
} else if self.matches(&[Fun]) {
self.function()
} else if self.matches(&[Var]) {
self.var_declaration()
} else {
self.statement()
}
}
/// Parse a statement.
fn statement(&mut self) -> Result<Statement, ParserError> {
if self.matches(&[For]) {
self.for_statement()
} else if self.matches(&[If]) {
self.if_statement()
} else if self.matches(&[Print]) {
self.print_statement()
} else if self.matches(&[Return]) {
self.return_statement()
} else if self.matches(&[While]) {
self.while_statement()
} else if self.matches(&[LeftBrace]) {
Ok(Statement::Block(self.block()?))
} else {
self.expression_statement()
}
}
/// Parse a for statement by desugaring it into a while loop.
fn for_statement(&mut self) -> Result<Statement, ParserError> {
let line = self.current_token.line;
self.consume(&LeftParen)
.ok_or(ParserError::LeftParenAfterForExpected(line))?;
let initializer = if self.matches(&[Semicolon]) {
None
} else if self.matches(&[Var]) {
Some(self.var_declaration()?)
} else {
Some(self.expression_statement()?)
};
let condition = if !self.matches(&[Semicolon]) {
self.expression()?
} else {
Expression::Literal {
value: Literal::Boolean(true),
}
};
self.consume(&Semicolon)
.ok_or(ParserError::SemicolonAfterLoopConditionExpected(line))?;
let increment = if !self.check(&RightParen) {
Some(self.expression()?)
} else {
None
};
self.consume(&RightParen)
.ok_or(ParserError::RightParenAfterForClausesExpected(line))?;
let body = self.statement()?;
let body = match increment {
Some(inc) => Statement::Block(vec![body, Statement::Expression(inc)]),
None => body,
};
let body = Statement::While {
condition,
body: Box::new(body),
};
let body = match initializer {
Some(initializer) => Statement::Block(vec![initializer, body]),
None => body,
};
Ok(body)
}
/// Parse an if statement with a condition, then branch, and optional else branch.
/// The condition must be enclosed in parentheses.
fn if_statement(&mut self) -> Result<Statement, ParserError> {
let line = self.current_token.line;
self.consume(&LeftParen)
.ok_or(ParserError::LeftParenAfterIfExpected(line))?;
let condition = self.expression()?;
self.consume(&RightParen)
.ok_or(ParserError::RightParenAfterConditionExpected(line))?;
let then_branch = self.statement()?;
let else_branch = if self.matches(&[Else]) {
Some(Box::new(self.statement()?))
} else {
None
};
Ok(Statement::If {
condition,
then_branch: Box::new(then_branch),
else_branch,
})
}
/// Parse a print statement, which consists of an expression followed by a semicolon.
/// The expression's value will be printed during execution.
fn print_statement(&mut self) -> Result<Statement, ParserError> {
let value = self.expression()?;
let line = self.current_token.line;
self.consume(&Semicolon)
.ok_or(ParserError::SemicolonAfterValueExpected(line))?;
Ok(Statement::Print(value))
}
/// Parse a return statement, which may include a return value expression.
/// The return value is optional - if not provided, nil is returned implicitly.
fn return_statement(&mut self) -> Result<Statement, ParserError> {
let keyword = self.previous()?.clone();
let value = if self.check(&Semicolon) {
None
} else {
Some(self.expression()?)
};
self.consume(&Semicolon)
.ok_or(ParserError::SemicolonAfterReturnExpected(keyword.line))?;
Ok(Statement::Return { keyword, value })
}
/// Parse a class declaration with a name, optional superclass, and methods.
/// A class can inherit from a superclass using the '<' operator.
fn class_declaration(&mut self) -> Result<Statement, ParserError> {
let line = self.current_token.line;
let name = self
.consume(&Identifier)
.ok_or(ParserError::ClassNameExpected(line))?
.clone();
let superclass = if self.matches(&[Less]) {
self.consume(&Identifier)
.ok_or(ParserError::ClassNameExpected(line))?;
let name = self.previous()?.clone();
Some(Expression::Variable { name })
} else {
None
};
self.consume(&LeftBrace)
.ok_or(ParserError::LeftBraceBeforeClassExpected(line))?;
let mut methods = Vec::new();
while !self.check(&RightBrace) && !self.is_at_end() {
let method = self.function()?;
methods.push(method);
}
self.consume(&RightBrace)
.ok_or(ParserError::RightBraceAfterClassExpected(line))?;
Ok(Statement::Class {
name,
superclass,
methods,
})
}
/// Parse a variable declaration with a name and optional initializer.
/// If no initializer is provided, the variable is initialized to nil.
fn var_declaration(&mut self) -> Result<Statement, ParserError> {
let line = self.current_token.line;
let name = self
.consume(&Identifier)
.ok_or(ParserError::VariableNameExpected(line))?
.clone();
let initializer = if self.matches(&[Equal]) {
Some(self.expression()?)
} else {
None
};
self.consume(&Semicolon)
.ok_or(ParserError::SemicolonAfterExpressionExpected(line))?;
Ok(Statement::Var {
name,
initializer: Box::new(initializer),
})
}
/// Parse a while statement with a condition and body.
/// The condition must be enclosed in parentheses.
fn while_statement(&mut self) -> Result<Statement, ParserError> {
let line = self.current_token.line;
self.consume(&LeftParen)
.ok_or(ParserError::LeftParenAfterWhileExpected(line))?;
let condition = self.expression()?;
self.consume(&RightParen)
.ok_or(ParserError::RightParenAfterConditionExpected(line))?;
let body = self.statement()?;
Ok(Statement::While {
condition,
body: Box::new(body),
})
}
/// Parse an expression statement, which is an expression followed by a semicolon.
/// The expression is evaluated for its side effects.
fn expression_statement(&mut self) -> Result<Statement, ParserError> {
let expr = self.expression()?;
let line = self.current_token.line;
self.consume(&Semicolon)
.ok_or(ParserError::SemicolonAfterExpressionExpected(line))?;
Ok(Statement::Expression(expr))
}
/// Parse a function declaration with a name, parameters, and body.
/// Used for both standalone functions and class methods.
fn function(&mut self) -> Result<Statement, ParserError> {
let line = self.current_token.line;
let name = self
.consume(&Identifier)
.ok_or(ParserError::FunctionNameExpected(line))?
.clone();
self.consume(&LeftParen)
.ok_or(ParserError::LeftParenAfterFunctionNameExpected(line))?;
let mut params = Vec::new();
if !self.check(&RightParen) {
let param = self
.consume(&Identifier)
.ok_or(ParserError::ParamNameExpected(line))?
.clone();
params.push(param);
while self.matches(&[Comma]) {
let param = self
.consume(&Identifier)
.ok_or(ParserError::ParamNameExpected(line))?
.clone();
params.push(param);
}
}
self.consume(&RightParen)
.ok_or(ParserError::RightParenAfterParamsExpected(line))?;
self.consume(&LeftBrace)
.ok_or(ParserError::LeftBraceBeforeFunctionBodyExpected(line))?;
let body = self.block()?;
Ok(Statement::Function { name, params, body })
}
/// Parse a block of statements enclosed in braces.
/// A block creates a new scope for variable declarations.
fn block(&mut self) -> Result<Vec<Statement>, ParserError> {
let mut statements = Vec::new();
while !self.check(&RightBrace) && !self.is_at_end() {
statements.push(self.declaration()?);
}
let line = self.previous()?.line;
self.consume(&RightBrace)
.ok_or(ParserError::RightBraceAfterBlockExpected(line))?;
Ok(statements)
}
/// Parse an assignment expression, which can assign to a variable or object property.
/// Assignment is right-associative, so we recursively parse the right side.
fn assignment(&mut self) -> Result<Expression, ParserError> {
let expr = self.or()?;
if self.matches(&[Equal]) {
let equals = self.previous()?.clone();
let value = self.assignment()?;
if let Expression::Variable { name } = expr {
Ok(Expression::Assign {
name,
value: Box::new(value),
})
} else if let Expression::Get { object, name } = expr {
Ok(Expression::Set {
object,
name,
value: Box::new(value),
})
} else {
Err(ParserError::InvalidAssignmentTarget(equals.line))
}
} else {
Ok(expr)
}
}
/// Parse a logical expression with a specific operator (AND or OR).
fn logical_operator<F>(
&mut self,
operator: TokenType,
parse_fn: F,
) -> Result<Expression, ParserError>
where
F: Fn(&mut Self) -> Result<Expression, ParserError>,
{
let mut expr = parse_fn(self)?;
while self.matches(&[operator]) {
let operator = self.previous()?.clone();
let right = parse_fn(self)?;
expr = Expression::Logical {
left: Box::new(expr),
operator,
right: Box::new(right),
};
}
Ok(expr)
}
/// Parse a logical OR expression.
fn or(&mut self) -> Result<Expression, ParserError> {
self.logical_operator(Or, Self::and)
}
/// Parse a logical AND expression.
fn and(&mut self) -> Result<Expression, ParserError> {
self.logical_operator(And, Self::equality)
}
/// equality -> comparison ( ( "!=" | "==" ) comparison )* ;
fn equality(&mut self) -> Result<Expression, ParserError> {
self.binary_expr(Self::comparison, &[BangEqual, EqualEqual])
}
/// comparison -> term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
fn comparison(&mut self) -> Result<Expression, ParserError> {
self.binary_expr(Self::term, &[Greater, GreaterEqual, Less, LessEqual])
}
/// term -> factor ( ( "-" | "+" ) factor )* ;
fn term(&mut self) -> Result<Expression, ParserError> {
self.binary_expr(Self::factor, &[Minus, Plus])
}
/// factor -> unary ( ( "/" | "*" ) unary )* ;
fn factor(&mut self) -> Result<Expression, ParserError> {
self.binary_expr(Self::unary, &[Slash, Star])
}
/// unary -> ( "!" | "-" ) unary | primary ;
fn unary(&mut self) -> Result<Expression, ParserError> {
if self.matches(&[Bang, Minus]) {
let operator = self.previous()?.clone();
let right = self.unary()?;
Ok(Expression::Unary {
operator,
right: Box::new(right),
})
} else {
self.call()
}
}
/// Parse a call expression or property access.
fn call(&mut self) -> Result<Expression, ParserError> {
let mut expr = self.primary()?;
loop {
if self.matches(&[LeftParen]) {
expr = self.finish_call(expr)?;
} else if self.matches(&[Dot]) {
let line = self.current_token.line;
let name = self
.consume(&Identifier)
.ok_or(ParserError::PropertyNameAfterDotExpected(line))?
.clone();
expr = Expression::Get {
object: Box::new(expr),
name,
}
} else {
break;
}
}
Ok(expr)
}
/// Complete parsing a function call after seeing the opening parenthesis.
/// Parse the arguments and closing parenthesis.
fn finish_call(&mut self, callee: Expression) -> Result<Expression, ParserError> {
let mut args = Vec::new();
if !self.check(&RightParen) {
args.push(self.expression()?);
while self.matches(&[Comma]) {
args.push(self.expression()?);
}
}
let line = self.current_token.line;
let paren = self
.consume(&RightParen)
.ok_or(ParserError::RightParenAfterArgumentsExpected(line))?
.clone();
Ok(Expression::Call {
callee: Box::new(callee),
paren,
args,
})
}
/// primary -> NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")" ;
fn primary(&mut self) -> Result<Expression, ParserError> {
if self.matches(&[False]) {
Ok(Expression::Literal {
value: token::Literal::Boolean(false),
})
} else if self.matches(&[True]) {
Ok(Expression::Literal {
value: token::Literal::Boolean(true),
})
} else if self.matches(&[Nil]) {
Ok(Expression::Literal {
value: token::Literal::Nil,
})
} else if self.matches(&[Super]) {
let line = self.current_token.line;
let keyword = self.previous()?.clone();
self.consume(&Dot).ok_or(ParserError::DotAfterSuper(line))?;
let method = self
.consume(&Identifier)
.ok_or(ParserError::SuperclassMethodNameExpected(line))?
.clone();
Ok(Expression::Super { keyword, method })
} else if self.matches(&[This]) {
Ok(Expression::This {
keyword: self.previous()?.clone(),
})
} else if self.matches(&[Identifier]) {
let prev = self.previous()?.clone();
Ok(Expression::Variable { name: prev })
} else if self.matches(&[Number, String]) {
let prev = self.previous()?;
let value = prev
.literal
.clone()
.ok_or(ParserError::LiteralExpected(prev.line))?;
Ok(Expression::Literal { value })
} else if self.matches(&[LeftParen]) {
let expr = self.expression()?;
let line = self.current_token.line;
self.consume(&RightParen)
.ok_or(ParserError::ParenAfterExpression(line))?;
Ok(Expression::Grouping {
expression: Box::new(expr),
})
} else {
let prev = self.previous()?;
Err(ParserError::ExpressionExpected(prev.line))
}
}
/// Synchronize the parser after an error by advancing to the next statement boundary.
/// This allows parsing to continue after encountering a syntax error.
fn synchronize(&mut self) {
let _ = self.advance();
while !self.is_at_end()
&& self.previous().unwrap().token_type != Semicolon
&& !&[Class, Fun, Var, For, If, While, Print, Return]
.contains(&self.current_token.token_type)
{
let _ = self.advance();
}
}
}
/// Try to parse the provided tokens into an Abstract Syntax Tree (AST).
/// Return a list of statements that can be executed by the interpreter.
pub fn ast(tokens: Vec<Token>) -> Result<Vec<Statement>, ParserError> {
let mut parser = Parser::new(tokens)?;
parser.run()
}