From a629ddca05e72d32b3027d13c4554a7fe5478128 Mon Sep 17 00:00:00 2001 From: Sebastian Hugentobler Date: Wed, 12 Feb 2025 10:30:51 +0100 Subject: [PATCH] statements in rust --- rust/rox/src/environment.rs | 66 +++++++++ rust/rox/src/expression.rs | 8 + rust/rox/src/interpreter.rs | 285 ++++++++++++++++++++++++++---------- rust/rox/src/lib.rs | 20 ++- rust/rox/src/parser.rs | 236 +++++++++++++++-------------- rust/rox/src/statement.rs | 13 ++ 6 files changed, 434 insertions(+), 194 deletions(-) create mode 100644 rust/rox/src/environment.rs create mode 100644 rust/rox/src/statement.rs diff --git a/rust/rox/src/environment.rs b/rust/rox/src/environment.rs new file mode 100644 index 0000000..49ac733 --- /dev/null +++ b/rust/rox/src/environment.rs @@ -0,0 +1,66 @@ +use std::collections::HashMap; + +use thiserror::Error; + +use crate::{token::Token, value::Value}; + +#[derive(Error, Debug)] +pub enum EnvironmentError { + #[error("line {0}: undefined variable: {1}")] + UndefinedVariable(usize, String), +} + +/// Environment mapping variable names to their respective values. Can have an optional enclosing +/// environment. The outermost (global) environment has no enclosing one. +#[derive(Default, Debug, Clone)] +pub struct Environment { + values: HashMap, + enclosing: Option>, +} + +impl Environment { + pub fn with_enclosing(enclosing: Environment) -> Self { + Self { + values: HashMap::default(), + enclosing: Some(Box::new(enclosing)), + } + } + + /// Define a new variable binding with a value. + /// + /// Overwrite any binding of the same name. + pub fn define(&mut self, name: String, value: Value) { + self.values.insert(name, value); + } + + /// Assign a new value to an existing (defined) variable. Error if there is no such variable in + /// this environment or any of the enclosing ones. + pub fn assign(&mut self, token: &Token, value: Value) -> Result<(), EnvironmentError> { + if self.values.contains_key(token.lexeme.as_str()) { + self.values.insert(token.lexeme.clone(), value); + Ok(()) + } else if let Some(enclosing) = &mut self.enclosing { + enclosing.assign(token, value) + } else { + Err(EnvironmentError::UndefinedVariable( + token.line, + token.lexeme.clone(), + )) + } + } + + /// Get the value of an existing (defined) variable. Error if there is no such variable in + /// this environment or any of the enclosing ones. + pub fn get(&self, token: &Token) -> Result { + if let Some(v) = self.values.get(token.lexeme.as_str()) { + Ok(v.clone()) + } else if let Some(enclosing) = &self.enclosing { + enclosing.get(token) + } else { + Err(EnvironmentError::UndefinedVariable( + token.line, + token.lexeme.clone(), + )) + } + } +} diff --git a/rust/rox/src/expression.rs b/rust/rox/src/expression.rs index 79f0082..a625331 100644 --- a/rust/rox/src/expression.rs +++ b/rust/rox/src/expression.rs @@ -1,7 +1,12 @@ use crate::token::{self, Token}; +/// Enumeration of all types of expressions. #[derive(Debug, Clone, PartialEq)] pub enum Expression { + Assign { + name: Token, + value: Box, + }, Binary { left: Box, operator: Token, @@ -17,4 +22,7 @@ pub enum Expression { operator: Token, right: Box, }, + Variable { + name: Token, + }, } diff --git a/rust/rox/src/interpreter.rs b/rust/rox/src/interpreter.rs index f55ed54..16d5f8a 100644 --- a/rust/rox/src/interpreter.rs +++ b/rust/rox/src/interpreter.rs @@ -1,110 +1,233 @@ use thiserror::Error; +use tracing::error; use crate::{ + environment::{Environment, EnvironmentError}, expression::Expression, + statement::Statement, token::{Literal, Token, TokenType}, value::Value, }; #[derive(Error, Debug)] pub enum InterpreterError { - #[error("line {0}: MINUS unary expression expects a number on the right")] + #[error("[line {0}] MINUS unary expression expects a number on the right")] UnaryExpressionNotANumber(usize), - #[error("line {0}: unknown unary operator: {1}")] + #[error("[line {0}] unknown unary operator: {1}")] UnaryOperatorUnknown(usize, String), - #[error("line {0}: unknown binary operator: {1}")] + #[error("[line {0}] unknown binary operator: {1}")] BinaryOperatorUnknown(usize, String), + #[error("[line {0}] left or right is not a number.")] + BinaryExpressionNeedsNumber(usize), + #[error("[line {0}] left or right is neither a number nor string.")] + BinaryExpressionNeedsNumberOrString(usize), + #[error("{0}")] + UndefinedVariable(EnvironmentError), } -/// Try to evaluate an expression and return its result. -pub fn evaluate(expression: Expression) -> Result { - match expression { - Expression::Literal { value } => literal(value), - Expression::Grouping { expression } => grouping(*expression), - Expression::Unary { - operator: op, - right, - } => unary(op, *right), - Expression::Binary { - left, - operator, - right, - } => binary(*left, operator, *right), +/// Interpreter for the Lox language. +#[derive(Default, Debug)] +pub struct Interpreter { + environment: Environment, +} + +impl Interpreter { + /// Try to evaluate an expression and return its result. + pub fn run(&mut self, statements: Vec) -> Result<(), InterpreterError> { + for stmt in statements { + match self.execute(stmt) { + Ok(_) => {} + Err(e) => error!("{e}"), + }; + } + + Ok(()) } -} -/// Convert the literal value into a Value. -fn literal(literal: Literal) -> Result { - Ok(literal.into()) -} + ///Execute a statement. + fn execute(&mut self, statement: Statement) -> Result<(), InterpreterError> { + match statement { + Statement::Block(statements) => { + let sub_env = Environment::with_enclosing(self.environment.clone()); + self.block(statements, sub_env)? + } + Statement::Print(expression) => self.print_statement(expression)?, + Statement::Expression(expression) => { + self.evaluate(expression)?; + } + Statement::Var { name, initializer } => self.var_statement(name, *initializer)?, + }; -/// Evaluate the inner expression. -fn grouping(inner: Expression) -> Result { - evaluate(inner) -} + Ok(()) + } -/// Evaluate the expression on the right and use its result when evaluating the unary operator. -fn unary(op: Token, right: Expression) -> Result { - let right = evaluate(right)?; + /// Execute all statements within a block, using a new environment (with the old one as the + /// enclosing one). + fn block( + &mut self, + statements: Vec, + environment: Environment, + ) -> Result<(), InterpreterError> { + let prev_env = &self.environment.clone(); + self.environment = environment; - match op.token_type { - TokenType::Minus => { - if let Value::Number(val) = right { - Ok(Value::Number(-val)) - } else { - Err(InterpreterError::UnaryExpressionNotANumber(op.line)) + for stmt in statements { + if let Err(e) = self.execute(stmt) { + error!("{e}"); } } - TokenType::Bang => Ok(Value::Boolean(!right.is_truthy())), - _ => Err(InterpreterError::UnaryOperatorUnknown(op.line, op.lexeme)), + + self.environment = prev_env.clone(); + + Ok(()) } -} -/// Calculate number operations. -fn number_op(left: f64, op: TokenType, right: f64) -> f64 { - match op { - TokenType::Minus => left - right, - TokenType::Plus => left + right, - TokenType::Slash => left / right, - TokenType::Star => left * right, - _ => unreachable!(), - } -} - -/// Calculate boolean operations. -fn boolean_op(left: f64, op: TokenType, right: f64) -> bool { - match op { - TokenType::Greater => left > right, - TokenType::GreaterEqual => left >= right, - TokenType::Less => left < right, - TokenType::LessEqual => left <= right, - _ => unreachable!(), - } -} - -/// Evaluate the left and right expressions (in that order) and then combine them with the -/// specified operator. -fn binary(left: Expression, op: Token, right: Expression) -> Result { - let left = evaluate(left)?; - let right = evaluate(right)?; - - match op.token_type { - TokenType::Minus | TokenType::Slash | TokenType::Star | TokenType::Plus => { - if let (Value::Number(left), Value::Number(right)) = (left, right) { - Ok(Value::Number(number_op(left, op.token_type, right))) - } else { - todo!() - } + /// Evaluate an expression and return its value. + fn evaluate(&mut self, expression: Expression) -> Result { + match expression { + Expression::Literal { value } => self.literal(value), + Expression::Grouping { expression } => self.grouping(*expression), + Expression::Unary { + operator: op, + right, + } => self.unary(op, *right), + Expression::Binary { + left, + operator, + right, + } => self.binary(*left, operator, *right), + Expression::Variable { name } => self.var_expression(&name), + Expression::Assign { name, value } => self.assign(&name, *value), } - TokenType::Greater | TokenType::GreaterEqual | TokenType::Less | TokenType::LessEqual => { - if let (Value::Number(left), Value::Number(right)) = (left, right) { - Ok(Value::Boolean(boolean_op(left, op.token_type, right))) - } else { - todo!() + } + + /// Evaluate an expression and print its value to stdout. + fn print_statement(&mut self, expression: Expression) -> Result<(), InterpreterError> { + let value = self.evaluate(expression)?; + println!("{value}"); + + Ok(()) + } + + /// Initialize a variable with an initializer expression or nil. + fn var_statement( + &mut self, + name: Token, + initializer: Option, + ) -> Result<(), InterpreterError> { + let value = if let Some(initializer) = initializer { + self.evaluate(initializer) + } else { + Ok(Value::Nil) + }?; + + self.environment.define(name.lexeme, value); + + Ok(()) + } + + /// Assign the value of an expression to a variable. + fn assign(&mut self, name: &Token, value: Expression) -> Result { + let value = self.evaluate(value)?; + + self.environment + .assign(name, value.clone()) + .map_err(InterpreterError::UndefinedVariable)?; + Ok(value) + } + + /// Convert the literal value into a Value. + fn literal(&self, literal: Literal) -> Result { + Ok(literal.into()) + } + + /// Evaluate the inner expression. + fn grouping(&mut self, inner: Expression) -> Result { + self.evaluate(inner) + } + + /// Evaluate the expression on the right and use its result when evaluating the unary operator. + fn unary(&mut self, op: Token, right: Expression) -> Result { + let right = self.evaluate(right)?; + + match op.token_type { + TokenType::Minus => { + if let Value::Number(val) = right { + Ok(Value::Number(-val)) + } else { + Err(InterpreterError::UnaryExpressionNotANumber(op.line)) + } } + TokenType::Bang => Ok(Value::Boolean(!right.is_truthy())), + _ => Err(InterpreterError::UnaryOperatorUnknown(op.line, op.lexeme)), + } + } + + /// Get the value of a variable. + fn var_expression(&mut self, name: &Token) -> Result { + self.environment + .get(name) + .map_err(InterpreterError::UndefinedVariable) + } + + /// Calculate number operations. + fn number_op(&self, left: f64, op: TokenType, right: f64) -> f64 { + match op { + TokenType::Minus => left - right, + TokenType::Plus => left + right, + TokenType::Slash => left / right, + TokenType::Star => left * right, + _ => unreachable!(), + } + } + + /// Calculate boolean operations. + fn boolean_op(&self, left: f64, op: TokenType, right: f64) -> bool { + match op { + TokenType::Greater => left > right, + TokenType::GreaterEqual => left >= right, + TokenType::Less => left < right, + TokenType::LessEqual => left <= right, + _ => unreachable!(), + } + } + + /// Evaluate the left and right expressions (in that order) and then combine them with the + /// specified operator. + fn binary( + &mut self, + left: Expression, + op: Token, + right: Expression, + ) -> Result { + let left = self.evaluate(left)?; + let right = self.evaluate(right)?; + + match op.token_type { + TokenType::Minus | TokenType::Slash | TokenType::Star | TokenType::Plus => { + if let (Value::Number(left), Value::Number(right)) = (left.clone(), right.clone()) { + Ok(Value::Number(self.number_op(left, op.token_type, right))) + } else if let (Value::String(left), Value::String(right)) = (left, right) { + Ok(Value::String(format!("{}{}", left.clone(), right.clone()))) + } else { + Err(InterpreterError::BinaryExpressionNeedsNumberOrString( + op.line, + )) + } + } + TokenType::Greater + | TokenType::GreaterEqual + | TokenType::Less + | TokenType::LessEqual => { + if let (Value::Number(left), Value::Number(right)) = (left, right) { + Ok(Value::Boolean(self.boolean_op(left, op.token_type, right))) + } else { + Err(InterpreterError::BinaryExpressionNeedsNumber(op.line)) + } + } + TokenType::BangEqual => Ok(Value::Boolean(left != right)), + TokenType::EqualEqual => Ok(Value::Boolean(left == right)), + _ => Err(InterpreterError::BinaryOperatorUnknown(op.line, op.lexeme)), } - TokenType::BangEqual => Ok(Value::Boolean(left != right)), - TokenType::EqualEqual => Ok(Value::Boolean(left == right)), - _ => Err(InterpreterError::BinaryOperatorUnknown(op.line, op.lexeme)), } } diff --git a/rust/rox/src/lib.rs b/rust/rox/src/lib.rs index 64b32bb..dd37957 100644 --- a/rust/rox/src/lib.rs +++ b/rust/rox/src/lib.rs @@ -7,14 +7,17 @@ use std::{ path::Path, }; -use tracing::{error, info}; +use interpreter::Interpreter; +use tracing::error; pub mod cli; +pub mod environment; pub mod expression; pub mod interpreter; pub mod keywords; pub mod parser; pub mod scanner; +pub mod statement; pub mod token; pub mod tokenizer { pub mod comment; @@ -32,13 +35,16 @@ pub mod value; /// Read the source code in a file and scan it to tokens. pub fn compile(source: &Path) -> Result<(), io::Error> { let input = fs::read_to_string(source)?; - run(&input); + let mut interpreter = Interpreter::default(); + + run(&input, &mut interpreter); Ok(()) } /// Run a Lox REPL until SIGINT. pub fn repl() { + let mut interpreter = Interpreter::default(); loop { print!("> "); let _ = io::stdout().flush(); @@ -49,16 +55,16 @@ pub fn repl() { Err(e) => error!("{}", e), } let input = input.trim().to_string(); - run(&input); + run(&input, &mut interpreter); } } /// Evaluate a Lox input string and print errors or output. -fn run(input: &str) { +fn run(input: &str, interpreter: &mut Interpreter) { let tokens = scanner::tokenize(input); - match parser::generate_ast(tokens) { - Ok(ast) => match interpreter::evaluate(ast) { - Ok(value) => println!("{value}"), + match parser::ast(tokens) { + Ok(ast) => match interpreter.run(ast) { + Ok(_) => {} Err(e) => error!("{e}"), }, Err(e) => error!("{e}"), diff --git a/rust/rox/src/parser.rs b/rust/rox/src/parser.rs index a2b0079..2f24df0 100644 --- a/rust/rox/src/parser.rs +++ b/rust/rox/src/parser.rs @@ -3,6 +3,7 @@ use tracing::error; use crate::{ expression::Expression, + statement::Statement, token::{ self, Token, TokenType::{self, *}, @@ -13,14 +14,24 @@ use crate::{ pub enum ParserError { #[error("empty token stream")] NoTokens, - #[error("line {0}: expected expression")] + #[error("[line {0}] expected expression")] ExpressionExpected(usize), - #[error("line {0}: expected ')' after expression.")] + #[error("[line {0}] expected ')' after expression.")] ParenAfterExpression(usize), - #[error("Out of bounds access at index {0}.")] + #[error("[Out of bounds access at index {0}.")] OutOfBoundsAccess(usize), - #[error("line {0}: literal expected.")] + #[error("[line {0}] literal expected.")] LiteralExpected(usize), + #[error("[line {0}] expected ';' after value.")] + SemicolonAfterValueExpected(usize), + #[error("[line {0}] expected ';' after expression.")] + SemicolonAfterExpressionExpected(usize), + #[error("[line {0}] expected variable name.")] + VariableNameExpected(usize), + #[error("[line {0}] invalid assignment target.")] + InvalidAssignmentTarget(usize), + #[error("[line {0}] expected '}}' after block.")] + RightBraceAfterBlockExpected(usize), } /// Parse the Lox language. @@ -43,6 +54,23 @@ impl Parser { }) } + /// Parse all tokens to a list of statements for execution. + fn run(&mut self) -> Result, ParserError> { + let mut statements = Vec::new(); + + while !self.is_at_end() { + match self.declaration() { + Ok(x) => statements.push(x), + Err(e) => { + error!("{e}"); + self.synchronize(); + } + } + } + + Ok(statements) + } + /// Check if any of the provided types match the type of the current token. /// /// If so, advance the current token. @@ -117,7 +145,99 @@ impl Parser { /// expression -> equality ; fn expression(&mut self) -> Result { - self.equality() + self.assignment() + } + + fn declaration(&mut self) -> Result { + if self.matches(&[Var]) { + self.var_declaration() + } else { + self.statement() + } + } + + fn statement(&mut self) -> Result { + if self.matches(&[Print]) { + self.print_statement() + } else if self.matches(&[LeftBrace]) { + Ok(Statement::Block(self.block()?)) + } else { + self.expression_statement() + } + } + + fn print_statement(&mut self) -> Result { + let value = self.expression()?; + let line = self.current_token.line; + self.consume(&Semicolon) + .ok_or(ParserError::SemicolonAfterValueExpected(line))?; + + Ok(Statement::Print(value)) + } + + fn var_declaration(&mut self) -> Result { + let line = self.current_token.line; + let name = self + .consume(&Identifier) + .ok_or(ParserError::VariableNameExpected(line))? + .clone(); + + let initializer = if self.matches(&[Equal]) { + Some(self.expression()?) + } else { + None + }; + + self.consume(&Semicolon) + .ok_or(ParserError::SemicolonAfterExpressionExpected(line))?; + + Ok(Statement::Var { + name, + initializer: Box::new(initializer), + }) + } + + fn expression_statement(&mut self) -> Result { + let expr = self.expression()?; + let line = self.current_token.line; + self.consume(&Semicolon) + .ok_or(ParserError::SemicolonAfterExpressionExpected(line))?; + + Ok(Statement::Expression(expr)) + } + + fn block(&mut self) -> Result, ParserError> { + let mut statements = Vec::new(); + + while !self.check(&RightBrace) && !self.is_at_end() { + statements.push(self.declaration()?); + } + + let line = self.previous()?.line; + self.consume(&RightBrace) + .ok_or(ParserError::RightBraceAfterBlockExpected(line))?; + + Ok(statements) + } + + fn assignment(&mut self) -> Result { + let expr = self.equality()?; + + if self.matches(&[Equal]) { + let equals = self.previous()?.clone(); + let value = self.assignment()?; + + if let Expression::Variable { name } = expr { + Ok(Expression::Assign { + name, + value: Box::new(value), + }) + } else { + Err(ParserError::InvalidAssignmentTarget(equals.line)) + } + } else { + Ok(expr) + } } /// equality -> comparison ( ( "!=" | "==" ) comparison )* ; @@ -169,6 +289,9 @@ impl Parser { Ok(Expression::Literal { value: token::Literal::Nil, }) + } else if self.matches(&[Identifier]) { + let prev = self.previous()?.clone(); + Ok(Expression::Variable { name: prev }) } else if self.matches(&[Number, String]) { let prev = self.previous()?; let value = prev @@ -205,106 +328,7 @@ impl Parser { } /// Try to parse the provided tokens into an AST. -pub fn generate_ast(tokens: Vec) -> Result { +pub fn ast(tokens: Vec) -> Result, ParserError> { let mut parser = Parser::new(tokens)?; - parser.expression() -} - -#[cfg(test)] -mod tests { - use crate::{ - expression::Expression, - token::{Literal, Token, TokenType}, - }; - - use super::generate_ast; - - #[test] - fn simple_expression() { - let ast = generate_ast(vec![ - Token { - token_type: TokenType::Number, - lexeme: "3".into(), - literal: Some(Literal::Number(3.0)), - line: 1, - }, - Token { - token_type: TokenType::Star, - lexeme: "*".into(), - literal: None, - line: 1, - }, - Token { - token_type: TokenType::Number, - lexeme: "4".into(), - literal: Some(Literal::Number(4.0)), - line: 1, - }, - Token { - token_type: TokenType::Plus, - lexeme: "+".into(), - literal: None, - line: 1, - }, - Token { - token_type: TokenType::Number, - lexeme: "2".into(), - literal: Some(Literal::Number(2.0)), - line: 1, - }, - Token { - token_type: TokenType::Star, - lexeme: "*".into(), - literal: None, - line: 1, - }, - Token { - token_type: TokenType::Number, - lexeme: "6".into(), - literal: Some(Literal::Number(6.0)), - line: 1, - }, - ]) - .unwrap(); - - assert_eq!( - ast, - Expression::Binary { - left: Box::new(Expression::Binary { - left: Box::new(Expression::Literal { - value: Literal::Number(3.0) - }), - operator: Token { - token_type: TokenType::Star, - lexeme: "*".into(), - literal: None, - line: 1 - }, - right: Box::new(Expression::Literal { - value: Literal::Number(4.0) - }) - }), - operator: Token { - token_type: TokenType::Plus, - lexeme: "+".into(), - literal: None, - line: 1 - }, - right: Box::new(Expression::Binary { - left: Box::new(Expression::Literal { - value: Literal::Number(2.0) - }), - operator: Token { - token_type: TokenType::Star, - lexeme: "*".into(), - literal: None, - line: 1 - }, - right: Box::new(Expression::Literal { - value: Literal::Number(6.0) - }) - }) - } - ) - } + parser.run() } diff --git a/rust/rox/src/statement.rs b/rust/rox/src/statement.rs new file mode 100644 index 0000000..7624e73 --- /dev/null +++ b/rust/rox/src/statement.rs @@ -0,0 +1,13 @@ +use crate::{expression::Expression, token::Token}; + +/// Enumeration of all types of statements. +#[derive(Debug, Clone, PartialEq)] +pub enum Statement { + Block(Vec), + Print(Expression), + Expression(Expression), + Var { + name: Token, + initializer: Box>, + }, +}