java lox scanner implementation

This commit is contained in:
Sebastian Hugentobler 2025-02-06 10:54:21 +01:00
commit dcc1ae8699
20 changed files with 848 additions and 0 deletions

View file

@ -0,0 +1,19 @@
package ch.vanwa.lox_interpreter;
import java.io.IOException;
public class App {
/**
* Entry point for the Lox interpreter cli.
*/
public static void main(String[] args) throws IOException {
if (args.length > 1) {
System.out.println("Usage: jlox [script]");
System.exit(64);
} else if (args.length == 1) {
Lox.runFile(args[0]);
} else {
Lox.runPrompt();
}
}
}

View file

@ -0,0 +1,90 @@
package ch.vanwa.lox_interpreter;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
/**
* A Lox interpreter.
*/
public class Lox {
/**
* True if a run produced an error, false otherwise.
*/
static boolean hadError = false;
/**
* Run a file with Lox source code.
*
* @param path Path of the source code file.
*/
public static void runFile(String path) throws IOException {
byte[] bytes = Files.readAllBytes(Paths.get(path));
run(new String(bytes, StandardCharsets.UTF_8));
if (hadError) {
System.exit(65);
}
}
/**
* Run a Lox REPL.
*/
public static void runPrompt() throws IOException {
var input = new InputStreamReader(System.in);
var reader = new BufferedReader(input);
for (;;) {
System.out.print("> ");
String line = reader.readLine();
if (line == null) {
break;
}
run(line);
hadError = false;
}
}
/**
* Run Lox source code.
*
* @param source Lox source code.
*
* @return List of scanned tokens.
*/
public static List<Token> run(String source) {
var scanner = new Scanner(source);
List<Token> tokens = scanner.scanTokens();
for (Token token : tokens) {
System.out.println(token);
}
return tokens;
}
/**
* Print an error message.
*
* @param line Line number where the error ocurred.
* @param message Error message.
*/
static void error(int line, String message) {
report(line, "", message);
}
/**
* Print a message to stderr.
*
* @param line Line number where the error ocurred.
* @param where Where the error occurred.
* @param message Error message.
*/
private static void report(int line, String where, String message) {
System.err.println(String.format("[line %d] Error%s: %s", line, where, message));
hadError = true;
}
}

View file

@ -0,0 +1,342 @@
package ch.vanwa.lox_interpreter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import static ch.vanwa.lox_interpreter.TokenType.*;
/**
* Convert source code into tokens for the Lox programming language.
*/
class Scanner {
/**
* Source code getting scanned.
*/
private final String source;
/**
* List of already scanned tokens.
*/
private final List<Token> tokens = new ArrayList<>();
/**
* Starting index of the current lexeme.
*/
private int start = 0;
/**
* Index of current character in the source code.
*/
private int current = 0;
/**
* Which line the scanneris on in the source code.
*/
private int line = 1;
/**
* Mapping of keyword string to {@link TokenType}.
*/
private static final Map<String, TokenType> keywords = Map.ofEntries(
Map.entry("and", AND),
Map.entry("class", CLASS),
Map.entry("else", ELSE),
Map.entry("false", FALSE),
Map.entry("for", FOR),
Map.entry("fun", FUN),
Map.entry("if", IF),
Map.entry("nil", NIL),
Map.entry("or", OR),
Map.entry("print", PRINT),
Map.entry("return", RETURN),
Map.entry("super", SUPER),
Map.entry("this", THIS),
Map.entry("true", TRUE),
Map.entry("var", VAR),
Map.entry("while", WHILE));
/**
* Initialize scanner.
*
* @param source Source code to scan.
*/
Scanner(final String source) {
this.source = source;
}
/**
* Scan lexemes to tokens until the end is reached.
*
* @return List of scanned tokens + EOF.
*/
List<Token> scanTokens() {
while (!isAtEnd()) {
start = current;
scanToken();
}
tokens.add(new Token(EOF, "", null, line));
return tokens;
}
/**
* Scan the next lexeme to a token.
*/
private void scanToken() {
char c = advance();
switch (c) {
case '(':
addToken(LEFT_PAREN);
break;
case ')':
addToken(RIGHT_PAREN);
break;
case '{':
addToken(LEFT_BRACE);
break;
case '}':
addToken(RIGHT_BRACE);
break;
case ',':
addToken(COMMA);
break;
case '.':
addToken(DOT);
break;
case '-':
addToken(MINUS);
break;
case '+':
addToken(PLUS);
break;
case ';':
addToken(SEMICOLON);
break;
case '*':
addToken(STAR);
break;
case '!':
addToken(match('=') ? BANG_EQUAL : BANG);
break;
case '=':
addToken(match('=') ? EQUAL_EQUAL : EQUAL);
break;
case '<':
addToken(match('=') ? LESS_EQUAL : LESS);
break;
case '>':
addToken(match('=') ? GREATER_EQUAL : GREATER);
break;
case '/':
if (match('/')) {
// A comment goes until the end of the line.
while (peek() != '\n' && !isAtEnd())
advance();
} else {
addToken(SLASH);
}
break;
case ' ':
case '\r':
case '\t':
// Ignore whitespace.
break;
case '\n':
line++;
break;
case '"':
string();
break;
default:
if (isDigit(c)) {
number();
} else if (isAlpha(c)) {
identifier();
} else {
Lox.error(line, "Unexpected character.");
}
break;
}
}
/**
* Consume an identifier.
*/
private void identifier() {
while (isAlphaNumeric(peek())) {
advance();
}
String text = source.substring(start, current);
TokenType type = keywords.get(text);
if (type == null) {
type = IDENTIFIER;
}
addToken(type);
}
/**
* Consume a number literal.
*/
private void number() {
while (isDigit(peek())) {
advance();
}
// Look for a fractional part.
if (peek() == '.' && isDigit(peekNext())) {
// Consume the "."
advance();
while (isDigit(peek())) {
advance();
}
}
addToken(NUMBER,
Double.parseDouble(source.substring(start, current)));
}
/**
* Consume a string literal.
*/
private void string() {
while (peek() != '"' && !isAtEnd()) {
if (peek() == '\n') {
line++;
}
advance();
}
if (isAtEnd()) {
Lox.error(line, "Unterminated string.");
return;
}
// The closing ".
advance();
// Trim the surrounding quotes.
String value = source.substring(start + 1, current - 1);
addToken(STRING, value);
}
/**
* Check if a character matches the one at the current pointer.
*
* @param expected Character used for matching against the current character.
*
* @return True if the expected character matches the current one, false
* otherwise.
*/
private boolean match(final char expected) {
if (isAtEnd()) {
return false;
}
if (source.charAt(current) != expected) {
return false;
}
current++;
return true;
}
/**
* Peek at the current character without consuming it.
*
* @return The character peeked at.
*/
private char peek() {
if (isAtEnd()) {
return '\0';
}
return source.charAt(current);
}
/**
* Peek at the next character without consuming it or moving the pointer.
*
* @return The character peeked at.
*/
private char peekNext() {
if (current + 1 >= source.length()) {
return '\0';
}
return source.charAt(current + 1);
}
/**
* Check if a character is a letter or an underscore (a-z,A-Z,_).
*
* @param c Character to check.
*
* @return True if the character is a letter or underscore, false otherwise.
*/
private boolean isAlpha(final char c) {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
c == '_';
}
/**
* Check if a character is alphanumeric (a-z,A-Z,_,0-9).
*
* @param c Character to check.
*
* @return True if the character is alphanumeric, false otherwise.
*/
private boolean isAlphaNumeric(final char c) {
return isAlpha(c) || isDigit(c);
}
/**
* Check if a character is a digit (0-9).
*
* @param c Character to check.
*
* @return True if the character is a digit, false otherwise.
*/
private boolean isDigit(final char c) {
return c >= '0' && c <= '9';
}
/**
* Check if the scanner reached the end of the source code.
*
* @return True if the scaner reached the end of the source code, false
* otherwise.
*/
private boolean isAtEnd() {
return current >= source.length();
}
/**
* Advance the source code pointer by one.
*
* @return The character under the advanced pointer.
*/
private char advance() {
return source.charAt(current++);
}
/**
* Add a token without a literal to the list of scanned tokens.
*
* @param type Type of token.
*/
private void addToken(final TokenType type) {
addToken(type, null);
}
/**
* Add a token to the list of scanned tokens.
*
* @param type Type of token.
* @param literal Value of a literal (can be null).
*/
private void addToken(final TokenType type, final Object literal) {
String text = source.substring(start, current);
tokens.add(new Token(type, text, literal, line));
}
}

View file

@ -0,0 +1,16 @@
package ch.vanwa.lox_interpreter;
/**
* Single token.
*
* @param type Type of the token.
* @param lexeme Lexeme that produced this token.
* @param literal Literal value of the token (can be null).
* @param line Line number where the token was scannedin the source code.
*/
public record Token(TokenType type, String lexeme, Object literal, int line) {
@Override
public String toString() {
return String.format("%d: %s %s %s", line, type, lexeme, literal);
}
}

View file

@ -0,0 +1,25 @@
package ch.vanwa.lox_interpreter;
/**
* Enumeration of all token types for Lox.
*/
enum TokenType {
// Single-character tokens.
LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE,
COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,
// One or two character tokens.
BANG, BANG_EQUAL,
EQUAL, EQUAL_EQUAL,
GREATER, GREATER_EQUAL,
LESS, LESS_EQUAL,
// Literals.
IDENTIFIER, STRING, NUMBER,
// Keywords.
AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,
EOF
}

View file

@ -0,0 +1,27 @@
package ch.vanwa.lox_interpreter;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
/**
* Unit test for simple App.
*/
public class AppTest {
/**
* Simple scanner test.
*/
@Test
public void scanOneLine() {
var tokens = Lox.run("var language = \"lox\"");
assertEquals(5, tokens.size());
assertEquals(new Token(TokenType.VAR, "var", null, 1), tokens.get(0));
assertEquals(new Token(TokenType.IDENTIFIER, "language", null, 1), tokens.get(1));
assertEquals(new Token(TokenType.EQUAL, "=", null, 1), tokens.get(2));
assertEquals(new Token(TokenType.STRING, "\"lox\"", "lox", 1), tokens.get(3));
assertEquals(new Token(TokenType.EOF, "", null, 1), tokens.get(4));
}
}