commit dcc1ae86994c768bded3b17f9b77631e5ef80b5f Author: Sebastian Hugentobler Date: Thu Feb 6 10:54:21 2025 +0100 java lox scanner implementation diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9b42106 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.direnv/ diff --git a/java/.envrc b/java/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/java/.envrc @@ -0,0 +1 @@ +use flake diff --git a/java/.gitignore b/java/.gitignore new file mode 100644 index 0000000..2f7896d --- /dev/null +++ b/java/.gitignore @@ -0,0 +1 @@ +target/ diff --git a/java/flake.lock b/java/flake.lock new file mode 100644 index 0000000..f293d28 --- /dev/null +++ b/java/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1738680400, + "narHash": "sha256-ooLh+XW8jfa+91F1nhf9OF7qhuA/y1ChLx6lXDNeY5U=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "799ba5bffed04ced7067a91798353d360788b30d", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/java/flake.nix b/java/flake.nix new file mode 100644 index 0000000..97fc0f0 --- /dev/null +++ b/java/flake.nix @@ -0,0 +1,29 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = + { + nixpkgs, + flake-utils, + ... + }: + flake-utils.lib.eachDefaultSystem ( + system: + let + pkgs = import nixpkgs { inherit system; }; + buildInputs = with pkgs; [ + jdk21 + jdt-language-server + maven + ]; + in + { + devShells.default = pkgs.mkShell { + buildInputs = buildInputs; + }; + } + ); +} diff --git a/java/lox-interpreter/.classpath b/java/lox-interpreter/.classpath new file mode 100644 index 0000000..4e7f670 --- /dev/null +++ b/java/lox-interpreter/.classpath @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/java/lox-interpreter/.mvn/jvm.config b/java/lox-interpreter/.mvn/jvm.config new file mode 100644 index 0000000..e69de29 diff --git a/java/lox-interpreter/.mvn/maven.config b/java/lox-interpreter/.mvn/maven.config new file mode 100644 index 0000000..e69de29 diff --git a/java/lox-interpreter/.project b/java/lox-interpreter/.project new file mode 100644 index 0000000..f9bbc23 --- /dev/null +++ b/java/lox-interpreter/.project @@ -0,0 +1,34 @@ + + + lox-interpreter + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + + + 1738828672243 + + 30 + + org.eclipse.core.resources.regexFilterMatcher + node_modules|\.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__ + + + + diff --git a/java/lox-interpreter/.settings/org.eclipse.core.resources.prefs b/java/lox-interpreter/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..f9fe345 --- /dev/null +++ b/java/lox-interpreter/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,4 @@ +eclipse.preferences.version=1 +encoding//src/main/java=UTF-8 +encoding//src/test/java=UTF-8 +encoding/=UTF-8 diff --git a/java/lox-interpreter/.settings/org.eclipse.jdt.apt.core.prefs b/java/lox-interpreter/.settings/org.eclipse.jdt.apt.core.prefs new file mode 100644 index 0000000..d4313d4 --- /dev/null +++ b/java/lox-interpreter/.settings/org.eclipse.jdt.apt.core.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.apt.aptEnabled=false diff --git a/java/lox-interpreter/.settings/org.eclipse.jdt.core.prefs b/java/lox-interpreter/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..e96c048 --- /dev/null +++ b/java/lox-interpreter/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,9 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=21 +org.eclipse.jdt.core.compiler.compliance=21 +org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore +org.eclipse.jdt.core.compiler.processAnnotations=disabled +org.eclipse.jdt.core.compiler.release=enabled +org.eclipse.jdt.core.compiler.source=21 diff --git a/java/lox-interpreter/.settings/org.eclipse.m2e.core.prefs b/java/lox-interpreter/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 0000000..f897a7f --- /dev/null +++ b/java/lox-interpreter/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,4 @@ +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/java/lox-interpreter/pom.xml b/java/lox-interpreter/pom.xml new file mode 100644 index 0000000..36e01d3 --- /dev/null +++ b/java/lox-interpreter/pom.xml @@ -0,0 +1,126 @@ + + + 4.0.0 + + ch.vanwa.lox_interpreter + lox-interpreter + 1.0-SNAPSHOT + + lox-interpreter + https://code.vanwa.ch + + + UTF-8 + 21 + + + + + + org.junit + junit-bom + 5.11.0 + pom + import + + + + + + + org.junit.jupiter + junit-jupiter-api + test + + + + org.junit.jupiter + junit-jupiter-params + test + + + + + + + + maven-clean-plugin + 3.4.0 + + + maven-resources-plugin + 3.3.1 + + + maven-compiler-plugin + 3.13.0 + + + maven-surefire-plugin + 3.3.0 + + + maven-jar-plugin + 3.4.2 + + + maven-install-plugin + 3.1.2 + + + maven-deploy-plugin + 3.1.2 + + + maven-site-plugin + 3.12.1 + + + maven-project-info-reports-plugin + 3.6.1 + + + + + + maven-assembly-plugin + 3.7.1 + + + jar-with-dependencies + + + + ch.vanwa.lox_interpreter.App + + + + + + make-assembly + package + + single + + + + + + org.codehaus.mojo + exec-maven-plugin + 3.5.0 + + + + java + + + + + ch.vanwa.lox_interpreter.App + + + + + diff --git a/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/App.java b/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/App.java new file mode 100644 index 0000000..bb48da0 --- /dev/null +++ b/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/App.java @@ -0,0 +1,19 @@ +package ch.vanwa.lox_interpreter; + +import java.io.IOException; + +public class App { + /** + * Entry point for the Lox interpreter cli. + */ + public static void main(String[] args) throws IOException { + if (args.length > 1) { + System.out.println("Usage: jlox [script]"); + System.exit(64); + } else if (args.length == 1) { + Lox.runFile(args[0]); + } else { + Lox.runPrompt(); + } + } +} diff --git a/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/Lox.java b/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/Lox.java new file mode 100644 index 0000000..cb1c8b0 --- /dev/null +++ b/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/Lox.java @@ -0,0 +1,90 @@ +package ch.vanwa.lox_interpreter; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; + +/** + * A Lox interpreter. + */ +public class Lox { + /** + * True if a run produced an error, false otherwise. + */ + static boolean hadError = false; + + /** + * Run a file with Lox source code. + * + * @param path Path of the source code file. + */ + public static void runFile(String path) throws IOException { + byte[] bytes = Files.readAllBytes(Paths.get(path)); + run(new String(bytes, StandardCharsets.UTF_8)); + if (hadError) { + System.exit(65); + } + } + + /** + * Run a Lox REPL. + */ + public static void runPrompt() throws IOException { + var input = new InputStreamReader(System.in); + var reader = new BufferedReader(input); + + for (;;) { + System.out.print("> "); + String line = reader.readLine(); + if (line == null) { + break; + } + run(line); + hadError = false; + } + } + + /** + * Run Lox source code. + * + * @param source Lox source code. + * + * @return List of scanned tokens. + */ + public static List run(String source) { + var scanner = new Scanner(source); + List tokens = scanner.scanTokens(); + + for (Token token : tokens) { + System.out.println(token); + } + + return tokens; + } + + /** + * Print an error message. + * + * @param line Line number where the error ocurred. + * @param message Error message. + */ + static void error(int line, String message) { + report(line, "", message); + } + + /** + * Print a message to stderr. + * + * @param line Line number where the error ocurred. + * @param where Where the error occurred. + * @param message Error message. + */ + private static void report(int line, String where, String message) { + System.err.println(String.format("[line %d] Error%s: %s", line, where, message)); + hadError = true; + } +} diff --git a/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/Scanner.java b/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/Scanner.java new file mode 100644 index 0000000..8070783 --- /dev/null +++ b/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/Scanner.java @@ -0,0 +1,342 @@ +package ch.vanwa.lox_interpreter; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static ch.vanwa.lox_interpreter.TokenType.*; + +/** + * Convert source code into tokens for the Lox programming language. + */ +class Scanner { + /** + * Source code getting scanned. + */ + private final String source; + + /** + * List of already scanned tokens. + */ + private final List tokens = new ArrayList<>(); + + /** + * Starting index of the current lexeme. + */ + private int start = 0; + + /** + * Index of current character in the source code. + */ + private int current = 0; + + /** + * Which line the scanneris on in the source code. + */ + private int line = 1; + + /** + * Mapping of keyword string to {@link TokenType}. + */ + private static final Map keywords = Map.ofEntries( + Map.entry("and", AND), + Map.entry("class", CLASS), + Map.entry("else", ELSE), + Map.entry("false", FALSE), + Map.entry("for", FOR), + Map.entry("fun", FUN), + Map.entry("if", IF), + Map.entry("nil", NIL), + Map.entry("or", OR), + Map.entry("print", PRINT), + Map.entry("return", RETURN), + Map.entry("super", SUPER), + Map.entry("this", THIS), + Map.entry("true", TRUE), + Map.entry("var", VAR), + Map.entry("while", WHILE)); + + /** + * Initialize scanner. + * + * @param source Source code to scan. + */ + Scanner(final String source) { + this.source = source; + } + + /** + * Scan lexemes to tokens until the end is reached. + * + * @return List of scanned tokens + EOF. + */ + List scanTokens() { + while (!isAtEnd()) { + start = current; + scanToken(); + } + + tokens.add(new Token(EOF, "", null, line)); + return tokens; + } + + /** + * Scan the next lexeme to a token. + */ + private void scanToken() { + char c = advance(); + switch (c) { + case '(': + addToken(LEFT_PAREN); + break; + case ')': + addToken(RIGHT_PAREN); + break; + case '{': + addToken(LEFT_BRACE); + break; + case '}': + addToken(RIGHT_BRACE); + break; + case ',': + addToken(COMMA); + break; + case '.': + addToken(DOT); + break; + case '-': + addToken(MINUS); + break; + case '+': + addToken(PLUS); + break; + case ';': + addToken(SEMICOLON); + break; + case '*': + addToken(STAR); + break; + case '!': + addToken(match('=') ? BANG_EQUAL : BANG); + break; + case '=': + addToken(match('=') ? EQUAL_EQUAL : EQUAL); + break; + case '<': + addToken(match('=') ? LESS_EQUAL : LESS); + break; + case '>': + addToken(match('=') ? GREATER_EQUAL : GREATER); + break; + case '/': + if (match('/')) { + // A comment goes until the end of the line. + while (peek() != '\n' && !isAtEnd()) + advance(); + } else { + addToken(SLASH); + } + break; + case ' ': + case '\r': + case '\t': + // Ignore whitespace. + break; + case '\n': + line++; + break; + case '"': + string(); + break; + default: + if (isDigit(c)) { + number(); + } else if (isAlpha(c)) { + identifier(); + } else { + Lox.error(line, "Unexpected character."); + } + break; + } + } + + /** + * Consume an identifier. + */ + private void identifier() { + while (isAlphaNumeric(peek())) { + advance(); + } + + String text = source.substring(start, current); + TokenType type = keywords.get(text); + if (type == null) { + type = IDENTIFIER; + } + addToken(type); + } + + /** + * Consume a number literal. + */ + private void number() { + while (isDigit(peek())) { + advance(); + } + + // Look for a fractional part. + if (peek() == '.' && isDigit(peekNext())) { + // Consume the "." + advance(); + + while (isDigit(peek())) { + advance(); + } + } + + addToken(NUMBER, + Double.parseDouble(source.substring(start, current))); + } + + /** + * Consume a string literal. + */ + private void string() { + while (peek() != '"' && !isAtEnd()) { + if (peek() == '\n') { + line++; + } + advance(); + } + + if (isAtEnd()) { + Lox.error(line, "Unterminated string."); + return; + } + + // The closing ". + advance(); + + // Trim the surrounding quotes. + String value = source.substring(start + 1, current - 1); + addToken(STRING, value); + } + + /** + * Check if a character matches the one at the current pointer. + * + * @param expected Character used for matching against the current character. + * + * @return True if the expected character matches the current one, false + * otherwise. + */ + private boolean match(final char expected) { + if (isAtEnd()) { + return false; + } + if (source.charAt(current) != expected) { + return false; + } + + current++; + return true; + } + + /** + * Peek at the current character without consuming it. + * + * @return The character peeked at. + */ + private char peek() { + if (isAtEnd()) { + return '\0'; + } + return source.charAt(current); + } + + /** + * Peek at the next character without consuming it or moving the pointer. + * + * @return The character peeked at. + */ + private char peekNext() { + if (current + 1 >= source.length()) { + return '\0'; + } + return source.charAt(current + 1); + } + + /** + * Check if a character is a letter or an underscore (a-z,A-Z,_). + * + * @param c Character to check. + * + * @return True if the character is a letter or underscore, false otherwise. + */ + private boolean isAlpha(final char c) { + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_'; + } + + /** + * Check if a character is alphanumeric (a-z,A-Z,_,0-9). + * + * @param c Character to check. + * + * @return True if the character is alphanumeric, false otherwise. + */ + private boolean isAlphaNumeric(final char c) { + return isAlpha(c) || isDigit(c); + } + + /** + * Check if a character is a digit (0-9). + * + * @param c Character to check. + * + * @return True if the character is a digit, false otherwise. + */ + private boolean isDigit(final char c) { + return c >= '0' && c <= '9'; + } + + /** + * Check if the scanner reached the end of the source code. + * + * @return True if the scaner reached the end of the source code, false + * otherwise. + */ + private boolean isAtEnd() { + return current >= source.length(); + } + + /** + * Advance the source code pointer by one. + * + * @return The character under the advanced pointer. + */ + private char advance() { + return source.charAt(current++); + } + + /** + * Add a token without a literal to the list of scanned tokens. + * + * @param type Type of token. + */ + private void addToken(final TokenType type) { + addToken(type, null); + } + + /** + * Add a token to the list of scanned tokens. + * + * @param type Type of token. + * @param literal Value of a literal (can be null). + */ + private void addToken(final TokenType type, final Object literal) { + String text = source.substring(start, current); + tokens.add(new Token(type, text, literal, line)); + } +} diff --git a/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/Token.java b/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/Token.java new file mode 100644 index 0000000..a8d9630 --- /dev/null +++ b/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/Token.java @@ -0,0 +1,16 @@ +package ch.vanwa.lox_interpreter; + +/** + * Single token. + * + * @param type Type of the token. + * @param lexeme Lexeme that produced this token. + * @param literal Literal value of the token (can be null). + * @param line Line number where the token was scannedin the source code. + */ +public record Token(TokenType type, String lexeme, Object literal, int line) { + @Override + public String toString() { + return String.format("%d: %s %s %s", line, type, lexeme, literal); + } +} diff --git a/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/TokenType.java b/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/TokenType.java new file mode 100644 index 0000000..d056d1d --- /dev/null +++ b/java/lox-interpreter/src/main/java/ch/vanwa/lox_interpreter/TokenType.java @@ -0,0 +1,25 @@ +package ch.vanwa.lox_interpreter; + +/** + * Enumeration of all token types for Lox. + */ +enum TokenType { + // Single-character tokens. + LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE, + COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR, + + // One or two character tokens. + BANG, BANG_EQUAL, + EQUAL, EQUAL_EQUAL, + GREATER, GREATER_EQUAL, + LESS, LESS_EQUAL, + + // Literals. + IDENTIFIER, STRING, NUMBER, + + // Keywords. + AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR, + PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE, + + EOF +} diff --git a/java/lox-interpreter/src/test/java/ch/vanwa/lox_interpreter/AppTest.java b/java/lox-interpreter/src/test/java/ch/vanwa/lox_interpreter/AppTest.java new file mode 100644 index 0000000..d5f92b2 --- /dev/null +++ b/java/lox-interpreter/src/test/java/ch/vanwa/lox_interpreter/AppTest.java @@ -0,0 +1,27 @@ +package ch.vanwa.lox_interpreter; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; + +/** + * Unit test for simple App. + */ +public class AppTest { + + /** + * Simple scanner test. + */ + @Test + public void scanOneLine() { + var tokens = Lox.run("var language = \"lox\""); + + assertEquals(5, tokens.size()); + + assertEquals(new Token(TokenType.VAR, "var", null, 1), tokens.get(0)); + assertEquals(new Token(TokenType.IDENTIFIER, "language", null, 1), tokens.get(1)); + assertEquals(new Token(TokenType.EQUAL, "=", null, 1), tokens.get(2)); + assertEquals(new Token(TokenType.STRING, "\"lox\"", "lox", 1), tokens.get(3)); + assertEquals(new Token(TokenType.EOF, "", null, 1), tokens.get(4)); + } +}