java lox scanner implementation

This commit is contained in:
Sebastian Hugentobler 2025-02-06 10:54:21 +01:00
commit dcc1ae8699
20 changed files with 848 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.direnv/

1
java/.envrc Normal file
View File

@ -0,0 +1 @@
use flake

1
java/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
target/

61
java/flake.lock Normal file
View File

@ -0,0 +1,61 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1738680400,
"narHash": "sha256-ooLh+XW8jfa+91F1nhf9OF7qhuA/y1ChLx6lXDNeY5U=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "799ba5bffed04ced7067a91798353d360788b30d",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

29
java/flake.nix Normal file
View File

@ -0,0 +1,29 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs =
{
nixpkgs,
flake-utils,
...
}:
flake-utils.lib.eachDefaultSystem (
system:
let
pkgs = import nixpkgs { inherit system; };
buildInputs = with pkgs; [
jdk21
jdt-language-server
maven
];
in
{
devShells.default = pkgs.mkShell {
buildInputs = buildInputs;
};
}
);
}

View File

@ -0,0 +1,57 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-21">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" path="target/generated-sources/annotations">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="ignore_optional_problems" value="true"/>
<attribute name="m2e-apt" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="target/generated-test-sources/test-annotations">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="ignore_optional_problems" value="true"/>
<attribute name="m2e-apt" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>

View File

View File

View File

@ -0,0 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>lox-interpreter</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
<filteredResources>
<filter>
<id>1738828672243</id>
<name></name>
<type>30</type>
<matcher>
<id>org.eclipse.core.resources.regexFilterMatcher</id>
<arguments>node_modules|\.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__</arguments>
</matcher>
</filter>
</filteredResources>
</projectDescription>

View File

@ -0,0 +1,4 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/test/java=UTF-8
encoding/<project>=UTF-8

View File

@ -0,0 +1,2 @@
eclipse.preferences.version=1
org.eclipse.jdt.apt.aptEnabled=false

View File

@ -0,0 +1,9 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=21
org.eclipse.jdt.core.compiler.compliance=21
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
org.eclipse.jdt.core.compiler.processAnnotations=disabled
org.eclipse.jdt.core.compiler.release=enabled
org.eclipse.jdt.core.compiler.source=21

View File

@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

View File

@ -0,0 +1,126 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>ch.vanwa.lox_interpreter</groupId>
<artifactId>lox-interpreter</artifactId>
<version>1.0-SNAPSHOT</version>
<name>lox-interpreter</name>
<url>https://code.vanwa.ch</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.release>21</maven.compiler.release>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.junit</groupId>
<artifactId>junit-bom</artifactId>
<version>5.11.0</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<scope>test</scope>
</dependency>
<!-- Optionally: parameterized tests support -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<pluginManagement>
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.4.0</version>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.3.1</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.13.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.3.0</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.4.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>3.1.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>3.1.2</version>
</plugin>
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.12.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.6.1</version>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.7.1</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>ch.vanwa.lox_interpreter.App</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.5.0</version>
<executions>
<execution>
<goals>
<goal>java</goal>
</goals>
</execution>
</executions>
<configuration>
<mainClass>ch.vanwa.lox_interpreter.App</mainClass>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,19 @@
package ch.vanwa.lox_interpreter;
import java.io.IOException;
public class App {
/**
* Entry point for the Lox interpreter cli.
*/
public static void main(String[] args) throws IOException {
if (args.length > 1) {
System.out.println("Usage: jlox [script]");
System.exit(64);
} else if (args.length == 1) {
Lox.runFile(args[0]);
} else {
Lox.runPrompt();
}
}
}

View File

@ -0,0 +1,90 @@
package ch.vanwa.lox_interpreter;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
/**
* A Lox interpreter.
*/
public class Lox {
/**
* True if a run produced an error, false otherwise.
*/
static boolean hadError = false;
/**
* Run a file with Lox source code.
*
* @param path Path of the source code file.
*/
public static void runFile(String path) throws IOException {
byte[] bytes = Files.readAllBytes(Paths.get(path));
run(new String(bytes, StandardCharsets.UTF_8));
if (hadError) {
System.exit(65);
}
}
/**
* Run a Lox REPL.
*/
public static void runPrompt() throws IOException {
var input = new InputStreamReader(System.in);
var reader = new BufferedReader(input);
for (;;) {
System.out.print("> ");
String line = reader.readLine();
if (line == null) {
break;
}
run(line);
hadError = false;
}
}
/**
* Run Lox source code.
*
* @param source Lox source code.
*
* @return List of scanned tokens.
*/
public static List<Token> run(String source) {
var scanner = new Scanner(source);
List<Token> tokens = scanner.scanTokens();
for (Token token : tokens) {
System.out.println(token);
}
return tokens;
}
/**
* Print an error message.
*
* @param line Line number where the error ocurred.
* @param message Error message.
*/
static void error(int line, String message) {
report(line, "", message);
}
/**
* Print a message to stderr.
*
* @param line Line number where the error ocurred.
* @param where Where the error occurred.
* @param message Error message.
*/
private static void report(int line, String where, String message) {
System.err.println(String.format("[line %d] Error%s: %s", line, where, message));
hadError = true;
}
}

View File

@ -0,0 +1,342 @@
package ch.vanwa.lox_interpreter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import static ch.vanwa.lox_interpreter.TokenType.*;
/**
* Convert source code into tokens for the Lox programming language.
*/
class Scanner {
/**
* Source code getting scanned.
*/
private final String source;
/**
* List of already scanned tokens.
*/
private final List<Token> tokens = new ArrayList<>();
/**
* Starting index of the current lexeme.
*/
private int start = 0;
/**
* Index of current character in the source code.
*/
private int current = 0;
/**
* Which line the scanneris on in the source code.
*/
private int line = 1;
/**
* Mapping of keyword string to {@link TokenType}.
*/
private static final Map<String, TokenType> keywords = Map.ofEntries(
Map.entry("and", AND),
Map.entry("class", CLASS),
Map.entry("else", ELSE),
Map.entry("false", FALSE),
Map.entry("for", FOR),
Map.entry("fun", FUN),
Map.entry("if", IF),
Map.entry("nil", NIL),
Map.entry("or", OR),
Map.entry("print", PRINT),
Map.entry("return", RETURN),
Map.entry("super", SUPER),
Map.entry("this", THIS),
Map.entry("true", TRUE),
Map.entry("var", VAR),
Map.entry("while", WHILE));
/**
* Initialize scanner.
*
* @param source Source code to scan.
*/
Scanner(final String source) {
this.source = source;
}
/**
* Scan lexemes to tokens until the end is reached.
*
* @return List of scanned tokens + EOF.
*/
List<Token> scanTokens() {
while (!isAtEnd()) {
start = current;
scanToken();
}
tokens.add(new Token(EOF, "", null, line));
return tokens;
}
/**
* Scan the next lexeme to a token.
*/
private void scanToken() {
char c = advance();
switch (c) {
case '(':
addToken(LEFT_PAREN);
break;
case ')':
addToken(RIGHT_PAREN);
break;
case '{':
addToken(LEFT_BRACE);
break;
case '}':
addToken(RIGHT_BRACE);
break;
case ',':
addToken(COMMA);
break;
case '.':
addToken(DOT);
break;
case '-':
addToken(MINUS);
break;
case '+':
addToken(PLUS);
break;
case ';':
addToken(SEMICOLON);
break;
case '*':
addToken(STAR);
break;
case '!':
addToken(match('=') ? BANG_EQUAL : BANG);
break;
case '=':
addToken(match('=') ? EQUAL_EQUAL : EQUAL);
break;
case '<':
addToken(match('=') ? LESS_EQUAL : LESS);
break;
case '>':
addToken(match('=') ? GREATER_EQUAL : GREATER);
break;
case '/':
if (match('/')) {
// A comment goes until the end of the line.
while (peek() != '\n' && !isAtEnd())
advance();
} else {
addToken(SLASH);
}
break;
case ' ':
case '\r':
case '\t':
// Ignore whitespace.
break;
case '\n':
line++;
break;
case '"':
string();
break;
default:
if (isDigit(c)) {
number();
} else if (isAlpha(c)) {
identifier();
} else {
Lox.error(line, "Unexpected character.");
}
break;
}
}
/**
* Consume an identifier.
*/
private void identifier() {
while (isAlphaNumeric(peek())) {
advance();
}
String text = source.substring(start, current);
TokenType type = keywords.get(text);
if (type == null) {
type = IDENTIFIER;
}
addToken(type);
}
/**
* Consume a number literal.
*/
private void number() {
while (isDigit(peek())) {
advance();
}
// Look for a fractional part.
if (peek() == '.' && isDigit(peekNext())) {
// Consume the "."
advance();
while (isDigit(peek())) {
advance();
}
}
addToken(NUMBER,
Double.parseDouble(source.substring(start, current)));
}
/**
* Consume a string literal.
*/
private void string() {
while (peek() != '"' && !isAtEnd()) {
if (peek() == '\n') {
line++;
}
advance();
}
if (isAtEnd()) {
Lox.error(line, "Unterminated string.");
return;
}
// The closing ".
advance();
// Trim the surrounding quotes.
String value = source.substring(start + 1, current - 1);
addToken(STRING, value);
}
/**
* Check if a character matches the one at the current pointer.
*
* @param expected Character used for matching against the current character.
*
* @return True if the expected character matches the current one, false
* otherwise.
*/
private boolean match(final char expected) {
if (isAtEnd()) {
return false;
}
if (source.charAt(current) != expected) {
return false;
}
current++;
return true;
}
/**
* Peek at the current character without consuming it.
*
* @return The character peeked at.
*/
private char peek() {
if (isAtEnd()) {
return '\0';
}
return source.charAt(current);
}
/**
* Peek at the next character without consuming it or moving the pointer.
*
* @return The character peeked at.
*/
private char peekNext() {
if (current + 1 >= source.length()) {
return '\0';
}
return source.charAt(current + 1);
}
/**
* Check if a character is a letter or an underscore (a-z,A-Z,_).
*
* @param c Character to check.
*
* @return True if the character is a letter or underscore, false otherwise.
*/
private boolean isAlpha(final char c) {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
c == '_';
}
/**
* Check if a character is alphanumeric (a-z,A-Z,_,0-9).
*
* @param c Character to check.
*
* @return True if the character is alphanumeric, false otherwise.
*/
private boolean isAlphaNumeric(final char c) {
return isAlpha(c) || isDigit(c);
}
/**
* Check if a character is a digit (0-9).
*
* @param c Character to check.
*
* @return True if the character is a digit, false otherwise.
*/
private boolean isDigit(final char c) {
return c >= '0' && c <= '9';
}
/**
* Check if the scanner reached the end of the source code.
*
* @return True if the scaner reached the end of the source code, false
* otherwise.
*/
private boolean isAtEnd() {
return current >= source.length();
}
/**
* Advance the source code pointer by one.
*
* @return The character under the advanced pointer.
*/
private char advance() {
return source.charAt(current++);
}
/**
* Add a token without a literal to the list of scanned tokens.
*
* @param type Type of token.
*/
private void addToken(final TokenType type) {
addToken(type, null);
}
/**
* Add a token to the list of scanned tokens.
*
* @param type Type of token.
* @param literal Value of a literal (can be null).
*/
private void addToken(final TokenType type, final Object literal) {
String text = source.substring(start, current);
tokens.add(new Token(type, text, literal, line));
}
}

View File

@ -0,0 +1,16 @@
package ch.vanwa.lox_interpreter;
/**
* Single token.
*
* @param type Type of the token.
* @param lexeme Lexeme that produced this token.
* @param literal Literal value of the token (can be null).
* @param line Line number where the token was scannedin the source code.
*/
public record Token(TokenType type, String lexeme, Object literal, int line) {
@Override
public String toString() {
return String.format("%d: %s %s %s", line, type, lexeme, literal);
}
}

View File

@ -0,0 +1,25 @@
package ch.vanwa.lox_interpreter;
/**
* Enumeration of all token types for Lox.
*/
enum TokenType {
// Single-character tokens.
LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE,
COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,
// One or two character tokens.
BANG, BANG_EQUAL,
EQUAL, EQUAL_EQUAL,
GREATER, GREATER_EQUAL,
LESS, LESS_EQUAL,
// Literals.
IDENTIFIER, STRING, NUMBER,
// Keywords.
AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,
EOF
}

View File

@ -0,0 +1,27 @@
package ch.vanwa.lox_interpreter;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
/**
* Unit test for simple App.
*/
public class AppTest {
/**
* Simple scanner test.
*/
@Test
public void scanOneLine() {
var tokens = Lox.run("var language = \"lox\"");
assertEquals(5, tokens.size());
assertEquals(new Token(TokenType.VAR, "var", null, 1), tokens.get(0));
assertEquals(new Token(TokenType.IDENTIFIER, "language", null, 1), tokens.get(1));
assertEquals(new Token(TokenType.EQUAL, "=", null, 1), tokens.get(2));
assertEquals(new Token(TokenType.STRING, "\"lox\"", "lox", 1), tokens.get(3));
assertEquals(new Token(TokenType.EOF, "", null, 1), tokens.get(4));
}
}