java lox scanner implementation
This commit is contained in:
commit
dcc1ae8699
20 changed files with 848 additions and 0 deletions
|
@ -0,0 +1,19 @@
|
|||
package ch.vanwa.lox_interpreter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class App {
|
||||
/**
|
||||
* Entry point for the Lox interpreter cli.
|
||||
*/
|
||||
public static void main(String[] args) throws IOException {
|
||||
if (args.length > 1) {
|
||||
System.out.println("Usage: jlox [script]");
|
||||
System.exit(64);
|
||||
} else if (args.length == 1) {
|
||||
Lox.runFile(args[0]);
|
||||
} else {
|
||||
Lox.runPrompt();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
package ch.vanwa.lox_interpreter;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A Lox interpreter.
|
||||
*/
|
||||
public class Lox {
|
||||
/**
|
||||
* True if a run produced an error, false otherwise.
|
||||
*/
|
||||
static boolean hadError = false;
|
||||
|
||||
/**
|
||||
* Run a file with Lox source code.
|
||||
*
|
||||
* @param path Path of the source code file.
|
||||
*/
|
||||
public static void runFile(String path) throws IOException {
|
||||
byte[] bytes = Files.readAllBytes(Paths.get(path));
|
||||
run(new String(bytes, StandardCharsets.UTF_8));
|
||||
if (hadError) {
|
||||
System.exit(65);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a Lox REPL.
|
||||
*/
|
||||
public static void runPrompt() throws IOException {
|
||||
var input = new InputStreamReader(System.in);
|
||||
var reader = new BufferedReader(input);
|
||||
|
||||
for (;;) {
|
||||
System.out.print("> ");
|
||||
String line = reader.readLine();
|
||||
if (line == null) {
|
||||
break;
|
||||
}
|
||||
run(line);
|
||||
hadError = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run Lox source code.
|
||||
*
|
||||
* @param source Lox source code.
|
||||
*
|
||||
* @return List of scanned tokens.
|
||||
*/
|
||||
public static List<Token> run(String source) {
|
||||
var scanner = new Scanner(source);
|
||||
List<Token> tokens = scanner.scanTokens();
|
||||
|
||||
for (Token token : tokens) {
|
||||
System.out.println(token);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print an error message.
|
||||
*
|
||||
* @param line Line number where the error ocurred.
|
||||
* @param message Error message.
|
||||
*/
|
||||
static void error(int line, String message) {
|
||||
report(line, "", message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a message to stderr.
|
||||
*
|
||||
* @param line Line number where the error ocurred.
|
||||
* @param where Where the error occurred.
|
||||
* @param message Error message.
|
||||
*/
|
||||
private static void report(int line, String where, String message) {
|
||||
System.err.println(String.format("[line %d] Error%s: %s", line, where, message));
|
||||
hadError = true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,342 @@
|
|||
package ch.vanwa.lox_interpreter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static ch.vanwa.lox_interpreter.TokenType.*;
|
||||
|
||||
/**
|
||||
* Convert source code into tokens for the Lox programming language.
|
||||
*/
|
||||
class Scanner {
|
||||
/**
|
||||
* Source code getting scanned.
|
||||
*/
|
||||
private final String source;
|
||||
|
||||
/**
|
||||
* List of already scanned tokens.
|
||||
*/
|
||||
private final List<Token> tokens = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* Starting index of the current lexeme.
|
||||
*/
|
||||
private int start = 0;
|
||||
|
||||
/**
|
||||
* Index of current character in the source code.
|
||||
*/
|
||||
private int current = 0;
|
||||
|
||||
/**
|
||||
* Which line the scanneris on in the source code.
|
||||
*/
|
||||
private int line = 1;
|
||||
|
||||
/**
|
||||
* Mapping of keyword string to {@link TokenType}.
|
||||
*/
|
||||
private static final Map<String, TokenType> keywords = Map.ofEntries(
|
||||
Map.entry("and", AND),
|
||||
Map.entry("class", CLASS),
|
||||
Map.entry("else", ELSE),
|
||||
Map.entry("false", FALSE),
|
||||
Map.entry("for", FOR),
|
||||
Map.entry("fun", FUN),
|
||||
Map.entry("if", IF),
|
||||
Map.entry("nil", NIL),
|
||||
Map.entry("or", OR),
|
||||
Map.entry("print", PRINT),
|
||||
Map.entry("return", RETURN),
|
||||
Map.entry("super", SUPER),
|
||||
Map.entry("this", THIS),
|
||||
Map.entry("true", TRUE),
|
||||
Map.entry("var", VAR),
|
||||
Map.entry("while", WHILE));
|
||||
|
||||
/**
|
||||
* Initialize scanner.
|
||||
*
|
||||
* @param source Source code to scan.
|
||||
*/
|
||||
Scanner(final String source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan lexemes to tokens until the end is reached.
|
||||
*
|
||||
* @return List of scanned tokens + EOF.
|
||||
*/
|
||||
List<Token> scanTokens() {
|
||||
while (!isAtEnd()) {
|
||||
start = current;
|
||||
scanToken();
|
||||
}
|
||||
|
||||
tokens.add(new Token(EOF, "", null, line));
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan the next lexeme to a token.
|
||||
*/
|
||||
private void scanToken() {
|
||||
char c = advance();
|
||||
switch (c) {
|
||||
case '(':
|
||||
addToken(LEFT_PAREN);
|
||||
break;
|
||||
case ')':
|
||||
addToken(RIGHT_PAREN);
|
||||
break;
|
||||
case '{':
|
||||
addToken(LEFT_BRACE);
|
||||
break;
|
||||
case '}':
|
||||
addToken(RIGHT_BRACE);
|
||||
break;
|
||||
case ',':
|
||||
addToken(COMMA);
|
||||
break;
|
||||
case '.':
|
||||
addToken(DOT);
|
||||
break;
|
||||
case '-':
|
||||
addToken(MINUS);
|
||||
break;
|
||||
case '+':
|
||||
addToken(PLUS);
|
||||
break;
|
||||
case ';':
|
||||
addToken(SEMICOLON);
|
||||
break;
|
||||
case '*':
|
||||
addToken(STAR);
|
||||
break;
|
||||
case '!':
|
||||
addToken(match('=') ? BANG_EQUAL : BANG);
|
||||
break;
|
||||
case '=':
|
||||
addToken(match('=') ? EQUAL_EQUAL : EQUAL);
|
||||
break;
|
||||
case '<':
|
||||
addToken(match('=') ? LESS_EQUAL : LESS);
|
||||
break;
|
||||
case '>':
|
||||
addToken(match('=') ? GREATER_EQUAL : GREATER);
|
||||
break;
|
||||
case '/':
|
||||
if (match('/')) {
|
||||
// A comment goes until the end of the line.
|
||||
while (peek() != '\n' && !isAtEnd())
|
||||
advance();
|
||||
} else {
|
||||
addToken(SLASH);
|
||||
}
|
||||
break;
|
||||
case ' ':
|
||||
case '\r':
|
||||
case '\t':
|
||||
// Ignore whitespace.
|
||||
break;
|
||||
case '\n':
|
||||
line++;
|
||||
break;
|
||||
case '"':
|
||||
string();
|
||||
break;
|
||||
default:
|
||||
if (isDigit(c)) {
|
||||
number();
|
||||
} else if (isAlpha(c)) {
|
||||
identifier();
|
||||
} else {
|
||||
Lox.error(line, "Unexpected character.");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume an identifier.
|
||||
*/
|
||||
private void identifier() {
|
||||
while (isAlphaNumeric(peek())) {
|
||||
advance();
|
||||
}
|
||||
|
||||
String text = source.substring(start, current);
|
||||
TokenType type = keywords.get(text);
|
||||
if (type == null) {
|
||||
type = IDENTIFIER;
|
||||
}
|
||||
addToken(type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume a number literal.
|
||||
*/
|
||||
private void number() {
|
||||
while (isDigit(peek())) {
|
||||
advance();
|
||||
}
|
||||
|
||||
// Look for a fractional part.
|
||||
if (peek() == '.' && isDigit(peekNext())) {
|
||||
// Consume the "."
|
||||
advance();
|
||||
|
||||
while (isDigit(peek())) {
|
||||
advance();
|
||||
}
|
||||
}
|
||||
|
||||
addToken(NUMBER,
|
||||
Double.parseDouble(source.substring(start, current)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume a string literal.
|
||||
*/
|
||||
private void string() {
|
||||
while (peek() != '"' && !isAtEnd()) {
|
||||
if (peek() == '\n') {
|
||||
line++;
|
||||
}
|
||||
advance();
|
||||
}
|
||||
|
||||
if (isAtEnd()) {
|
||||
Lox.error(line, "Unterminated string.");
|
||||
return;
|
||||
}
|
||||
|
||||
// The closing ".
|
||||
advance();
|
||||
|
||||
// Trim the surrounding quotes.
|
||||
String value = source.substring(start + 1, current - 1);
|
||||
addToken(STRING, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a character matches the one at the current pointer.
|
||||
*
|
||||
* @param expected Character used for matching against the current character.
|
||||
*
|
||||
* @return True if the expected character matches the current one, false
|
||||
* otherwise.
|
||||
*/
|
||||
private boolean match(final char expected) {
|
||||
if (isAtEnd()) {
|
||||
return false;
|
||||
}
|
||||
if (source.charAt(current) != expected) {
|
||||
return false;
|
||||
}
|
||||
|
||||
current++;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Peek at the current character without consuming it.
|
||||
*
|
||||
* @return The character peeked at.
|
||||
*/
|
||||
private char peek() {
|
||||
if (isAtEnd()) {
|
||||
return '\0';
|
||||
}
|
||||
return source.charAt(current);
|
||||
}
|
||||
|
||||
/**
|
||||
* Peek at the next character without consuming it or moving the pointer.
|
||||
*
|
||||
* @return The character peeked at.
|
||||
*/
|
||||
private char peekNext() {
|
||||
if (current + 1 >= source.length()) {
|
||||
return '\0';
|
||||
}
|
||||
return source.charAt(current + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a character is a letter or an underscore (a-z,A-Z,_).
|
||||
*
|
||||
* @param c Character to check.
|
||||
*
|
||||
* @return True if the character is a letter or underscore, false otherwise.
|
||||
*/
|
||||
private boolean isAlpha(final char c) {
|
||||
return (c >= 'a' && c <= 'z') ||
|
||||
(c >= 'A' && c <= 'Z') ||
|
||||
c == '_';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a character is alphanumeric (a-z,A-Z,_,0-9).
|
||||
*
|
||||
* @param c Character to check.
|
||||
*
|
||||
* @return True if the character is alphanumeric, false otherwise.
|
||||
*/
|
||||
private boolean isAlphaNumeric(final char c) {
|
||||
return isAlpha(c) || isDigit(c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a character is a digit (0-9).
|
||||
*
|
||||
* @param c Character to check.
|
||||
*
|
||||
* @return True if the character is a digit, false otherwise.
|
||||
*/
|
||||
private boolean isDigit(final char c) {
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the scanner reached the end of the source code.
|
||||
*
|
||||
* @return True if the scaner reached the end of the source code, false
|
||||
* otherwise.
|
||||
*/
|
||||
private boolean isAtEnd() {
|
||||
return current >= source.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Advance the source code pointer by one.
|
||||
*
|
||||
* @return The character under the advanced pointer.
|
||||
*/
|
||||
private char advance() {
|
||||
return source.charAt(current++);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a token without a literal to the list of scanned tokens.
|
||||
*
|
||||
* @param type Type of token.
|
||||
*/
|
||||
private void addToken(final TokenType type) {
|
||||
addToken(type, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a token to the list of scanned tokens.
|
||||
*
|
||||
* @param type Type of token.
|
||||
* @param literal Value of a literal (can be null).
|
||||
*/
|
||||
private void addToken(final TokenType type, final Object literal) {
|
||||
String text = source.substring(start, current);
|
||||
tokens.add(new Token(type, text, literal, line));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
package ch.vanwa.lox_interpreter;
|
||||
|
||||
/**
|
||||
* Single token.
|
||||
*
|
||||
* @param type Type of the token.
|
||||
* @param lexeme Lexeme that produced this token.
|
||||
* @param literal Literal value of the token (can be null).
|
||||
* @param line Line number where the token was scannedin the source code.
|
||||
*/
|
||||
public record Token(TokenType type, String lexeme, Object literal, int line) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d: %s %s %s", line, type, lexeme, literal);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package ch.vanwa.lox_interpreter;
|
||||
|
||||
/**
|
||||
* Enumeration of all token types for Lox.
|
||||
*/
|
||||
enum TokenType {
|
||||
// Single-character tokens.
|
||||
LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE,
|
||||
COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,
|
||||
|
||||
// One or two character tokens.
|
||||
BANG, BANG_EQUAL,
|
||||
EQUAL, EQUAL_EQUAL,
|
||||
GREATER, GREATER_EQUAL,
|
||||
LESS, LESS_EQUAL,
|
||||
|
||||
// Literals.
|
||||
IDENTIFIER, STRING, NUMBER,
|
||||
|
||||
// Keywords.
|
||||
AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
|
||||
PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,
|
||||
|
||||
EOF
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package ch.vanwa.lox_interpreter;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* Unit test for simple App.
|
||||
*/
|
||||
public class AppTest {
|
||||
|
||||
/**
|
||||
* Simple scanner test.
|
||||
*/
|
||||
@Test
|
||||
public void scanOneLine() {
|
||||
var tokens = Lox.run("var language = \"lox\"");
|
||||
|
||||
assertEquals(5, tokens.size());
|
||||
|
||||
assertEquals(new Token(TokenType.VAR, "var", null, 1), tokens.get(0));
|
||||
assertEquals(new Token(TokenType.IDENTIFIER, "language", null, 1), tokens.get(1));
|
||||
assertEquals(new Token(TokenType.EQUAL, "=", null, 1), tokens.get(2));
|
||||
assertEquals(new Token(TokenType.STRING, "\"lox\"", "lox", 1), tokens.get(3));
|
||||
assertEquals(new Token(TokenType.EOF, "", null, 1), tokens.get(4));
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue