crafting-interpreters/src/tokenizer/lookahead.rs

98 lines
2.8 KiB
Rust
Raw Normal View History

2025-02-06 18:55:54 +01:00
use super::interface::Tokenizer;
use crate::token::TokenType::*;
use crate::token::{Token, TokenType};
use lazy_static::lazy_static;
use std::{collections::HashMap, iter::Peekable, str::CharIndices};
/// Data for one and two character lexemes.
2025-02-06 18:55:54 +01:00
struct LookaheadEntry {
/// TokenType if a lexeme is a one character one.
2025-02-06 18:55:54 +01:00
default_token: TokenType,
/// Mapping of second level character to a TokenType.
2025-02-06 18:55:54 +01:00
lookahead_map: HashMap<char, TokenType>,
}
lazy_static! {
/// Mapping of one and two character lexemes, specifiyng the one character variant and as many
/// two character ones as needed.
2025-02-06 18:55:54 +01:00
static ref LOOKAHEAD_TOKENS: HashMap<char, LookaheadEntry> = {
let mut m = HashMap::new();
let mut bang_map = HashMap::new();
bang_map.insert('=', BangEqual);
m.insert(
'!',
LookaheadEntry {
default_token: Bang,
lookahead_map: bang_map,
},
);
let mut equal_map = HashMap::new();
equal_map.insert('=', EqualEqual);
m.insert(
'=',
LookaheadEntry {
default_token: Equal,
lookahead_map: equal_map,
},
);
let mut less_map = HashMap::new();
less_map.insert('=', LessEqual);
m.insert(
'<',
LookaheadEntry {
default_token: Less,
lookahead_map: less_map,
},
);
let mut greater_map = HashMap::new();
greater_map.insert('=', GreaterEqual);
m.insert(
'>',
LookaheadEntry {
default_token: Greater,
lookahead_map: greater_map,
},
);
m
};
}
/// Consume lexemes that consist of exactly one or two characters.
2025-02-06 18:55:54 +01:00
pub struct Lookahead;
impl Tokenizer for Lookahead {
fn run(
&self,
c: (usize, char),
chars: &mut Peekable<CharIndices<'_>>,
source: &str,
line: usize,
) -> Option<(usize, Option<Token>)> {
LOOKAHEAD_TOKENS.get(&c.1).map(|entry| {
let (lexeme, token_type) = if let Some(&(_, peeked)) = chars.peek() {
if let Some(&token_type) = entry.lookahead_map.get(&peeked) {
chars.next();
(source[c.0..=c.0 + 1].to_string(), token_type)
} else {
(source[c.0..=c.0].to_string(), entry.default_token)
}
} else {
(source[c.0..=c.0].to_string(), entry.default_token)
};
(
0,
Some(Token {
token_type,
lexeme,
literal: None,
line,
}),
)
})
}
}