2025-02-06 18:55:54 +01:00
|
|
|
use super::interface::Tokenizer;
|
|
|
|
use crate::token::TokenType::*;
|
|
|
|
use crate::token::{Token, TokenType};
|
|
|
|
use lazy_static::lazy_static;
|
|
|
|
use std::{collections::HashMap, iter::Peekable, str::CharIndices};
|
|
|
|
|
2025-02-07 09:21:23 +01:00
|
|
|
/// Data for one and two character lexemes.
|
2025-02-06 18:55:54 +01:00
|
|
|
struct LookaheadEntry {
|
2025-02-07 09:21:23 +01:00
|
|
|
/// TokenType if a lexeme is a one character one.
|
2025-02-06 18:55:54 +01:00
|
|
|
default_token: TokenType,
|
2025-02-07 09:21:23 +01:00
|
|
|
/// Mapping of second level character to a TokenType.
|
2025-02-06 18:55:54 +01:00
|
|
|
lookahead_map: HashMap<char, TokenType>,
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
2025-02-07 09:21:23 +01:00
|
|
|
/// Mapping of one and two character lexemes, specifiyng the one character variant and as many
|
|
|
|
/// two character ones as needed.
|
2025-02-06 18:55:54 +01:00
|
|
|
static ref LOOKAHEAD_TOKENS: HashMap<char, LookaheadEntry> = {
|
|
|
|
let mut m = HashMap::new();
|
|
|
|
|
|
|
|
let mut bang_map = HashMap::new();
|
|
|
|
bang_map.insert('=', BangEqual);
|
|
|
|
m.insert(
|
|
|
|
'!',
|
|
|
|
LookaheadEntry {
|
|
|
|
default_token: Bang,
|
|
|
|
lookahead_map: bang_map,
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
|
|
|
let mut equal_map = HashMap::new();
|
|
|
|
equal_map.insert('=', EqualEqual);
|
|
|
|
m.insert(
|
|
|
|
'=',
|
|
|
|
LookaheadEntry {
|
|
|
|
default_token: Equal,
|
|
|
|
lookahead_map: equal_map,
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
|
|
|
let mut less_map = HashMap::new();
|
|
|
|
less_map.insert('=', LessEqual);
|
|
|
|
m.insert(
|
|
|
|
'<',
|
|
|
|
LookaheadEntry {
|
|
|
|
default_token: Less,
|
|
|
|
lookahead_map: less_map,
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
|
|
|
let mut greater_map = HashMap::new();
|
|
|
|
greater_map.insert('=', GreaterEqual);
|
|
|
|
m.insert(
|
|
|
|
'>',
|
|
|
|
LookaheadEntry {
|
|
|
|
default_token: Greater,
|
|
|
|
lookahead_map: greater_map,
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
|
|
|
m
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2025-02-07 09:21:23 +01:00
|
|
|
/// Consume lexemes that consist of exactly one or two characters.
|
2025-02-06 18:55:54 +01:00
|
|
|
pub struct Lookahead;
|
|
|
|
impl Tokenizer for Lookahead {
|
|
|
|
fn run(
|
|
|
|
&self,
|
|
|
|
c: (usize, char),
|
|
|
|
chars: &mut Peekable<CharIndices<'_>>,
|
|
|
|
source: &str,
|
|
|
|
line: usize,
|
|
|
|
) -> Option<(usize, Option<Token>)> {
|
|
|
|
LOOKAHEAD_TOKENS.get(&c.1).map(|entry| {
|
|
|
|
let (lexeme, token_type) = if let Some(&(_, peeked)) = chars.peek() {
|
|
|
|
if let Some(&token_type) = entry.lookahead_map.get(&peeked) {
|
|
|
|
chars.next();
|
|
|
|
(source[c.0..=c.0 + 1].to_string(), token_type)
|
|
|
|
} else {
|
|
|
|
(source[c.0..=c.0].to_string(), entry.default_token)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
(source[c.0..=c.0].to_string(), entry.default_token)
|
|
|
|
};
|
|
|
|
(
|
|
|
|
0,
|
|
|
|
Some(Token {
|
|
|
|
token_type,
|
|
|
|
lexeme,
|
|
|
|
literal: None,
|
|
|
|
line,
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|