From 50780273f2741cb97193bfe1b368026653b4322e Mon Sep 17 00:00:00 2001 From: ctsk <9384305+ctsk@users.noreply.github.com> Date: Wed, 12 Apr 2023 12:46:24 +0200 Subject: [PATCH] [rlox] Implement Scanner --- rlox/src/lc.rs | 273 +++++++++++++++++++++++++++++++++++++++++++++++ rlox/src/main.rs | 21 ++++ rlox/src/vm.rs | 9 +- 3 files changed, 298 insertions(+), 5 deletions(-) create mode 100644 rlox/src/lc.rs diff --git a/rlox/src/lc.rs b/rlox/src/lc.rs new file mode 100644 index 0000000..da44da1 --- /dev/null +++ b/rlox/src/lc.rs @@ -0,0 +1,273 @@ +use std::collections::BTreeSet; +use std::iter::Peekable; +use std::str::CharIndices; + +#[derive(Debug)] +enum TokenType { + Eof, + + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Minus, + Plus, + Semicolon, + Slash, + Star, + + Bang, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + + Identifier, + String, + Number, + + And, + Class, + Else, + False, + For, + Fun, + If, + Nil, + Or, + Print, + Return, + Super, + This, + True, + Var, + While, + + Error, +} + +#[derive(Debug)] +pub struct Token<'src> { + ttype: TokenType, + span: &'src str, +} + +pub struct Scanner<'src> { + source: &'src str, + iter: Peekable>, + line_map: LineMap, +} + +pub struct LineMap { + line_breaks: Vec, +} + +impl LineMap { + fn new(source: &str) -> LineMap { + LineMap { + line_breaks: source + .char_indices() + .filter_map(|(pos, c)| if c == '\n' { Some(pos) } else { None }) + .collect(), + } + } + + fn get_lines(&self, slice: &str) -> (usize, usize) { + return (0, 0) + } +} + +impl<'src> Scanner<'src> { + pub fn new(source: &'src str) -> Self { + Scanner { + source, + iter: source.char_indices().peekable(), + line_map: LineMap::new(source), + } + } + + fn make_token(&self, ttype: TokenType, start: usize, end: usize) -> Token<'src> { + Token { + ttype, + span: &self.source[start..=end], + } + } + + fn consume_if

(&mut self, p: P) -> Option + where + P: Fn(char) -> bool, + { + self.iter.next_if(|&(_, c)| p(c)).map(|(p, c)| p) + } + + fn consume_if_eq(&mut self, expected: char) -> Option { + self.consume_if(|c| c == expected) + } + + fn consume_while

(&mut self, p: P) -> Option + where + P: Fn(char) -> bool + Copy, + { + self.consume_if(p).map(|pos| { + let mut last = pos; + while let Some(pos) = self.consume_if(p) { + last = pos + } + last + }) + } + + fn consume_until_eq(&mut self, limit: char) -> Option { + while let Some((p, c)) = self.iter.next() { + if c == limit { + return Some(p); + } + } + None + } + + fn scan_string(&mut self, start: usize) -> Token<'src> { + let end = self.consume_until_eq('"').unwrap_or_else(|| { + panic!("Undelimited String"); + }); + + self.make_token(TokenType::String, start, end) + } + + fn scan_number(&mut self, start: usize) -> Token<'src> { + let mut end = start; + end = self + .consume_while(|c| c.is_ascii_alphanumeric()) + .unwrap_or(end); + + if let Some(pos) = self.consume_if_eq('.') { + end = pos; + + end = self + .consume_while(|c| c.is_ascii_alphanumeric()) + .unwrap_or(end); + } + + self.make_token(TokenType::Number, start, end) + } + + fn scan_identifier(&mut self, start: usize) -> Token<'src> { + let mut end = start; + + end = self + .consume_while(|c| c.is_ascii_alphanumeric()) + .unwrap_or(end); + + let slice = &self.source[start..=end]; + + let ttype = match slice { + "and" => TokenType::And, + "class" => TokenType::Class, + "else" => TokenType::Else, + "false" => TokenType::False, + "for" => TokenType::For, + "fun" => TokenType::Fun, + "if" => TokenType::If, + "nil" => TokenType::Nil, + "or" => TokenType::Or, + "print" => TokenType::Print, + "return" => TokenType::Return, + "super" => TokenType::Super, + "this" => TokenType::This, + "true" => TokenType::True, + "var" => TokenType::Var, + "while" => TokenType::While, + _ => TokenType::Identifier, + }; + + Token { ttype, span: slice } + } + + fn scan_comment(&mut self) { + self.consume_until_eq('"'); + } +} + +impl<'src> Iterator for Scanner<'src> { + type Item = Token<'src>; + + fn next(&mut self) -> Option { + // Skip Whitespace + while self + .iter + .next_if(|(_, b)| b.is_ascii_whitespace()) + .is_some() + {} + + if let Some((start_pos, start_ch)) = self.iter.next() { + let make_simple_token = + |s: &Self, ttype: TokenType| Some(s.make_token(ttype, start_pos, start_pos)); + + let handle_eq_suffix = |s: &mut Self, if_present: TokenType, if_absent: TokenType| { + Some(match s.consume_if_eq('=') { + Some(end) => s.make_token(if_present, start_pos, end), + None => s.make_token(if_absent, start_pos, start_pos), + }) + }; + + match start_ch { + '(' => make_simple_token(self, TokenType::LeftParen), + ')' => make_simple_token(self, TokenType::RightParen), + '{' => make_simple_token(self, TokenType::LeftBrace), + '}' => make_simple_token(self, TokenType::RightBrace), + ',' => make_simple_token(self, TokenType::Comma), + '.' => make_simple_token(self, TokenType::Dot), + '-' => make_simple_token(self, TokenType::Minus), + '+' => make_simple_token(self, TokenType::Plus), + ';' => make_simple_token(self, TokenType::Semicolon), + '/' => match self.consume_if_eq('/') { + Some(_) => self.next(), + None => make_simple_token(self, TokenType::Slash), + }, + '*' => make_simple_token(self, TokenType::Star), + '!' => handle_eq_suffix(self, TokenType::BangEqual, TokenType::Bang), + '=' => handle_eq_suffix(self, TokenType::EqualEqual, TokenType::Equal), + '>' => handle_eq_suffix(self, TokenType::GreaterEqual, TokenType::Greater), + '<' => handle_eq_suffix(self, TokenType::LessEqual, TokenType::Less), + _ => { + let token = if start_ch.is_ascii_digit() { + self.scan_number(start_pos) + } else if start_ch.is_ascii_alphabetic() { + self.scan_identifier(start_pos) + } else if start_ch == '"' { + self.scan_string(start_pos) + } else { + panic!("Invalid character"); + }; + + Some(token) + } + } + } else { + None + } + } +} + +pub fn compile(source: &str) { + let scanner = Scanner::new(source); + + for token in scanner { + println!("{:?}", token) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_compile() { + compile("print(1+2*3)"); + } +} diff --git a/rlox/src/main.rs b/rlox/src/main.rs index ffa3a95..f2f4d54 100644 --- a/rlox/src/main.rs +++ b/rlox/src/main.rs @@ -1,5 +1,26 @@ mod vm; +mod lc; +use std::env; + +fn repl() { +} + +fn run_file() { + +} fn main() { + + let num_args = env::args().len(); + + lc::compile("print(1+2*3)"); + + if num_args == 1 { + repl(); + } else if num_args == 2 { + run_file(); + } else { + println!("Usage: rlox [path]"); + } } diff --git a/rlox/src/vm.rs b/rlox/src/vm.rs index 86e9e72..fe2b9d6 100644 --- a/rlox/src/vm.rs +++ b/rlox/src/vm.rs @@ -141,7 +141,7 @@ impl VM { self.stack.pop().ok_or(VMError::Runtime) } - pub fn interpret(&mut self, chunk: &Chunk) -> Result<(), VMError> { + pub fn run(&mut self, chunk: &Chunk) -> Result<(), VMError> { while self.pc < chunk.code.len() { let instr = chunk.code[self.pc]; self.pc += 1; @@ -207,7 +207,6 @@ mod tests { chunk.add_constant(Value::from(1000.)); chunk.add_constant(Value::from(250.)); - chunk.add_op(Op::Constant { offset: 0 }, 1); chunk.add_op(Op::Constant { offset: 1 }, 1); chunk.add_op(Op::Multiply, 1); @@ -224,9 +223,9 @@ mod tests { chunk.add_op(Op::Negate, 2); chunk.add_op(Op::Divide, 2); - let mut interpreter = VM::new(); - interpreter.interpret(&chunk).unwrap(); + let mut vm = VM::new(); + vm.run(&chunk).unwrap(); - assert_eq!(interpreter.stack[0], Value::from(3.1416)); + assert_eq!(vm.stack[0], Value::from(3.1416)); } }