From 59d5984e3dcb35461d266fe88ef8a467b6be93e1 Mon Sep 17 00:00:00 2001 From: ctsk <9384305+ctsk@users.noreply.github.com> Date: Sun, 24 Nov 2024 20:52:45 +0100 Subject: [PATCH] [rlox] Implement Globals (and cleanup a...lot) --- rlox/src/bc.rs | 43 ++++----- rlox/src/gc.rs | 234 ++++++++++++++++++++++++++++++++++--------------- rlox/src/lc.rs | 178 ++++++++++++++++++++++++++----------- rlox/src/vm.rs | 63 ++++++++++++- 4 files changed, 369 insertions(+), 149 deletions(-) diff --git a/rlox/src/bc.rs b/rlox/src/bc.rs index d40b0ed..58cb15d 100644 --- a/rlox/src/bc.rs +++ b/rlox/src/bc.rs @@ -1,4 +1,4 @@ -use crate::gc::{GcHandle, Object, ObjectType}; +use crate::gc::{GcHandle, Object}; use std::collections::LinkedList; use std::convert::From; use std::fmt::Debug; @@ -22,7 +22,10 @@ pub enum Op { Less, Print, - Pop + Pop, + + DefineGlobal { offset: u8 }, + GetGlobal { offset: u8 }, } #[derive(Clone, Debug, PartialEq)] @@ -87,11 +90,7 @@ impl Display for Value { None => write!(f, "{}", stringified), } } - Value::Obj(object) => match object.get_otype() { - ObjectType::String => { - write!(f, "{}", object) - } - }, + Value::Obj(object) => write!(f, "{}", object), } } } @@ -138,6 +137,11 @@ impl Chunk { self } + pub fn add_constant_value(&mut self, value: Value) -> &mut Self { + self.constants.push(value); + self + } + pub fn add_constant(&mut self, value: Value, line: usize) -> &mut Self { self.constants.push(value); self.add_op( @@ -216,26 +220,25 @@ impl fmt::Debug for TraceInfo<'_> { mod tests { + #[test] fn string_value_equality() { use crate::bc::Value; - use crate::gc::allocate_string; + use crate::gc::GC; let s1 = "bla5"; let s2 = "bla6"; - unsafe { - let o1 = allocate_string(s1).unwrap(); - let o2 = allocate_string(s2).unwrap(); - let o3 = allocate_string(s2).unwrap(); - let v1 = Value::from(o1.get_object()); - let v2 = Value::from(o2.get_object()); - let v3 = Value::from(o3.get_object()); - let v4 = v2.clone(); + let o1 = GC::new_string(s1); + let o2 = GC::new_string(s2); + let o3 = GC::new_string(s2); + let v1 = Value::from(o1.get_object()); + let v2 = Value::from(o2.get_object()); + let v3 = Value::from(o3.get_object()); + let v4 = v2.clone(); - assert_ne!(v1, v2); - assert_eq!(v2, v3); - assert_eq!(v2, v4); - } + assert_ne!(v1, v2); + assert_eq!(v2, v3); + assert_eq!(v2, v4); } } diff --git a/rlox/src/gc.rs b/rlox/src/gc.rs index cf28f55..35b6b0f 100644 --- a/rlox/src/gc.rs +++ b/rlox/src/gc.rs @@ -1,46 +1,126 @@ +#![allow(unused, dead_code)] + +use core::hash; use std::{ alloc::{alloc, dealloc, Layout, LayoutError}, fmt::{self, Display}, }; +/// Api + +pub struct GC {} + +impl GC { + pub fn new_string(content: &str) -> GcHandle { + unsafe { allocate_string(content) }.unwrap() + } + + pub fn new_concat_string(first: ObjString, second: ObjString) -> GcHandle { + unsafe { concat_string(first, second) }.unwrap() + } + + pub fn free(handle: GcHandle) { + unsafe { deallocate_object(handle.object) } + } +} + +/// Markers #[derive(PartialEq, Eq, Clone, Copy)] #[repr(usize)] pub enum ObjectType { String, } -#[repr(C)] -struct ObjectHeader { - otype: ObjectType, +pub(crate) trait IsObject { + fn otype() -> ObjectType; + fn from_object(object: Object) -> Self; + fn upcast(self) -> Object; } -#[repr(C)] -struct ObjStringHeader { - object_header: ObjectHeader, - len: usize, + +/// Object Hierarchy / Layout stuff +/// +/// Object +/// | +/// ObjString +/// +/// Object: --ptr-to--> [ [], .... data .... ] +/// ObjString: --ptr-to--> [[[], len], ...data... ] +/// ^-StringHeader-^ +/// ^----------StringAlloc--------^ +/// +/// GcHandle owns the underlying memory and must not be dropped before the corresponding Objects are. + +#[derive(Debug, Clone, PartialEq)] +pub struct GcHandle { + object: Object, } -#[repr(C)] -struct ObjString { - header: ObjStringHeader, - data: [u8], +impl Drop for GcHandle { + fn drop(&mut self) { + unsafe { deallocate_object(self.object) }; + } } -const fn data_offset() -> usize { - std::mem::size_of::() +impl GcHandle { + pub fn get_object(&self) -> Object { + return self.object; + } } #[derive(Copy, Clone)] pub struct Object { - ptr: *mut ObjectHeader, + ptr: *mut Header, } +#[derive(Copy, Clone, Eq)] +pub struct ObjString { + ptr: *mut StringHeader, +} + +impl IsObject for ObjString { + fn otype() -> ObjectType { + ObjectType::String + } + + fn from_object(object: Object) -> ObjString { + ObjString { ptr: object.ptr as *mut StringHeader } + } + + fn upcast(self) -> Object { + Object { ptr: self.ptr as *mut Header } + } +} + +#[repr(C)] +struct Header { + otype: ObjectType, +} + +#[repr(C)] +struct StringAlloc { + header: StringHeader, + data: [u8], +} + +#[repr(C)] +struct StringHeader { + object_header: Header, + len: usize, +} + + +const fn data_offset() -> usize { + std::mem::size_of::() +} + + +/// Pretty-print Object impl Display for Object { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.get_otype() { - ObjectType::String => { - write!(f, "{}", ObjString::as_str(self.ptr as *mut ObjStringHeader)) - } + ObjectType::String => + fmt::Display::fmt(&self.downcast::().unwrap(), f) } } } @@ -49,16 +129,22 @@ impl Object { pub fn get_otype(&self) -> ObjectType { unsafe { (*self.ptr).otype } } + + pub(crate) fn downcast(self) -> Option { + if self.get_otype() == T::otype() { + Some(T::from_object(self)) + } else { + None + } + } } impl fmt::Debug for Object { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.get_otype() { ObjectType::String => { - let string = self.ptr as *mut ObjStringHeader; - let data = ObjString::as_str(string); - - write!(f, "STR {} {:?}", data.len(), &data[..8.min(data.len())],) + let string = self.downcast::().unwrap().as_str(); + write!(f, "STR {} {:?}", string.len(), &string[..8.min(string.len())]) } } } @@ -77,27 +163,58 @@ impl PartialEq for Object { match (*self.ptr).otype { ObjectType::String => { - let header = self.ptr as *mut ObjStringHeader; - let other_header = other.ptr as *mut ObjStringHeader; - - if (*header).len != (*other_header).len { - return false; - } - - let slice = ObjString::as_str(header); - let other_slice = ObjString::as_str(other_header); - - slice == other_slice + self.downcast::() == other.downcast::() } } } } } +impl PartialEq for ObjString { + fn eq(&self, other: &Self) -> bool { + unsafe { + if (*self.ptr).len != (*other.ptr).len { + return false; + } + + self.as_slice() == other.as_slice() + } + } +} + impl ObjString { + fn as_slice<'a>(&self) -> &'a [u8] { + let length = unsafe { (*self.ptr).len }; + let (layout_, offset) = StringAlloc::layout(length).unwrap(); + unsafe { + std::slice::from_raw_parts( + (self.ptr as *mut u8).offset(offset as isize), + length + ) + } + } + + fn as_str<'a>(&self) -> &'a str { + unsafe { std::str::from_utf8_unchecked(self.as_slice()) } + } +} + +impl fmt::Display for ObjString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.as_str(), f) + } +} + +impl std::hash::Hash for ObjString { + fn hash(&self, state: &mut H) { + std::hash::Hash::hash::(self.as_str(), state); + } +} + +impl StringAlloc { fn layout(length: usize) -> Result<(Layout, usize), LayoutError> { - let (layout, offset) = Layout::for_value(&ObjStringHeader { - object_header: ObjectHeader { + let (layout, offset) = Layout::for_value(&StringHeader { + object_header: Header { otype: ObjectType::String, }, len: length, @@ -106,47 +223,34 @@ impl ObjString { Ok((layout.pad_to_align(), offset)) } - - fn as_bytes<'a>(ptr: *const ObjStringHeader) -> &'a [u8] { - unsafe { - std::slice::from_raw_parts((ptr as *mut u8).offset(data_offset() as isize), (*ptr).len) - } - } - - fn as_str<'a>(ptr: *const ObjStringHeader) -> &'a str { - unsafe { std::str::from_utf8_unchecked(ObjString::as_bytes(ptr)) } - } } -pub unsafe fn allocate_string_obj<'a>( +unsafe fn allocate_string_obj<'a>( length: usize, ) -> Result<(GcHandle, &'a mut [u8]), LayoutError> { - let (layout, offset) = ObjString::layout(length)?; + let (layout, offset) = StringAlloc::layout(length)?; let allocation = alloc(layout); let data_ptr = allocation.offset(offset as isize); - let header = allocation as *mut ObjStringHeader; + let header = allocation as *mut StringHeader; (*header).len = length; (*header).object_header.otype = ObjectType::String; let object = Object { - ptr: header as *mut ObjectHeader, + ptr: header as *mut Header, }; let str = std::slice::from_raw_parts_mut(data_ptr, length); Ok((GcHandle { object }, str)) } -pub unsafe fn allocate_string(content: &str) -> Result { +unsafe fn allocate_string(content: &str) -> Result { let (gc_handle, slice) = allocate_string_obj(content.len())?; slice.copy_from_slice(content.as_bytes()); Ok(gc_handle) } -pub unsafe fn concat_string(a: Object, b: Object) -> Result { - let a_head = a.ptr as *mut ObjStringHeader; - let b_head = b.ptr as *mut ObjStringHeader; - let a_data = ObjString::as_bytes(a_head); - let b_data = ObjString::as_bytes(b_head); - let new_len = a_data.len() + b_data.len(); +unsafe fn concat_string(a: ObjString, b: ObjString) -> Result { + let (a_data,b_data) = (a.as_slice(), b.as_slice()); + let new_len = a_data.len() + b_data.len(); let (gc_handle, slice) = allocate_string_obj(new_len)?; slice[..a_data.len()].copy_from_slice(a_data); @@ -158,28 +262,12 @@ pub unsafe fn concat_string(a: Object, b: Object) -> Result { - let header = object.ptr as *mut ObjStringHeader; + let header = object.ptr as *mut StringHeader; dealloc( object.ptr as *mut u8, - ObjString::layout((*header).len).unwrap().0, + StringAlloc::layout((*header).len).unwrap().0, ) } } } -#[derive(Debug, Clone, PartialEq)] -pub struct GcHandle { - object: Object, -} - -impl Drop for GcHandle { - fn drop(&mut self) { - unsafe { deallocate_object(self.object) }; - } -} - -impl GcHandle { - pub fn get_object(&self) -> Object { - return self.object; - } -} diff --git a/rlox/src/lc.rs b/rlox/src/lc.rs index 3e06213..444a87c 100644 --- a/rlox/src/lc.rs +++ b/rlox/src/lc.rs @@ -1,10 +1,12 @@ -use std::fmt; +#![allow(dead_code, unused)] + +use std::{collections::hash_map, fmt}; use std::iter::Peekable; use std::str::CharIndices; use std::collections::HashMap; -use crate::bc::{Chunk, Op}; -use crate::gc::allocate_string; +use crate::bc::Value; +use crate::{bc::{Chunk, Op}, gc::GC}; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] enum ScanErrorKind { @@ -285,24 +287,22 @@ struct Parser<'src> { #[derive(Debug, PartialEq)] pub enum ParseErrorKind { InvalidNumber, - UnexpectedEOF, IncompleteExpression, NoSemicolonAfterValue, NoSemicolonAfterExpression, + NoVariableName, + NoSemicolonAfterVarDecl, } impl fmt::Display for ParseErrorKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ParseErrorKind::InvalidNumber => todo!(), - ParseErrorKind::UnexpectedEOF => todo!(), - ParseErrorKind::IncompleteExpression => { - write!(f, "Expect expression.") - }, - ParseErrorKind::NoSemicolonAfterValue => todo!(), - ParseErrorKind::NoSemicolonAfterExpression => { - write!(f, "Expect ';' after expression.") - }, + ParseErrorKind::IncompleteExpression => write!(f, "Expect expression."), + ParseErrorKind::NoSemicolonAfterValue => write!(f, "Expect ';' after value."), + ParseErrorKind::NoSemicolonAfterExpression => write!(f, "Expect ';' after expression."), + ParseErrorKind::NoVariableName => write!(f, "Expect variable name."), + ParseErrorKind::NoSemicolonAfterVarDecl => write!(f, "Expect ';' after variable declaration."), } } } @@ -336,7 +336,7 @@ enum Associativity { NonAssoc, } -#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq)] enum Precedence { None, Assignment, @@ -409,6 +409,21 @@ impl<'src> Parser<'src> { } } + fn add_string(&mut self, chunk: &mut Chunk, string: &'src str) -> u8 { + match self.intern_table.entry(string) { + hash_map::Entry::Occupied(entry) => { + entry.get().clone() + }, + hash_map::Entry::Vacant(entry) => { + let handle = GC::new_string(string); + chunk.add_constant_value(Value::from(handle.get_object())); + chunk.allocations.push_front(handle); + let offset = chunk.constants.len() as u8 - 1; + entry.insert(offset).clone() + }, + } + } + fn _expression(&mut self, chunk: &mut Chunk, min_prec: Precedence) -> Result<'src, ()> { match self.scanner.next() { None => return Err(self.error_end(ParseErrorKind::IncompleteExpression)), @@ -430,18 +445,13 @@ impl<'src> Parser<'src> { } TokenType::String => { let without_quotes = &token.span[1..(token.span.len() - 1)]; - match self.intern_table.get(without_quotes) { - Some(&index) => { - chunk.add_op(Op::Constant { offset: index }, token.line); - } - None => { - let object = unsafe { allocate_string(without_quotes) }.unwrap(); - chunk.add_constant(object.get_object().into(), token.line); - self.intern_table - .insert(without_quotes, chunk.constants.len() as u8 - 1); - chunk.allocations.push_front(object); - } - }; + let offset = self.add_string(chunk, without_quotes); + chunk.add_op( + Op::Constant { + offset, + }, + token.line + ); } TokenType::LeftParen => { self._expression(chunk, Precedence::None)?; @@ -456,6 +466,10 @@ impl<'src> Parser<'src> { TokenType::False => { chunk.add_op(Op::False, token.line); } + TokenType::Identifier => { + let offset = self.add_string(chunk, token.span); + chunk.add_op(Op::GetGlobal {offset}, token.line); + } _ => { return Err(self.error_at(token, ParseErrorKind::IncompleteExpression)); } @@ -504,31 +518,31 @@ impl<'src> Parser<'src> { self._expression(chunk, Precedence::None) } + pub fn must_consume(&mut self, expected: TokenType, error_kind: ParseErrorKind) -> Result<'src, Token<'src>> { + match self.scanner.peek().cloned() { + Some(token) if token.ttype == expected => Ok(self.scanner.next().unwrap()), + Some(token) => Err(self.error_at(token.clone(), error_kind)), + _ => Err(self.error_end(error_kind)), + } + } + pub fn print_statement(&mut self, print_token: Token<'src>, chunk: &mut Chunk) -> Result<'src, ()> { self.expression(chunk)?; chunk.add_op(Op::Print, print_token.line); - match self.scanner.next_if(|t| t.ttype == TokenType::Semicolon) { - Some(_) => Ok(()), - None => { - let location = self.scanner.peek().cloned(); - Err(self.error_at_or_end(location, ParseErrorKind::NoSemicolonAfterValue)) - }, - } + self.must_consume(TokenType::Semicolon, ParseErrorKind::NoSemicolonAfterValue).map(|_| ()) } - pub fn expr_statement(&mut self, chunk: &mut Chunk) -> Result<'src, ()> { + fn expr_statement(&mut self, chunk: &mut Chunk) -> Result<'src, ()> { self.expression(chunk)?; - chunk.add_op(Op::Pop, 0); - match self.scanner.next_if(|t| t.ttype == TokenType::Semicolon) { - Some(_) => Ok(()), - None => { - let location = self.scanner.peek().cloned(); - Err(self.error_at_or_end(location, ParseErrorKind::NoSemicolonAfterExpression)) - }, - } + let pop_line = + self.must_consume(TokenType::Semicolon, ParseErrorKind::NoSemicolonAfterExpression) + .map(|tok| tok.line)?; + chunk.add_op(Op::Pop, pop_line); + + Ok(()) } - pub fn statement(&mut self, chunk: &mut Chunk) -> Result<'src, ()> { + fn statement(&mut self, chunk: &mut Chunk) -> Result<'src, ()> { match self.scanner.peek().unwrap().ttype { TokenType::Print => { let print_token = self.scanner.next().unwrap(); @@ -538,7 +552,7 @@ impl<'src> Parser<'src> { } } - pub fn synchronize(&mut self) { + fn synchronize(&mut self) { use TokenType::*; while let Some(_token) = self.scanner.next_if( @@ -546,13 +560,42 @@ impl<'src> Parser<'src> { ) {} } - pub fn declaration(&mut self, chunk: &mut Chunk) { - self.statement(chunk).unwrap_or_else( - |err| { - self.errors.push(err); - self.synchronize(); + fn var_declaration(&mut self, var_token: Token<'src>, chunk: &mut Chunk) -> Result<'src, ()> { + let ident = self.must_consume(TokenType::Identifier, ParseErrorKind::NoVariableName)?; + let offset = self.add_string(chunk, ident.span); + + match self.scanner.peek() { + Some(token) if token.ttype == TokenType::Equal => { + self.expression(chunk)?; + }, + _ => { + chunk.add_op(Op::Nil, ident.line); } - ) + } + + chunk.add_op(Op::DefineGlobal { offset }, ident.line); + + self.must_consume(TokenType::Semicolon, ParseErrorKind::NoSemicolonAfterVarDecl)?; + + Ok(()) + } + + pub fn declaration(&mut self, chunk: &mut Chunk) { + let peeked = self.scanner.peek().unwrap().clone(); + match peeked.ttype { + TokenType::Var => { + self.scanner.next(); + self.var_declaration(peeked, chunk); + }, + _ => { + self.statement(chunk).unwrap_or_else( + |err| { + self.errors.push(err); + self.synchronize(); + } + ) + } + } } pub fn compile(&mut self, chunk: &mut Chunk) { @@ -799,7 +842,7 @@ mod tests { use crate::bc::Op::*; let expected = Chunk::new_with( vec![Constant { offset: 0 }, Constant { offset: 1 }, Add, Print], - vec![], + vec![1, 1, 1, 1], vec![Value::from(1.0), Value::from(1.0)], LinkedList::new(), ); @@ -810,14 +853,14 @@ mod tests { #[test] fn basic_print_string_statement() { let source = "print \"string\";"; - let allocation = unsafe { allocate_string("string").unwrap() }; + let allocation = GC::new_string("string"); let object = allocation.get_object(); let mut allocations = LinkedList::new(); allocations.push_front(allocation); use crate::bc::Op::*; let expected = Chunk::new_with( vec![Constant { offset: 0 }, Print], - vec![], + vec![1, 1], vec![Value::from(object)], allocations, ); @@ -831,11 +874,40 @@ mod tests { use crate::bc::Op::*; let expected = Chunk::new_with( vec![Constant { offset: 0 }, Constant { offset: 1 }, Divide, Pop], - vec![], + vec![1, 1, 1, 1], vec![Value::from(1.0), Value::from(1.0)], LinkedList::new(), ); test_parse_program(source, &expected); } + + #[test] + fn basic_var_decl() { + let source = "var x;"; + use crate::bc::Op::*; + let x = GC::new_string("x"); + let expected = Chunk::new_with( + vec![Nil, DefineGlobal { offset: 0 }], + vec![1, 1], + vec![x.get_object().into()], + LinkedList::new(), + ); + + test_parse_program(source, &expected); + } + + fn basic_var_decl_with_initializer() { + let source = "var x = 1 + 1;"; + use crate::bc::Op::*; + let x = GC::new_string("x"); + let expected = Chunk::new_with( + vec![Constant {offset: 1}, Constant {offset: 2}, Add, DefineGlobal { offset: 0 }], + vec![], + vec![x.get_object().into(), Value::from(1.0), Value::from(1.0)], + LinkedList::new(), + ); + + test_parse_program(source, &expected); + } } diff --git a/rlox/src/vm.rs b/rlox/src/vm.rs index 9f64964..a49bde3 100644 --- a/rlox/src/vm.rs +++ b/rlox/src/vm.rs @@ -1,6 +1,6 @@ use crate::bc::{Chunk, Op, TraceInfo, Value}; -use crate::gc::{concat_string, GcHandle, ObjectType}; -use std::collections::LinkedList; +use crate::gc::{GcHandle, ObjString, ObjectType, GC}; +use std::collections::{HashMap, LinkedList}; use std::io; use std::rc::Rc; @@ -72,6 +72,7 @@ impl VM { output: &mut Output, ) -> Result<()> { let mut allocations: LinkedList = LinkedList::new(); + let mut globals: HashMap = HashMap::new(); while self.pc < chunk.code.len() { let instr = chunk.code[self.pc]; @@ -126,7 +127,8 @@ impl VM { match a { Value::Obj(a) => match a.get_otype() { ObjectType::String => { - let new_obj = unsafe { concat_string(a, b).unwrap() }; + let (a, b) = (a.downcast().unwrap(), b.downcast().unwrap()); + let new_obj = GC::new_concat_string(a, b); self.push(Value::from(new_obj.get_object())); allocations.push_front(new_obj); Ok(()) @@ -178,6 +180,34 @@ impl VM { }, Op::Pop => { self.pop()?; + }, + Op::DefineGlobal { offset } => { + let ident = chunk.constants[offset as usize].clone(); + if let Value::Obj(name) = ident { + let name = name.downcast::().unwrap(); + globals.entry(name).or_insert(self.pop()?); + Ok(()) + } else { + unreachable!() + }? + }, + Op::GetGlobal { offset } => { + let ident = match chunk.constants[offset as usize] { + Value::Obj(object) => object.downcast::().unwrap(), + _ => todo!(), + }; + + if let Some(value) = globals.get(&ident) { + self.push(value.clone()); + + Ok(()) + } else { + Err( + VMError::Runtime( + format!("Undefined variable '{}'.", ident).into(), + self.pc, + )) + }? } } } @@ -190,6 +220,8 @@ impl VM { mod tests { use std::collections::LinkedList; + use crate::gc::GC; + use super::{Chunk, Op, VMError, Value, VM}; #[test] @@ -269,4 +301,29 @@ mod tests { Ok(()) } + + #[test] + fn read_write_globals() -> Result<(), VMError> { + let var = GC::new_string("global"); + use Op::*; + let chunk = Chunk::new_with( + vec![ + Constant { offset: 0 }, + DefineGlobal { offset: 1 }, + Constant { offset: 2 }, + GetGlobal { offset: 1 }, + Multiply, + ], + vec![], + vec![Value::from(5.0), Value::from(var.get_object()), Value::from(6.0)], + LinkedList::new() + ); + + let mut vm = VM::new(); + vm.stdrun(&chunk)?; + + assert_eq!(vm.stack, vec![Value::Number(30.0)]); + + Ok(()) + } }