From 0d7980ddc796f1754ffea8c0a19f3445eef1c905 Mon Sep 17 00:00:00 2001 From: ctsk <9384305+ctsk@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:15:13 +0200 Subject: [PATCH] [rlox] Implement basic linked-list allocation management --- rlox/src/bc.rs | 72 +++++++++----------- rlox/src/gc.rs | 174 +++++++++++++++++++++++++++++++++++++++++++++++ rlox/src/lc.rs | 27 +++++++- rlox/src/main.rs | 11 +-- rlox/src/vm.rs | 87 ++++++++++++++++-------- 5 files changed, 296 insertions(+), 75 deletions(-) create mode 100644 rlox/src/gc.rs diff --git a/rlox/src/bc.rs b/rlox/src/bc.rs index 24b7a50..bdf7873 100644 --- a/rlox/src/bc.rs +++ b/rlox/src/bc.rs @@ -1,4 +1,5 @@ -use crate::bc::Value::{Bool, Number}; +use crate::gc::{GcHandle, Object}; +use std::collections::LinkedList; use std::convert::From; use std::fmt; use std::fmt::Debug; @@ -6,7 +7,7 @@ use std::fmt::Debug; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum Op { Return, - Constant { offset: usize }, + Constant { offset: u8 }, Nil, True, False, @@ -20,69 +21,46 @@ pub enum Op { Greater, Less, } -#[derive(Clone, Debug, PartialEq)] -pub enum Object { - String(String) -} #[derive(Clone, Debug, PartialEq)] pub enum Value { Nil, Bool(bool), Number(f64), - Obj(Box) + Obj(Object), } impl Value { pub fn as_num(&self) -> Option { match self { - &Number(val) => Some(val), + &Value::Number(val) => Some(val), _ => None, } } pub fn as_bool(&self) -> Option { match self { - &Bool(val) => Some(val), + &Value::Bool(val) => Some(val), _ => None, } } - - pub fn as_str(&self) -> Option<&str> { - match self { - Value::Obj(obj) => { - match obj.as_ref() { - Object::String(string) => { - Some(string.as_str()) - } - } - }, - _ => None - } - } } impl From for Value { fn from(value: f64) -> Self { - Number(value) + Value::Number(value) } } impl From for Value { fn from(value: bool) -> Self { - Bool(value) + Value::Bool(value) } } -impl From<&str> for Value { - fn from(value: &str) -> Self { - Value::Obj(Box::from(Object::String(value.to_string()))) - } -} - -impl From for Value { - fn from(value: String) -> Self { - Value::Obj(Box::from(Object::String(value))) +impl From for Value { + fn from(value: Object) -> Self { + Value::Obj(value) } } @@ -90,6 +68,7 @@ pub struct Chunk { pub code: Vec, pub debug_info: Vec, pub constants: Vec, + pub allocations: LinkedList, } impl Chunk { @@ -98,14 +77,16 @@ impl Chunk { code: Vec::new(), debug_info: Vec::new(), constants: Vec::new(), + allocations: LinkedList::new() } } - pub fn new_with(code: Vec, debug_info: Vec, constants: Vec) -> Self { + pub fn new_with(code: Vec, debug_info: Vec, constants: Vec, allocations: LinkedList) -> Self { Chunk { code, debug_info, constants, + allocations } } @@ -124,7 +105,7 @@ impl Chunk { self.constants.push(value); self.add_op( Op::Constant { - offset: self.constants.len() - 1, + offset: self.constants.len() as u8 - 1, }, line, ) @@ -187,7 +168,7 @@ impl fmt::Debug for TraceInfo<'_> { match op { Op::Constant { offset } => { f.debug_struct("Constant") - .field("val", &chunk.constants[offset]) + .field("val", &chunk.constants[offset as usize]) .finish()?; write!(f, "") } @@ -197,18 +178,27 @@ impl fmt::Debug for TraceInfo<'_> { } mod tests { + #[test] fn string_value_equality() { + use crate::gc::allocate_string; use crate::bc::Value; let s1 = "bla5"; let s2 = "bla6"; - let v1 = Value::from(s1); - let v2 = Value::from(s2); - let v3 = Value::from(s2); + unsafe { + let o1 = allocate_string(s1).unwrap(); + let o2 = allocate_string(s2).unwrap(); + let o3 = allocate_string(s2).unwrap(); + let v1 = Value::from(o1.get_object()); + let v2 = Value::from(o2.get_object()); + let v3 = Value::from(o3.get_object()); + let v4 = v2.clone(); - assert_ne!(v1, v2); - assert_eq!(v2, v3); + assert_ne!(v1, v2); + assert_eq!(v2, v3); + assert_eq!(v2, v4); + } } } diff --git a/rlox/src/gc.rs b/rlox/src/gc.rs new file mode 100644 index 0000000..4486a96 --- /dev/null +++ b/rlox/src/gc.rs @@ -0,0 +1,174 @@ +use std::{alloc::{alloc, dealloc, Layout, LayoutError}, fmt}; + +#[derive(PartialEq, Eq, Clone, Copy)] +#[repr(usize)] +pub enum ObjectType { + String, +} + +#[repr(C)] +struct ObjectHeader { + otype: ObjectType, +} + +#[repr(C)] +struct ObjStringHeader { + object_header: ObjectHeader, + len: usize, +} + +#[repr(C)] +struct ObjString { + header: ObjStringHeader, + data: [u8], +} + +const fn data_offset() -> usize { + std::mem::size_of::() +} + +#[derive(Copy, Clone)] +pub struct Object { + ptr: *mut ObjectHeader, +} + +impl Object { + pub fn get_otype(&self) -> ObjectType { + unsafe { + (*self.ptr).otype + } + } + +} + +impl fmt::Debug for Object { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.get_otype() { + ObjectType::String => { + let string = self.ptr as *mut ObjStringHeader; + let data: &[u8] = ObjString::as_slice(string); + write!( + f, + "STR {} {:?}", + data.len(), + &data[..8.min(data.len())], + ) + }, + } + } +} + +impl PartialEq for Object { + fn eq(&self, other: &Self) -> bool { + if self.ptr == other.ptr { + return true; + } + + unsafe { + if (*self.ptr).otype != (*other.ptr).otype { + return false; + } + + match (*self.ptr).otype { + ObjectType::String => { + let header = self.ptr as *mut ObjStringHeader; + let other_header = other.ptr as *mut ObjStringHeader; + + if (*header).len != (*other_header).len { + return false; + } + + let slice = ObjString::as_slice(header); + let other_slice = ObjString::as_slice(other_header); + + slice == other_slice + }, + } + } + } +} + +impl ObjString { + fn layout(length: usize) -> Result<(Layout, usize), LayoutError> { + let (layout, offset) = Layout::for_value(&ObjStringHeader { + object_header: ObjectHeader { + otype: ObjectType::String, + }, + len: length, + }) + .extend(Layout::array::(length)?)?; + + Ok((layout.pad_to_align(), offset)) + } + + fn as_slice<'a>(ptr: *mut ObjStringHeader) -> &'a [u8] { + unsafe { + std::slice::from_raw_parts( + (ptr as *mut u8).offset(data_offset() as isize), + (*ptr).len + ) + } + } +} + +pub unsafe fn allocate_string_obj<'a>(length: usize) -> Result<(GcHandle, &'a mut [u8]), LayoutError> { + let (layout, offset) = ObjString::layout(length)?; + let allocation = alloc(layout); + let data_ptr = allocation.offset(offset as isize); + let header = allocation as *mut ObjStringHeader; + (*header).len = length; + (*header).object_header.otype = ObjectType::String; + let object = Object { ptr: header as *mut ObjectHeader }; + let str = std::slice::from_raw_parts_mut(data_ptr, length); + Ok((GcHandle { object }, str)) +} + +pub unsafe fn allocate_string(content: &str) -> Result { + let (gc_handle, slice) = allocate_string_obj(content.len())?; + slice.copy_from_slice(content.as_bytes()); + Ok(gc_handle) +} + +pub unsafe fn concat_string(a: Object, b: Object) -> Result { + let a_head = a.ptr as *mut ObjStringHeader; + let b_head = b.ptr as *mut ObjStringHeader; + let a_data = ObjString::as_slice(a_head); + let b_data = ObjString::as_slice(b_head); + let new_len = a_data.len() + b_data.len(); + + let (gc_handle, slice) = allocate_string_obj(new_len)?; + + slice[..a_data.len()].copy_from_slice(a_data); + slice[a_data.len()..].copy_from_slice(b_data); + + Ok(gc_handle) +} + +unsafe fn deallocate_object(object: Object) { + match object.get_otype() { + ObjectType::String => { + let header = object.ptr as *mut ObjStringHeader; + dealloc( + object.ptr as *mut u8, + ObjString::layout((*header).len).unwrap().0, + ) + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct GcHandle { + object: Object +} + +impl Drop for GcHandle { + fn drop(&mut self) { + unsafe { deallocate_object(self.object) }; + } +} + +impl GcHandle { + pub fn get_object(&self) -> Object { + return self.object; + } +} diff --git a/rlox/src/lc.rs b/rlox/src/lc.rs index 0a267e1..066bcf9 100644 --- a/rlox/src/lc.rs +++ b/rlox/src/lc.rs @@ -1,8 +1,9 @@ -use std::convert::identity; +use std::{collections::HashMap, convert::identity}; use std::iter::Peekable; use std::str::CharIndices; use crate::bc::{Chunk, Op}; +use crate::gc::allocate_string; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] enum TokenType { @@ -263,6 +264,7 @@ impl<'src> Iterator for Scanner<'src> { struct Parser<'src> { scanner: Peekable>, + intern_table: HashMap<&'src str, u8>, } enum Associativity { @@ -290,6 +292,7 @@ impl<'src> Parser<'src> { fn new(sc: Scanner<'src>) -> Self { Parser { scanner: sc.into_iter().peekable(), + intern_table: HashMap::new(), } } @@ -336,7 +339,21 @@ impl<'src> Parser<'src> { }, TokenType::String => { let without_quotes = &token.span[1..(token.span.len() - 1)]; - chunk.add_constant(without_quotes.into(), 0); + match self.intern_table.get(without_quotes) { + Some(&index) => { + chunk.add_op( + Op::Constant { + offset: index, + }, + 0 + ); + }, + None => { + let object = unsafe { allocate_string(without_quotes) }.unwrap(); + chunk.add_constant(object.get_object().into(), 0); + chunk.allocations.push_front(object); + }, + }; }, TokenType::Nil | TokenType::True | TokenType::False => { let op = match token.ttype { @@ -401,6 +418,8 @@ pub fn compile(source: &str, chunk: &mut Chunk) { #[cfg(test)] mod tests { + use std::collections::LinkedList; + use crate::bc::Value; use super::*; @@ -523,6 +542,7 @@ mod tests { ], vec![], vec![1., 1., 2., 1.].into_iter().map(Value::from).collect(), + LinkedList::new(), ); test_parse_expression(source, &expected); @@ -536,6 +556,7 @@ mod tests { vec![Nil, Nil, Add], vec![], vec![], + LinkedList::new(), ); test_parse_expression(source, &expected); @@ -549,6 +570,7 @@ mod tests { vec![True, False, Multiply], vec![], vec![], + LinkedList::new(), ); test_parse_expression(source, &expected); @@ -568,6 +590,7 @@ mod tests { True, Equal, Not], vec![], vec![], + LinkedList::new(), ); test_parse_expression(source, &expected); diff --git a/rlox/src/main.rs b/rlox/src/main.rs index 45bc046..5501882 100644 --- a/rlox/src/main.rs +++ b/rlox/src/main.rs @@ -1,6 +1,7 @@ mod bc; mod lc; mod vm; +mod gc; use std::env; use std::io; @@ -48,7 +49,7 @@ fn main() { #[cfg(test)] mod tests { - use crate::{bc::Chunk, lc::compile, vm::VM}; + use crate::{bc::{Chunk, Value}, gc::allocate_string, lc::compile, vm::VM}; #[test] fn test_compile_and_run_pi_math() { @@ -60,12 +61,14 @@ mod tests { } #[test] - fn string_handling() { + fn string_concatenation() { let source = "\"hello\" + \" \" + \"world\""; let mut chunk = Chunk::new(); compile(source, &mut chunk); let mut vm = VM::new(); - let v = vm.run(&chunk).unwrap(); - assert_eq!(v, Some("hello world".into())); + let (result, _allocs) = vm.run(&chunk).unwrap().unwrap(); + let target_alloc = unsafe { allocate_string("hello world").unwrap() }; + let target = Value::from(target_alloc.get_object()); + assert_eq!(result, target); } } diff --git a/rlox/src/vm.rs b/rlox/src/vm.rs index af67156..f2aa083 100644 --- a/rlox/src/vm.rs +++ b/rlox/src/vm.rs @@ -1,5 +1,6 @@ use crate::bc::{Chunk, Op, TraceInfo, Value}; -use std::ops::Not; +use crate::gc::{concat_string, GcHandle, ObjectType}; +use std::collections::LinkedList; use std::rc::Rc; pub struct VM { @@ -55,7 +56,9 @@ impl VM { .ok_or(self.type_err("Number", top_of_stack)) } - pub fn run(&mut self, chunk: &Chunk) -> Result, VMError> { + pub fn run(&mut self, chunk: &Chunk) -> Result)>, VMError> { + let mut allocations: LinkedList = LinkedList::new(); + while self.pc < chunk.code.len() { let instr = chunk.code[self.pc]; self.pc += 1; @@ -79,7 +82,7 @@ impl VM { match instr { Op::Return => print!("{:?}", self.pop()?), - Op::Constant { offset } => self.push(chunk.constants[offset].clone()), + Op::Constant { offset } => self.push(chunk.constants[offset as usize].clone()), Op::Nil => self.push(Value::Nil), Op::True => self.push(Value::Bool(true)), Op::False => self.push(Value::Bool(false)), @@ -103,20 +106,29 @@ impl VM { let a = self.pop_num()?; self.push(Value::from(num + a)); } - Value::Obj(ref _obj) => { - match b.as_str() { - None => Err(self.type_err("String", b)), - Some(str_b) => { + Value::Obj(b) => { + match b.get_otype() { + ObjectType::String => { let a = self.pop()?; - match a.as_str() { - Some(str_a) => { - self.push(Value::from(str_a.to_owned() + str_b)); - Ok(()) + match a { + Value::Obj(a) => { + match a.get_otype() { + ObjectType::String => { + let new_obj = unsafe { + concat_string(a, b).unwrap() + }; + self.push(Value::from(new_obj.get_object())); + allocations.push_front(new_obj); + Ok(()) + }, + } }, - None => Err(self.type_err("String", a)) - } - } - }? + _ => { + Err(self.type_err("String", a)) + } + }? + }, + } } _ => return Err(VMError::Runtime("Operands of + need to be numbers or strings".into(), self.pc)) }; @@ -151,17 +163,24 @@ impl VM { } } - Ok(self - .stack - .is_empty() - .not() - .then_some(self.stack[self.stack.len() - 1].clone())) + match self.stack.pop() { + None => Ok(None), + Some(result_value) => { + let escaping_allocs = allocations.into_iter().filter( + |handle| Value::from(handle.get_object()) == result_value + ).collect(); + + Ok(Some((result_value, escaping_allocs))) + } + } } } #[cfg(test)] mod tests { - use super::{Chunk, Op, Value, VM}; + use std::collections::LinkedList; + + use super::{Chunk, Op, VMError, Value, VM}; #[test] fn simple_arithmetic() { @@ -188,12 +207,15 @@ mod tests { .into_iter() .map(Value::from) .collect(), + LinkedList::new(), ); let mut vm = VM::new(); - vm.run(&chunk).unwrap(); + let (result, allocs) = vm.run(&chunk).unwrap().unwrap(); - assert_eq!(vm.stack[0], Value::from(3.1416)); + assert_eq!(result, Value::from(3.1416)); + assert!(vm.stack.is_empty()); + assert!(allocs.is_empty()); } #[test] @@ -202,6 +224,7 @@ mod tests { vec![Op::Nil, Op::Negate], vec![], vec![], + LinkedList::new(), ); let mut vm = VM::new(); @@ -212,16 +235,21 @@ mod tests { } #[test] - fn simple_booleans() { + fn simple_booleans() -> Result<(), VMError> { let chunk = Chunk::new_with( vec![Op::False, Op::Not, Op::False, Op::Not, Op::Equal], vec![], vec![], + LinkedList::new(), ); let mut vm = VM::new(); - vm.run(&chunk).unwrap(); + let (result, allocs) = vm.run(&chunk)?.unwrap(); - assert_eq!(vm.stack[0], true.into()); + assert_eq!(result, true.into()); + assert!(vm.stack.is_empty()); + assert!(allocs.is_empty()); + + Ok(()) } #[test] @@ -230,10 +258,13 @@ mod tests { vec![Op::Nil, Op::Not], vec![], vec![], + LinkedList::new(), ); let mut vm = VM::new(); - vm.run(&chunk).unwrap(); + let (result, allocs) = vm.run(&chunk).unwrap().unwrap(); - assert_eq!(vm.stack[0], true.into()); + assert_eq!(result, true.into()); + assert!(vm.stack.is_empty()); + assert!(allocs.is_empty()); } }