[rlox] Implement Globals (and cleanup a...lot)

This commit is contained in:
ctsk
2024-11-24 20:52:45 +01:00
parent 895b0f6b72
commit 59d5984e3d
4 changed files with 369 additions and 149 deletions

View File

@@ -1,4 +1,4 @@
use crate::gc::{GcHandle, Object, ObjectType};
use crate::gc::{GcHandle, Object};
use std::collections::LinkedList;
use std::convert::From;
use std::fmt::Debug;
@@ -22,7 +22,10 @@ pub enum Op {
Less,
Print,
Pop
Pop,
DefineGlobal { offset: u8 },
GetGlobal { offset: u8 },
}
#[derive(Clone, Debug, PartialEq)]
@@ -87,11 +90,7 @@ impl Display for Value {
None => write!(f, "{}", stringified),
}
}
Value::Obj(object) => match object.get_otype() {
ObjectType::String => {
write!(f, "{}", object)
}
},
Value::Obj(object) => write!(f, "{}", object),
}
}
}
@@ -138,6 +137,11 @@ impl Chunk {
self
}
pub fn add_constant_value(&mut self, value: Value) -> &mut Self {
self.constants.push(value);
self
}
pub fn add_constant(&mut self, value: Value, line: usize) -> &mut Self {
self.constants.push(value);
self.add_op(
@@ -216,26 +220,25 @@ impl fmt::Debug for TraceInfo<'_> {
mod tests {
#[test]
fn string_value_equality() {
use crate::bc::Value;
use crate::gc::allocate_string;
use crate::gc::GC;
let s1 = "bla5";
let s2 = "bla6";
unsafe {
let o1 = allocate_string(s1).unwrap();
let o2 = allocate_string(s2).unwrap();
let o3 = allocate_string(s2).unwrap();
let v1 = Value::from(o1.get_object());
let v2 = Value::from(o2.get_object());
let v3 = Value::from(o3.get_object());
let v4 = v2.clone();
let o1 = GC::new_string(s1);
let o2 = GC::new_string(s2);
let o3 = GC::new_string(s2);
let v1 = Value::from(o1.get_object());
let v2 = Value::from(o2.get_object());
let v3 = Value::from(o3.get_object());
let v4 = v2.clone();
assert_ne!(v1, v2);
assert_eq!(v2, v3);
assert_eq!(v2, v4);
}
assert_ne!(v1, v2);
assert_eq!(v2, v3);
assert_eq!(v2, v4);
}
}

View File

@@ -1,46 +1,126 @@
#![allow(unused, dead_code)]
use core::hash;
use std::{
alloc::{alloc, dealloc, Layout, LayoutError},
fmt::{self, Display},
};
/// Api
pub struct GC {}
impl GC {
pub fn new_string(content: &str) -> GcHandle {
unsafe { allocate_string(content) }.unwrap()
}
pub fn new_concat_string(first: ObjString, second: ObjString) -> GcHandle {
unsafe { concat_string(first, second) }.unwrap()
}
pub fn free(handle: GcHandle) {
unsafe { deallocate_object(handle.object) }
}
}
/// Markers
#[derive(PartialEq, Eq, Clone, Copy)]
#[repr(usize)]
pub enum ObjectType {
String,
}
#[repr(C)]
struct ObjectHeader {
otype: ObjectType,
pub(crate) trait IsObject {
fn otype() -> ObjectType;
fn from_object(object: Object) -> Self;
fn upcast(self) -> Object;
}
#[repr(C)]
struct ObjStringHeader {
object_header: ObjectHeader,
len: usize,
/// Object Hierarchy / Layout stuff
///
/// Object
/// |
/// ObjString
///
/// Object: --ptr-to--> [ [<otype>], .... data .... ]
/// ObjString: --ptr-to--> [[[<otype>], len], ...data... ]
/// ^-StringHeader-^
/// ^----------StringAlloc--------^
///
/// GcHandle owns the underlying memory and must not be dropped before the corresponding Objects are.
#[derive(Debug, Clone, PartialEq)]
pub struct GcHandle {
object: Object,
}
#[repr(C)]
struct ObjString {
header: ObjStringHeader,
data: [u8],
impl Drop for GcHandle {
fn drop(&mut self) {
unsafe { deallocate_object(self.object) };
}
}
const fn data_offset() -> usize {
std::mem::size_of::<ObjStringHeader>()
impl GcHandle {
pub fn get_object(&self) -> Object {
return self.object;
}
}
#[derive(Copy, Clone)]
pub struct Object {
ptr: *mut ObjectHeader,
ptr: *mut Header,
}
#[derive(Copy, Clone, Eq)]
pub struct ObjString {
ptr: *mut StringHeader,
}
impl IsObject for ObjString {
fn otype() -> ObjectType {
ObjectType::String
}
fn from_object(object: Object) -> ObjString {
ObjString { ptr: object.ptr as *mut StringHeader }
}
fn upcast(self) -> Object {
Object { ptr: self.ptr as *mut Header }
}
}
#[repr(C)]
struct Header {
otype: ObjectType,
}
#[repr(C)]
struct StringAlloc {
header: StringHeader,
data: [u8],
}
#[repr(C)]
struct StringHeader {
object_header: Header,
len: usize,
}
const fn data_offset() -> usize {
std::mem::size_of::<StringHeader>()
}
/// Pretty-print Object
impl Display for Object {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.get_otype() {
ObjectType::String => {
write!(f, "{}", ObjString::as_str(self.ptr as *mut ObjStringHeader))
}
ObjectType::String =>
fmt::Display::fmt(&self.downcast::<ObjString>().unwrap(), f)
}
}
}
@@ -49,16 +129,22 @@ impl Object {
pub fn get_otype(&self) -> ObjectType {
unsafe { (*self.ptr).otype }
}
pub(crate) fn downcast<T: IsObject>(self) -> Option<T> {
if self.get_otype() == T::otype() {
Some(T::from_object(self))
} else {
None
}
}
}
impl fmt::Debug for Object {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.get_otype() {
ObjectType::String => {
let string = self.ptr as *mut ObjStringHeader;
let data = ObjString::as_str(string);
write!(f, "STR {} {:?}", data.len(), &data[..8.min(data.len())],)
let string = self.downcast::<ObjString>().unwrap().as_str();
write!(f, "STR {} {:?}", string.len(), &string[..8.min(string.len())])
}
}
}
@@ -77,27 +163,58 @@ impl PartialEq for Object {
match (*self.ptr).otype {
ObjectType::String => {
let header = self.ptr as *mut ObjStringHeader;
let other_header = other.ptr as *mut ObjStringHeader;
if (*header).len != (*other_header).len {
return false;
}
let slice = ObjString::as_str(header);
let other_slice = ObjString::as_str(other_header);
slice == other_slice
self.downcast::<ObjString>() == other.downcast::<ObjString>()
}
}
}
}
}
impl PartialEq for ObjString {
fn eq(&self, other: &Self) -> bool {
unsafe {
if (*self.ptr).len != (*other.ptr).len {
return false;
}
self.as_slice() == other.as_slice()
}
}
}
impl ObjString {
fn as_slice<'a>(&self) -> &'a [u8] {
let length = unsafe { (*self.ptr).len };
let (layout_, offset) = StringAlloc::layout(length).unwrap();
unsafe {
std::slice::from_raw_parts(
(self.ptr as *mut u8).offset(offset as isize),
length
)
}
}
fn as_str<'a>(&self) -> &'a str {
unsafe { std::str::from_utf8_unchecked(self.as_slice()) }
}
}
impl fmt::Display for ObjString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_str(), f)
}
}
impl std::hash::Hash for ObjString {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
std::hash::Hash::hash::<H>(self.as_str(), state);
}
}
impl StringAlloc {
fn layout(length: usize) -> Result<(Layout, usize), LayoutError> {
let (layout, offset) = Layout::for_value(&ObjStringHeader {
object_header: ObjectHeader {
let (layout, offset) = Layout::for_value(&StringHeader {
object_header: Header {
otype: ObjectType::String,
},
len: length,
@@ -106,47 +223,34 @@ impl ObjString {
Ok((layout.pad_to_align(), offset))
}
fn as_bytes<'a>(ptr: *const ObjStringHeader) -> &'a [u8] {
unsafe {
std::slice::from_raw_parts((ptr as *mut u8).offset(data_offset() as isize), (*ptr).len)
}
}
fn as_str<'a>(ptr: *const ObjStringHeader) -> &'a str {
unsafe { std::str::from_utf8_unchecked(ObjString::as_bytes(ptr)) }
}
}
pub unsafe fn allocate_string_obj<'a>(
unsafe fn allocate_string_obj<'a>(
length: usize,
) -> Result<(GcHandle, &'a mut [u8]), LayoutError> {
let (layout, offset) = ObjString::layout(length)?;
let (layout, offset) = StringAlloc::layout(length)?;
let allocation = alloc(layout);
let data_ptr = allocation.offset(offset as isize);
let header = allocation as *mut ObjStringHeader;
let header = allocation as *mut StringHeader;
(*header).len = length;
(*header).object_header.otype = ObjectType::String;
let object = Object {
ptr: header as *mut ObjectHeader,
ptr: header as *mut Header,
};
let str = std::slice::from_raw_parts_mut(data_ptr, length);
Ok((GcHandle { object }, str))
}
pub unsafe fn allocate_string(content: &str) -> Result<GcHandle, LayoutError> {
unsafe fn allocate_string(content: &str) -> Result<GcHandle, LayoutError> {
let (gc_handle, slice) = allocate_string_obj(content.len())?;
slice.copy_from_slice(content.as_bytes());
Ok(gc_handle)
}
pub unsafe fn concat_string(a: Object, b: Object) -> Result<GcHandle, LayoutError> {
let a_head = a.ptr as *mut ObjStringHeader;
let b_head = b.ptr as *mut ObjStringHeader;
let a_data = ObjString::as_bytes(a_head);
let b_data = ObjString::as_bytes(b_head);
let new_len = a_data.len() + b_data.len();
unsafe fn concat_string(a: ObjString, b: ObjString) -> Result<GcHandle, LayoutError> {
let (a_data,b_data) = (a.as_slice(), b.as_slice());
let new_len = a_data.len() + b_data.len();
let (gc_handle, slice) = allocate_string_obj(new_len)?;
slice[..a_data.len()].copy_from_slice(a_data);
@@ -158,28 +262,12 @@ pub unsafe fn concat_string(a: Object, b: Object) -> Result<GcHandle, LayoutErro
unsafe fn deallocate_object(object: Object) {
match object.get_otype() {
ObjectType::String => {
let header = object.ptr as *mut ObjStringHeader;
let header = object.ptr as *mut StringHeader;
dealloc(
object.ptr as *mut u8,
ObjString::layout((*header).len).unwrap().0,
StringAlloc::layout((*header).len).unwrap().0,
)
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct GcHandle {
object: Object,
}
impl Drop for GcHandle {
fn drop(&mut self) {
unsafe { deallocate_object(self.object) };
}
}
impl GcHandle {
pub fn get_object(&self) -> Object {
return self.object;
}
}

View File

@@ -1,10 +1,12 @@
use std::fmt;
#![allow(dead_code, unused)]
use std::{collections::hash_map, fmt};
use std::iter::Peekable;
use std::str::CharIndices;
use std::collections::HashMap;
use crate::bc::{Chunk, Op};
use crate::gc::allocate_string;
use crate::bc::Value;
use crate::{bc::{Chunk, Op}, gc::GC};
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum ScanErrorKind {
@@ -285,24 +287,22 @@ struct Parser<'src> {
#[derive(Debug, PartialEq)]
pub enum ParseErrorKind {
InvalidNumber,
UnexpectedEOF,
IncompleteExpression,
NoSemicolonAfterValue,
NoSemicolonAfterExpression,
NoVariableName,
NoSemicolonAfterVarDecl,
}
impl fmt::Display for ParseErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseErrorKind::InvalidNumber => todo!(),
ParseErrorKind::UnexpectedEOF => todo!(),
ParseErrorKind::IncompleteExpression => {
write!(f, "Expect expression.")
},
ParseErrorKind::NoSemicolonAfterValue => todo!(),
ParseErrorKind::NoSemicolonAfterExpression => {
write!(f, "Expect ';' after expression.")
},
ParseErrorKind::IncompleteExpression => write!(f, "Expect expression."),
ParseErrorKind::NoSemicolonAfterValue => write!(f, "Expect ';' after value."),
ParseErrorKind::NoSemicolonAfterExpression => write!(f, "Expect ';' after expression."),
ParseErrorKind::NoVariableName => write!(f, "Expect variable name."),
ParseErrorKind::NoSemicolonAfterVarDecl => write!(f, "Expect ';' after variable declaration."),
}
}
}
@@ -336,7 +336,7 @@ enum Associativity {
NonAssoc,
}
#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq)]
enum Precedence {
None,
Assignment,
@@ -409,6 +409,21 @@ impl<'src> Parser<'src> {
}
}
fn add_string(&mut self, chunk: &mut Chunk, string: &'src str) -> u8 {
match self.intern_table.entry(string) {
hash_map::Entry::Occupied(entry) => {
entry.get().clone()
},
hash_map::Entry::Vacant(entry) => {
let handle = GC::new_string(string);
chunk.add_constant_value(Value::from(handle.get_object()));
chunk.allocations.push_front(handle);
let offset = chunk.constants.len() as u8 - 1;
entry.insert(offset).clone()
},
}
}
fn _expression(&mut self, chunk: &mut Chunk, min_prec: Precedence) -> Result<'src, ()> {
match self.scanner.next() {
None => return Err(self.error_end(ParseErrorKind::IncompleteExpression)),
@@ -430,18 +445,13 @@ impl<'src> Parser<'src> {
}
TokenType::String => {
let without_quotes = &token.span[1..(token.span.len() - 1)];
match self.intern_table.get(without_quotes) {
Some(&index) => {
chunk.add_op(Op::Constant { offset: index }, token.line);
}
None => {
let object = unsafe { allocate_string(without_quotes) }.unwrap();
chunk.add_constant(object.get_object().into(), token.line);
self.intern_table
.insert(without_quotes, chunk.constants.len() as u8 - 1);
chunk.allocations.push_front(object);
}
};
let offset = self.add_string(chunk, without_quotes);
chunk.add_op(
Op::Constant {
offset,
},
token.line
);
}
TokenType::LeftParen => {
self._expression(chunk, Precedence::None)?;
@@ -456,6 +466,10 @@ impl<'src> Parser<'src> {
TokenType::False => {
chunk.add_op(Op::False, token.line);
}
TokenType::Identifier => {
let offset = self.add_string(chunk, token.span);
chunk.add_op(Op::GetGlobal {offset}, token.line);
}
_ => {
return Err(self.error_at(token, ParseErrorKind::IncompleteExpression));
}
@@ -504,31 +518,31 @@ impl<'src> Parser<'src> {
self._expression(chunk, Precedence::None)
}
pub fn must_consume(&mut self, expected: TokenType, error_kind: ParseErrorKind) -> Result<'src, Token<'src>> {
match self.scanner.peek().cloned() {
Some(token) if token.ttype == expected => Ok(self.scanner.next().unwrap()),
Some(token) => Err(self.error_at(token.clone(), error_kind)),
_ => Err(self.error_end(error_kind)),
}
}
pub fn print_statement(&mut self, print_token: Token<'src>, chunk: &mut Chunk) -> Result<'src, ()> {
self.expression(chunk)?;
chunk.add_op(Op::Print, print_token.line);
match self.scanner.next_if(|t| t.ttype == TokenType::Semicolon) {
Some(_) => Ok(()),
None => {
let location = self.scanner.peek().cloned();
Err(self.error_at_or_end(location, ParseErrorKind::NoSemicolonAfterValue))
},
}
self.must_consume(TokenType::Semicolon, ParseErrorKind::NoSemicolonAfterValue).map(|_| ())
}
pub fn expr_statement(&mut self, chunk: &mut Chunk) -> Result<'src, ()> {
fn expr_statement(&mut self, chunk: &mut Chunk) -> Result<'src, ()> {
self.expression(chunk)?;
chunk.add_op(Op::Pop, 0);
match self.scanner.next_if(|t| t.ttype == TokenType::Semicolon) {
Some(_) => Ok(()),
None => {
let location = self.scanner.peek().cloned();
Err(self.error_at_or_end(location, ParseErrorKind::NoSemicolonAfterExpression))
},
}
let pop_line =
self.must_consume(TokenType::Semicolon, ParseErrorKind::NoSemicolonAfterExpression)
.map(|tok| tok.line)?;
chunk.add_op(Op::Pop, pop_line);
Ok(())
}
pub fn statement(&mut self, chunk: &mut Chunk) -> Result<'src, ()> {
fn statement(&mut self, chunk: &mut Chunk) -> Result<'src, ()> {
match self.scanner.peek().unwrap().ttype {
TokenType::Print => {
let print_token = self.scanner.next().unwrap();
@@ -538,7 +552,7 @@ impl<'src> Parser<'src> {
}
}
pub fn synchronize(&mut self) {
fn synchronize(&mut self) {
use TokenType::*;
while let Some(_token) = self.scanner.next_if(
@@ -546,13 +560,42 @@ impl<'src> Parser<'src> {
) {}
}
pub fn declaration(&mut self, chunk: &mut Chunk) {
self.statement(chunk).unwrap_or_else(
|err| {
self.errors.push(err);
self.synchronize();
fn var_declaration(&mut self, var_token: Token<'src>, chunk: &mut Chunk) -> Result<'src, ()> {
let ident = self.must_consume(TokenType::Identifier, ParseErrorKind::NoVariableName)?;
let offset = self.add_string(chunk, ident.span);
match self.scanner.peek() {
Some(token) if token.ttype == TokenType::Equal => {
self.expression(chunk)?;
},
_ => {
chunk.add_op(Op::Nil, ident.line);
}
)
}
chunk.add_op(Op::DefineGlobal { offset }, ident.line);
self.must_consume(TokenType::Semicolon, ParseErrorKind::NoSemicolonAfterVarDecl)?;
Ok(())
}
pub fn declaration(&mut self, chunk: &mut Chunk) {
let peeked = self.scanner.peek().unwrap().clone();
match peeked.ttype {
TokenType::Var => {
self.scanner.next();
self.var_declaration(peeked, chunk);
},
_ => {
self.statement(chunk).unwrap_or_else(
|err| {
self.errors.push(err);
self.synchronize();
}
)
}
}
}
pub fn compile(&mut self, chunk: &mut Chunk) {
@@ -799,7 +842,7 @@ mod tests {
use crate::bc::Op::*;
let expected = Chunk::new_with(
vec![Constant { offset: 0 }, Constant { offset: 1 }, Add, Print],
vec![],
vec![1, 1, 1, 1],
vec![Value::from(1.0), Value::from(1.0)],
LinkedList::new(),
);
@@ -810,14 +853,14 @@ mod tests {
#[test]
fn basic_print_string_statement() {
let source = "print \"string\";";
let allocation = unsafe { allocate_string("string").unwrap() };
let allocation = GC::new_string("string");
let object = allocation.get_object();
let mut allocations = LinkedList::new();
allocations.push_front(allocation);
use crate::bc::Op::*;
let expected = Chunk::new_with(
vec![Constant { offset: 0 }, Print],
vec![],
vec![1, 1],
vec![Value::from(object)],
allocations,
);
@@ -831,11 +874,40 @@ mod tests {
use crate::bc::Op::*;
let expected = Chunk::new_with(
vec![Constant { offset: 0 }, Constant { offset: 1 }, Divide, Pop],
vec![],
vec![1, 1, 1, 1],
vec![Value::from(1.0), Value::from(1.0)],
LinkedList::new(),
);
test_parse_program(source, &expected);
}
#[test]
fn basic_var_decl() {
let source = "var x;";
use crate::bc::Op::*;
let x = GC::new_string("x");
let expected = Chunk::new_with(
vec![Nil, DefineGlobal { offset: 0 }],
vec![1, 1],
vec![x.get_object().into()],
LinkedList::new(),
);
test_parse_program(source, &expected);
}
fn basic_var_decl_with_initializer() {
let source = "var x = 1 + 1;";
use crate::bc::Op::*;
let x = GC::new_string("x");
let expected = Chunk::new_with(
vec![Constant {offset: 1}, Constant {offset: 2}, Add, DefineGlobal { offset: 0 }],
vec![],
vec![x.get_object().into(), Value::from(1.0), Value::from(1.0)],
LinkedList::new(),
);
test_parse_program(source, &expected);
}
}

View File

@@ -1,6 +1,6 @@
use crate::bc::{Chunk, Op, TraceInfo, Value};
use crate::gc::{concat_string, GcHandle, ObjectType};
use std::collections::LinkedList;
use crate::gc::{GcHandle, ObjString, ObjectType, GC};
use std::collections::{HashMap, LinkedList};
use std::io;
use std::rc::Rc;
@@ -72,6 +72,7 @@ impl VM {
output: &mut Output,
) -> Result<()> {
let mut allocations: LinkedList<GcHandle> = LinkedList::new();
let mut globals: HashMap<ObjString, Value> = HashMap::new();
while self.pc < chunk.code.len() {
let instr = chunk.code[self.pc];
@@ -126,7 +127,8 @@ impl VM {
match a {
Value::Obj(a) => match a.get_otype() {
ObjectType::String => {
let new_obj = unsafe { concat_string(a, b).unwrap() };
let (a, b) = (a.downcast().unwrap(), b.downcast().unwrap());
let new_obj = GC::new_concat_string(a, b);
self.push(Value::from(new_obj.get_object()));
allocations.push_front(new_obj);
Ok(())
@@ -178,6 +180,34 @@ impl VM {
},
Op::Pop => {
self.pop()?;
},
Op::DefineGlobal { offset } => {
let ident = chunk.constants[offset as usize].clone();
if let Value::Obj(name) = ident {
let name = name.downcast::<ObjString>().unwrap();
globals.entry(name).or_insert(self.pop()?);
Ok(())
} else {
unreachable!()
}?
},
Op::GetGlobal { offset } => {
let ident = match chunk.constants[offset as usize] {
Value::Obj(object) => object.downcast::<ObjString>().unwrap(),
_ => todo!(),
};
if let Some(value) = globals.get(&ident) {
self.push(value.clone());
Ok(())
} else {
Err(
VMError::Runtime(
format!("Undefined variable '{}'.", ident).into(),
self.pc,
))
}?
}
}
}
@@ -190,6 +220,8 @@ impl VM {
mod tests {
use std::collections::LinkedList;
use crate::gc::GC;
use super::{Chunk, Op, VMError, Value, VM};
#[test]
@@ -269,4 +301,29 @@ mod tests {
Ok(())
}
#[test]
fn read_write_globals() -> Result<(), VMError> {
let var = GC::new_string("global");
use Op::*;
let chunk = Chunk::new_with(
vec![
Constant { offset: 0 },
DefineGlobal { offset: 1 },
Constant { offset: 2 },
GetGlobal { offset: 1 },
Multiply,
],
vec![],
vec![Value::from(5.0), Value::from(var.get_object()), Value::from(6.0)],
LinkedList::new()
);
let mut vm = VM::new();
vm.stdrun(&chunk)?;
assert_eq!(vm.stack, vec![Value::Number(30.0)]);
Ok(())
}
}