use std::{env, iter::Peekable, rc::Rc, slice::Iter, str::Chars, vec::IntoIter}; use rug::Float; use crate::{ environment::Environment, node::{ NodeEnum, add::Add, assign::Assign, call::Call, comparison::{Greater, GreaterEquals, Less, LessEquals}, constant::{Constant, ConstantValue}, divide::Divide, equals::Equals, exponent::Exponent, function::{Function, FunctionType}, if_else::{Bool, ElseBranchEnum, IfElse}, multiply::Multiply, set::Set, string_node::StringNode, subtract::Subtract, symbol::Symbol, }, }; #[derive(Debug)] pub struct Token(usize, TokenType); #[derive(Debug, PartialEq, Clone)] pub enum TokenType { // Space, Number(String), Identifier(String), String(String), Plus, Minus, Star, Slash, Hat, Equals, Greater, Less, GreaterEquals, LessEquals, ColonEquals, LeftArrow, RParen, LParen, RSquare, LSquare, Comma, If, Then, Else, End, True, False, } impl TokenType { pub fn len(&self) -> usize { match self { TokenType::Number(n) => n.to_string().len(), TokenType::Identifier(s) | TokenType::String(s) => s.len(), TokenType::Plus => 1, TokenType::Minus => 1, TokenType::Star => 1, TokenType::Slash => 1, TokenType::Hat => 1, TokenType::Equals => 1, TokenType::Greater => 1, TokenType::Less => 1, TokenType::GreaterEquals => 2, TokenType::LessEquals => 2, TokenType::ColonEquals => 2, TokenType::LeftArrow => 2, TokenType::RParen => 1, TokenType::LParen => 1, TokenType::RSquare => 1, TokenType::LSquare => 1, TokenType::Comma => 1, TokenType::If => 2, TokenType::Then => 4, TokenType::Else => 4, TokenType::End => 3, TokenType::True => 4, TokenType::False => 5, } } } pub struct Lexer<'a> { source: Peekable>, } #[derive(Debug)] pub enum LexerError { UnexpectedChar(usize, String), } impl<'a> Lexer<'a> { pub fn new(source: &'a String) -> Self { Self { source: source.chars().peekable(), } } pub fn lex(&'a mut self) -> Result, LexerError> { let mut i = 0; let mut tokens = vec![]; while let Some(c) = self.source.next() { match c { // Collapse spaces into a single Space token ' ' => { while self.source.peek() == Some(&' ') { self.source.next(); i += 1; } // tokens.push(Token(i, TokenType::Space)); } // Comments with // '/' if self.source.peek() == Some(&'/') => { while self.source.next() != Some('\n') { i += 1; } } // Numbers with decimal points '0'..='9' | '.' => { let mut digit = String::from(c); let mut has_decimal = c == '.'; loop { let d = self.source.peek(); match d { Some('0'..='9') => { digit.push(*d.unwrap()); self.source.next(); i += 1; } Some('.') => { if has_decimal { return Err(LexerError::UnexpectedChar( i + 1, "Invalid digit with multiple decimal points".into(), )); } digit.push(*d.unwrap()); self.source.next(); i += 1; has_decimal = true; } _ => { break; } } } // if let Some(v) = { // v // } else { // return Err(LexerError::NumberParse( // i, // format!("Failed to convert {digit} to a number"), // )); // }; tokens.push(Token(i, TokenType::Number(digit))); } '"' => { let mut buffer = "".to_owned(); loop { let next = self.source.peek(); match next { Some('"') => { tokens.push(Token(i, TokenType::String(buffer.clone()))); self.source.next(); break; } Some(_) => { buffer.push(self.source.next().unwrap()); } None => { return Err(LexerError::UnexpectedChar( i, "Unexpected End of file".to_owned(), )); } } } } // LeftArrow (->) '-' if self.source.peek() == Some(&'>') => { self.source.next(); i += 1; tokens.push(Token(i, TokenType::LeftArrow)); } '<' if self.source.peek() == Some(&'=') => { self.source.next(); i += 1; tokens.push(Token(i, TokenType::LessEquals)); } '>' if self.source.peek() == Some(&'=') => { self.source.next(); i += 1; tokens.push(Token(i, TokenType::GreaterEquals)); } '<' => tokens.push(Token(i, TokenType::Less)), '>' => tokens.push(Token(i, TokenType::Greater)), '+' => tokens.push(Token(i, TokenType::Plus)), '-' => tokens.push(Token(i, TokenType::Minus)), '*' => tokens.push(Token(i, TokenType::Star)), '/' => tokens.push(Token(i, TokenType::Slash)), '=' => tokens.push(Token(i, TokenType::Equals)), ',' => tokens.push(Token(i, TokenType::Comma)), '^' => tokens.push(Token(i, TokenType::Hat)), ':' if self.source.peek() == Some(&'=') => { self.source.next(); i += 1; tokens.push(Token(i, TokenType::ColonEquals)); } '(' => tokens.push(Token(i, TokenType::LParen)), ')' => tokens.push(Token(i, TokenType::RParen)), '[' => tokens.push(Token(i, TokenType::LSquare)), ']' => tokens.push(Token(i, TokenType::RSquare)), _ if c.is_alphabetic() || c == '_' => { tokens.push(self.lex_identifier(&mut i, c)?); } _ => { return Err(LexerError::UnexpectedChar( i, format!("Unexpected char {}", c), )); } } i += 1; } Ok(tokens) } fn lex_identifier<'b>(&'b mut self, i: &mut usize, c: char) -> Result { let mut identifier = c.to_string(); while let Some(c) = self.source.peek() { if c.is_alphanumeric() || c == &'_' || c == &'\'' { identifier.push(*c); self.source.next(); *i += 1; } else { break; } } Ok(Token( *i, match identifier.to_lowercase().as_str() { "if" => TokenType::If, "then" => TokenType::Then, "else" => TokenType::Else, "end" => TokenType::End, "true" => TokenType::True, "false" => TokenType::False, _ => TokenType::Identifier(identifier), }, )) } } pub enum ParserError { UnexpectedEndOfTokens(String), UnexpectedToken(usize, usize, String), Unimplemented(usize, usize, String), UnexpectedNode(usize, String), NumberParse(usize, String), } /// Recursive descent parser pub struct Parser<'a> { tokens: Peekable>, environment: &'a mut Environment, previous: Option, } type Tokens<'a> = Peekable>; impl<'a> Parser<'a> { pub fn new(tokens: Vec, env: &'a mut Environment) -> Self { // #[cfg(debug_assertions)] // println!("\r{tokens:?}"); Self { tokens: tokens.into_iter().peekable(), environment: env, previous: None, } } // Parse tokens recursively and descendentantly pub fn parse(&mut self) -> Result>, ParserError> { let mut expressions = vec![]; while self.tokens.peek().is_some() { expressions.push(self.expression()?); } Ok(expressions) } #[inline] fn consume<'b>(&'b mut self) -> &'b Option { self.previous = self.tokens.next(); &self.previous } #[inline] fn is_at_end(&mut self) -> bool { if self.tokens.peek().is_none() { return true; } false } /// Checks if the next token is `t`, if it is then consume it and return true. Otherwise does /// nothing and returns false. #[inline] fn matchType<'b>(&'b mut self, t: TokenType) -> bool { if let Some(Token(_, token_type)) = self.tokens.peek() { if *token_type == t { self.consume(); return true; } } false } #[inline] fn matchOrErr(&mut self, t: TokenType) -> Result { let (i, tt) = if let Some(Token(i, tt)) = self.tokens.peek() { (*i, tt.clone()) } else { return Err(ParserError::UnexpectedEndOfTokens(format!( "Expected {t:?} but found nothing instead" ))); }; if self.matchType(t.clone()) { Ok(true) } else { Err(ParserError::UnexpectedToken( i, tt.len(), format!("Expected {t:?} but found {tt:?} instead"), )) } } fn expression(&mut self) -> Result, ParserError> { self.assignment() } fn assignment(&mut self) -> Result, ParserError> { let expr = self.equality()?; if self.matchType(TokenType::ColonEquals) { return Ok(Rc::new(Assign::new(expr, self.equality()?).into())); } Ok(expr) } fn equality(&mut self) -> Result, ParserError> { // TODO: Implement equality let expr = self.comparison()?; if self.matchType(TokenType::Equals) { let mut expressions = vec![]; loop { expressions.push(self.comparison()?); if !self.matchType(TokenType::Equals) { break; } } return Ok(Rc::new(Equals::new(expr, expressions).into())); } Ok(expr) } fn comparison(&mut self) -> Result, ParserError> { // TODO: Implement comparison let expr = self.term()?; let t = if let Some(Token(i, t)) = self.tokens.peek() { t.clone() } else { return Ok(expr); }; if match t { TokenType::Greater => true, TokenType::Less => true, TokenType::GreaterEquals => true, TokenType::LessEquals => true, _ => false, } { self.consume(); let mut expressions = vec![]; loop { expressions.push(self.term()?); if !self.matchType(t.clone()) { break; } } return match t { TokenType::Greater => Ok(Greater::new(expr, expressions)), TokenType::Less => Ok(Less::new(expr, expressions)), TokenType::GreaterEquals => Ok(GreaterEquals::new(expr, expressions)), TokenType::LessEquals => Ok(LessEquals::new(expr, expressions)), _ => panic!(), }; } Ok(expr) } fn term(&mut self) -> Result, ParserError> { let expr = self.factor()?; if self.matchType(TokenType::Plus) { Ok(Rc::new(Add::new(expr, self.comparison()?).into())) } else if let Some(Token(_, TokenType::Minus)) = self.tokens.peek() { self.consume(); Ok(Rc::new(Subtract::new(expr, self.comparison()?).into())) } else { Ok(expr) } } fn factor(&mut self) -> Result, ParserError> { let expr = self.unary()?; if let Some(Token(_, TokenType::Star)) = self.tokens.peek() { self.consume(); Ok(Rc::new(Multiply::new(expr, self.comparison()?).into())) } else if let Some(Token(_, TokenType::Slash)) = self.tokens.peek() { self.consume(); Ok(Rc::new(Divide::new(expr, self.comparison()?).into())) } else { Ok(expr) } } fn unary(&mut self) -> Result, ParserError> { self.exponent() } fn exponent(&mut self) -> Result, ParserError> { let expr = self.call(); if self.matchType(TokenType::Hat) { let right = self.unary()?; return Ok(Exponent::new(expr?, right)); } expr } fn call(&mut self) -> Result, ParserError> { // Left hand side let mut expr = self.function()?; // Calls are right-associative, so we evaluate right-to-left loop { let (i, t) = if let Some(Token(i, x)) = self.tokens.peek() { (*i, x.clone()) } else { return Ok(expr); }; // If the next token is a parenthesis then we construct a call if t == TokenType::LParen { self.consume(); // Calls can have 0 arguments, so check and return early if self.matchType(TokenType::RParen) { expr = Call::new(expr, vec![]); } else { // Parse expressions until a patching Right-Parenthesis is found let mut parameters = vec![self.equality()?]; while self.matchType(TokenType::Comma) { parameters.push(self.equality()?); } if !self.matchType(TokenType::RParen) { return Err(ParserError::UnexpectedToken( i, t.len(), "Unclosed right parenthesis".to_owned(), )); } // If the next token is a ColonEquals (assignment) then // the user wants function assignment sugar // // Ie f(x) := x*5 => f := x -> x*5 if self.matchType(TokenType::ColonEquals) { if let NodeEnum::Symbol(_) = expr.as_ref() { } else { let Token(i, token) = self.previous.as_ref().unwrap(); return Err(ParserError::UnexpectedToken( *i, token.len(), format!( "Expected an Identifier here but found a {}", expr.type_str() ), )); }; // Parse body let body = self.equality()?; // Convert vector of expressions to vector of symbols let mut arguments = Vec::with_capacity(parameters.len()); for param in parameters.into_iter() { if let NodeEnum::Symbol(symbol) = Rc::::try_unwrap(param).unwrap() { arguments.push(symbol); } else { return Err(ParserError::UnexpectedToken( i, t.len(), format!("One or more argument is not a Symbol",), )); } } // Early exit with new desugared expression return Ok(Rc::new( Assign::new( expr, Function::new(FunctionType::UserFunction(body, arguments)), ) .into(), )); } else { expr = Call::new(expr, parameters); } } } else { break; } } Ok(expr) } fn function(&mut self) -> Result, ParserError> { let error_loc = if let Some(Token(i, _)) = self.tokens.peek() { *i } else { 0 }; let expr = self.if_else()?; if self.matchType(TokenType::LeftArrow) { let right = self.equality()?; match expr.clone().as_ref() { NodeEnum::Symbol(symbol) => { return Ok(Function::new(FunctionType::UserFunction( right, vec![symbol.clone()], ))); } NodeEnum::Set(set) => { let mut symbols = vec![]; for (i, value) in set.get_values().into_iter().enumerate() { match value.as_ref() { NodeEnum::Symbol(symbol) => symbols.push(symbol.clone()), _ => { return Err(ParserError::UnexpectedNode( error_loc, format!( "Expected set of Identifiers, but argument #{i} is a {value:?}" ), )); } } } return Ok(Function::new(FunctionType::UserFunction(right, symbols))); } _ => { return Err(ParserError::UnexpectedNode( error_loc, format!("Expected Set, got {:?}", expr), )); } } } Ok(expr) } fn if_else(&mut self) -> Result, ParserError> { if self.matchType(TokenType::If) { let condition = self.equality()?; let _ = self.matchOrErr(TokenType::Then)?; let mut expressions = vec![]; while !(self.matchType(TokenType::End) || self.matchType(TokenType::Else)) { if self.is_at_end() { return Err(ParserError::UnexpectedEndOfTokens( "Expected an else or end here".to_owned(), )); } expressions.push(self.expression()?); } // Safe to unwrap since the while loop would terminate if previous was none (it didnt // find an End or Else before running out of tokens) let else_branch = match self.previous.as_ref().unwrap() { Token(_, TokenType::End) => ElseBranchEnum::None, Token(_, TokenType::Else) => { if let Some(Token(_, TokenType::If)) = self.tokens.peek() { ElseBranchEnum::ElseIf(self.if_else()?) } else { let mut expressions = vec![]; while !self.matchType(TokenType::End) { if self.is_at_end() { return Err(ParserError::UnexpectedEndOfTokens( "Expected an end here".to_owned(), )); } expressions.push(self.expression()?); } ElseBranchEnum::Block(expressions) } } _ => panic!("Not possible"), }; return Ok(IfElse::new(condition, expressions, else_branch)); } self.set() } fn set(&mut self) -> Result, ParserError> { if self.matchType(TokenType::LSquare) { // Empty set if self.matchType(TokenType::RSquare) { return Ok(Set::new(vec![])); } let mut values = vec![self.equality()?]; while { if let Some(Token(_, TokenType::RSquare)) = self.tokens.peek() { self.consume(); false } else { true } } { let (i, token) = if let Some(Token(i, x)) = self.tokens.peek() { (i, x) } else { return Err(ParserError::UnexpectedEndOfTokens( "Expected comma here".into(), )); }; if *token == TokenType::Comma { self.consume(); } else { return Err(ParserError::UnexpectedToken( *i, token.len(), format!("Expected comma here, but got {token:?}"), )); } values.push(self.equality()?); } return Ok(Set::new(values)); } self.primary() } fn primary(&mut self) -> Result, ParserError> { let (i, token) = if let Some(Token(i, token)) = self.tokens.next() { (i, token) } else { return Err(ParserError::UnexpectedEndOfTokens( "Expected a Primary here".into(), )); }; match token { TokenType::Number(value) => { let value = if let Ok(incomplete) = Float::parse(&value) { Float::with_val_64(self.environment.get_float_precision(), incomplete) } else { return Err(ParserError::NumberParse( i, format!("Failed to convert `{value}` to a number"), )); }; Ok(Rc::new(Constant::new(value).into())) } TokenType::Identifier(string) => Ok(Rc::new( Symbol::new_from_str(string, self.environment).into(), )), TokenType::True => Ok(Rc::new(Bool::True.into())), TokenType::False => Ok(Rc::new(Bool::False.into())), TokenType::String(s) => Ok(StringNode::new(s)), TokenType::LParen => { let expr = self.expression()?; if !self.matchType(TokenType::RParen) { if let Some(Token(i, t)) = self.tokens.peek() { return Err(ParserError::UnexpectedToken( *i, t.len(), format!("Expected right parenthesis here, but got {t:?}"), )); } else { return Err(ParserError::UnexpectedToken( i, 1, "Unclosed right parenthesis".to_owned(), )); } } Ok(expr) } _ => Err(ParserError::UnexpectedToken( i, token.len(), format!("Unexpected token {token:?}"), )), } } }