openbirch-rs/src/lib/parser/mod.rs

781 lines
25 KiB
Rust

use std::{env, iter::Peekable, rc::Rc, slice::Iter, str::Chars, vec::IntoIter};
use rug::Float;
use crate::{
environment::Environment,
node::{
NodeEnum,
add::Add,
assign::Assign,
call::Call,
comparison::{Greater, GreaterEquals, Less, LessEquals},
constant::{Constant, ConstantValue},
divide::Divide,
equals::Equals,
exponent::Exponent,
function::{Function, FunctionType},
if_else::{Bool, ElseBranchEnum, IfElse},
multiply::Multiply,
set::Set,
string_node::StringNode,
subtract::Subtract,
symbol::Symbol,
},
};
#[derive(Debug)]
pub struct Token(usize, TokenType);
#[derive(Debug, PartialEq, Clone)]
pub enum TokenType {
// Space,
Number(String),
Identifier(String),
String(String),
Plus,
Minus,
Star,
Slash,
Hat,
Equals,
Greater,
Less,
GreaterEquals,
LessEquals,
ColonEquals,
LeftArrow,
RParen,
LParen,
RSquare,
LSquare,
Comma,
If,
Then,
Else,
End,
True,
False,
}
impl TokenType {
pub fn len(&self) -> usize {
match self {
TokenType::Number(n) => n.to_string().len(),
TokenType::Identifier(s) | TokenType::String(s) => s.len(),
TokenType::Plus => 1,
TokenType::Minus => 1,
TokenType::Star => 1,
TokenType::Slash => 1,
TokenType::Hat => 1,
TokenType::Equals => 1,
TokenType::Greater => 1,
TokenType::Less => 1,
TokenType::GreaterEquals => 2,
TokenType::LessEquals => 2,
TokenType::ColonEquals => 2,
TokenType::LeftArrow => 2,
TokenType::RParen => 1,
TokenType::LParen => 1,
TokenType::RSquare => 1,
TokenType::LSquare => 1,
TokenType::Comma => 1,
TokenType::If => 2,
TokenType::Then => 4,
TokenType::Else => 4,
TokenType::End => 3,
TokenType::True => 4,
TokenType::False => 5,
}
}
}
pub struct Lexer<'a> {
source: Peekable<Chars<'a>>,
}
#[derive(Debug)]
pub enum LexerError {
UnexpectedChar(usize, String),
}
impl<'a> Lexer<'a> {
pub fn new(source: &'a String) -> Self {
Self {
source: source.chars().peekable(),
}
}
pub fn lex(&'a mut self) -> Result<Vec<Token>, LexerError> {
let mut i = 0;
let mut tokens = vec![];
while let Some(c) = self.source.next() {
match c {
// Collapse spaces into a single Space token
' ' => {
while self.source.peek() == Some(&' ') {
self.source.next();
i += 1;
}
// tokens.push(Token(i, TokenType::Space));
}
// Comments with //
'/' if self.source.peek() == Some(&'/') => {
while self.source.next() != Some('\n') {
i += 1;
}
}
// Numbers with decimal points
'0'..='9' | '.' => {
let mut digit = String::from(c);
let mut has_decimal = c == '.';
loop {
let d = self.source.peek();
match d {
Some('0'..='9') => {
digit.push(*d.unwrap());
self.source.next();
i += 1;
}
Some('.') => {
if has_decimal {
return Err(LexerError::UnexpectedChar(
i + 1,
"Invalid digit with multiple decimal points".into(),
));
}
digit.push(*d.unwrap());
self.source.next();
i += 1;
has_decimal = true;
}
_ => {
break;
}
}
}
// if let Some(v) = {
// v
// } else {
// return Err(LexerError::NumberParse(
// i,
// format!("Failed to convert {digit} to a number"),
// ));
// };
tokens.push(Token(i, TokenType::Number(digit)));
}
'"' => {
let mut buffer = "".to_owned();
loop {
let next = self.source.peek();
match next {
Some('"') => {
tokens.push(Token(i, TokenType::String(buffer.clone())));
self.source.next();
break;
}
Some(_) => {
buffer.push(self.source.next().unwrap());
}
None => {
return Err(LexerError::UnexpectedChar(
i,
"Unexpected End of file".to_owned(),
));
}
}
}
}
// LeftArrow (->)
'-' if self.source.peek() == Some(&'>') => {
self.source.next();
i += 1;
tokens.push(Token(i, TokenType::LeftArrow));
}
'<' if self.source.peek() == Some(&'=') => {
self.source.next();
i += 1;
tokens.push(Token(i, TokenType::LessEquals));
}
'>' if self.source.peek() == Some(&'=') => {
self.source.next();
i += 1;
tokens.push(Token(i, TokenType::GreaterEquals));
}
'<' => tokens.push(Token(i, TokenType::Less)),
'>' => tokens.push(Token(i, TokenType::Greater)),
'+' => tokens.push(Token(i, TokenType::Plus)),
'-' => tokens.push(Token(i, TokenType::Minus)),
'*' => tokens.push(Token(i, TokenType::Star)),
'/' => tokens.push(Token(i, TokenType::Slash)),
'=' => tokens.push(Token(i, TokenType::Equals)),
',' => tokens.push(Token(i, TokenType::Comma)),
'^' => tokens.push(Token(i, TokenType::Hat)),
':' if self.source.peek() == Some(&'=') => {
self.source.next();
i += 1;
tokens.push(Token(i, TokenType::ColonEquals));
}
'(' => tokens.push(Token(i, TokenType::LParen)),
')' => tokens.push(Token(i, TokenType::RParen)),
'[' => tokens.push(Token(i, TokenType::LSquare)),
']' => tokens.push(Token(i, TokenType::RSquare)),
_ if c.is_alphabetic() || c == '_' => {
tokens.push(self.lex_identifier(&mut i, c)?);
}
_ => {
return Err(LexerError::UnexpectedChar(
i,
format!("Unexpected char {}", c),
));
}
}
i += 1;
}
Ok(tokens)
}
fn lex_identifier<'b>(&'b mut self, i: &mut usize, c: char) -> Result<Token, LexerError> {
let mut identifier = c.to_string();
while let Some(c) = self.source.peek() {
if c.is_alphanumeric() || c == &'_' || c == &'\'' {
identifier.push(*c);
self.source.next();
*i += 1;
} else {
break;
}
}
Ok(Token(
*i,
match identifier.to_lowercase().as_str() {
"if" => TokenType::If,
"then" => TokenType::Then,
"else" => TokenType::Else,
"end" => TokenType::End,
"true" => TokenType::True,
"false" => TokenType::False,
_ => TokenType::Identifier(identifier),
},
))
}
}
pub enum ParserError {
UnexpectedEndOfTokens(String),
UnexpectedToken(usize, usize, String),
Unimplemented(usize, usize, String),
UnexpectedNode(usize, String),
NumberParse(usize, String),
}
/// Recursive descent parser
pub struct Parser<'a> {
tokens: Peekable<IntoIter<Token>>,
environment: &'a mut Environment,
previous: Option<Token>,
}
type Tokens<'a> = Peekable<Iter<'a, Token>>;
impl<'a> Parser<'a> {
pub fn new(tokens: Vec<Token>, env: &'a mut Environment) -> Self {
// #[cfg(debug_assertions)]
// println!("\r{tokens:?}");
Self {
tokens: tokens.into_iter().peekable(),
environment: env,
previous: None,
}
}
// Parse tokens recursively and descendentantly
pub fn parse(&mut self) -> Result<Vec<Rc<NodeEnum>>, ParserError> {
let mut expressions = vec![];
while self.tokens.peek().is_some() {
expressions.push(self.expression()?);
}
Ok(expressions)
}
#[inline]
fn consume<'b>(&'b mut self) -> &'b Option<Token> {
self.previous = self.tokens.next();
&self.previous
}
#[inline]
fn is_at_end(&mut self) -> bool {
if self.tokens.peek().is_none() {
return true;
}
false
}
/// Checks if the next token is `t`, if it is then consume it and return true. Otherwise does
/// nothing and returns false.
#[inline]
fn matchType<'b>(&'b mut self, t: TokenType) -> bool {
if let Some(Token(_, token_type)) = self.tokens.peek() {
if *token_type == t {
self.consume();
return true;
}
}
false
}
#[inline]
fn matchOrErr(&mut self, t: TokenType) -> Result<bool, ParserError> {
let (i, tt) = if let Some(Token(i, tt)) = self.tokens.peek() {
(*i, tt.clone())
} else {
return Err(ParserError::UnexpectedEndOfTokens(format!(
"Expected {t:?} but found nothing instead"
)));
};
if self.matchType(t.clone()) {
Ok(true)
} else {
Err(ParserError::UnexpectedToken(
i,
tt.len(),
format!("Expected {t:?} but found {tt:?} instead"),
))
}
}
fn expression(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
self.assignment()
}
fn assignment(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
let expr = self.equality()?;
if self.matchType(TokenType::ColonEquals) {
return Ok(Rc::new(Assign::new(expr, self.equality()?).into()));
}
Ok(expr)
}
fn equality(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
// TODO: Implement equality
let expr = self.comparison()?;
if self.matchType(TokenType::Equals) {
let mut expressions = vec![];
loop {
expressions.push(self.comparison()?);
if !self.matchType(TokenType::Equals) {
break;
}
}
return Ok(Rc::new(Equals::new(expr, expressions).into()));
}
Ok(expr)
}
fn comparison(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
// TODO: Implement comparison
let expr = self.term()?;
let t = if let Some(Token(i, t)) = self.tokens.peek() {
t.clone()
} else {
return Ok(expr);
};
if match t {
TokenType::Greater => true,
TokenType::Less => true,
TokenType::GreaterEquals => true,
TokenType::LessEquals => true,
_ => false,
} {
self.consume();
let mut expressions = vec![];
loop {
expressions.push(self.term()?);
if !self.matchType(t.clone()) {
break;
}
}
return match t {
TokenType::Greater => Ok(Greater::new(expr, expressions)),
TokenType::Less => Ok(Less::new(expr, expressions)),
TokenType::GreaterEquals => Ok(GreaterEquals::new(expr, expressions)),
TokenType::LessEquals => Ok(LessEquals::new(expr, expressions)),
_ => panic!(),
};
}
Ok(expr)
}
fn term(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
let expr = self.factor()?;
if self.matchType(TokenType::Plus) {
Ok(Rc::new(Add::new(expr, self.comparison()?).into()))
} else if let Some(Token(_, TokenType::Minus)) = self.tokens.peek() {
self.consume();
Ok(Rc::new(Subtract::new(expr, self.comparison()?).into()))
} else {
Ok(expr)
}
}
fn factor(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
let expr = self.unary()?;
if let Some(Token(_, TokenType::Star)) = self.tokens.peek() {
self.consume();
Ok(Rc::new(Multiply::new(expr, self.comparison()?).into()))
} else if let Some(Token(_, TokenType::Slash)) = self.tokens.peek() {
self.consume();
Ok(Rc::new(Divide::new(expr, self.comparison()?).into()))
} else {
Ok(expr)
}
}
fn unary(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
self.exponent()
}
fn exponent(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
let expr = self.call();
if self.matchType(TokenType::Hat) {
let right = self.unary()?;
return Ok(Exponent::new(expr?, right));
}
expr
}
fn call(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
// Left hand side
let mut expr = self.function()?;
// Calls are right-associative, so we evaluate right-to-left
loop {
let (i, t) = if let Some(Token(i, x)) = self.tokens.peek() {
(*i, x.clone())
} else {
return Ok(expr);
};
// If the next token is a parenthesis then we construct a call
if t == TokenType::LParen {
self.consume();
// Calls can have 0 arguments, so check and return early
if self.matchType(TokenType::RParen) {
expr = Call::new(expr, vec![]);
} else {
// Parse expressions until a patching Right-Parenthesis is found
let mut parameters = vec![self.equality()?];
while self.matchType(TokenType::Comma) {
parameters.push(self.equality()?);
}
if !self.matchType(TokenType::RParen) {
return Err(ParserError::UnexpectedToken(
i,
t.len(),
"Unclosed right parenthesis".to_owned(),
));
}
// If the next token is a ColonEquals (assignment) then
// the user wants function assignment sugar
//
// Ie f(x) := x*5 => f := x -> x*5
if self.matchType(TokenType::ColonEquals) {
if let NodeEnum::Symbol(_) = expr.as_ref() {
} else {
let Token(i, token) = self.previous.as_ref().unwrap();
return Err(ParserError::UnexpectedToken(
*i,
token.len(),
format!(
"Expected an Identifier here but found a {}",
expr.type_str()
),
));
};
// Parse body
let body = self.equality()?;
// Convert vector of expressions to vector of symbols
let mut arguments = Vec::with_capacity(parameters.len());
for param in parameters.into_iter() {
if let NodeEnum::Symbol(symbol) =
Rc::<NodeEnum>::try_unwrap(param).unwrap()
{
arguments.push(symbol);
} else {
return Err(ParserError::UnexpectedToken(
i,
t.len(),
format!("One or more argument is not a Symbol",),
));
}
}
// Early exit with new desugared expression
return Ok(Rc::new(
Assign::new(
expr,
Function::new(FunctionType::UserFunction(body, arguments)),
)
.into(),
));
} else {
expr = Call::new(expr, parameters);
}
}
} else {
break;
}
}
Ok(expr)
}
fn function(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
let error_loc = if let Some(Token(i, _)) = self.tokens.peek() {
*i
} else {
0
};
let expr = self.if_else()?;
if self.matchType(TokenType::LeftArrow) {
let right = self.equality()?;
match expr.clone().as_ref() {
NodeEnum::Symbol(symbol) => {
return Ok(Function::new(FunctionType::UserFunction(
right,
vec![symbol.clone()],
)));
}
NodeEnum::Set(set) => {
let mut symbols = vec![];
for (i, value) in set.get_values().into_iter().enumerate() {
match value.as_ref() {
NodeEnum::Symbol(symbol) => symbols.push(symbol.clone()),
_ => {
return Err(ParserError::UnexpectedNode(
error_loc,
format!(
"Expected set of Identifiers, but argument #{i} is a {value:?}"
),
));
}
}
}
return Ok(Function::new(FunctionType::UserFunction(right, symbols)));
}
_ => {
return Err(ParserError::UnexpectedNode(
error_loc,
format!("Expected Set, got {:?}", expr),
));
}
}
}
Ok(expr)
}
fn if_else(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
if self.matchType(TokenType::If) {
let condition = self.equality()?;
let _ = self.matchOrErr(TokenType::Then)?;
let mut expressions = vec![];
while !(self.matchType(TokenType::End) || self.matchType(TokenType::Else)) {
if self.is_at_end() {
return Err(ParserError::UnexpectedEndOfTokens(
"Expected an else or end here".to_owned(),
));
}
expressions.push(self.expression()?);
}
// Safe to unwrap since the while loop would terminate if previous was none (it didnt
// find an End or Else before running out of tokens)
let else_branch = match self.previous.as_ref().unwrap() {
Token(_, TokenType::End) => ElseBranchEnum::None,
Token(_, TokenType::Else) => {
if let Some(Token(_, TokenType::If)) = self.tokens.peek() {
ElseBranchEnum::ElseIf(self.if_else()?)
} else {
let mut expressions = vec![];
while !self.matchType(TokenType::End) {
if self.is_at_end() {
return Err(ParserError::UnexpectedEndOfTokens(
"Expected an end here".to_owned(),
));
}
expressions.push(self.expression()?);
}
ElseBranchEnum::Block(expressions)
}
}
_ => panic!("Not possible"),
};
return Ok(IfElse::new(condition, expressions, else_branch));
}
self.set()
}
fn set(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
if self.matchType(TokenType::LSquare) {
// Empty set
if self.matchType(TokenType::RSquare) {
return Ok(Set::new(vec![]));
}
let mut values = vec![self.equality()?];
while {
if let Some(Token(_, TokenType::RSquare)) = self.tokens.peek() {
self.consume();
false
} else {
true
}
} {
let (i, token) = if let Some(Token(i, x)) = self.tokens.peek() {
(i, x)
} else {
return Err(ParserError::UnexpectedEndOfTokens(
"Expected comma here".into(),
));
};
if *token == TokenType::Comma {
self.consume();
} else {
return Err(ParserError::UnexpectedToken(
*i,
token.len(),
format!("Expected comma here, but got {token:?}"),
));
}
values.push(self.equality()?);
}
return Ok(Set::new(values));
}
self.primary()
}
fn primary(&mut self) -> Result<Rc<NodeEnum>, ParserError> {
let (i, token) = if let Some(Token(i, token)) = self.tokens.next() {
(i, token)
} else {
return Err(ParserError::UnexpectedEndOfTokens(
"Expected a Primary here".into(),
));
};
match token {
TokenType::Number(value) => {
let value = if let Ok(incomplete) = Float::parse(&value) {
Float::with_val_64(self.environment.get_float_precision(), incomplete)
} else {
return Err(ParserError::NumberParse(
i,
format!("Failed to convert `{value}` to a number"),
));
};
Ok(Rc::new(Constant::new(value).into()))
}
TokenType::Identifier(string) => Ok(Rc::new(
Symbol::new_from_str(string, self.environment).into(),
)),
TokenType::True => Ok(Rc::new(Bool::True.into())),
TokenType::False => Ok(Rc::new(Bool::False.into())),
TokenType::String(s) => Ok(StringNode::new(s)),
TokenType::LParen => {
let expr = self.expression()?;
if !self.matchType(TokenType::RParen) {
if let Some(Token(i, t)) = self.tokens.peek() {
return Err(ParserError::UnexpectedToken(
*i,
t.len(),
format!("Expected right parenthesis here, but got {t:?}"),
));
} else {
return Err(ParserError::UnexpectedToken(
i,
1,
"Unclosed right parenthesis".to_owned(),
));
}
}
Ok(expr)
}
_ => Err(ParserError::UnexpectedToken(
i,
token.len(),
format!("Unexpected token {token:?}"),
)),
}
}
}