use nom::{ IResult, Parser, branch::alt, multi::many0, bytes::complete::{tag, take_while1}, character::complete::multispace1, }; use crate::sexp::{SExp, SExp::*, SLeaf::*, util::*}; #[derive(Debug,PartialEq)] pub enum Token { ParOpen, ParClose, Num(i32), Sym(String), Whitespace(String), } use Token::*; use crate::parse::util::*; fn parse_token(s: &str) -> IResult<&str, Token> { alt(( tag("(").map(|_| ParOpen), tag(")").map(|_| ParClose), multispace1.map(whitespace), nom::character::complete::i32.map(Num), take_while1(|c| !(" \n\t()".contains(c))).map(sym), )).parse(s) } fn tokenize(s: &str) -> Result, String> { match many0(parse_token).parse(s) { Ok(("", res)) => Ok(res), Ok((rest, _)) => Err(format!("all data should be tokenizable, '{rest}' was not")), Err(e) => Err(e.to_string()), } } fn tokens_to_ast(tokens: Vec) -> Result { todo!() } #[cfg(test)] mod private_parsing_tests { use super::{*, parse_token}; #[test] fn test_parse_token() { assert_eq!(parse_token("()"), Ok((")", ParOpen))); assert_eq!(parse_token(")"), Ok(("", ParClose))); assert_eq!(parse_token(" \t\n"), Ok(("", whitespace(" \t\n")))); assert_eq!(parse_token("1 23"), Ok((" 23", Num(1)))); assert_eq!(parse_token("23"), Ok(("", Num(23)))); assert_eq!(parse_token("Nil a"), Ok((" a", sym("Nil")))); assert_eq!(parse_token("a"), Ok(("", sym("a")))); assert!(parse_token("").is_err()) } #[test] fn test_tokenize() { assert_eq!( tokenize("(+ 1 2 (\t\n a)").unwrap(), vec![ ParOpen, sym("+"), whitespace(" "), Num(1), whitespace(" "), Num(2), whitespace(" "), ParOpen, whitespace("\t\n "), sym("a"), ParClose ] ); } #[test] fn test_tokens_to_ast() { // Syms are parsed to vars, // and everything is wrapped in an extra layer of scons // so that multi-expression programs can be returned // as one single SExp. assert_eq!( tokens_to_ast(vec![sym("a")]), Ok(scons(var("a"), Nil)) ); // Lists are pased to scons linked lists assert_eq!( tokens_to_ast(vec![ ParOpen, sym("a"), whitespace(" "), sym("b"), whitespace(" "), sym("c"), whitespace(" "), ParClose, ]), Ok(scons(scons(var("a"), scons(var("b"), scons(var("c"), Nil))), Nil)) ); // Nesting should work. assert_eq!( tokens_to_ast(vec![ ParOpen, sym("a"), whitespace(" "), ParOpen, sym("b"), whitespace(" "), sym("c"), whitespace(" "), ParClose, whitespace(" "), sym("d"), ParClose ]), Ok( scons(scons(var("a"), scons( scons(var("b"), scons(var("c"), Nil)) , scons(var("d"), Nil))), Nil) ) ); // Multiple expressions should be parseable assert_eq!( tokens_to_ast(vec![ ParOpen, sym("a"), ParClose, sym("b"), ParOpen, sym("c"), ParClose, ]), Ok(scons( scons(var("a"), Nil), scons(var("b"), scons(scons(var("c"), Nil), Nil)))) ); // Operators are parsed correctly assert_eq!( tokens_to_ast(vec![ ParOpen, sym("+"), whitespace(" "), sym("-"), whitespace(" "), sym("*"), whitespace(" "), sym("/"), ParClose ]), Ok(scons( scons(Add, scons(Sub, scons(Mul, scons(Div, Nil)))), Nil)) ); // Integers are parsed correctly assert_eq!( tokens_to_ast(vec![Num(5)]), Ok(scons(Int(5), Nil)) ); // Nil can be parsed assert_eq!( tokens_to_ast(vec![ParOpen,ParClose]), Ok(scons(Nil, Nil)) ); // Quote can be parsed assert_eq!( tokens_to_ast(vec![ ParOpen, sym("quote"), whitespace(" "), sym("a"), whitespace(" "), sym("b"), whitespace(" ") ]), Ok(scons(Quote, scons(var("a"), scons(var("b"), Nil)))) ); } #[test] fn test_tokens_to_ast_failing() { assert!( tokens_to_ast(vec![ParClose]).is_err(), "Invalid parentheses should fail" ); assert!( tokens_to_ast(vec![ParOpen]).is_err(), "Invalid parentheses should fail" ); assert!( tokens_to_ast(vec![ParClose, ParOpen]).is_err(), "Invalid parentheses should fail" ); assert!( tokens_to_ast(vec![ParOpen, ParOpen, ParClose]).is_err(), "Invalid parentheses should fail" ); assert!( tokens_to_ast(vec![Num(1), sym("a")]).is_err(), "Having a symbol starting with a number should fail." ); assert!( tokens_to_ast(vec![sym("quote"), sym("a")]).is_err(), "There should be whitespace between quote and other symbols" ); } }