From 69bcc7d4d306d1c8a7596ae13641ffe479d98dcc Mon Sep 17 00:00:00 2001 From: Joel Kronqvist Date: Sun, 27 Jul 2025 17:57:33 +0300 Subject: Implemented tokenize with tests - note it doesn't check syntax --- src/parse/parsetree.rs | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/parse/parsetree.rs b/src/parse/parsetree.rs index e243d56..560ebe7 100644 --- a/src/parse/parsetree.rs +++ b/src/parse/parsetree.rs @@ -3,7 +3,9 @@ use nom::{ IResult, Parser, branch::alt, - bytes::complete::{tag, take_while}, + multi::many0, + combinator::all_consuming, + bytes::complete::{tag, take_while1}, character::complete::multispace1, }; @@ -26,10 +28,18 @@ fn parse_token(s: &str) -> IResult<&str, Token> { tag(")").map(|_| ParClose), multispace1.map(whitespace), nom::character::complete::i32.map(Num), - take_while(|c| !(" \n\t()".contains(c))).map(sym), + take_while1(|c| !(" \n\t()".contains(c))).map(sym), )).parse(s) } +fn tokenize(s: &str) -> Result, String> { + match many0(parse_token).parse(s) { + Ok(("", res)) => Ok(res), + Ok((rest, _)) => Err(format!("all data should be tokenizable, '{rest}' was not")), + Err(e) => Err(e.to_string()), + } +} + #[cfg(test)] mod private_parsing_tests { use super::{*, parse_token}; @@ -46,6 +56,24 @@ mod private_parsing_tests { assert_eq!(parse_token("Nil a"), Ok((" a", sym("Nil")))); assert_eq!(parse_token("a"), Ok(("", sym("a")))); + + assert!(parse_token("").is_err()) + } + + #[test] + fn test_tokenize() { + assert_eq!( + tokenize("(+ 1 2 (\t\n a)").unwrap(), + vec![ + ParOpen, + sym("+"), whitespace(" "), + Num(1), whitespace(" "), + Num(2), whitespace(" "), + ParOpen, whitespace("\t\n "), + sym("a"), + ParClose + ] + ); } } -- cgit v1.2.3