aimx/literals/
text.rs

1//! Text literal and tagged content parsing for AIMX.
2//!
3//! Provides parsers for quoted strings, brace-delimited blocks and
4//! angle-bracket tags with escape handling, used by the AIMX lexer/parser.
5
6use nom::{
7    IResult, Parser,
8    branch::alt,
9    bytes::complete::is_not,
10    character::complete::{char, none_of},
11    combinator::{map, value, verify},
12    multi::fold,
13    sequence::{delimited, preceded},
14};
15use std::sync::Arc;
16
17/// Parse a quoted text literal.
18///
19/// Supports single and double quotes and common escape sequences.
20pub fn parse_text(input: &str) -> IResult<&str, Arc<str>> {
21    alt((parse_double_quoted_text, parse_single_quoted_text)).parse(input)
22}
23
24/// Parse a brace-delimited block with a leading symbol.
25///
26/// Expects `{symbol ...}` and returns the inner content.
27pub fn parse_block(symbol: char, input: &str) -> IResult<&str, String> {
28    let (input, _) = char('{').parse(input)?;
29    delimited(char(symbol), parse_block_content, char('}')).parse(input)
30}
31
32/// Parse an angle-bracket tag: `<...>`.
33///
34/// Returns the inner content with escapes resolved.
35pub fn parse_tag(input: &str) -> IResult<&str, Arc<str>> {
36    delimited(char('<'), parse_tagged_content, char('>')).parse(input)
37}
38
39/// Parse a double-quoted text literal.
40fn parse_double_quoted_text(input: &str) -> IResult<&str, Arc<str>> {
41    delimited(char('"'), parse_double_quoted_content, char('"')).parse(input)
42}
43
44/// Parse a single-quoted text literal.
45fn parse_single_quoted_text(input: &str) -> IResult<&str, Arc<str>> {
46    delimited(char('\''), parse_single_quoted_content, char('\'')).parse(input)
47}
48
49// ----- PARSE CONTENT -----
50
51/// Parse block content (inside `{symbol ...}`) with escape handling.
52fn parse_block_content(input: &str) -> IResult<&str, String> {
53    fold(
54        0..,
55        parse_block_fragment,
56        String::new,
57        |mut text, fragment| {
58            match fragment {
59                StringFragment::Literal(s) => text.push_str(s),
60                StringFragment::EscapedChar(c) => text.push(c),
61            }
62            text
63        },
64    )
65    .parse(input)
66}
67
68/// Parse tagged content (inside `<...>`) with escape handling.
69fn parse_tagged_content(input: &str) -> IResult<&str, Arc<str>> {
70    let (remain, text) = fold(
71        0..,
72        parse_tagged_fragment,
73        String::new,
74        |mut text, fragment| {
75            match fragment {
76                StringFragment::Literal(s) => text.push_str(s),
77                StringFragment::EscapedChar(c) => text.push(c),
78            }
79            text
80        },
81    )
82    .parse(input)?;
83    Ok((remain, Arc::from(text)))
84}
85
86/// Parse content inside double quotes with escape handling.
87fn parse_double_quoted_content(input: &str) -> IResult<&str, Arc<str>> {
88    let (remain, text) = fold(
89        0..,
90        parse_double_quoted_fragment,
91        String::new,
92        |mut text, fragment| {
93            match fragment {
94                StringFragment::Literal(s) => text.push_str(s),
95                StringFragment::EscapedChar(c) => text.push(c),
96            }
97            text
98        },
99    )
100    .parse(input)?;
101    Ok((remain, Arc::from(text)))
102}
103
104/// Parse content inside single quotes with escape handling.
105fn parse_single_quoted_content(input: &str) -> IResult<&str, Arc<str>> {
106    let (remain, text) = fold(
107        0..,
108        parse_single_quoted_fragment,
109        String::new,
110        |mut text, fragment| {
111            match fragment {
112                StringFragment::Literal(s) => text.push_str(s),
113                StringFragment::EscapedChar(c) => text.push(c),
114            }
115            text
116        },
117    )
118    .parse(input)?;
119    Ok((remain, Arc::from(text)))
120}
121
122// ----- PARSE FRAGMENT -----
123
124/// Fragment of parsed text content.
125#[derive(Debug, Clone, Copy, PartialEq, Eq)]
126enum StringFragment<'a> {
127    /// Non-empty literal slice.
128    Literal(&'a str),
129    /// Single escaped character.
130    EscapedChar(char),
131}
132
133/// Fragment parser for block content.
134fn parse_block_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
135    alt((
136        map(parse_block_literal, StringFragment::Literal),
137        map(convert_escaped_char, StringFragment::EscapedChar),
138    ))
139    .parse(input)
140}
141
142/// Fragment parser for tagged content.
143fn parse_tagged_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
144    alt((
145        map(parse_tagged_literal, StringFragment::Literal),
146        map(convert_escaped_char, StringFragment::EscapedChar),
147    ))
148    .parse(input)
149}
150
151/// Fragment parser for double-quoted content.
152fn parse_double_quoted_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
153    alt((
154        map(parse_double_quoted_literal, StringFragment::Literal),
155        map(convert_escaped_char, StringFragment::EscapedChar),
156    ))
157    .parse(input)
158}
159
160/// Fragment parser for single-quoted content.
161fn parse_single_quoted_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
162    alt((
163        map(parse_single_quoted_literal, StringFragment::Literal),
164        map(convert_escaped_char, StringFragment::EscapedChar),
165    ))
166    .parse(input)
167}
168
169// ----- PARSE LITERAL -----
170
171/// Literal run for block content (no `\\` or `}`).
172fn parse_block_literal(input: &str) -> IResult<&str, &str> {
173    let not_quote_slash = is_not("\\}");
174    verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
175}
176
177/// Literal run for tagged content (no `\\` or `>`).
178fn parse_tagged_literal(input: &str) -> IResult<&str, &str> {
179    let not_quote_slash = is_not("\\>");
180    verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
181}
182
183/// Literal run for double-quoted content (no `\\` or `"`).
184fn parse_double_quoted_literal(input: &str) -> IResult<&str, &str> {
185    let not_quote_slash = is_not("\\\"");
186    verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
187}
188
189/// Literal run for single-quoted content (no `\\` or `'`).
190fn parse_single_quoted_literal(input: &str) -> IResult<&str, &str> {
191    let not_quote_slash = is_not("\\'");
192    verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
193}
194
195/// Parse a backslash escape sequence to a single character.
196///
197/// Supports `n`, `r`, `t`, `\\`, `"`, `'`, `b`, `f`, `/`, and defaults to the
198/// following character when not matched.
199fn convert_escaped_char(input: &str) -> IResult<&str, char> {
200    preceded(
201        char('\\'),
202        alt((
203            value('\n', char('n')),
204            value('\r', char('r')),
205            value('\t', char('t')),
206            value('\\', char('\\')),
207            value('"', char('"')),
208            value('\'', char('\'')),
209            value('\u{08}', char('b')),
210            value('\u{0C}', char('f')),
211            value('/', char('/')),
212            // Fallback: return next character as-is.
213            none_of("nrt\\\"'b/f"),
214        )),
215    )
216    .parse(input)
217}
218
219/// Convert text with escapes (no surrounding quotes) to a decoded string.
220pub fn convert_escaped_text(input: &str) -> IResult<&str, String> {
221    fold(
222        0..,
223        alt((
224            map(is_not("\\"), |s: &str| s.to_string()),
225            map(convert_escaped_char, |c| c.to_string()),
226        )),
227        String::new,
228        |mut acc, fragment| {
229            acc.push_str(&fragment);
230            acc
231        },
232    )
233    .parse(input)
234}