aimx/literals/text.rs
1//! Text parsing for the AIM expression grammar.
2//!
3//! This module provides parsers for text literals and tagged content in the
4//! AIM expression language. It handles both quoted strings (single and double)
5//! and tagged content with escape sequence processing.
6//!
7//! # Supported Text Formats
8//!
9//! - **Quoted strings**: `"hello"` or `'world'`
10//! - **Tagged content**: `<tag>`
11//! - **Escape sequences**: `\n`, `\t`, `\"`, `\'`, etc.
12//!
13//! # Examples
14//!
15//! ```text
16//! "Hello, world!" // double-quoted string
17//! 'Hello, world!' // single-quoted string
18//! <tag> // tagged content
19//! "Line 1\nLine 2" // string with escape sequences
20//! ```
21
22use nom::{
23 IResult,
24 Parser,
25 branch::alt,
26 bytes::complete::is_not,
27 character::complete::{char, none_of},
28 combinator::{map, value, verify},
29 multi::fold,
30 sequence::{delimited, preceded},
31};
32
33/// Parse a text literal from a string.
34///
35/// This function parses text literals in the AIM expression language, supporting
36/// both single-quoted and double-quoted strings with escape sequence processing.
37/// It can handle common escape sequences like `\n`, `\t`, `\"`, `\'`, etc.
38///
39/// # Supported Quote Types
40/// - **Double-quoted**: `"text"` - supports all escape sequences
41/// - **Single-quoted**: `'text'` - supports all escape sequences
42///
43/// # Supported Escape Sequences
44/// - `\n` - newline
45/// - `\r` - carriage return
46/// - `\t` - tab
47/// - `\\` - backslash
48/// - `\"` - double quote
49/// - `\'` - single quote
50/// - `\b` - backspace
51/// - `\f` - form feed
52/// - `\/` - forward slash
53///
54/// # Arguments
55///
56/// * `input` - A string slice containing the text literal to parse
57///
58/// # Returns
59///
60/// * `IResult<&str, String>` - A nom result with remaining input and parsed text content
61///
62/// # Examples
63///
64/// ```rust
65/// use aimx::literals::text::parse_text;
66///
67/// assert_eq!(parse_text("\"hello\""), Ok(("", "hello".to_string())));
68/// assert_eq!(parse_text("'world'"), Ok(("", "world".to_string())));
69/// assert_eq!(parse_text("\"line1\\nline2\""), Ok(("", "line1\nline2".to_string())));
70/// ```
71pub fn parse_text(input: &str) -> IResult<&str, String> {
72 alt((
73 parse_double_quoted_text,
74 parse_single_quoted_text,
75 )).parse(input)
76}
77
78/// Parse a block from a string.
79///
80/// This function parses block content enclosed in braces with a specific symbol.
81/// Braces are used in various contexts within the AIM language for formatting special content.
82///
83/// # Arguments
84///
85/// * `symbol` - The expected character after the opening brace
86/// * `input` - A string slice containing the block to parse, starting with `{`
87///
88/// # Returns
89///
90/// * `IResult<&str, String>` - A nom result with remaining input and parsed block content
91///
92/// # Examples
93///
94/// ```rust
95/// use aimx::literals::text::parse_block;
96///
97/// assert_eq!(parse_block('$', "{$description}"), Ok(("", "description".to_string())));
98/// assert_eq!(parse_block('@', "{@foo bar 96}"), Ok(("", "foo bar 96".to_string())));
99/// ```
100pub fn parse_block(symbol: char, input: &str) -> IResult<&str, String> {
101 let (input, _) = char('{').parse(input)?;
102 delimited(
103 char(symbol),
104 parse_block_content,
105 char('}'),
106 ).parse(input)
107}
108
109/// Parse a tag from a string.
110///
111/// This function parses tagged content enclosed in angle brackets. Tags are used
112/// in the AIM language for formatting special types and inference responses.
113///
114/// # Arguments
115///
116/// * `input` - A string slice containing the tag to parse
117///
118/// # Returns
119///
120/// * `IResult<&str, String>` - A nom result with remaining input and parsed tag content
121///
122/// # Examples
123///
124/// ```rust
125/// use aimx::literals::text::parse_tag;
126///
127/// assert_eq!(parse_tag("<bold>"), Ok(("", "bold".to_string())));
128/// assert_eq!(parse_tag("<format:date>"), Ok(("", "format:date".to_string())));
129/// ```
130pub fn parse_tag(input: &str) -> IResult<&str, String> {
131 delimited(
132 char('<'),
133 parse_tagged_content,
134 char('>'),
135 ).parse(input)
136}
137
138/// Parse a double-quoted text literal.
139///
140/// This function parses text enclosed in double quotes, processing escape
141/// sequences in the content.
142fn parse_double_quoted_text(input: &str) -> IResult<&str, String> {
143 delimited(
144 char('"'),
145 parse_double_quoted_content,
146 char('"'),
147 ).parse(input)
148}
149
150/// Parse a single-quoted text literal.
151///
152/// This function parses text enclosed in single quotes, processing escape
153/// sequences in the content.
154fn parse_single_quoted_text(input: &str) -> IResult<&str, String> {
155 delimited(
156 char('\''),
157 parse_single_quoted_content,
158 char('\''),
159 ).parse(input)
160}
161
162// -----PARSE CONTENT ---
163
164/// Parse block content.
165///
166/// This function parses the content within braces, processing any escape sequences
167/// that may be present.
168fn parse_block_content(input: &str) -> IResult<&str, String> {
169 fold(
170 0..,
171 parse_block_fragment,
172 String::new,
173 |mut text, fragment| {
174 match fragment {
175 StringFragment::Literal(s) => text.push_str(s),
176 StringFragment::EscapedChar(c) => text.push(c),
177 }
178 text
179 },
180 ).parse(input)
181}
182
183/// Parse tagged content.
184///
185/// This function parses the content within angle brackets, processing any
186/// escape sequences that may be present.
187fn parse_tagged_content(input: &str) -> IResult<&str, String> {
188 fold(
189 0..,
190 parse_tagged_fragment,
191 String::new,
192 |mut text, fragment| {
193 match fragment {
194 StringFragment::Literal(s) => text.push_str(s),
195 StringFragment::EscapedChar(c) => text.push(c),
196 }
197 text
198 },
199 ).parse(input)
200}
201
202/// Parse double-quoted content.
203///
204/// This function parses the content within double quotes, processing escape
205/// sequences and building the final string.
206fn parse_double_quoted_content(input: &str) -> IResult<&str, String> {
207 fold(
208 0..,
209 parse_double_quoted_fragment,
210 String::new,
211 |mut text, fragment| {
212 match fragment {
213 StringFragment::Literal(s) => text.push_str(s),
214 StringFragment::EscapedChar(c) => text.push(c),
215 }
216 text
217 },
218 ).parse(input)
219}
220
221/// Parse single-quoted content.
222///
223/// This function parses the content within single quotes, processing escape
224/// sequences and building the final string.
225fn parse_single_quoted_content(input: &str) -> IResult<&str, String> {
226 fold(
227 0..,
228 parse_single_quoted_fragment,
229 String::new,
230 |mut text, fragment| {
231 match fragment {
232 StringFragment::Literal(s) => text.push_str(s),
233 StringFragment::EscapedChar(c) => text.push(c),
234 }
235 text
236 },
237 ).parse(input)
238}
239
240// -----PARSE FRAGMENT ---
241
242/// A text fragment contains a fragment of text being parsed: either
243/// a non-empty Literal (a series of non-escaped characters), or a single
244/// parsed escaped character.
245#[derive(Debug, Clone, Copy, PartialEq, Eq)]
246enum StringFragment<'a> {
247 /// A literal sequence of non-escaped characters
248 Literal(&'a str),
249 /// A single escaped character
250 EscapedChar(char),
251}
252
253/// Parse a string fragment for block text.
254///
255/// This function parses either a literal sequence of characters or a single
256/// escaped character within block content.
257fn parse_block_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
258 alt((
259 map(parse_block_literal, StringFragment::Literal),
260 map(convert_escaped_char, StringFragment::EscapedChar),
261 )).parse(input)
262}
263
264/// Parse a string fragment for tagged text.
265///
266/// This function parses either a literal sequence of characters or a single
267/// escaped character within tagged content.
268fn parse_tagged_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
269 alt((
270 map(parse_tagged_literal, StringFragment::Literal),
271 map(convert_escaped_char, StringFragment::EscapedChar),
272 )).parse(input)
273}
274
275/// Parse a string fragment for double-quoted text.
276///
277/// This function parses either a literal sequence of characters or a single
278/// escaped character within double-quoted text.
279fn parse_double_quoted_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
280 alt((
281 map(parse_double_quoted_literal, StringFragment::Literal),
282 map(convert_escaped_char, StringFragment::EscapedChar),
283 )).parse(input)
284}
285
286/// Parse a string fragment for single-quoted text.
287///
288/// This function parses either a literal sequence of characters or a single
289/// escaped character within single-quoted text.
290fn parse_single_quoted_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
291 alt((
292 map(parse_single_quoted_literal, StringFragment::Literal),
293 map(convert_escaped_char, StringFragment::EscapedChar),
294 )).parse(input)
295}
296
297// -----PARSE LITERAL ---
298
299/// Parse a literal block for block content.
300///
301/// This function parses a sequence of characters that don't contain escape
302/// sequences or closing delimiters for block content.
303fn parse_block_literal(input: &str) -> IResult<&str, &str> {
304 let not_quote_slash = is_not("\\}");
305 verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
306}
307
308/// Parse a literal block for tagged content.
309///
310/// This function parses a sequence of characters that don't contain escape
311/// sequences or closing delimiters for tagged content.
312fn parse_tagged_literal(input: &str) -> IResult<&str, &str> {
313 let not_quote_slash = is_not("\\>");
314 verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
315}
316
317/// Parse a literal block for double-quoted text.
318///
319/// This function parses a sequence of characters that don't contain escape
320/// sequences or double quotes.
321fn parse_double_quoted_literal(input: &str) -> IResult<&str, &str> {
322 let not_quote_slash = is_not("\\\"");
323 verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
324}
325
326/// Parse a literal block for single-quoted text.
327///
328/// This function parses a sequence of characters that don't contain escape
329/// sequences or single quotes.
330fn parse_single_quoted_literal(input: &str) -> IResult<&str, &str> {
331 let not_quote_slash = is_not("\\'");
332 verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
333}
334
335/// Convert an escaped character sequence to its character representation.
336///
337/// This function parses escape sequences and converts them to their
338/// corresponding character values.
339///
340/// Supported escape sequences:
341/// - `\n` - newline (U+000A)
342/// - `\r` - carriage return (U+000D)
343/// - `\t` - tab (U+0009)
344/// - `\\` - backslash (U+005C)
345/// - `\"` - double quote (U+0022)
346/// - `\'` - single quote (U+0027)
347/// - `\b` - backspace (U+0008)
348/// - `\f` - form feed (U+000C)
349/// - `\/` - forward slash (U+002F)
350/// - `\x` - literal character x (for any character x not in the above list)
351fn convert_escaped_char(input: &str) -> IResult<&str, char> {
352 preceded(
353 char('\\'),
354 alt((
355 value('\n', char('n')),
356 value('\r', char('r')),
357 value('\t', char('t')),
358 value('\\', char('\\')),
359 value('"', char('"')),
360 value('\'', char('\'')),
361 value('\u{08}', char('b')),
362 value('\u{0C}', char('f')),
363 value('/', char('/')),
364 // For other escaped characters, just return the character after the backslash
365 none_of("nrt\\\"'b/f"),
366 )),
367 ).parse(input)
368}
369
370/// Convert text content containing escape sequences to literal text.
371///
372/// This function takes escaped content (without quotes) and processes any
373/// escape sequences, building the final string representation.
374pub fn convert_escaped_text(input: &str) -> IResult<&str, String> {
375 fold(
376 0..,
377 alt((
378 map(is_not("\\"), |s: &str| s.to_string()),
379 map(convert_escaped_char, |c| c.to_string()),
380 )),
381 String::new,
382 |mut acc, fragment| {
383 acc.push_str(&fragment);
384 acc
385 },
386 ).parse(input)
387}