aimx/literals/
text.rs

1//! Text parsing for the AIM expression grammar.
2//!
3//! This module provides parsers for text literals and tagged content in the
4//! AIM expression language. It handles both quoted strings (single and double)
5//! and tagged content with escape sequence processing.
6//!
7//! # Supported Text Formats
8//!
9//! - **Quoted strings**: `"hello"` or `'world'`
10//! - **Tagged content**: `<tag>`
11//! - **Escape sequences**: `\n`, `\t`, `\"`, `\'`, etc.
12//!
13//! # Examples
14//!
15//! ```text
16//! "Hello, world!"           // double-quoted string
17//! 'Hello, world!'           // single-quoted string
18//! <tag>                     // tagged content
19//! "Line 1\nLine 2"          // string with escape sequences
20//! ```
21
22use nom::{
23  IResult,
24  Parser,
25  branch::alt,
26  bytes::complete::is_not,
27  character::complete::{char, none_of},
28  combinator::{map, value, verify},
29  multi::fold,
30  sequence::{delimited, preceded},
31};
32
33/// Parse a text literal from a string.
34/// 
35/// This function parses text literals in the AIM expression language, supporting
36/// both single-quoted and double-quoted strings with escape sequence processing.
37/// It can handle common escape sequences like `\n`, `\t`, `\"`, `\'`, etc.
38/// 
39/// # Supported Quote Types
40/// - **Double-quoted**: `"text"` - supports all escape sequences
41/// - **Single-quoted**: `'text'` - supports all escape sequences
42/// 
43/// # Supported Escape Sequences
44/// - `\n` - newline
45/// - `\r` - carriage return
46/// - `\t` - tab
47/// - `\\` - backslash
48/// - `\"` - double quote
49/// - `\'` - single quote
50/// - `\b` - backspace
51/// - `\f` - form feed
52/// - `\/` - forward slash
53/// 
54/// # Arguments
55/// 
56/// * `input` - A string slice containing the text literal to parse
57/// 
58/// # Returns
59/// 
60/// * `IResult<&str, String>` - A nom result with remaining input and parsed text content
61/// 
62/// # Examples
63/// 
64/// ```rust
65/// use aimx::literals::text::parse_text;
66/// 
67/// assert_eq!(parse_text("\"hello\""), Ok(("", "hello".to_string())));
68/// assert_eq!(parse_text("'world'"), Ok(("", "world".to_string())));
69/// assert_eq!(parse_text("\"line1\\nline2\""), Ok(("", "line1\nline2".to_string())));
70/// ```
71pub fn parse_text(input: &str) -> IResult<&str, String> {
72  alt((
73    parse_double_quoted_text,
74    parse_single_quoted_text,
75  )).parse(input)
76}
77
78/// Parse a block from a string.
79/// 
80/// This function parses block content enclosed in braces with a specific symbol.
81/// Braces are used in various contexts within the AIM language for formatting special content.
82/// 
83/// # Arguments
84/// 
85/// * `symbol` - The expected character after the opening brace
86/// * `input` - A string slice containing the block to parse, starting with `{`
87/// 
88/// # Returns
89/// 
90/// * `IResult<&str, String>` - A nom result with remaining input and parsed block content
91/// 
92/// # Examples
93/// 
94/// ```rust
95/// use aimx::literals::text::parse_block;
96/// 
97/// assert_eq!(parse_block('$', "{$description}"), Ok(("", "description".to_string())));
98/// assert_eq!(parse_block('@', "{@foo bar 96}"), Ok(("", "foo bar 96".to_string())));
99/// ```
100pub fn parse_block(symbol: char, input: &str) -> IResult<&str, String> {
101  let (input, _) = char('{').parse(input)?;
102  delimited(
103    char(symbol),
104    parse_block_content,
105    char('}'),
106  ).parse(input)
107}
108
109/// Parse a tag from a string.
110/// 
111/// This function parses tagged content enclosed in angle brackets. Tags are used
112/// in the AIM language for formatting special types and inference responses.
113/// 
114/// # Arguments
115/// 
116/// * `input` - A string slice containing the tag to parse
117/// 
118/// # Returns
119/// 
120/// * `IResult<&str, String>` - A nom result with remaining input and parsed tag content
121/// 
122/// # Examples
123/// 
124/// ```rust
125/// use aimx::literals::text::parse_tag;
126/// 
127/// assert_eq!(parse_tag("<bold>"), Ok(("", "bold".to_string())));
128/// assert_eq!(parse_tag("<format:date>"), Ok(("", "format:date".to_string())));
129/// ```
130pub fn parse_tag(input: &str) -> IResult<&str, String> {
131  delimited(
132    char('<'),
133    parse_tagged_content,
134    char('>'),
135  ).parse(input)
136}
137
138/// Parse a double-quoted text literal.
139/// 
140/// This function parses text enclosed in double quotes, processing escape
141/// sequences in the content.
142fn parse_double_quoted_text(input: &str) -> IResult<&str, String> {
143  delimited(
144    char('"'),
145    parse_double_quoted_content,
146    char('"'),
147  ).parse(input)
148}
149
150/// Parse a single-quoted text literal.
151/// 
152/// This function parses text enclosed in single quotes, processing escape
153/// sequences in the content.
154fn parse_single_quoted_text(input: &str) -> IResult<&str, String> {
155  delimited(
156    char('\''),
157    parse_single_quoted_content,
158    char('\''),
159  ).parse(input)
160}
161
162// -----PARSE CONTENT ---
163
164/// Parse block content.
165/// 
166/// This function parses the content within braces, processing any escape sequences
167/// that may be present.
168fn parse_block_content(input: &str) -> IResult<&str, String> {
169  fold(
170    0..,
171    parse_block_fragment,
172    String::new,
173    |mut text, fragment| {
174      match fragment {
175        StringFragment::Literal(s) => text.push_str(s),
176        StringFragment::EscapedChar(c) => text.push(c),
177      }
178      text
179    },
180  ).parse(input)
181}
182
183/// Parse tagged content.
184/// 
185/// This function parses the content within angle brackets, processing any
186/// escape sequences that may be present.
187fn parse_tagged_content(input: &str) -> IResult<&str, String> {
188  fold(
189    0..,
190    parse_tagged_fragment,
191    String::new,
192    |mut text, fragment| {
193      match fragment {
194        StringFragment::Literal(s) => text.push_str(s),
195        StringFragment::EscapedChar(c) => text.push(c),
196      }
197      text
198    },
199  ).parse(input)
200}
201
202/// Parse double-quoted content.
203/// 
204/// This function parses the content within double quotes, processing escape
205/// sequences and building the final string.
206fn parse_double_quoted_content(input: &str) -> IResult<&str, String> {
207  fold(
208    0..,
209    parse_double_quoted_fragment,
210    String::new,
211    |mut text, fragment| {
212      match fragment {
213        StringFragment::Literal(s) => text.push_str(s),
214        StringFragment::EscapedChar(c) => text.push(c),
215      }
216      text
217    },
218  ).parse(input)
219}
220
221/// Parse single-quoted content.
222/// 
223/// This function parses the content within single quotes, processing escape
224/// sequences and building the final string.
225fn parse_single_quoted_content(input: &str) -> IResult<&str, String> {
226  fold(
227    0..,
228    parse_single_quoted_fragment,
229    String::new,
230    |mut text, fragment| {
231      match fragment {
232        StringFragment::Literal(s) => text.push_str(s),
233        StringFragment::EscapedChar(c) => text.push(c),
234      }
235      text
236    },
237  ).parse(input)
238}
239
240// -----PARSE FRAGMENT ---
241
242/// A text fragment contains a fragment of text being parsed: either
243/// a non-empty Literal (a series of non-escaped characters), or a single
244/// parsed escaped character.
245#[derive(Debug, Clone, Copy, PartialEq, Eq)]
246enum StringFragment<'a> {
247  /// A literal sequence of non-escaped characters
248  Literal(&'a str),
249  /// A single escaped character
250  EscapedChar(char),
251}
252
253/// Parse a string fragment for block text.
254/// 
255/// This function parses either a literal sequence of characters or a single
256/// escaped character within block content.
257fn parse_block_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
258  alt((
259    map(parse_block_literal, StringFragment::Literal),
260    map(convert_escaped_char, StringFragment::EscapedChar),
261  )).parse(input)
262}
263
264/// Parse a string fragment for tagged text.
265/// 
266/// This function parses either a literal sequence of characters or a single
267/// escaped character within tagged content.
268fn parse_tagged_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
269  alt((
270    map(parse_tagged_literal, StringFragment::Literal),
271    map(convert_escaped_char, StringFragment::EscapedChar),
272  )).parse(input)
273}
274
275/// Parse a string fragment for double-quoted text.
276/// 
277/// This function parses either a literal sequence of characters or a single
278/// escaped character within double-quoted text.
279fn parse_double_quoted_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
280  alt((
281    map(parse_double_quoted_literal, StringFragment::Literal),
282    map(convert_escaped_char, StringFragment::EscapedChar),
283  )).parse(input)
284}
285
286/// Parse a string fragment for single-quoted text.
287/// 
288/// This function parses either a literal sequence of characters or a single
289/// escaped character within single-quoted text.
290fn parse_single_quoted_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
291  alt((
292    map(parse_single_quoted_literal, StringFragment::Literal),
293    map(convert_escaped_char, StringFragment::EscapedChar),
294  )).parse(input)
295}
296
297// -----PARSE LITERAL ---
298
299/// Parse a literal block for block content.
300/// 
301/// This function parses a sequence of characters that don't contain escape
302/// sequences or closing delimiters for block content.
303fn parse_block_literal(input: &str) -> IResult<&str, &str> {
304  let not_quote_slash = is_not("\\}");
305  verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
306}
307
308/// Parse a literal block for tagged content.
309/// 
310/// This function parses a sequence of characters that don't contain escape
311/// sequences or closing delimiters for tagged content.
312fn parse_tagged_literal(input: &str) -> IResult<&str, &str> {
313  let not_quote_slash = is_not("\\>");
314  verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
315}
316
317/// Parse a literal block for double-quoted text.
318/// 
319/// This function parses a sequence of characters that don't contain escape
320/// sequences or double quotes.
321fn parse_double_quoted_literal(input: &str) -> IResult<&str, &str> {
322  let not_quote_slash = is_not("\\\"");
323  verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
324}
325
326/// Parse a literal block for single-quoted text.
327/// 
328/// This function parses a sequence of characters that don't contain escape
329/// sequences or single quotes.
330fn parse_single_quoted_literal(input: &str) -> IResult<&str, &str> {
331  let not_quote_slash = is_not("\\'");
332  verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
333}
334
335/// Convert an escaped character sequence to its character representation.
336/// 
337/// This function parses escape sequences and converts them to their
338/// corresponding character values.
339/// 
340/// Supported escape sequences:
341/// - `\n` - newline (U+000A)
342/// - `\r` - carriage return (U+000D)
343/// - `\t` - tab (U+0009)
344/// - `\\` - backslash (U+005C)
345/// - `\"` - double quote (U+0022)
346/// - `\'` - single quote (U+0027)
347/// - `\b` - backspace (U+0008)
348/// - `\f` - form feed (U+000C)
349/// - `\/` - forward slash (U+002F)
350/// - `\x` - literal character x (for any character x not in the above list)
351fn convert_escaped_char(input: &str) -> IResult<&str, char> {
352  preceded(
353    char('\\'),
354    alt((
355      value('\n', char('n')),
356      value('\r', char('r')),
357      value('\t', char('t')),
358      value('\\', char('\\')),
359      value('"', char('"')),
360      value('\'', char('\'')),
361      value('\u{08}', char('b')),
362      value('\u{0C}', char('f')),
363      value('/', char('/')),
364      // For other escaped characters, just return the character after the backslash
365      none_of("nrt\\\"'b/f"),
366    )),
367  ).parse(input)
368}
369
370/// Convert text content containing escape sequences to literal text.
371/// 
372/// This function takes escaped content (without quotes) and processes any
373/// escape sequences, building the final string representation.
374pub fn convert_escaped_text(input: &str) -> IResult<&str, String> {
375  fold(
376    0..,
377    alt((
378      map(is_not("\\"), |s: &str| s.to_string()),
379      map(convert_escaped_char, |c| c.to_string()),
380    )),
381    String::new,
382    |mut acc, fragment| {
383      acc.push_str(&fragment);
384      acc
385    },
386  ).parse(input)
387}