aimx/inference/key.rs
1//! Parsing of UPPERCASE inference keys used in AIM prompts and responses.
2//!
3//! Keys are UCIDs (`[A-Z_][A-Z0-9_]*`) with one of three suffix patterns:
4//! - `key_colon_eol`: `UCID ':'` optional whitespace, then end-of-line → multi-line body follows.
5//! - `key_eol`: `UCID` optional whitespace, then end-of-line → delimiter/marker line.
6//! - `key_colon`: `UCID ':'` → inline value on same line.
7
8use nom::{
9 IResult, Parser,
10 branch::alt,
11 bytes::complete::take_while,
12 character::complete::{char, multispace0, satisfy},
13 combinator::{map, recognize},
14 error::{Error, ErrorKind},
15 sequence::{pair, preceded},
16};
17use std::sync::Arc;
18
19/// Suffix patterns that follow an inference key.
20///
21/// Used by `parse_key` to describe how subsequent content is terminated.
22#[derive(Debug, PartialEq, Eq, Clone)]
23pub enum Suffix {
24 /// Key followed by colon and end-of-line.
25 /// Indicates a multi-line value on subsequent lines.
26 ColonEol,
27
28 /// Key followed by optional whitespace and end-of-line.
29 /// Used as a delimiter/marker line.
30 Eol,
31
32 /// Key followed by colon.
33 /// Indicates an inline value on the same line.
34 Colon,
35}
36
37/// Parse an uppercase identifier `UCID = [A-Z_][A-Z0-9_]*`.
38pub fn parse_ucid(input: &str) -> IResult<&str, Arc<str>> {
39 let (remain, ucid) = recognize(pair(
40 satisfy(|c: char| c.is_ascii_uppercase() || c == '_'),
41 take_while(|c: char| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_'),
42 ))
43 .parse(input)?;
44 Ok((remain, Arc::from(ucid)))
45}
46
47/// Check if there is no remaining content (end-of-line/input marker).
48fn check_eol(input: &str) -> IResult<&str, ()> {
49 if input.is_empty() {
50 Ok((input, ()))
51 } else {
52 Err(nom::Err::Error(Error::new(input, ErrorKind::Fail)))
53 }
54}
55
56/// Parse an inference key and classify its suffix.
57///
58/// Leading whitespace is skipped; returns `(key, Suffix)` with remaining input.
59pub fn parse_key(input: &str) -> IResult<&str, (Arc<str>, Suffix)> {
60 let (input, ucid) = preceded(multispace0, parse_ucid).parse(input)?;
61 alt((
62 // key_colon_eol = UCID ':' S? EOL
63 map(
64 (char(':'), multispace0, check_eol),
65 |_| (ucid.clone(), Suffix::ColonEol)
66 ),
67 // key_eol = UCID S? EOL
68 map(
69 (multispace0, check_eol),
70 |_| (ucid.clone(), Suffix::Eol)
71 ),
72 // key_colon = UCID ':'
73 map(
74 char(':'),
75 |_| (ucid.clone(), Suffix::Colon)
76 ),
77 )).parse(input)
78}