diff --git a/Cargo.lock b/Cargo.lock index 55fefab..c82c19f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -336,6 +336,15 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -421,6 +430,41 @@ dependencies = [ "sct", ] +[[package]] +name = "darling" +version = "0.20.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc5d6b04b3fd0ba9926f945895de7d806260a2d7431ba82e7edaecb043c4c6b8" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04e48a959bcd5c761246f5d090ebc2fbf7b9cd527a492b07a67510c108f1e7e3" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.48", +] + +[[package]] +name = "darling_macro" +version = "0.20.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1545d67a2149e1d93b7e5c7752dce5a7426eb5d1357ddcfd89336b94444f77" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.48", +] + [[package]] name = "deranged" version = "0.3.11" @@ -656,6 +700,7 @@ name = "gbnf" version = "0.1.0" dependencies = [ "auto_impl", + "convert_case", "itertools 0.12.1", "quote", "serde", @@ -668,8 +713,10 @@ name = "gbnf_derive" version = "0.1.0" dependencies = [ "auto_impl", + "darling", "gbnf", "itertools 0.12.1", + "proc-macro2", "quote", "syn 2.0.48", ] @@ -896,6 +943,12 @@ dependencies = [ "cc", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.5.0" @@ -2071,6 +2124,12 @@ dependencies = [ "vte", ] +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + [[package]] name = "strum" version = "0.25.0" diff --git a/game/src/ai/prompts/execution_prompts.rs b/game/src/ai/prompts/execution_prompts.rs index f054ddc..b35427f 100644 --- a/game/src/ai/prompts/execution_prompts.rs +++ b/game/src/ai/prompts/execution_prompts.rs @@ -1,5 +1,8 @@ use crate::ai::convo::AiPrompt; -use crate::models::commands::{CommandEvent, EventConversionFailure, ParsedCommand}; +use crate::models::commands::{ + CommandEvent, EventConversionFailure, ParsedCommand, RawCommandEventGbnfLimit, + RawCommandExecution, RawCommandExecutionGbnfLimit, +}; use crate::models::world::items::Item; use crate::models::world::people::Person; use crate::models::world::scenes::{Exit, Prop, Scene, Stage}; @@ -244,6 +247,45 @@ fn stage_info(stage: &Stage) -> String { info } +fn execution_gbnf_limit<'a>(stage: &'a Stage) -> RawCommandExecutionGbnfLimit { + // We will begin this implementation by simply setting the limits + // to all uuids in the scene. We might wind up getting rid of the + // appliesTo field, leaving only parameter behind. + + let people_keys = stage + .people + .iter() + .map(|p| &p._key) + .cloned() + .flatten(); + + let item_keys = stage + .items + .iter() + .map(|i| &i._key) + .cloned() + .flatten(); + + let curr_scene_key = stage.scene._key.iter(); + let exit_keys = stage.scene.exits.iter().map(|e| &e.scene_key).cloned(); + + let all_uuids = people_keys + .chain(item_keys) + //.chain(curr_scene_key) + .chain(exit_keys) + .collect_vec(); + + //let all_uuids = std::rc::Rc::new(all_uuids); + let event_limit = RawCommandEventGbnfLimit { + applies_to: Box::new(all_uuids.clone()), + parameter: Box::new(all_uuids.clone()), + }; + + RawCommandExecutionGbnfLimit { + event: Box::new(event_limit), + } +} + pub fn execution_prompt(original_cmd: &str, stage: &Stage, cmd: &ParsedCommand) -> AiPrompt { let scene_info = stage_info(&stage); @@ -255,7 +297,12 @@ pub fn execution_prompt(original_cmd: &str, stage: &Stage, cmd: &ParsedCommand) .replacen("{LOCATION}", &cmd.location, 1) .replacen("{USING}", &cmd.using, 1); - AiPrompt::new_with_grammar_and_size(&prompt, COMMAND_EXECUTION_BNF, 512) + let limit = execution_gbnf_limit(stage); + let grammar = RawCommandExecution::to_grammar_with_limit(limit); + println!("{}", grammar); + + //AiPrompt::new_with_grammar_and_size(&prompt, COMMAND_EXECUTION_BNF, 512) + AiPrompt::new_with_grammar_and_size(&prompt, &grammar, 512) } pub fn fix_prompt(scene: &Scene, failures: &EventConversionFailure) -> AiPrompt { diff --git a/game/src/ai/prompts/parsing_prompts.rs b/game/src/ai/prompts/parsing_prompts.rs index 79d8a01..4d42bbb 100644 --- a/game/src/ai/prompts/parsing_prompts.rs +++ b/game/src/ai/prompts/parsing_prompts.rs @@ -1,3 +1,4 @@ +//use gbnf::prelude::*; use crate::{ ai::convo::AiPrompt, models::commands::{ParsedCommands, VerbsResponse}, diff --git a/game/src/models/commands.rs b/game/src/models/commands.rs index e3012cb..7bb2c42 100644 --- a/game/src/models/commands.rs +++ b/game/src/models/commands.rs @@ -1,10 +1,10 @@ use std::fmt::Display; +use gbnf::prelude::*; +use gbnf_derive::{Gbnf}; use serde::{Deserialize, Serialize}; use strum::{EnumString, EnumVariantNames}; use thiserror::Error; -use gbnf::prelude::*; -use gbnf_derive::Gbnf; /// Stored in the database to bypass AI 'parsing' when possible. #[derive(Debug, Serialize, Deserialize, Clone)] @@ -56,13 +56,14 @@ pub struct VerbAndTargetEntry { pub target: String, } -#[derive(Debug, Serialize, Deserialize, Clone)] +#[derive(Debug, Serialize, Deserialize, Clone, Gbnf)] #[serde(rename_all = "camelCase")] pub struct RawCommandExecution { pub valid: bool, pub reason: Option, pub narration: String, #[serde(skip_serializing_if = "Option::is_none")] + #[gbnf_limit] pub event: Option, } @@ -77,11 +78,13 @@ impl RawCommandExecution { } } -#[derive(Debug, Serialize, Deserialize, Clone)] +#[derive(Debug, Serialize, Deserialize, Clone, Gbnf)] #[serde(rename_all = "camelCase")] pub struct RawCommandEvent { pub event_name: String, + #[gbnf_limit] pub applies_to: String, + #[gbnf_limit] pub parameter: String, } @@ -147,7 +150,8 @@ pub struct Narrative { /// An "AI Command" is a command execution generated by the LLM and /// run through coherence validation/fixing, and (assuming it is -/// valid) contains a series of events to apply to the game state. +/// valid) contains an event to apply to the game state. +//TODO rename to AiCommandExecution #[derive(Debug, Serialize, Deserialize, Clone)] pub struct AiCommand { pub valid: bool, diff --git a/gbnf/Cargo.toml b/gbnf/Cargo.toml index 2313c1b..095cae1 100644 --- a/gbnf/Cargo.toml +++ b/gbnf/Cargo.toml @@ -10,3 +10,4 @@ quote = "1.0.35" itertools = "0.12.0" serde = "1.0.196" serde_derive = "1.0.196" +convert_case = "0.6.0" diff --git a/gbnf/src/lib.rs b/gbnf/src/lib.rs index aa49b2e..e782f0b 100644 --- a/gbnf/src/lib.rs +++ b/gbnf/src/lib.rs @@ -1,32 +1,173 @@ -extern crate proc_macro; +use std::{ + collections::HashMap, + fmt::Display, + ops::{Deref, DerefMut}, +}; use itertools::Itertools; +use limited::{Limited, LimitedGbnfComplex, LimitedGbnfField, LimitedGbnfPrimitive}; use serde::de::DeserializeOwned; +use convert_case::{Case, Casing}; + +mod limited; pub mod prelude { pub use crate::gbnf_field; pub use crate::gbnf_field_type; pub use crate::AsGbnf; + pub use crate::AsGbnfPrimitive; pub use crate::AsGrammar; pub use crate::GbnfComplex; pub use crate::GbnfField; pub use crate::GbnfFieldType; + pub use crate::GbnfLimit; + pub use crate::GbnfLimitedField; pub use crate::GbnfPrimitive; pub use crate::GbnfRule; + pub use crate::GbnfRuleLevel; pub use crate::GbnfToken; + pub use std::collections::HashMap; } -// TODOs for this implementation: -// 1. Move primitive definitions (string, bool, etc) to the bottom of generated grammar. -// 2. Implement support for limited values. -// 3. Generate static strings for the gramma rules where possible. -// 4. Properly support optional types (right now they map to non-optional values). +/// The GbnfLimitedField trait is capable of producing a GbnfLimitType +/// enum value, which itself contains either a list of allowable +/// values (for simple types), or a nested GbnfLimit object(for +/// complex types). This allows the creation of a complex nested Limit +/// struct which can hold the full structure of nested limits of +/// multiple types that derive Gbnf and have limits. +pub trait GbnfLimitedField { + fn limit(self: Box) -> GbnfLimit; +} + +impl + 'static> From> for Box> { + fn from(value: Box) -> Self { + value + } +} + +impl From> for Box> { + fn from(value: Vec) -> Self { + Box::new(value) + } +} + +impl From<[T; N]> for Box> +where + T: ToString + AsGbnfPrimitive + 'static, +{ + fn from(value: [T; N]) -> Self { + Box::new(value) + } +} + +impl From>> for Box> { + fn from(value: Box>) -> Self { + value + } +} + +// Single pritive field types that can only have specific values. +impl GbnfLimitedField for Vec { + fn limit(self: Box) -> GbnfLimit { + GbnfLimit::Simple( + T::to_gbnf_primitive(), + self.into_iter().map(|v| v.to_string()).collect(), + ) + } +} + +impl GbnfLimitedField> for Vec { + fn limit(self: Box) -> GbnfLimit { + GbnfLimit::Simple( + T::to_gbnf_primitive(), + self.into_iter().map(|v| v.to_string()).collect(), + ) + } +} + +// List types that can only have specific values. +impl GbnfLimitedField> for Vec { + fn limit(self: Box) -> GbnfLimit { + GbnfLimit::Simple( + T::to_gbnf_primitive(), + self.into_iter().map(|v| v.to_string()).collect(), + ) + } +} + +impl GbnfLimitedField for [T; N] +where + T: ToString + AsGbnfPrimitive, +{ + fn limit(self: Box) -> GbnfLimit { + GbnfLimit::Simple( + T::to_gbnf_primitive(), + self.into_iter().map(|v| v.to_string()).collect(), + ) + } +} + +impl GbnfLimitedField> for [T; N] +where + T: ToString + AsGbnfPrimitive, +{ + fn limit(self: Box) -> GbnfLimit { + GbnfLimit::Simple( + T::to_gbnf_primitive(), + self.into_iter().map(|v| v.to_string()).collect(), + ) + } +} + +impl<'a> GbnfLimitedField for &'a [&'a str] { + fn limit(self: Box) -> GbnfLimit { + GbnfLimit::Simple( + String::to_gbnf_primitive(), + self.into_iter().map(|v| v.to_string()).collect(), + ) + } +} + +impl<'a> GbnfLimitedField for std::rc::Rc> { + fn limit(self: Box) -> GbnfLimit { + GbnfLimit::Simple( + String::to_gbnf_primitive(), + self.iter().map(|v| v.to_string()).collect(), + ) + } +} + +/// A type of GBNF value limit, either simple (a list of values meant +/// for a single field), or complex (contains nested GbnfLimit). +#[derive(Debug)] +pub enum GbnfLimit { + /// The primitive type, and the allowed values. + Simple(GbnfPrimitive, Vec), + /// Field name -> nested limit type. + Complex(HashMap<&'static str, GbnfLimit>), +} + +impl From>> for GbnfLimit { + fn from(value: Box>) -> Self { + value.limit() + } +} // Converts GBNF defintions (through the types below) into the grammar // rules. pub trait AsGrammar { - fn rules(&self) -> Vec; - fn token(&self) -> String; + /// Create the rule itself, along with its dependent rules. + fn rule(&self, token: &str) -> GbnfRule; + + /// The basic token for the type, or type contained within a + /// wrapper. A wrapper is something like GbnfField. This always + /// returns the token for the base type of the rule (including + /// option or list, for those types). + fn base_type_token(&self) -> GbnfToken; + + /// Wraps this trait impl in a corresponding Limited GBNF rule + /// creator. + fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a; } /// Trait for regular types to implement to convert themselves to a @@ -35,6 +176,10 @@ pub trait AsGbnf { fn to_gbnf() -> GbnfFieldType; } +pub trait AsGbnfPrimitive { + fn to_gbnf_primitive() -> GbnfPrimitive; +} + macro_rules! define_field_type { ($type:ty, $gbnf_type:expr) => { impl AsGbnf for $type { @@ -45,6 +190,23 @@ macro_rules! define_field_type { }; } +macro_rules! define_gbnf_primitive { + ($type:ty, $gbnf_primitive:expr) => { + impl AsGbnfPrimitive for $type { + fn to_gbnf_primitive() -> GbnfPrimitive { + $gbnf_primitive + } + } + }; +} + +macro_rules! define_primitive_field_type { + ($type:ty, $gbnf_primitive:expr) => { + define_gbnf_primitive!($type, $gbnf_primitive); + define_field_type!($type, GbnfFieldType::Primitive($gbnf_primitive)); + }; +} + #[macro_export] macro_rules! gbnf_field_type { ($type:ty) => { @@ -58,25 +220,34 @@ macro_rules! gbnf_field { GbnfField { field_name: $field_name.to_string(), field_type: gbnf_field_type!($field_type), + limited: false, + } + }; + + ($field_name:literal, $field_type: ty, $field_limit:expr) => { + GbnfField { + field_name: $field_name.to_string(), + field_type: gbnf_field_type!($field_type), + limit: $field_limit, } }; } // Implemented field type mappings for common rust types. -define_field_type!(i16, GbnfFieldType::Primitive(GbnfPrimitive::Number)); -define_field_type!(u16, GbnfFieldType::Primitive(GbnfPrimitive::Number)); -define_field_type!(i32, GbnfFieldType::Primitive(GbnfPrimitive::Number)); -define_field_type!(u32, GbnfFieldType::Primitive(GbnfPrimitive::Number)); -define_field_type!(i64, GbnfFieldType::Primitive(GbnfPrimitive::Number)); -define_field_type!(u64, GbnfFieldType::Primitive(GbnfPrimitive::Number)); -define_field_type!(f32, GbnfFieldType::Primitive(GbnfPrimitive::Number)); -define_field_type!(f64, GbnfFieldType::Primitive(GbnfPrimitive::Number)); -define_field_type!(usize, GbnfFieldType::Primitive(GbnfPrimitive::Number)); +define_primitive_field_type!(i16, GbnfPrimitive::Number); +define_primitive_field_type!(u16, GbnfPrimitive::Number); +define_primitive_field_type!(i32, GbnfPrimitive::Number); +define_primitive_field_type!(u32, GbnfPrimitive::Number); +define_primitive_field_type!(i64, GbnfPrimitive::Number); +define_primitive_field_type!(u64, GbnfPrimitive::Number); +define_primitive_field_type!(f32, GbnfPrimitive::Number); +define_primitive_field_type!(f64, GbnfPrimitive::Number); +define_primitive_field_type!(usize, GbnfPrimitive::Number); -define_field_type!(bool, GbnfFieldType::Primitive(GbnfPrimitive::Boolean)); +define_primitive_field_type!(bool, GbnfPrimitive::Boolean); -define_field_type!(String, GbnfFieldType::Primitive(GbnfPrimitive::String)); -define_field_type!(char, GbnfFieldType::Primitive(GbnfPrimitive::String)); +define_primitive_field_type!(String, GbnfPrimitive::String); +define_primitive_field_type!(char, GbnfPrimitive::String); // Blanket implementations to cover more types impl AsGbnf for [T; N] @@ -90,7 +261,6 @@ where OptionalPrimitive(primitive_type) => PrimitiveList(primitive_type), Complex(complex_type) => ComplexList(complex_type), OptionalComplex(complex_type) => ComplexList(complex_type), - Limited(_) => panic!("limited values are not yet supported"), ComplexList(_) | PrimitiveList(_) => panic!("nested lists not supported"), } } @@ -107,7 +277,6 @@ where OptionalPrimitive(primitive_type) => PrimitiveList(primitive_type), Complex(complex_type) => ComplexList(complex_type), OptionalComplex(complex_type) => ComplexList(complex_type), - Limited(_) => panic!("limited values not yet supported"), ComplexList(_) | PrimitiveList(_) => panic!("nested lists not supported"), } } @@ -123,58 +292,130 @@ where Primitive(primitive_type) => OptionalPrimitive(primitive_type), Complex(complex_type) => OptionalComplex(complex_type), OptionalPrimitive(_) | OptionalComplex(_) => panic!("nested options are not allowed"), - Limited(_) => panic!("limited values not yet supported"), _ => panic!("optional type cannot be a list"), } } } +#[derive(Debug, Clone, Eq, Hash, PartialEq, Ord, PartialOrd)] +pub enum GbnfRuleLevel { + Root, + Middle, + Leaf, +} + // Actual GBNF rule itself. Holds rule text for dedup. #[derive(Debug, Clone, Eq, Hash, PartialEq)] pub struct GbnfRule { - name: String, + name: GbnfToken, text: String, + dependents: Vec, + level: GbnfRuleLevel, } impl GbnfRule { - pub fn new(token: String, rule_text: String) -> GbnfRule { + pub fn new(token: GbnfToken, rule_text: String, level: GbnfRuleLevel) -> GbnfRule { GbnfRule { name: token, text: rule_text, + dependents: vec![], + level, } } - pub fn single(token: String, rule_text: String) -> Vec { - vec![GbnfRule::new(token, rule_text)] + pub fn space() -> GbnfRule { + GbnfRule::new( + GbnfToken::new("ws".to_string()), + r#"[ \t\n]*"#.to_string(), + GbnfRuleLevel::Leaf, + ) + } + + pub fn to_optional(self) -> GbnfRule { + let option_token = self.name.option_token(); + let option_rule_text = format!(r#"{} | "null""#, self.name); + let mut option_rule = GbnfRule::new(option_token, option_rule_text, GbnfRuleLevel::Middle); + option_rule.dependents = vec![self]; + option_rule + } + + /// Turn this rule into a list rule: main rule is a list type, and + /// dependent rule is the original type/rule itself. + pub fn to_list(self) -> GbnfRule { + let list_name = self.name.list_token(); + let list_rule_text = + r#""[]" | "[" {SPACE} {TYPE_NAME} ("," {SPACE} {TYPE_NAME})* "]""# + .replace("{LIST_NAME}", &list_name.0) + .replace("{SPACE}", &GbnfRule::space().name.0) + .replace("{TYPE_NAME}", &self.name.0); + + let mut list_rule = GbnfRule::new(list_name, list_rule_text, GbnfRuleLevel::Middle); + list_rule.dependents = vec![self, GbnfRule::space()]; + list_rule + } + + /// Consume this rule to produce a list of rules consisting of + /// this rule and its dependents. The rules in the list have no + /// dependents. Useful for final output of a rules list. + fn flatten(mut self) -> Vec { + let dependents = self + .dependents + .drain(0..) + .flat_map(|rule| rule.flatten()) + .collect_vec(); + + // Self needs to be in front to get proper rule ordering. + [&[self][..], &dependents[..]].concat() } } /// Tokens in the GBNF rule. -pub enum GbnfToken { - Space, +#[repr(transparent)] +#[derive(Debug, Clone, Eq, Hash, PartialEq)] +pub struct GbnfToken(String); + +impl From<&str> for GbnfToken { + fn from(value: &str) -> Self { + GbnfToken(value.to_string().to_case(Case::Camel)) + } +} + +impl Display for GbnfToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Deref for GbnfToken { + type Target = String; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for GbnfToken { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } } impl GbnfToken { - pub(self) const SPACE: &'static str = r#"[ \t\n]*"#; -} - -impl AsGrammar for GbnfToken { - fn rules(&self) -> Vec { - match self { - Self::Space => GbnfRule::single(self.token(), Self::SPACE.to_string()), - } + pub fn new>(value: S) -> GbnfToken { + GbnfToken::from(value.as_ref()) } - fn token(&self) -> String { - match self { - Self::Space => "ws".to_string(), - } + fn option_token(&self) -> GbnfToken { + GbnfToken::new(format!("{}Option", self.0)) + } + + fn list_token(&self) -> GbnfToken { + GbnfToken::new(format!("{}List", self.0)) } } /// Represents a primitive value in the GBNF, the simplest possible /// value a type can hold. -#[derive(Debug)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum GbnfPrimitive { String, Boolean, @@ -189,30 +430,34 @@ impl GbnfPrimitive { impl AsGrammar for GbnfPrimitive { /// Output the raw GBNF rule of this primitive. - fn rules(&self) -> Vec { + fn rule(&self, token: &str) -> GbnfRule { let rule_text = match self { Self::Boolean => Self::BOOLEAN, Self::Number => Self::NUMBER, Self::String => Self::STRING, }; - GbnfRule::single(self.token(), rule_text.to_string()) + GbnfRule::new(token.into(), rule_text.to_string(), GbnfRuleLevel::Leaf) } /// Output the token name of the GBNF rule (to refer to in other /// rules). - fn token(&self) -> String { - String::from(match self { + fn base_type_token(&self) -> GbnfToken { + GbnfToken::from(match self { Self::Boolean => "boolean", Self::Number => "number", Self::String => "string", }) } + + fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a { + Limited(self, limit.and_then(|l| LimitedGbnfPrimitive::new(l))) + } } /// Categorize all types of fields that the generated grammar can /// handle. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum GbnfFieldType { /// A single property on the type, e.g. myField: i32 Primitive(GbnfPrimitive), @@ -231,13 +476,16 @@ pub enum GbnfFieldType { /// A list/vec of complex types. ComplexList(GbnfComplex), - - /// A single property field, but with limited values allowed, - /// constrained by the primitive type. - Limited(GbnfPrimitive), } impl GbnfFieldType { + pub fn as_primitive(self) -> GbnfPrimitive { + match self { + GbnfFieldType::Primitive(primitive) => primitive, + _ => panic!("not a GBNF primitive"), + } + } + pub fn as_complex(self) -> GbnfComplex { match self { GbnfFieldType::Complex(complex) => complex, @@ -247,89 +495,109 @@ impl GbnfFieldType { } /// Connect a property name and a field type to generate a GBNF rule. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct GbnfField { pub field_name: String, pub field_type: GbnfFieldType, -} - -impl GbnfField { - fn list_rule(field_type: &(impl AsGrammar + ?Sized)) -> String { - r#""[]" | "[" {SPACE} {TYPE_NAME} ("," {SPACE} {TYPE_NAME})* "]""# - .replace("{LIST_NAME}", "") - .replace("{SPACE}", &GbnfToken::Space.token()) - .replace("{TYPE_NAME}", &field_type.token()) - } - - fn list_rules(&self, f: &T) -> Vec { - // Create two rules: one for the list and on for its actual type. - let list_rule = GbnfRule::new(self.token(), Self::list_rule(f)); - - let mut rules = vec![list_rule]; - rules.append(&mut f.rules()); - rules - } + pub limited: bool, } impl AsGrammar for GbnfField { - fn token(&self) -> String { + fn base_type_token(&self) -> GbnfToken { match &self.field_type { - GbnfFieldType::Primitive(f) => f.token(), - GbnfFieldType::OptionalPrimitive(f) => f.token(), - GbnfFieldType::PrimitiveList(f) => format!("{}List", f.token()), - GbnfFieldType::Complex(f) => f.token(), - GbnfFieldType::OptionalComplex(f) => f.token(), - GbnfFieldType::ComplexList(f) => format!("{}List", f.token()), - GbnfFieldType::Limited(f) => f.token(), - _ => "".to_string(), + GbnfFieldType::Primitive(f) => f.base_type_token(), + GbnfFieldType::Complex(f) => f.base_type_token(), + GbnfFieldType::OptionalPrimitive(f) => f.base_type_token(), + GbnfFieldType::OptionalComplex(f) => f.base_type_token(), + GbnfFieldType::PrimitiveList(f) => f.base_type_token(), + GbnfFieldType::ComplexList(f) => f.base_type_token(), } } - // TODO need to implement optional rules, which probably involves - // wrapping the primitive rule in parens, and then ORing to null. - fn rules(&self) -> Vec { + fn rule(&self, token: &str) -> GbnfRule { match &self.field_type { - GbnfFieldType::Complex(f) => f.rules(), - GbnfFieldType::OptionalComplex(f) => f.rules(), - GbnfFieldType::ComplexList(f) => self.list_rules(f), - GbnfFieldType::Primitive(f) => f.rules(), - GbnfFieldType::OptionalPrimitive(f) => f.rules(), - GbnfFieldType::PrimitiveList(f) => self.list_rules(f), - GbnfFieldType::Limited(f) => f.rules(), + GbnfFieldType::Complex(f) => f.rule(token), + GbnfFieldType::Primitive(f) => f.rule(token), + GbnfFieldType::OptionalComplex(f) => f.rule(token).to_optional(), + GbnfFieldType::OptionalPrimitive(f) => f.rule(token).to_optional(), + GbnfFieldType::ComplexList(f) => f.rule(token).to_list(), + GbnfFieldType::PrimitiveList(f) => f.rule(token).to_list(), } } + + fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a { + Limited( + self, + limit.map(|l| LimitedGbnfField { + field: self, + limit: l, + }), + ) + } } /// The complex type is a direct mapping from a supported Rust struct, /// and also used to generate the root of a GBNF grammar. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct GbnfComplex { pub name: String, pub fields: Vec, } impl GbnfComplex { - pub fn to_grammar(&self) -> String { - let mut rules = vec![GbnfRule::new("root".to_string(), self.name.clone())]; + pub fn to_grammar(&self, limit: Option) -> String { + // The root type cannot itself be limited. + let mut root = GbnfRule::new( + GbnfToken::new("root".to_string()), + self.base_type_token().0, + GbnfRuleLevel::Root, + ); - rules.append(&mut self.rules()); + let root_type_rule = if let Some(_) = limit { + let limited_self = self.with_limit(limit.as_ref()); + limited_self.rule(&root.text) + } else { + self.rule(&root.text) + }; - for field in &self.fields { - rules.append(&mut field.rules()); - } + root.dependents = vec![root_type_rule]; + let rules = vec![root]; - rules + // Final output: flatten all rules into one giant list of + // rules with no dependents, sort according to "rule level", + // and then deduplicate. + let mut grammar = rules .into_iter() + .flat_map(|rule| rule.flatten()) + .sorted_by(|rule1, rule2| Ord::cmp(&rule1.level, &rule2.level)) .unique() .map(|rule| format!("{} ::= {}", rule.name, rule.text)) - .join("\n") - } -} + .join("\n"); -impl AsGrammar for GbnfComplex { - fn rules(&self) -> Vec { - // This will output the full set of rules for the complex type. - // Deduplication handled later. + grammar.push('\n'); + grammar + } + + fn rule_for_field(&self, field: &GbnfField, limit: Option<&GbnfLimit>) -> GbnfRule { + if let Some(GbnfLimit::Complex(nested_limit)) = limit { + nested_limit + .get(field.field_name.as_str()) + .map(|l| { + // For complex type fields, we "namespace" the + // field type name when limiting, to prevent + // collisions. + let limited = field.with_limit(Some(l)); + let token = format!("{}{}", self.name, limited.base_type_token()); + limited.rule(&token) + }) + .unwrap_or_else(|| field.rule(&field.base_type_token())) + } else { + field.rule(&field.base_type_token()) + } + } + + /// The GBNF rule for the complex type itself. + fn rule_for_self(&self, token: &str, limit: Option<&GbnfLimit>) -> GbnfRule { let mut rule = String::new(); rule.push_str(r#""{" "#); @@ -338,14 +606,17 @@ impl AsGrammar for GbnfComplex { .fields .iter() .map(|field| { + let field_rule = self.rule_for_field(field, limit); + let token = field_rule.name; + let mut text = String::new(); - text.push_str(&GbnfToken::Space.token()); + text.push_str(&GbnfRule::space().name.0); text.push_str(" "); text.push_str(&format!( r#""\"{}\":" {} {}"#, field.field_name, - GbnfToken::Space.token(), - field.token(), + GbnfRule::space().name.0, + token, )); text }) @@ -354,12 +625,40 @@ impl AsGrammar for GbnfComplex { rule.push_str(&field_rules_text); rule.push_str(r#" "}""#); - let mut rules = GbnfRule::single(self.token(), rule); - rules.append(&mut GbnfToken::Space.rules()); - rules + GbnfRule::new(token.into(), rule, GbnfRuleLevel::Middle) } - fn token(&self) -> String { - self.name.clone() + /// The rules for the fields of the complex type. + fn rules_for_fields(&self, limit: Option<&GbnfLimit>) -> Vec { + let mut rules = vec![]; + for field in &self.fields { + rules.push(self.rule_for_field(field, limit)); + } + + rules + } +} + +impl AsGrammar for GbnfComplex { + fn rule(&self, token: &str) -> GbnfRule { + let mut main_rule = self.rule_for_self(token, None); + let field_rules = self.rules_for_fields(None); + main_rule.dependents = field_rules; + main_rule.dependents.push(GbnfRule::space()); + main_rule + } + + fn base_type_token(&self) -> GbnfToken { + GbnfToken::new(self.name.clone()) + } + + fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a { + Limited( + self, + limit.map(|l| LimitedGbnfComplex { + complex: self, + limit: l, + }), + ) } } diff --git a/gbnf/src/limited.rs b/gbnf/src/limited.rs new file mode 100644 index 0000000..7557f56 --- /dev/null +++ b/gbnf/src/limited.rs @@ -0,0 +1,149 @@ +use crate::{ + AsGrammar, GbnfComplex, GbnfField, GbnfFieldType, GbnfLimit, GbnfPrimitive, GbnfRule, + GbnfRuleLevel, GbnfToken, +}; +use itertools::Itertools; + +#[derive(Debug, Clone)] +pub struct LimitedGbnfComplex<'a> { + pub complex: &'a GbnfComplex, + pub limit: &'a GbnfLimit, +} + +impl AsGrammar for LimitedGbnfComplex<'_> { + fn rule(&self, token: &str) -> GbnfRule { + let mut main_rule = self.complex.rule_for_self(token, Some(self.limit)); + let field_rules = self.complex.rules_for_fields(Some(self.limit)); + main_rule.dependents = field_rules; + main_rule.dependents.push(GbnfRule::space()); + main_rule + } + + fn base_type_token(&self) -> GbnfToken { + GbnfToken::new(format!("{}Limit", self.complex.base_type_token())) + } + + fn with_limit<'a>(&'a self, _: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a { + self.clone() + } +} + +#[derive(Debug, Clone)] +pub struct LimitedGbnfPrimitive<'a> { + primitive: GbnfPrimitive, + values: &'a [String], +} + +impl LimitedGbnfPrimitive<'_> { + pub fn new(limit: &GbnfLimit) -> Option { + if let GbnfLimit::Simple(primitive, values) = limit { + Some(LimitedGbnfPrimitive { + primitive: *primitive, + values, + }) + } else { + None + } + } +} + +impl AsGrammar for LimitedGbnfPrimitive<'_> { + fn rule(&self, token: &str) -> GbnfRule { + let values = self + .values + .into_iter() + .map(|v| { + if self.primitive == GbnfPrimitive::String { + format!(r#""\"{}\"""#, v) + } else { + format!(r#""{}""#, v) + } + }) + .join(" | "); + + GbnfRule::new(token.into(), values, GbnfRuleLevel::Leaf) + } + + fn base_type_token(&self) -> GbnfToken { + GbnfToken::new(format!("{}Limit", self.primitive.base_type_token())) + } + + fn with_limit<'a>(&'a self, _: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a { + self.clone() + } +} + +#[derive(Debug, Clone)] +pub struct LimitedGbnfField<'a> { + pub field: &'a GbnfField, + pub limit: &'a GbnfLimit, +} + +macro_rules! wrap_in_limit { + ($field:expr, $limit:expr, $token:expr) => { + $field.with_limit($limit).rule($token) + }; +} + +impl AsGrammar for LimitedGbnfField<'_> { + fn rule(&self, token: &str) -> GbnfRule { + // This is always the limit for this specific field. + let limit = Some(self.limit); + + match &self.field.field_type { + GbnfFieldType::Complex(f) => wrap_in_limit!(f, limit, token), + GbnfFieldType::Primitive(f) => wrap_in_limit!(f, limit, token), + GbnfFieldType::OptionalComplex(f) => wrap_in_limit!(f, limit, token).to_optional(), + GbnfFieldType::OptionalPrimitive(f) => wrap_in_limit!(f, limit, token).to_optional(), + GbnfFieldType::ComplexList(f) => wrap_in_limit!(f, limit, token).to_list(), + GbnfFieldType::PrimitiveList(f) => wrap_in_limit!(f, limit, token).to_list(), + } + } + + fn base_type_token(&self) -> GbnfToken { + let token = match &self.field.field_type { + GbnfFieldType::Primitive(f) => f.base_type_token(), + GbnfFieldType::Complex(f) => f.base_type_token(), + GbnfFieldType::OptionalPrimitive(f) => f.base_type_token(), + GbnfFieldType::OptionalComplex(f) => f.base_type_token(), + GbnfFieldType::PrimitiveList(f) => f.base_type_token(), + GbnfFieldType::ComplexList(f) => f.base_type_token(), + }; + + GbnfToken::new(format!("{}{}Limit", self.field.field_name, token)) + } + + fn with_limit<'a>(&'a self, _: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a { + self.clone() + } +} + +#[derive(Debug, Clone)] +pub struct Limited<'a, T, L>(pub &'a T, pub Option) +where + T: AsGrammar + Clone, + L: AsGrammar + Clone; + +impl AsGrammar for Limited<'_, T, L> +where + T: AsGrammar + Clone, + L: AsGrammar + Clone, +{ + fn rule(&self, token: &str) -> GbnfRule { + self.1 + .as_ref() + .map(|l| l.rule(token)) + .unwrap_or_else(|| self.0.rule(token)) + } + + fn base_type_token(&self) -> GbnfToken { + self.1 + .as_ref() + .map(|l| l.base_type_token()) + .unwrap_or_else(|| self.0.base_type_token()) + } + + fn with_limit<'a>(&'a self, _: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a { + self.clone() + } +} diff --git a/gbnf/src/new.rs b/gbnf/src/new.rs new file mode 100644 index 0000000..e69de29 diff --git a/gbnf_derive/Cargo.toml b/gbnf_derive/Cargo.toml index 6f1444d..da3393d 100644 --- a/gbnf_derive/Cargo.toml +++ b/gbnf_derive/Cargo.toml @@ -11,4 +11,6 @@ auto_impl = "1.1.2" syn = { version = "2.0", features = [ "derive", "full", "parsing", "printing", "visit", "visit-mut", "clone-impls", "proc-macro" ] } quote = "1.0.35" itertools = "0.12.0" -gbnf = { path = "../gbnf" } \ No newline at end of file +gbnf = { path = "../gbnf" } +proc-macro2 = "1.0.78" +darling = "0.20.5" diff --git a/gbnf_derive/src/lib.rs b/gbnf_derive/src/lib.rs index 33ddd89..8ba3573 100644 --- a/gbnf_derive/src/lib.rs +++ b/gbnf_derive/src/lib.rs @@ -1,80 +1,181 @@ +use darling::ast::Data; +use darling::{FromDeriveInput, FromField}; use proc_macro::{Span, TokenStream}; -use quote::{quote, ToTokens}; -use syn::parse::{Parse, ParseStream}; -use syn::punctuated::Punctuated; -use syn::{braced, parse_macro_input}; -use syn::{DeriveInput, Field, Ident, LitStr, Token}; +use proc_macro2::TokenStream as TokenStream2; +use quote::{format_ident, quote, ToTokens}; +use syn::parse_macro_input; +use syn::{DeriveInput, Ident, LitStr}; -#[derive(Debug)] -struct GbnfStructDef { - name: Ident, - fields: Punctuated, +#[derive(Clone, Debug, FromField)] +#[darling(forward_attrs(gbnf_limit))] +struct GbnfFieldDef { + ident: Option, + ty: syn::Type, + vis: syn::Visibility, + attrs: Vec, } -impl Parse for GbnfStructDef { - fn parse(input: ParseStream) -> syn::Result { - // let _ = Discard tokens we don't care about. - let _: Option = input.parse()?; - let _: Option = input.parse()?; +impl ToTokens for GbnfFieldDef { + fn to_tokens(&self, tokens: &mut TokenStream2) { + let ident = &self.ident; + let ty = &self.ty; // TODO figure out how to get ident out + let vis = &self.vis; - let content; - let name: Ident = input.parse()?; - let _ = braced!(content in input); + let output = quote! { + #vis #ident: Box> + }; - Ok(GbnfStructDef { - name, - fields: content.parse_terminated(Field::parse_named, Token![,])?, - }) + output.to_tokens(tokens); } } -fn generate_gbnf(input: TokenStream, create_struct: bool) -> TokenStream { - // To define complex types, we take a struct into the macro, and - // then output a bunch of calls to gbnf_field (wrapped in gbnf - // complex). +#[derive(Debug, FromDeriveInput)] +#[darling(supports(struct_named))] +struct GbnfStructDef { + ident: syn::Ident, + data: darling::ast::Data<(), GbnfFieldDef>, +} - // We could also generate the entire complex type now during macro - // run, and then shove the resulting GBNF rule into the type as a - // static string. +impl ToTokens for GbnfStructDef { + fn to_tokens(&self, tokens: &mut TokenStream2) { + let ident = &self.ident; - if let Ok(expr_struct) = syn::parse::(input) { - let struct_name_str = LitStr::new(&expr_struct.name.to_string(), Span::call_site().into()); - let struct_name = expr_struct.name; - let fields = expr_struct.fields.iter(); - - let gbnfs = expr_struct.fields.iter().map(|field| { - let field_type = &field.ty; - let field_ident = field - .ident - .as_ref() - .map(|i| i.to_string()) - .map(|field_name| LitStr::new(&field_name, Span::call_site().into())) - .expect("no ident"); - - quote! { gbnf_field!(#field_ident, #field_type) } - }); - - let struct_frag = if create_struct { - quote! { - pub struct #struct_name { - #(#fields),* - } - } - } else { - quote! {} + let fields = match self.data { + Data::Struct(ref struct_def) => &struct_def.fields, + _ => panic!("Can only use GbnfLimit on structs with owned data"), }; - let code = quote! { - #struct_frag + let output = quote! { + pub struct #ident { + #(#fields),* + } + }; - impl #struct_name { + tokens.extend(output); + } +} + +/// Find fields in the struct with a #[gbnf_limit] attribute. +fn find_limited_fields(fields: &[GbnfFieldDef]) -> impl Iterator + '_ { + fields.iter().filter(|field| { + field + .attrs + .iter() + .find(|attr| attr.path().get_ident().unwrap().to_string() == "gbnf_limit") + .is_some() + }) +} + +/// Find fields in the struct without a #[gbnf_limit] attribute. +fn find_non_limited_fields(fields: &[GbnfFieldDef]) -> impl Iterator + '_ { + fields.iter().filter(|field| { + field + .attrs + .iter() + .find(|attr| attr.path().get_ident().unwrap().to_string() == "gbnf_limit") + .is_none() + }) +} + +fn generate_to_grammar_impl(original_struct_name: &Ident, fields: &[GbnfFieldDef]) -> TokenStream2 { + let limit_struct_name = format_ident!("{}GbnfLimit", original_struct_name); + + // Convert limit struct field names to string literals + let limit_struct_fields: Vec<_> = find_limited_fields(fields) + .map(|field| quote! { #field }) + .collect(); + + // Convert provided values of the limit struct into corresponding + // GbnfLimit instances. Bunch of tuples fed into HashMap::from. + let from_assignments = find_limited_fields(fields).map(|field| { + let key = LitStr::new( + &field.ident.as_ref().expect("no ident!").to_string(), + Span::call_site().into(), + ) + .to_token_stream(); + + let ident = &field.ident; + let value = quote! { self.#ident.into() }; + quote! { (#key, #value) } + }); + + if limit_struct_fields.len() > 0 { + quote! { + pub struct #limit_struct_name { + #(#limit_struct_fields),* + } + + impl #limit_struct_name { + pub fn to_gbnf_limit(self) -> GbnfLimit { + GbnfLimit::Complex( + HashMap::from([ + #(#from_assignments),* + ]) + ) + } + } + + impl GbnfLimitedField<#original_struct_name> for #limit_struct_name { + fn limit(self: Box<#limit_struct_name>) -> GbnfLimit { + self.to_gbnf_limit() + } + } + + impl GbnfLimitedField> for #limit_struct_name { + fn limit(self: Box<#limit_struct_name>) -> GbnfLimit { + self.to_gbnf_limit() + } + } + + impl From<#limit_struct_name> for Box> { + fn from(limit: #limit_struct_name) -> Self { + Box::new(limit) + } + } + + impl From<#limit_struct_name> for Box>> { + fn from(limit: #limit_struct_name) -> Self { + Box::new(limit) + } + } + + impl #original_struct_name { + pub fn to_grammar_with_limit(limit: #limit_struct_name) -> String { + let gbnf_limit = limit.to_gbnf_limit(); + Self::to_gbnf().as_complex().to_grammar(Some(gbnf_limit)) + } + } + } + } else { + quote! { + impl #original_struct_name { pub fn to_grammar() -> &'static str { use std::sync::OnceLock; static GRAMMAR: OnceLock = OnceLock::new(); - GRAMMAR.get_or_init(|| Self::to_gbnf().as_complex().to_grammar()) + GRAMMAR.get_or_init(|| Self::to_gbnf().as_complex().to_grammar(None)) } } + } + } +} +/// Generate the GBNF rules and the limit struct (if applicable). +fn generate_gbnf(input: &DeriveInput) -> TokenStream { + if let Ok(gbnf_struct) = GbnfStructDef::from_derive_input(input) { + let struct_name = gbnf_struct.ident; + let struct_name_str = LitStr::new(&struct_name.to_string(), Span::call_site().into()); + + let fields = match gbnf_struct.data { + Data::Struct(struct_def) => struct_def.fields, + _ => panic!("Can only use GbnfLimit on structs with owned data"), + }; + + // Gbnf rule generation stuff + let limited_gbnfs = map_limited_gbnfs(&fields); + let non_limited_gbnfs = map_non_limited_gbnfs(&fields); + let gbnfs = limited_gbnfs.chain(non_limited_gbnfs); + + let as_gbnf_impl = quote! { impl AsGbnf for #struct_name { fn to_gbnf() -> gbnf::GbnfFieldType { GbnfFieldType::Complex( @@ -87,21 +188,61 @@ fn generate_gbnf(input: TokenStream, create_struct: bool) -> TokenStream { } }; - code.into() + let to_grammar_impl = generate_to_grammar_impl(&struct_name, &fields); + + let final_output = quote! { + use gbnf::prelude::*; + + #as_gbnf_impl + + #to_grammar_impl + }; + + final_output.into() } else { - panic!("Can only generate GBNF from structs of owned data (pub or private)"); + panic!("Can only use GbnfLimit on structs with owned data"); } } -/// Create a GBNF complex type as a Rust struct. -#[proc_macro] -pub fn gbnf_complex(input: TokenStream) -> TokenStream { - generate_gbnf(input, true) +/// Turn the fields without the limit attr into GBNF rule definitions. +fn map_non_limited_gbnfs(fields: &[GbnfFieldDef]) -> impl Iterator + '_ { + find_non_limited_fields(fields).map(|field| { + let field_type = &field.ty; + let field_ident = field + .ident + .as_ref() + .map(|i| i.to_string()) + .map(|field_name| LitStr::new(&field_name, Span::call_site().into())) + .expect("no ident"); + + quote! { gbnf_field!(#field_ident, #field_type) } + }) } -/// Add the ability to convert a Rust type into a GBNF grammar. -#[proc_macro_derive(Gbnf)] +/// Turn the fields with #[gbnf_limit] attr into GBNF rule definitions. +fn map_limited_gbnfs(fields: &[GbnfFieldDef]) -> impl Iterator + '_ { + find_limited_fields(fields).map(|field| { + let field_type = &field.ty; + let field_ident = field + .ident + .as_ref() + .map(|i| i.to_string()) + .map(|field_name| LitStr::new(&field_name, Span::call_site().into())) + .expect("no ident"); + + quote! { + GbnfField { + field_name: #field_ident.to_string(), + field_type: gbnf_field_type!(#field_type), + limited: true, + } + } + }) +} + +/// Convert a Rust type into a GBNF grammar. +#[proc_macro_derive(Gbnf, attributes(gbnf_limit))] pub fn gbnf(input: TokenStream) -> TokenStream { let input = parse_macro_input!(input as DeriveInput); - generate_gbnf(input.to_token_stream().into(), false) + generate_gbnf(&input) }