Allow limiting of values on derive(Gbnf) structs.

With the new helper attribute #[gbnf_limit] on a field of a struct
that has #[derive(Gbnf)], it is now possible to constrain the output
of the LLM to a specific set of values for that field. This is
extremely useful, for example, when asking the LLM to put a specific
ID in a field.

This commit also addresses numerous shortcomings of the original
implementation of the derive macro.

Deserialization of certain LLM responses currently broken as of this
commit, due to different field names in the LLM output vs what serde
expects (camelCase vs snake_case issues).
This commit is contained in:
projectmoon 2024-02-07 12:01:47 +01:00
parent 64209738de
commit 6fff4ecacd
10 changed files with 889 additions and 186 deletions

59
Cargo.lock generated
View File

@ -336,6 +336,15 @@ dependencies = [
"yaml-rust",
]
[[package]]
name = "convert_case"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "core-foundation"
version = "0.9.4"
@ -421,6 +430,41 @@ dependencies = [
"sct",
]
[[package]]
name = "darling"
version = "0.20.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc5d6b04b3fd0ba9926f945895de7d806260a2d7431ba82e7edaecb043c4c6b8"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.20.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04e48a959bcd5c761246f5d090ebc2fbf7b9cd527a492b07a67510c108f1e7e3"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn 2.0.48",
]
[[package]]
name = "darling_macro"
version = "0.20.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1545d67a2149e1d93b7e5c7752dce5a7426eb5d1357ddcfd89336b94444f77"
dependencies = [
"darling_core",
"quote",
"syn 2.0.48",
]
[[package]]
name = "deranged"
version = "0.3.11"
@ -656,6 +700,7 @@ name = "gbnf"
version = "0.1.0"
dependencies = [
"auto_impl",
"convert_case",
"itertools 0.12.1",
"quote",
"serde",
@ -668,8 +713,10 @@ name = "gbnf_derive"
version = "0.1.0"
dependencies = [
"auto_impl",
"darling",
"gbnf",
"itertools 0.12.1",
"proc-macro2",
"quote",
"syn 2.0.48",
]
@ -896,6 +943,12 @@ dependencies = [
"cc",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "0.5.0"
@ -2071,6 +2124,12 @@ dependencies = [
"vte",
]
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.25.0"

View File

@ -1,5 +1,8 @@
use crate::ai::convo::AiPrompt;
use crate::models::commands::{CommandEvent, EventConversionFailure, ParsedCommand};
use crate::models::commands::{
CommandEvent, EventConversionFailure, ParsedCommand, RawCommandEventGbnfLimit,
RawCommandExecution, RawCommandExecutionGbnfLimit,
};
use crate::models::world::items::Item;
use crate::models::world::people::Person;
use crate::models::world::scenes::{Exit, Prop, Scene, Stage};
@ -244,6 +247,45 @@ fn stage_info(stage: &Stage) -> String {
info
}
fn execution_gbnf_limit<'a>(stage: &'a Stage) -> RawCommandExecutionGbnfLimit {
// We will begin this implementation by simply setting the limits
// to all uuids in the scene. We might wind up getting rid of the
// appliesTo field, leaving only parameter behind.
let people_keys = stage
.people
.iter()
.map(|p| &p._key)
.cloned()
.flatten();
let item_keys = stage
.items
.iter()
.map(|i| &i._key)
.cloned()
.flatten();
let curr_scene_key = stage.scene._key.iter();
let exit_keys = stage.scene.exits.iter().map(|e| &e.scene_key).cloned();
let all_uuids = people_keys
.chain(item_keys)
//.chain(curr_scene_key)
.chain(exit_keys)
.collect_vec();
//let all_uuids = std::rc::Rc::new(all_uuids);
let event_limit = RawCommandEventGbnfLimit {
applies_to: Box::new(all_uuids.clone()),
parameter: Box::new(all_uuids.clone()),
};
RawCommandExecutionGbnfLimit {
event: Box::new(event_limit),
}
}
pub fn execution_prompt(original_cmd: &str, stage: &Stage, cmd: &ParsedCommand) -> AiPrompt {
let scene_info = stage_info(&stage);
@ -255,7 +297,12 @@ pub fn execution_prompt(original_cmd: &str, stage: &Stage, cmd: &ParsedCommand)
.replacen("{LOCATION}", &cmd.location, 1)
.replacen("{USING}", &cmd.using, 1);
AiPrompt::new_with_grammar_and_size(&prompt, COMMAND_EXECUTION_BNF, 512)
let limit = execution_gbnf_limit(stage);
let grammar = RawCommandExecution::to_grammar_with_limit(limit);
println!("{}", grammar);
//AiPrompt::new_with_grammar_and_size(&prompt, COMMAND_EXECUTION_BNF, 512)
AiPrompt::new_with_grammar_and_size(&prompt, &grammar, 512)
}
pub fn fix_prompt(scene: &Scene, failures: &EventConversionFailure) -> AiPrompt {

View File

@ -1,3 +1,4 @@
//use gbnf::prelude::*;
use crate::{
ai::convo::AiPrompt,
models::commands::{ParsedCommands, VerbsResponse},

View File

@ -1,10 +1,10 @@
use std::fmt::Display;
use gbnf::prelude::*;
use gbnf_derive::{Gbnf};
use serde::{Deserialize, Serialize};
use strum::{EnumString, EnumVariantNames};
use thiserror::Error;
use gbnf::prelude::*;
use gbnf_derive::Gbnf;
/// Stored in the database to bypass AI 'parsing' when possible.
#[derive(Debug, Serialize, Deserialize, Clone)]
@ -56,13 +56,14 @@ pub struct VerbAndTargetEntry {
pub target: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize, Clone, Gbnf)]
#[serde(rename_all = "camelCase")]
pub struct RawCommandExecution {
pub valid: bool,
pub reason: Option<String>,
pub narration: String,
#[serde(skip_serializing_if = "Option::is_none")]
#[gbnf_limit]
pub event: Option<RawCommandEvent>,
}
@ -77,11 +78,13 @@ impl RawCommandExecution {
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize, Clone, Gbnf)]
#[serde(rename_all = "camelCase")]
pub struct RawCommandEvent {
pub event_name: String,
#[gbnf_limit]
pub applies_to: String,
#[gbnf_limit]
pub parameter: String,
}
@ -147,7 +150,8 @@ pub struct Narrative {
/// An "AI Command" is a command execution generated by the LLM and
/// run through coherence validation/fixing, and (assuming it is
/// valid) contains a series of events to apply to the game state.
/// valid) contains an event to apply to the game state.
//TODO rename to AiCommandExecution
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct AiCommand {
pub valid: bool,

View File

@ -10,3 +10,4 @@ quote = "1.0.35"
itertools = "0.12.0"
serde = "1.0.196"
serde_derive = "1.0.196"
convert_case = "0.6.0"

View File

@ -1,32 +1,173 @@
extern crate proc_macro;
use std::{
collections::HashMap,
fmt::Display,
ops::{Deref, DerefMut},
};
use itertools::Itertools;
use limited::{Limited, LimitedGbnfComplex, LimitedGbnfField, LimitedGbnfPrimitive};
use serde::de::DeserializeOwned;
use convert_case::{Case, Casing};
mod limited;
pub mod prelude {
pub use crate::gbnf_field;
pub use crate::gbnf_field_type;
pub use crate::AsGbnf;
pub use crate::AsGbnfPrimitive;
pub use crate::AsGrammar;
pub use crate::GbnfComplex;
pub use crate::GbnfField;
pub use crate::GbnfFieldType;
pub use crate::GbnfLimit;
pub use crate::GbnfLimitedField;
pub use crate::GbnfPrimitive;
pub use crate::GbnfRule;
pub use crate::GbnfRuleLevel;
pub use crate::GbnfToken;
pub use std::collections::HashMap;
}
// TODOs for this implementation:
// 1. Move primitive definitions (string, bool, etc) to the bottom of generated grammar.
// 2. Implement support for limited values.
// 3. Generate static strings for the gramma rules where possible.
// 4. Properly support optional types (right now they map to non-optional values).
/// The GbnfLimitedField trait is capable of producing a GbnfLimitType
/// enum value, which itself contains either a list of allowable
/// values (for simple types), or a nested GbnfLimit object(for
/// complex types). This allows the creation of a complex nested Limit
/// struct which can hold the full structure of nested limits of
/// multiple types that derive Gbnf and have limits.
pub trait GbnfLimitedField<T> {
fn limit(self: Box<Self>) -> GbnfLimit;
}
impl<T: GbnfLimitedField<T> + 'static> From<Box<T>> for Box<dyn GbnfLimitedField<T>> {
fn from(value: Box<T>) -> Self {
value
}
}
impl<T: ToString + AsGbnfPrimitive + 'static> From<Vec<T>> for Box<dyn GbnfLimitedField<T>> {
fn from(value: Vec<T>) -> Self {
Box::new(value)
}
}
impl<T, const N: usize> From<[T; N]> for Box<dyn GbnfLimitedField<T>>
where
T: ToString + AsGbnfPrimitive + 'static,
{
fn from(value: [T; N]) -> Self {
Box::new(value)
}
}
impl<T: ToString + AsGbnfPrimitive + 'static> From<Box<Vec<T>>> for Box<dyn GbnfLimitedField<T>> {
fn from(value: Box<Vec<T>>) -> Self {
value
}
}
// Single pritive field types that can only have specific values.
impl<T: ToString + AsGbnfPrimitive> GbnfLimitedField<T> for Vec<T> {
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
T::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
impl<T: ToString + AsGbnfPrimitive> GbnfLimitedField<std::rc::Rc<T>> for Vec<T> {
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
T::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
// List types that can only have specific values.
impl<T: ToString + AsGbnfPrimitive> GbnfLimitedField<Vec<T>> for Vec<T> {
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
T::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
impl<T, const N: usize> GbnfLimitedField<T> for [T; N]
where
T: ToString + AsGbnfPrimitive,
{
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
T::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
impl<T, const N: usize> GbnfLimitedField<Vec<T>> for [T; N]
where
T: ToString + AsGbnfPrimitive,
{
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
T::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
impl<'a> GbnfLimitedField<String> for &'a [&'a str] {
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
String::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
impl<'a> GbnfLimitedField<String> for std::rc::Rc<Vec<&'a str>> {
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
String::to_gbnf_primitive(),
self.iter().map(|v| v.to_string()).collect(),
)
}
}
/// A type of GBNF value limit, either simple (a list of values meant
/// for a single field), or complex (contains nested GbnfLimit).
#[derive(Debug)]
pub enum GbnfLimit {
/// The primitive type, and the allowed values.
Simple(GbnfPrimitive, Vec<String>),
/// Field name -> nested limit type.
Complex(HashMap<&'static str, GbnfLimit>),
}
impl<T> From<Box<dyn GbnfLimitedField<T>>> for GbnfLimit {
fn from(value: Box<dyn GbnfLimitedField<T>>) -> Self {
value.limit()
}
}
// Converts GBNF defintions (through the types below) into the grammar
// rules.
pub trait AsGrammar {
fn rules(&self) -> Vec<GbnfRule>;
fn token(&self) -> String;
/// Create the rule itself, along with its dependent rules.
fn rule(&self, token: &str) -> GbnfRule;
/// The basic token for the type, or type contained within a
/// wrapper. A wrapper is something like GbnfField. This always
/// returns the token for the base type of the rule (including
/// option or list, for those types).
fn base_type_token(&self) -> GbnfToken;
/// Wraps this trait impl in a corresponding Limited GBNF rule
/// creator.
fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a;
}
/// Trait for regular types to implement to convert themselves to a
@ -35,6 +176,10 @@ pub trait AsGbnf {
fn to_gbnf() -> GbnfFieldType;
}
pub trait AsGbnfPrimitive {
fn to_gbnf_primitive() -> GbnfPrimitive;
}
macro_rules! define_field_type {
($type:ty, $gbnf_type:expr) => {
impl AsGbnf for $type {
@ -45,6 +190,23 @@ macro_rules! define_field_type {
};
}
macro_rules! define_gbnf_primitive {
($type:ty, $gbnf_primitive:expr) => {
impl AsGbnfPrimitive for $type {
fn to_gbnf_primitive() -> GbnfPrimitive {
$gbnf_primitive
}
}
};
}
macro_rules! define_primitive_field_type {
($type:ty, $gbnf_primitive:expr) => {
define_gbnf_primitive!($type, $gbnf_primitive);
define_field_type!($type, GbnfFieldType::Primitive($gbnf_primitive));
};
}
#[macro_export]
macro_rules! gbnf_field_type {
($type:ty) => {
@ -58,25 +220,34 @@ macro_rules! gbnf_field {
GbnfField {
field_name: $field_name.to_string(),
field_type: gbnf_field_type!($field_type),
limited: false,
}
};
($field_name:literal, $field_type: ty, $field_limit:expr) => {
GbnfField {
field_name: $field_name.to_string(),
field_type: gbnf_field_type!($field_type),
limit: $field_limit,
}
};
}
// Implemented field type mappings for common rust types.
define_field_type!(i16, GbnfFieldType::Primitive(GbnfPrimitive::Number));
define_field_type!(u16, GbnfFieldType::Primitive(GbnfPrimitive::Number));
define_field_type!(i32, GbnfFieldType::Primitive(GbnfPrimitive::Number));
define_field_type!(u32, GbnfFieldType::Primitive(GbnfPrimitive::Number));
define_field_type!(i64, GbnfFieldType::Primitive(GbnfPrimitive::Number));
define_field_type!(u64, GbnfFieldType::Primitive(GbnfPrimitive::Number));
define_field_type!(f32, GbnfFieldType::Primitive(GbnfPrimitive::Number));
define_field_type!(f64, GbnfFieldType::Primitive(GbnfPrimitive::Number));
define_field_type!(usize, GbnfFieldType::Primitive(GbnfPrimitive::Number));
define_primitive_field_type!(i16, GbnfPrimitive::Number);
define_primitive_field_type!(u16, GbnfPrimitive::Number);
define_primitive_field_type!(i32, GbnfPrimitive::Number);
define_primitive_field_type!(u32, GbnfPrimitive::Number);
define_primitive_field_type!(i64, GbnfPrimitive::Number);
define_primitive_field_type!(u64, GbnfPrimitive::Number);
define_primitive_field_type!(f32, GbnfPrimitive::Number);
define_primitive_field_type!(f64, GbnfPrimitive::Number);
define_primitive_field_type!(usize, GbnfPrimitive::Number);
define_field_type!(bool, GbnfFieldType::Primitive(GbnfPrimitive::Boolean));
define_primitive_field_type!(bool, GbnfPrimitive::Boolean);
define_field_type!(String, GbnfFieldType::Primitive(GbnfPrimitive::String));
define_field_type!(char, GbnfFieldType::Primitive(GbnfPrimitive::String));
define_primitive_field_type!(String, GbnfPrimitive::String);
define_primitive_field_type!(char, GbnfPrimitive::String);
// Blanket implementations to cover more types
impl<T, const N: usize> AsGbnf for [T; N]
@ -90,7 +261,6 @@ where
OptionalPrimitive(primitive_type) => PrimitiveList(primitive_type),
Complex(complex_type) => ComplexList(complex_type),
OptionalComplex(complex_type) => ComplexList(complex_type),
Limited(_) => panic!("limited values are not yet supported"),
ComplexList(_) | PrimitiveList(_) => panic!("nested lists not supported"),
}
}
@ -107,7 +277,6 @@ where
OptionalPrimitive(primitive_type) => PrimitiveList(primitive_type),
Complex(complex_type) => ComplexList(complex_type),
OptionalComplex(complex_type) => ComplexList(complex_type),
Limited(_) => panic!("limited values not yet supported"),
ComplexList(_) | PrimitiveList(_) => panic!("nested lists not supported"),
}
}
@ -123,58 +292,130 @@ where
Primitive(primitive_type) => OptionalPrimitive(primitive_type),
Complex(complex_type) => OptionalComplex(complex_type),
OptionalPrimitive(_) | OptionalComplex(_) => panic!("nested options are not allowed"),
Limited(_) => panic!("limited values not yet supported"),
_ => panic!("optional type cannot be a list"),
}
}
}
#[derive(Debug, Clone, Eq, Hash, PartialEq, Ord, PartialOrd)]
pub enum GbnfRuleLevel {
Root,
Middle,
Leaf,
}
// Actual GBNF rule itself. Holds rule text for dedup.
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
pub struct GbnfRule {
name: String,
name: GbnfToken,
text: String,
dependents: Vec<GbnfRule>,
level: GbnfRuleLevel,
}
impl GbnfRule {
pub fn new(token: String, rule_text: String) -> GbnfRule {
pub fn new(token: GbnfToken, rule_text: String, level: GbnfRuleLevel) -> GbnfRule {
GbnfRule {
name: token,
text: rule_text,
dependents: vec![],
level,
}
}
pub fn single(token: String, rule_text: String) -> Vec<GbnfRule> {
vec![GbnfRule::new(token, rule_text)]
pub fn space() -> GbnfRule {
GbnfRule::new(
GbnfToken::new("ws".to_string()),
r#"[ \t\n]*"#.to_string(),
GbnfRuleLevel::Leaf,
)
}
pub fn to_optional(self) -> GbnfRule {
let option_token = self.name.option_token();
let option_rule_text = format!(r#"{} | "null""#, self.name);
let mut option_rule = GbnfRule::new(option_token, option_rule_text, GbnfRuleLevel::Middle);
option_rule.dependents = vec![self];
option_rule
}
/// Turn this rule into a list rule: main rule is a list type, and
/// dependent rule is the original type/rule itself.
pub fn to_list(self) -> GbnfRule {
let list_name = self.name.list_token();
let list_rule_text =
r#""[]" | "[" {SPACE} {TYPE_NAME} ("," {SPACE} {TYPE_NAME})* "]""#
.replace("{LIST_NAME}", &list_name.0)
.replace("{SPACE}", &GbnfRule::space().name.0)
.replace("{TYPE_NAME}", &self.name.0);
let mut list_rule = GbnfRule::new(list_name, list_rule_text, GbnfRuleLevel::Middle);
list_rule.dependents = vec![self, GbnfRule::space()];
list_rule
}
/// Consume this rule to produce a list of rules consisting of
/// this rule and its dependents. The rules in the list have no
/// dependents. Useful for final output of a rules list.
fn flatten(mut self) -> Vec<GbnfRule> {
let dependents = self
.dependents
.drain(0..)
.flat_map(|rule| rule.flatten())
.collect_vec();
// Self needs to be in front to get proper rule ordering.
[&[self][..], &dependents[..]].concat()
}
}
/// Tokens in the GBNF rule.
pub enum GbnfToken {
Space,
#[repr(transparent)]
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
pub struct GbnfToken(String);
impl From<&str> for GbnfToken {
fn from(value: &str) -> Self {
GbnfToken(value.to_string().to_case(Case::Camel))
}
}
impl Display for GbnfToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl Deref for GbnfToken {
type Target = String;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for GbnfToken {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl GbnfToken {
pub(self) const SPACE: &'static str = r#"[ \t\n]*"#;
}
impl AsGrammar for GbnfToken {
fn rules(&self) -> Vec<GbnfRule> {
match self {
Self::Space => GbnfRule::single(self.token(), Self::SPACE.to_string()),
}
pub fn new<S: AsRef<str>>(value: S) -> GbnfToken {
GbnfToken::from(value.as_ref())
}
fn token(&self) -> String {
match self {
Self::Space => "ws".to_string(),
}
fn option_token(&self) -> GbnfToken {
GbnfToken::new(format!("{}Option", self.0))
}
fn list_token(&self) -> GbnfToken {
GbnfToken::new(format!("{}List", self.0))
}
}
/// Represents a primitive value in the GBNF, the simplest possible
/// value a type can hold.
#[derive(Debug)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum GbnfPrimitive {
String,
Boolean,
@ -189,30 +430,34 @@ impl GbnfPrimitive {
impl AsGrammar for GbnfPrimitive {
/// Output the raw GBNF rule of this primitive.
fn rules(&self) -> Vec<GbnfRule> {
fn rule(&self, token: &str) -> GbnfRule {
let rule_text = match self {
Self::Boolean => Self::BOOLEAN,
Self::Number => Self::NUMBER,
Self::String => Self::STRING,
};
GbnfRule::single(self.token(), rule_text.to_string())
GbnfRule::new(token.into(), rule_text.to_string(), GbnfRuleLevel::Leaf)
}
/// Output the token name of the GBNF rule (to refer to in other
/// rules).
fn token(&self) -> String {
String::from(match self {
fn base_type_token(&self) -> GbnfToken {
GbnfToken::from(match self {
Self::Boolean => "boolean",
Self::Number => "number",
Self::String => "string",
})
}
fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a {
Limited(self, limit.and_then(|l| LimitedGbnfPrimitive::new(l)))
}
}
/// Categorize all types of fields that the generated grammar can
/// handle.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum GbnfFieldType {
/// A single property on the type, e.g. myField: i32
Primitive(GbnfPrimitive),
@ -231,13 +476,16 @@ pub enum GbnfFieldType {
/// A list/vec of complex types.
ComplexList(GbnfComplex),
/// A single property field, but with limited values allowed,
/// constrained by the primitive type.
Limited(GbnfPrimitive),
}
impl GbnfFieldType {
pub fn as_primitive(self) -> GbnfPrimitive {
match self {
GbnfFieldType::Primitive(primitive) => primitive,
_ => panic!("not a GBNF primitive"),
}
}
pub fn as_complex(self) -> GbnfComplex {
match self {
GbnfFieldType::Complex(complex) => complex,
@ -247,89 +495,109 @@ impl GbnfFieldType {
}
/// Connect a property name and a field type to generate a GBNF rule.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct GbnfField {
pub field_name: String,
pub field_type: GbnfFieldType,
}
impl GbnfField {
fn list_rule(field_type: &(impl AsGrammar + ?Sized)) -> String {
r#""[]" | "[" {SPACE} {TYPE_NAME} ("," {SPACE} {TYPE_NAME})* "]""#
.replace("{LIST_NAME}", "")
.replace("{SPACE}", &GbnfToken::Space.token())
.replace("{TYPE_NAME}", &field_type.token())
}
fn list_rules<T: AsGrammar>(&self, f: &T) -> Vec<GbnfRule> {
// Create two rules: one for the list and on for its actual type.
let list_rule = GbnfRule::new(self.token(), Self::list_rule(f));
let mut rules = vec![list_rule];
rules.append(&mut f.rules());
rules
}
pub limited: bool,
}
impl AsGrammar for GbnfField {
fn token(&self) -> String {
fn base_type_token(&self) -> GbnfToken {
match &self.field_type {
GbnfFieldType::Primitive(f) => f.token(),
GbnfFieldType::OptionalPrimitive(f) => f.token(),
GbnfFieldType::PrimitiveList(f) => format!("{}List", f.token()),
GbnfFieldType::Complex(f) => f.token(),
GbnfFieldType::OptionalComplex(f) => f.token(),
GbnfFieldType::ComplexList(f) => format!("{}List", f.token()),
GbnfFieldType::Limited(f) => f.token(),
_ => "".to_string(),
GbnfFieldType::Primitive(f) => f.base_type_token(),
GbnfFieldType::Complex(f) => f.base_type_token(),
GbnfFieldType::OptionalPrimitive(f) => f.base_type_token(),
GbnfFieldType::OptionalComplex(f) => f.base_type_token(),
GbnfFieldType::PrimitiveList(f) => f.base_type_token(),
GbnfFieldType::ComplexList(f) => f.base_type_token(),
}
}
// TODO need to implement optional rules, which probably involves
// wrapping the primitive rule in parens, and then ORing to null.
fn rules(&self) -> Vec<GbnfRule> {
fn rule(&self, token: &str) -> GbnfRule {
match &self.field_type {
GbnfFieldType::Complex(f) => f.rules(),
GbnfFieldType::OptionalComplex(f) => f.rules(),
GbnfFieldType::ComplexList(f) => self.list_rules(f),
GbnfFieldType::Primitive(f) => f.rules(),
GbnfFieldType::OptionalPrimitive(f) => f.rules(),
GbnfFieldType::PrimitiveList(f) => self.list_rules(f),
GbnfFieldType::Limited(f) => f.rules(),
GbnfFieldType::Complex(f) => f.rule(token),
GbnfFieldType::Primitive(f) => f.rule(token),
GbnfFieldType::OptionalComplex(f) => f.rule(token).to_optional(),
GbnfFieldType::OptionalPrimitive(f) => f.rule(token).to_optional(),
GbnfFieldType::ComplexList(f) => f.rule(token).to_list(),
GbnfFieldType::PrimitiveList(f) => f.rule(token).to_list(),
}
}
fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a {
Limited(
self,
limit.map(|l| LimitedGbnfField {
field: self,
limit: l,
}),
)
}
}
/// The complex type is a direct mapping from a supported Rust struct,
/// and also used to generate the root of a GBNF grammar.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct GbnfComplex {
pub name: String,
pub fields: Vec<GbnfField>,
}
impl GbnfComplex {
pub fn to_grammar(&self) -> String {
let mut rules = vec![GbnfRule::new("root".to_string(), self.name.clone())];
pub fn to_grammar(&self, limit: Option<GbnfLimit>) -> String {
// The root type cannot itself be limited.
let mut root = GbnfRule::new(
GbnfToken::new("root".to_string()),
self.base_type_token().0,
GbnfRuleLevel::Root,
);
rules.append(&mut self.rules());
let root_type_rule = if let Some(_) = limit {
let limited_self = self.with_limit(limit.as_ref());
limited_self.rule(&root.text)
} else {
self.rule(&root.text)
};
for field in &self.fields {
rules.append(&mut field.rules());
}
root.dependents = vec![root_type_rule];
let rules = vec![root];
rules
// Final output: flatten all rules into one giant list of
// rules with no dependents, sort according to "rule level",
// and then deduplicate.
let mut grammar = rules
.into_iter()
.flat_map(|rule| rule.flatten())
.sorted_by(|rule1, rule2| Ord::cmp(&rule1.level, &rule2.level))
.unique()
.map(|rule| format!("{} ::= {}", rule.name, rule.text))
.join("\n")
}
}
.join("\n");
impl AsGrammar for GbnfComplex {
fn rules(&self) -> Vec<GbnfRule> {
// This will output the full set of rules for the complex type.
// Deduplication handled later.
grammar.push('\n');
grammar
}
fn rule_for_field(&self, field: &GbnfField, limit: Option<&GbnfLimit>) -> GbnfRule {
if let Some(GbnfLimit::Complex(nested_limit)) = limit {
nested_limit
.get(field.field_name.as_str())
.map(|l| {
// For complex type fields, we "namespace" the
// field type name when limiting, to prevent
// collisions.
let limited = field.with_limit(Some(l));
let token = format!("{}{}", self.name, limited.base_type_token());
limited.rule(&token)
})
.unwrap_or_else(|| field.rule(&field.base_type_token()))
} else {
field.rule(&field.base_type_token())
}
}
/// The GBNF rule for the complex type itself.
fn rule_for_self(&self, token: &str, limit: Option<&GbnfLimit>) -> GbnfRule {
let mut rule = String::new();
rule.push_str(r#""{" "#);
@ -338,14 +606,17 @@ impl AsGrammar for GbnfComplex {
.fields
.iter()
.map(|field| {
let field_rule = self.rule_for_field(field, limit);
let token = field_rule.name;
let mut text = String::new();
text.push_str(&GbnfToken::Space.token());
text.push_str(&GbnfRule::space().name.0);
text.push_str(" ");
text.push_str(&format!(
r#""\"{}\":" {} {}"#,
field.field_name,
GbnfToken::Space.token(),
field.token(),
GbnfRule::space().name.0,
token,
));
text
})
@ -354,12 +625,40 @@ impl AsGrammar for GbnfComplex {
rule.push_str(&field_rules_text);
rule.push_str(r#" "}""#);
let mut rules = GbnfRule::single(self.token(), rule);
rules.append(&mut GbnfToken::Space.rules());
rules
GbnfRule::new(token.into(), rule, GbnfRuleLevel::Middle)
}
fn token(&self) -> String {
self.name.clone()
/// The rules for the fields of the complex type.
fn rules_for_fields(&self, limit: Option<&GbnfLimit>) -> Vec<GbnfRule> {
let mut rules = vec![];
for field in &self.fields {
rules.push(self.rule_for_field(field, limit));
}
rules
}
}
impl AsGrammar for GbnfComplex {
fn rule(&self, token: &str) -> GbnfRule {
let mut main_rule = self.rule_for_self(token, None);
let field_rules = self.rules_for_fields(None);
main_rule.dependents = field_rules;
main_rule.dependents.push(GbnfRule::space());
main_rule
}
fn base_type_token(&self) -> GbnfToken {
GbnfToken::new(self.name.clone())
}
fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a {
Limited(
self,
limit.map(|l| LimitedGbnfComplex {
complex: self,
limit: l,
}),
)
}
}

149
gbnf/src/limited.rs Normal file
View File

@ -0,0 +1,149 @@
use crate::{
AsGrammar, GbnfComplex, GbnfField, GbnfFieldType, GbnfLimit, GbnfPrimitive, GbnfRule,
GbnfRuleLevel, GbnfToken,
};
use itertools::Itertools;
#[derive(Debug, Clone)]
pub struct LimitedGbnfComplex<'a> {
pub complex: &'a GbnfComplex,
pub limit: &'a GbnfLimit,
}
impl AsGrammar for LimitedGbnfComplex<'_> {
fn rule(&self, token: &str) -> GbnfRule {
let mut main_rule = self.complex.rule_for_self(token, Some(self.limit));
let field_rules = self.complex.rules_for_fields(Some(self.limit));
main_rule.dependents = field_rules;
main_rule.dependents.push(GbnfRule::space());
main_rule
}
fn base_type_token(&self) -> GbnfToken {
GbnfToken::new(format!("{}Limit", self.complex.base_type_token()))
}
fn with_limit<'a>(&'a self, _: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a {
self.clone()
}
}
#[derive(Debug, Clone)]
pub struct LimitedGbnfPrimitive<'a> {
primitive: GbnfPrimitive,
values: &'a [String],
}
impl LimitedGbnfPrimitive<'_> {
pub fn new(limit: &GbnfLimit) -> Option<LimitedGbnfPrimitive> {
if let GbnfLimit::Simple(primitive, values) = limit {
Some(LimitedGbnfPrimitive {
primitive: *primitive,
values,
})
} else {
None
}
}
}
impl AsGrammar for LimitedGbnfPrimitive<'_> {
fn rule(&self, token: &str) -> GbnfRule {
let values = self
.values
.into_iter()
.map(|v| {
if self.primitive == GbnfPrimitive::String {
format!(r#""\"{}\"""#, v)
} else {
format!(r#""{}""#, v)
}
})
.join(" | ");
GbnfRule::new(token.into(), values, GbnfRuleLevel::Leaf)
}
fn base_type_token(&self) -> GbnfToken {
GbnfToken::new(format!("{}Limit", self.primitive.base_type_token()))
}
fn with_limit<'a>(&'a self, _: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a {
self.clone()
}
}
#[derive(Debug, Clone)]
pub struct LimitedGbnfField<'a> {
pub field: &'a GbnfField,
pub limit: &'a GbnfLimit,
}
macro_rules! wrap_in_limit {
($field:expr, $limit:expr, $token:expr) => {
$field.with_limit($limit).rule($token)
};
}
impl AsGrammar for LimitedGbnfField<'_> {
fn rule(&self, token: &str) -> GbnfRule {
// This is always the limit for this specific field.
let limit = Some(self.limit);
match &self.field.field_type {
GbnfFieldType::Complex(f) => wrap_in_limit!(f, limit, token),
GbnfFieldType::Primitive(f) => wrap_in_limit!(f, limit, token),
GbnfFieldType::OptionalComplex(f) => wrap_in_limit!(f, limit, token).to_optional(),
GbnfFieldType::OptionalPrimitive(f) => wrap_in_limit!(f, limit, token).to_optional(),
GbnfFieldType::ComplexList(f) => wrap_in_limit!(f, limit, token).to_list(),
GbnfFieldType::PrimitiveList(f) => wrap_in_limit!(f, limit, token).to_list(),
}
}
fn base_type_token(&self) -> GbnfToken {
let token = match &self.field.field_type {
GbnfFieldType::Primitive(f) => f.base_type_token(),
GbnfFieldType::Complex(f) => f.base_type_token(),
GbnfFieldType::OptionalPrimitive(f) => f.base_type_token(),
GbnfFieldType::OptionalComplex(f) => f.base_type_token(),
GbnfFieldType::PrimitiveList(f) => f.base_type_token(),
GbnfFieldType::ComplexList(f) => f.base_type_token(),
};
GbnfToken::new(format!("{}{}Limit", self.field.field_name, token))
}
fn with_limit<'a>(&'a self, _: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a {
self.clone()
}
}
#[derive(Debug, Clone)]
pub struct Limited<'a, T, L>(pub &'a T, pub Option<L>)
where
T: AsGrammar + Clone,
L: AsGrammar + Clone;
impl<T, L> AsGrammar for Limited<'_, T, L>
where
T: AsGrammar + Clone,
L: AsGrammar + Clone,
{
fn rule(&self, token: &str) -> GbnfRule {
self.1
.as_ref()
.map(|l| l.rule(token))
.unwrap_or_else(|| self.0.rule(token))
}
fn base_type_token(&self) -> GbnfToken {
self.1
.as_ref()
.map(|l| l.base_type_token())
.unwrap_or_else(|| self.0.base_type_token())
}
fn with_limit<'a>(&'a self, _: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a {
self.clone()
}
}

0
gbnf/src/new.rs Normal file
View File

View File

@ -11,4 +11,6 @@ auto_impl = "1.1.2"
syn = { version = "2.0", features = [ "derive", "full", "parsing", "printing", "visit", "visit-mut", "clone-impls", "proc-macro" ] }
quote = "1.0.35"
itertools = "0.12.0"
gbnf = { path = "../gbnf" }
gbnf = { path = "../gbnf" }
proc-macro2 = "1.0.78"
darling = "0.20.5"

View File

@ -1,80 +1,181 @@
use darling::ast::Data;
use darling::{FromDeriveInput, FromField};
use proc_macro::{Span, TokenStream};
use quote::{quote, ToTokens};
use syn::parse::{Parse, ParseStream};
use syn::punctuated::Punctuated;
use syn::{braced, parse_macro_input};
use syn::{DeriveInput, Field, Ident, LitStr, Token};
use proc_macro2::TokenStream as TokenStream2;
use quote::{format_ident, quote, ToTokens};
use syn::parse_macro_input;
use syn::{DeriveInput, Ident, LitStr};
#[derive(Debug)]
struct GbnfStructDef {
name: Ident,
fields: Punctuated<Field, Token![,]>,
#[derive(Clone, Debug, FromField)]
#[darling(forward_attrs(gbnf_limit))]
struct GbnfFieldDef {
ident: Option<syn::Ident>,
ty: syn::Type,
vis: syn::Visibility,
attrs: Vec<syn::Attribute>,
}
impl Parse for GbnfStructDef {
fn parse(input: ParseStream) -> syn::Result<Self> {
// let _ = Discard tokens we don't care about.
let _: Option<Token![pub]> = input.parse()?;
let _: Option<Token![struct]> = input.parse()?;
impl ToTokens for GbnfFieldDef {
fn to_tokens(&self, tokens: &mut TokenStream2) {
let ident = &self.ident;
let ty = &self.ty; // TODO figure out how to get ident out
let vis = &self.vis;
let content;
let name: Ident = input.parse()?;
let _ = braced!(content in input);
let output = quote! {
#vis #ident: Box<dyn GbnfLimitedField<#ty>>
};
Ok(GbnfStructDef {
name,
fields: content.parse_terminated(Field::parse_named, Token![,])?,
})
output.to_tokens(tokens);
}
}
fn generate_gbnf(input: TokenStream, create_struct: bool) -> TokenStream {
// To define complex types, we take a struct into the macro, and
// then output a bunch of calls to gbnf_field (wrapped in gbnf
// complex).
#[derive(Debug, FromDeriveInput)]
#[darling(supports(struct_named))]
struct GbnfStructDef {
ident: syn::Ident,
data: darling::ast::Data<(), GbnfFieldDef>,
}
// We could also generate the entire complex type now during macro
// run, and then shove the resulting GBNF rule into the type as a
// static string.
impl ToTokens for GbnfStructDef {
fn to_tokens(&self, tokens: &mut TokenStream2) {
let ident = &self.ident;
if let Ok(expr_struct) = syn::parse::<GbnfStructDef>(input) {
let struct_name_str = LitStr::new(&expr_struct.name.to_string(), Span::call_site().into());
let struct_name = expr_struct.name;
let fields = expr_struct.fields.iter();
let gbnfs = expr_struct.fields.iter().map(|field| {
let field_type = &field.ty;
let field_ident = field
.ident
.as_ref()
.map(|i| i.to_string())
.map(|field_name| LitStr::new(&field_name, Span::call_site().into()))
.expect("no ident");
quote! { gbnf_field!(#field_ident, #field_type) }
});
let struct_frag = if create_struct {
quote! {
pub struct #struct_name {
#(#fields),*
}
}
} else {
quote! {}
let fields = match self.data {
Data::Struct(ref struct_def) => &struct_def.fields,
_ => panic!("Can only use GbnfLimit on structs with owned data"),
};
let code = quote! {
#struct_frag
let output = quote! {
pub struct #ident {
#(#fields),*
}
};
impl #struct_name {
tokens.extend(output);
}
}
/// Find fields in the struct with a #[gbnf_limit] attribute.
fn find_limited_fields(fields: &[GbnfFieldDef]) -> impl Iterator<Item = &GbnfFieldDef> + '_ {
fields.iter().filter(|field| {
field
.attrs
.iter()
.find(|attr| attr.path().get_ident().unwrap().to_string() == "gbnf_limit")
.is_some()
})
}
/// Find fields in the struct without a #[gbnf_limit] attribute.
fn find_non_limited_fields(fields: &[GbnfFieldDef]) -> impl Iterator<Item = &GbnfFieldDef> + '_ {
fields.iter().filter(|field| {
field
.attrs
.iter()
.find(|attr| attr.path().get_ident().unwrap().to_string() == "gbnf_limit")
.is_none()
})
}
fn generate_to_grammar_impl(original_struct_name: &Ident, fields: &[GbnfFieldDef]) -> TokenStream2 {
let limit_struct_name = format_ident!("{}GbnfLimit", original_struct_name);
// Convert limit struct field names to string literals
let limit_struct_fields: Vec<_> = find_limited_fields(fields)
.map(|field| quote! { #field })
.collect();
// Convert provided values of the limit struct into corresponding
// GbnfLimit instances. Bunch of tuples fed into HashMap::from.
let from_assignments = find_limited_fields(fields).map(|field| {
let key = LitStr::new(
&field.ident.as_ref().expect("no ident!").to_string(),
Span::call_site().into(),
)
.to_token_stream();
let ident = &field.ident;
let value = quote! { self.#ident.into() };
quote! { (#key, #value) }
});
if limit_struct_fields.len() > 0 {
quote! {
pub struct #limit_struct_name {
#(#limit_struct_fields),*
}
impl #limit_struct_name {
pub fn to_gbnf_limit(self) -> GbnfLimit {
GbnfLimit::Complex(
HashMap::from([
#(#from_assignments),*
])
)
}
}
impl GbnfLimitedField<#original_struct_name> for #limit_struct_name {
fn limit(self: Box<#limit_struct_name>) -> GbnfLimit {
self.to_gbnf_limit()
}
}
impl GbnfLimitedField<Option<#original_struct_name>> for #limit_struct_name {
fn limit(self: Box<#limit_struct_name>) -> GbnfLimit {
self.to_gbnf_limit()
}
}
impl From<#limit_struct_name> for Box<dyn GbnfLimitedField<#original_struct_name>> {
fn from(limit: #limit_struct_name) -> Self {
Box::new(limit)
}
}
impl From<#limit_struct_name> for Box<dyn GbnfLimitedField<Option<#original_struct_name>>> {
fn from(limit: #limit_struct_name) -> Self {
Box::new(limit)
}
}
impl #original_struct_name {
pub fn to_grammar_with_limit(limit: #limit_struct_name) -> String {
let gbnf_limit = limit.to_gbnf_limit();
Self::to_gbnf().as_complex().to_grammar(Some(gbnf_limit))
}
}
}
} else {
quote! {
impl #original_struct_name {
pub fn to_grammar() -> &'static str {
use std::sync::OnceLock;
static GRAMMAR: OnceLock<String> = OnceLock::new();
GRAMMAR.get_or_init(|| Self::to_gbnf().as_complex().to_grammar())
GRAMMAR.get_or_init(|| Self::to_gbnf().as_complex().to_grammar(None))
}
}
}
}
}
/// Generate the GBNF rules and the limit struct (if applicable).
fn generate_gbnf(input: &DeriveInput) -> TokenStream {
if let Ok(gbnf_struct) = GbnfStructDef::from_derive_input(input) {
let struct_name = gbnf_struct.ident;
let struct_name_str = LitStr::new(&struct_name.to_string(), Span::call_site().into());
let fields = match gbnf_struct.data {
Data::Struct(struct_def) => struct_def.fields,
_ => panic!("Can only use GbnfLimit on structs with owned data"),
};
// Gbnf rule generation stuff
let limited_gbnfs = map_limited_gbnfs(&fields);
let non_limited_gbnfs = map_non_limited_gbnfs(&fields);
let gbnfs = limited_gbnfs.chain(non_limited_gbnfs);
let as_gbnf_impl = quote! {
impl AsGbnf for #struct_name {
fn to_gbnf() -> gbnf::GbnfFieldType {
GbnfFieldType::Complex(
@ -87,21 +188,61 @@ fn generate_gbnf(input: TokenStream, create_struct: bool) -> TokenStream {
}
};
code.into()
let to_grammar_impl = generate_to_grammar_impl(&struct_name, &fields);
let final_output = quote! {
use gbnf::prelude::*;
#as_gbnf_impl
#to_grammar_impl
};
final_output.into()
} else {
panic!("Can only generate GBNF from structs of owned data (pub or private)");
panic!("Can only use GbnfLimit on structs with owned data");
}
}
/// Create a GBNF complex type as a Rust struct.
#[proc_macro]
pub fn gbnf_complex(input: TokenStream) -> TokenStream {
generate_gbnf(input, true)
/// Turn the fields without the limit attr into GBNF rule definitions.
fn map_non_limited_gbnfs(fields: &[GbnfFieldDef]) -> impl Iterator<Item = TokenStream2> + '_ {
find_non_limited_fields(fields).map(|field| {
let field_type = &field.ty;
let field_ident = field
.ident
.as_ref()
.map(|i| i.to_string())
.map(|field_name| LitStr::new(&field_name, Span::call_site().into()))
.expect("no ident");
quote! { gbnf_field!(#field_ident, #field_type) }
})
}
/// Add the ability to convert a Rust type into a GBNF grammar.
#[proc_macro_derive(Gbnf)]
/// Turn the fields with #[gbnf_limit] attr into GBNF rule definitions.
fn map_limited_gbnfs(fields: &[GbnfFieldDef]) -> impl Iterator<Item = TokenStream2> + '_ {
find_limited_fields(fields).map(|field| {
let field_type = &field.ty;
let field_ident = field
.ident
.as_ref()
.map(|i| i.to_string())
.map(|field_name| LitStr::new(&field_name, Span::call_site().into()))
.expect("no ident");
quote! {
GbnfField {
field_name: #field_ident.to_string(),
field_type: gbnf_field_type!(#field_type),
limited: true,
}
}
})
}
/// Convert a Rust type into a GBNF grammar.
#[proc_macro_derive(Gbnf, attributes(gbnf_limit))]
pub fn gbnf(input: TokenStream) -> TokenStream {
let input = parse_macro_input!(input as DeriveInput);
generate_gbnf(input.to_token_stream().into(), false)
generate_gbnf(&input)
}