ai-game/gbnf/src/lib.rs

665 lines
20 KiB
Rust

use std::{
collections::HashMap,
fmt::Display,
ops::{Deref, DerefMut},
};
use itertools::Itertools;
use limited::{Limited, LimitedGbnfComplex, LimitedGbnfField, LimitedGbnfPrimitive};
use serde::de::DeserializeOwned;
use convert_case::{Case, Casing};
mod limited;
pub mod prelude {
pub use crate::gbnf_field;
pub use crate::gbnf_field_type;
pub use crate::AsGbnf;
pub use crate::AsGbnfPrimitive;
pub use crate::AsGrammar;
pub use crate::GbnfComplex;
pub use crate::GbnfField;
pub use crate::GbnfFieldType;
pub use crate::GbnfLimit;
pub use crate::GbnfLimitedField;
pub use crate::GbnfPrimitive;
pub use crate::GbnfRule;
pub use crate::GbnfRuleLevel;
pub use crate::GbnfToken;
pub use std::collections::HashMap;
}
/// The GbnfLimitedField trait is capable of producing a GbnfLimitType
/// enum value, which itself contains either a list of allowable
/// values (for simple types), or a nested GbnfLimit object(for
/// complex types). This allows the creation of a complex nested Limit
/// struct which can hold the full structure of nested limits of
/// multiple types that derive Gbnf and have limits.
pub trait GbnfLimitedField<T> {
fn limit(self: Box<Self>) -> GbnfLimit;
}
impl<T: GbnfLimitedField<T> + 'static> From<Box<T>> for Box<dyn GbnfLimitedField<T>> {
fn from(value: Box<T>) -> Self {
value
}
}
impl<T: ToString + AsGbnfPrimitive + 'static> From<Vec<T>> for Box<dyn GbnfLimitedField<T>> {
fn from(value: Vec<T>) -> Self {
Box::new(value)
}
}
impl<T, const N: usize> From<[T; N]> for Box<dyn GbnfLimitedField<T>>
where
T: ToString + AsGbnfPrimitive + 'static,
{
fn from(value: [T; N]) -> Self {
Box::new(value)
}
}
impl<T: ToString + AsGbnfPrimitive + 'static> From<Box<Vec<T>>> for Box<dyn GbnfLimitedField<T>> {
fn from(value: Box<Vec<T>>) -> Self {
value
}
}
// Single pritive field types that can only have specific values.
impl<T: ToString + AsGbnfPrimitive> GbnfLimitedField<T> for Vec<T> {
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
T::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
impl<T: ToString + AsGbnfPrimitive> GbnfLimitedField<std::rc::Rc<T>> for Vec<T> {
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
T::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
// List types that can only have specific values.
impl<T: ToString + AsGbnfPrimitive> GbnfLimitedField<Vec<T>> for Vec<T> {
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
T::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
impl<T, const N: usize> GbnfLimitedField<T> for [T; N]
where
T: ToString + AsGbnfPrimitive,
{
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
T::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
impl<T, const N: usize> GbnfLimitedField<Vec<T>> for [T; N]
where
T: ToString + AsGbnfPrimitive,
{
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
T::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
impl<'a> GbnfLimitedField<String> for &'a [&'a str] {
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
String::to_gbnf_primitive(),
self.into_iter().map(|v| v.to_string()).collect(),
)
}
}
impl<'a> GbnfLimitedField<String> for std::rc::Rc<Vec<&'a str>> {
fn limit(self: Box<Self>) -> GbnfLimit {
GbnfLimit::Simple(
String::to_gbnf_primitive(),
self.iter().map(|v| v.to_string()).collect(),
)
}
}
/// A type of GBNF value limit, either simple (a list of values meant
/// for a single field), or complex (contains nested GbnfLimit).
#[derive(Debug)]
pub enum GbnfLimit {
/// The primitive type, and the allowed values.
Simple(GbnfPrimitive, Vec<String>),
/// Field name -> nested limit type.
Complex(HashMap<&'static str, GbnfLimit>),
}
impl<T> From<Box<dyn GbnfLimitedField<T>>> for GbnfLimit {
fn from(value: Box<dyn GbnfLimitedField<T>>) -> Self {
value.limit()
}
}
// Converts GBNF defintions (through the types below) into the grammar
// rules.
pub trait AsGrammar {
/// Create the rule itself, along with its dependent rules.
fn rule(&self, token: &str) -> GbnfRule;
/// The basic token for the type, or type contained within a
/// wrapper. A wrapper is something like GbnfField. This always
/// returns the token for the base type of the rule (including
/// option or list, for those types).
fn base_type_token(&self) -> GbnfToken;
/// Wraps this trait impl in a corresponding Limited GBNF rule
/// creator.
fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a;
}
/// Trait for regular types to implement to convert themselves to a
/// GBNF value.
pub trait AsGbnf {
fn to_gbnf() -> GbnfFieldType;
}
pub trait AsGbnfPrimitive {
fn to_gbnf_primitive() -> GbnfPrimitive;
}
macro_rules! define_field_type {
($type:ty, $gbnf_type:expr) => {
impl AsGbnf for $type {
fn to_gbnf() -> GbnfFieldType {
$gbnf_type
}
}
};
}
macro_rules! define_gbnf_primitive {
($type:ty, $gbnf_primitive:expr) => {
impl AsGbnfPrimitive for $type {
fn to_gbnf_primitive() -> GbnfPrimitive {
$gbnf_primitive
}
}
};
}
macro_rules! define_primitive_field_type {
($type:ty, $gbnf_primitive:expr) => {
define_gbnf_primitive!($type, $gbnf_primitive);
define_field_type!($type, GbnfFieldType::Primitive($gbnf_primitive));
};
}
#[macro_export]
macro_rules! gbnf_field_type {
($type:ty) => {
<$type as AsGbnf>::to_gbnf()
};
}
#[macro_export]
macro_rules! gbnf_field {
($field_name:literal, $field_type:ty) => {
GbnfField {
field_name: $field_name.to_string(),
field_type: gbnf_field_type!($field_type),
limited: false,
}
};
($field_name:literal, $field_type: ty, $field_limit:expr) => {
GbnfField {
field_name: $field_name.to_string(),
field_type: gbnf_field_type!($field_type),
limit: $field_limit,
}
};
}
// Implemented field type mappings for common rust types.
define_primitive_field_type!(i16, GbnfPrimitive::Number);
define_primitive_field_type!(u16, GbnfPrimitive::Number);
define_primitive_field_type!(i32, GbnfPrimitive::Number);
define_primitive_field_type!(u32, GbnfPrimitive::Number);
define_primitive_field_type!(i64, GbnfPrimitive::Number);
define_primitive_field_type!(u64, GbnfPrimitive::Number);
define_primitive_field_type!(f32, GbnfPrimitive::Number);
define_primitive_field_type!(f64, GbnfPrimitive::Number);
define_primitive_field_type!(usize, GbnfPrimitive::Number);
define_primitive_field_type!(bool, GbnfPrimitive::Boolean);
define_primitive_field_type!(String, GbnfPrimitive::String);
define_primitive_field_type!(char, GbnfPrimitive::String);
// Blanket implementations to cover more types
impl<T, const N: usize> AsGbnf for [T; N]
where
T: AsGbnf + DeserializeOwned,
{
fn to_gbnf() -> GbnfFieldType {
use GbnfFieldType::*;
match <T as AsGbnf>::to_gbnf() {
Primitive(primitive_type) => PrimitiveList(primitive_type),
OptionalPrimitive(primitive_type) => PrimitiveList(primitive_type),
Complex(complex_type) => ComplexList(complex_type),
OptionalComplex(complex_type) => ComplexList(complex_type),
ComplexList(_) | PrimitiveList(_) => panic!("nested lists not supported"),
}
}
}
impl<T> AsGbnf for Vec<T>
where
T: AsGbnf + DeserializeOwned,
{
fn to_gbnf() -> GbnfFieldType {
use GbnfFieldType::*;
match <T as AsGbnf>::to_gbnf() {
Primitive(primitive_type) => PrimitiveList(primitive_type),
OptionalPrimitive(primitive_type) => PrimitiveList(primitive_type),
Complex(complex_type) => ComplexList(complex_type),
OptionalComplex(complex_type) => ComplexList(complex_type),
ComplexList(_) | PrimitiveList(_) => panic!("nested lists not supported"),
}
}
}
impl<T> AsGbnf for Option<T>
where
T: AsGbnf + DeserializeOwned,
{
fn to_gbnf() -> GbnfFieldType {
use GbnfFieldType::*;
match <T as AsGbnf>::to_gbnf() {
Primitive(primitive_type) => OptionalPrimitive(primitive_type),
Complex(complex_type) => OptionalComplex(complex_type),
OptionalPrimitive(_) | OptionalComplex(_) => panic!("nested options are not allowed"),
_ => panic!("optional type cannot be a list"),
}
}
}
#[derive(Debug, Clone, Eq, Hash, PartialEq, Ord, PartialOrd)]
pub enum GbnfRuleLevel {
Root,
Middle,
Leaf,
}
// Actual GBNF rule itself. Holds rule text for dedup.
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
pub struct GbnfRule {
name: GbnfToken,
text: String,
dependents: Vec<GbnfRule>,
level: GbnfRuleLevel,
}
impl GbnfRule {
pub fn new(token: GbnfToken, rule_text: String, level: GbnfRuleLevel) -> GbnfRule {
GbnfRule {
name: token,
text: rule_text,
dependents: vec![],
level,
}
}
pub fn space() -> GbnfRule {
GbnfRule::new(
GbnfToken::new("ws".to_string()),
r#"[ \t\n]*"#.to_string(),
GbnfRuleLevel::Leaf,
)
}
pub fn to_optional(self) -> GbnfRule {
let option_token = self.name.option_token();
let option_rule_text = format!(r#"{} | "null""#, self.name);
let mut option_rule = GbnfRule::new(option_token, option_rule_text, GbnfRuleLevel::Middle);
option_rule.dependents = vec![self];
option_rule
}
/// Turn this rule into a list rule: main rule is a list type, and
/// dependent rule is the original type/rule itself.
pub fn to_list(self) -> GbnfRule {
let list_name = self.name.list_token();
let list_rule_text =
r#""[]" | "[" {SPACE} {TYPE_NAME} ("," {SPACE} {TYPE_NAME})* "]""#
.replace("{LIST_NAME}", &list_name.0)
.replace("{SPACE}", &GbnfRule::space().name.0)
.replace("{TYPE_NAME}", &self.name.0);
let mut list_rule = GbnfRule::new(list_name, list_rule_text, GbnfRuleLevel::Middle);
list_rule.dependents = vec![self, GbnfRule::space()];
list_rule
}
/// Consume this rule to produce a list of rules consisting of
/// this rule and its dependents. The rules in the list have no
/// dependents. Useful for final output of a rules list.
fn flatten(mut self) -> Vec<GbnfRule> {
let dependents = self
.dependents
.drain(0..)
.flat_map(|rule| rule.flatten())
.collect_vec();
// Self needs to be in front to get proper rule ordering.
[&[self][..], &dependents[..]].concat()
}
}
/// Tokens in the GBNF rule.
#[repr(transparent)]
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
pub struct GbnfToken(String);
impl From<&str> for GbnfToken {
fn from(value: &str) -> Self {
GbnfToken(value.to_string().to_case(Case::Camel))
}
}
impl Display for GbnfToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl Deref for GbnfToken {
type Target = String;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for GbnfToken {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl GbnfToken {
pub fn new<S: AsRef<str>>(value: S) -> GbnfToken {
GbnfToken::from(value.as_ref())
}
fn option_token(&self) -> GbnfToken {
GbnfToken::new(format!("{}Option", self.0))
}
fn list_token(&self) -> GbnfToken {
GbnfToken::new(format!("{}List", self.0))
}
}
/// Represents a primitive value in the GBNF, the simplest possible
/// value a type can hold.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum GbnfPrimitive {
String,
Boolean,
Number,
}
impl GbnfPrimitive {
pub(self) const STRING: &'static str = r#""\"" ([^"]*) "\"""#;
pub(self) const BOOLEAN: &'static str = r#""true" | "false""#;
pub(self) const NUMBER: &'static str = r#"[0-9]+ "."? [0-9]*"#;
}
impl AsGrammar for GbnfPrimitive {
/// Output the raw GBNF rule of this primitive.
fn rule(&self, token: &str) -> GbnfRule {
let rule_text = match self {
Self::Boolean => Self::BOOLEAN,
Self::Number => Self::NUMBER,
Self::String => Self::STRING,
};
GbnfRule::new(token.into(), rule_text.to_string(), GbnfRuleLevel::Leaf)
}
/// Output the token name of the GBNF rule (to refer to in other
/// rules).
fn base_type_token(&self) -> GbnfToken {
GbnfToken::from(match self {
Self::Boolean => "boolean",
Self::Number => "number",
Self::String => "string",
})
}
fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a {
Limited(self, limit.and_then(|l| LimitedGbnfPrimitive::new(l)))
}
}
/// Categorize all types of fields that the generated grammar can
/// handle.
#[derive(Debug, Clone)]
pub enum GbnfFieldType {
/// A single property on the type, e.g. myField: i32
Primitive(GbnfPrimitive),
/// Can be a value or null.
OptionalPrimitive(GbnfPrimitive),
/// A list/vec of primitive types.
PrimitiveList(GbnfPrimitive),
/// A complex type, with its own properties.
Complex(GbnfComplex),
/// Can be a value or null.
OptionalComplex(GbnfComplex),
/// A list/vec of complex types.
ComplexList(GbnfComplex),
}
impl GbnfFieldType {
pub fn as_primitive(self) -> GbnfPrimitive {
match self {
GbnfFieldType::Primitive(primitive) => primitive,
_ => panic!("not a GBNF primitive"),
}
}
pub fn as_complex(self) -> GbnfComplex {
match self {
GbnfFieldType::Complex(complex) => complex,
_ => panic!("Not a GBNF complex type"),
}
}
}
/// Connect a property name and a field type to generate a GBNF rule.
#[derive(Debug, Clone)]
pub struct GbnfField {
pub field_name: String,
pub field_type: GbnfFieldType,
pub limited: bool,
}
impl AsGrammar for GbnfField {
fn base_type_token(&self) -> GbnfToken {
match &self.field_type {
GbnfFieldType::Primitive(f) => f.base_type_token(),
GbnfFieldType::Complex(f) => f.base_type_token(),
GbnfFieldType::OptionalPrimitive(f) => f.base_type_token(),
GbnfFieldType::OptionalComplex(f) => f.base_type_token(),
GbnfFieldType::PrimitiveList(f) => f.base_type_token(),
GbnfFieldType::ComplexList(f) => f.base_type_token(),
}
}
fn rule(&self, token: &str) -> GbnfRule {
match &self.field_type {
GbnfFieldType::Complex(f) => f.rule(token),
GbnfFieldType::Primitive(f) => f.rule(token),
GbnfFieldType::OptionalComplex(f) => f.rule(token).to_optional(),
GbnfFieldType::OptionalPrimitive(f) => f.rule(token).to_optional(),
GbnfFieldType::ComplexList(f) => f.rule(token).to_list(),
GbnfFieldType::PrimitiveList(f) => f.rule(token).to_list(),
}
}
fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a {
Limited(
self,
limit.map(|l| LimitedGbnfField {
field: self,
limit: l,
}),
)
}
}
/// The complex type is a direct mapping from a supported Rust struct,
/// and also used to generate the root of a GBNF grammar.
#[derive(Debug, Clone)]
pub struct GbnfComplex {
pub name: String,
pub fields: Vec<GbnfField>,
}
impl GbnfComplex {
pub fn to_grammar(&self, limit: Option<GbnfLimit>) -> String {
// The root type cannot itself be limited.
let mut root = GbnfRule::new(
GbnfToken::new("root".to_string()),
self.base_type_token().0,
GbnfRuleLevel::Root,
);
let root_type_rule = if let Some(_) = limit {
let limited_self = self.with_limit(limit.as_ref());
limited_self.rule(&root.text)
} else {
self.rule(&root.text)
};
root.dependents = vec![root_type_rule];
let rules = vec![root];
// Final output: flatten all rules into one giant list of
// rules with no dependents, sort according to "rule level",
// and then deduplicate.
let mut grammar = rules
.into_iter()
.flat_map(|rule| rule.flatten())
.sorted_by(|rule1, rule2| Ord::cmp(&rule1.level, &rule2.level))
.unique()
.map(|rule| format!("{} ::= {}", rule.name, rule.text))
.join("\n");
grammar.push('\n');
grammar
}
fn rule_for_field(&self, field: &GbnfField, limit: Option<&GbnfLimit>) -> GbnfRule {
if let Some(GbnfLimit::Complex(nested_limit)) = limit {
nested_limit
.get(field.field_name.as_str())
.map(|l| {
// For complex type fields, we "namespace" the
// field type name when limiting, to prevent
// collisions.
let limited = field.with_limit(Some(l));
let token = format!("{}{}", self.name, limited.base_type_token());
limited.rule(&token)
})
.unwrap_or_else(|| field.rule(&field.base_type_token()))
} else {
field.rule(&field.base_type_token())
}
}
/// The GBNF rule for the complex type itself.
fn rule_for_self(&self, token: &str, limit: Option<&GbnfLimit>) -> GbnfRule {
let mut rule = String::new();
rule.push_str(r#""{" "#);
let field_rules_text = self
.fields
.iter()
.map(|field| {
let field_rule = self.rule_for_field(field, limit);
let token = field_rule.name;
let mut text = String::new();
text.push_str(&GbnfRule::space().name.0);
text.push_str(" ");
text.push_str(&format!(
r#""\"{}\":" {} {}"#,
field.field_name,
GbnfRule::space().name.0,
token,
));
text
})
.join(r#" "," "#);
rule.push_str(&field_rules_text);
rule.push_str(r#" "}""#);
GbnfRule::new(token.into(), rule, GbnfRuleLevel::Middle)
}
/// The rules for the fields of the complex type.
fn rules_for_fields(&self, limit: Option<&GbnfLimit>) -> Vec<GbnfRule> {
let mut rules = vec![];
for field in &self.fields {
rules.push(self.rule_for_field(field, limit));
}
rules
}
}
impl AsGrammar for GbnfComplex {
fn rule(&self, token: &str) -> GbnfRule {
let mut main_rule = self.rule_for_self(token, None);
let field_rules = self.rules_for_fields(None);
main_rule.dependents = field_rules;
main_rule.dependents.push(GbnfRule::space());
main_rule
}
fn base_type_token(&self) -> GbnfToken {
GbnfToken::new(self.name.clone())
}
fn with_limit<'a>(&'a self, limit: Option<&'a GbnfLimit>) -> impl AsGrammar + 'a {
Limited(
self,
limit.map(|l| LimitedGbnfComplex {
complex: self,
limit: l,
}),
)
}
}