import uuid
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, Generic, List, Optional, TypedDict, TypeVar
from pydantic import BaseModel, Field
from .metadata_utils import extract_unique_context_ids
T = TypeVar("T")
[docs]
class GradeSchema(str, Enum):
PERFECT = "perfect"
GOOD = "good"
NEEDS_IMPROVEMENT = "needs_improvement"
BAD = "bad"
NOT_ACCEPTABLE = "not_acceptable"
[docs]
class EvaluationEntrySchema(BaseModel):
feedback: str
score: float
[docs]
class EvaluationSchema(BaseModel):
coherence: EvaluationEntrySchema
factuality: EvaluationEntrySchema
grounding: EvaluationEntrySchema
helpfulness: EvaluationEntrySchema
relevance: EvaluationEntrySchema
overall_grade: GradeSchema
[docs]
class Role(str, Enum):
USER = "user"
ASSISTANT = "assistant"
[docs]
class ConversationEntry(BaseModel):
role: Role
content: str
reasoning_content: str | None = None
[docs]
class Conversation(BaseModel):
conversations: List[ConversationEntry]
metadata: dict[str, Any] = Field(default_factory=dict)
[docs]
class ConversationWithContext(Conversation):
instruction_context: str | None = None
response_context: str | None = None
persona: str | None = None
[docs]
class EvaluatedConversationWithContext(ConversationWithContext):
evaluation: EvaluationSchema | None = None
final_score: Optional[float] = 0.0
[docs]
class PersonaEntry(BaseModel):
"""Represents a set of generated personas for a source document."""
descriptions: list[str] = Field(..., description="List of persona descriptions")
metadata: dict = Field(default_factory=dict, description="Any associated metadata")
[docs]
class StructuredGenerationRow(BaseModel, Generic[T]):
"""Represents a single row of structured generation, including metadata."""
instruction: str = Field(
..., description="The user instructions/prompt used to generate the output."
)
context: Optional[str] = Field(
None, description="The context provided to the model."
)
persona: Optional[str] = Field(
None, description="The persona adopted by the respondent."
)
output: T = Field(..., description="The structured output generated by the model.")
metadata: Dict[str, Any] = Field(
default_factory=dict, description="Metadata about the generation."
)
[docs]
class GeneratedResponsePrompt(BaseModel):
"""Output of RespondentPromptModifier."""
prompt: str = Field(..., description="Modified respondent prompt")
context: Optional[str] = Field(
None, description="Context used in respondent prompt"
)
metadata: dict[str, Any] = Field(
default_factory=dict,
description="Additional metadata about respondent promp generation",
)
[docs]
class Document(BaseModel):
id: str = Field(
default_factory=lambda: str(uuid.uuid4()),
description="Id of the document. Autogenerated if not specified",
)
text: Optional[str] = None
personas: list[PersonaEntry] = Field(
default_factory=list, description="List of persona generations"
)
metadata: dict[str, Any] = Field(
default_factory=dict, description="Any associated metadata"
)
[docs]
@dataclass
class GenerationState:
"""Current state of the generation process."""
num_generated: int = 0
num_requested: int = 0
start_time: datetime = field(default_factory=datetime.now)
last_item: Optional[BaseModel] = None
monitor: Optional[Any] = None # GenerationMonitor
metadata: Dict[str, Any] = field(default_factory=dict)
stop_event: Optional[Any] = None # asyncio.Event
# Context coverage tracking
context_counts: Dict[str, int] = field(default_factory=dict)
unique_personas: list[str] = field(default_factory=list)
[docs]
def update(self, item: Any):
"""Update state with a new generated item."""
self.num_generated += 1
self.last_item = item if isinstance(item, BaseModel) else None
# Extract metadata if available
meta = None
if hasattr(item, "metadata"):
meta = item.metadata
elif isinstance(item, dict):
meta = item.get("metadata")
if meta is not None and isinstance(meta, dict):
for ctx_id in extract_unique_context_ids(meta):
self.context_counts[ctx_id] = self.context_counts.get(ctx_id, 0) + 1
persona = getattr(item, "persona", None) or (
meta.get("persona_name") if isinstance(meta, dict) else None
)
if persona and persona not in self.unique_personas:
self.unique_personas.append(persona)