Source code for churro_ocr.templates.base
"""Provider-neutral template protocols for OCR backends."""
from __future__ import annotations
from collections.abc import Callable
from typing import Any, Protocol, runtime_checkable
from churro_ocr.page_detection import DocumentPage
OCRConversation = list[dict[str, Any]]
[docs]
@runtime_checkable
class OCRPromptTemplate(Protocol):
"""Protocol for OCR templates that build model conversations."""
[docs]
def build_conversation(self, page: DocumentPage) -> OCRConversation:
"""Build a model conversation for one page.
:param page: Page to convert into a model-specific prompt payload.
:returns: Structured conversation ready for backend-specific rendering.
"""
...
OCRPromptTemplateCallable = Callable[[DocumentPage], OCRConversation]
OCRPromptTemplateLike = OCRPromptTemplate | OCRPromptTemplateCallable
[docs]
def build_ocr_conversation(template: OCRPromptTemplateLike, page: DocumentPage) -> OCRConversation:
"""Build an OCR conversation from a template or template callable.
:param template: Prompt template object or callable.
:param page: Page to convert into a conversation.
:returns: Structured OCR conversation for ``page``.
"""
if callable(template) and not isinstance(template, OCRPromptTemplate):
return template(page)
return template.build_conversation(page)
# Internal aliases kept to make the refactor incremental while the
# provider implementations migrate off the old HF-prefixed names.
HFConversation = OCRConversation
HFOCRTemplate = OCRPromptTemplate
HFOCRTemplateCallable = OCRPromptTemplateCallable
HFOCRTemplateLike = OCRPromptTemplateLike
__all__ = [
"build_ocr_conversation",
"OCRConversation",
"OCRPromptTemplate",
"OCRPromptTemplateCallable",
"OCRPromptTemplateLike",
]