Files
Kniepunkt/kniepunkt/episodes.py
T
ankn e08c484838 Add KNIEPUNKT Assistant with multi-LLM editorial workflow
Six-step weekly workflow (research → sources → storyline → draft →
quality → publication) supporting Claude, ChatGPT, Gemini, and Mistral
in parallel for creative steps. Web search via Anthropic tool for news
research. Episode index built from 34 existing KNIEPUNKT episodes for
redundancy checks. Sessions persisted as JSON for mid-workflow resume.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-24 23:54:23 +02:00

134 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Load and index existing KNIEPUNKT episodes for redundancy checks."""
import json
from pathlib import Path
EPISODES_DIR = Path(__file__).parent.parent / "KNIEPUNKTe"
CACHE_FILE = Path(__file__).parent.parent / "episodes_cache.json"
_SYSTEM = (
"Du bist ein erfahrener Redaktionsassistent für die LinkedIn-Kolumne KNIEPUNKT "
"von Dr. André Knie. Analysiere Episodentexte präzise und strukturiert."
)
def _read_pdf(path: Path) -> str:
from pypdf import PdfReader
reader = PdfReader(str(path))
return "\n".join(page.extract_text() or "" for page in reader.pages)
def _read_docx(path: Path) -> str:
from docx import Document
doc = Document(str(path))
return "\n".join(p.text for p in doc.paragraphs)
def _read_odt(path: Path) -> str:
from odf.opendocument import load
from odf.text import P
doc = load(str(path))
paragraphs = doc.getElementsByType(P)
return "\n".join(
"".join(node.data for node in p.childNodes if hasattr(node, "data"))
for p in paragraphs
)
def _read_episode(path: Path) -> str:
suffix = path.suffix.lower()
if suffix == ".pdf":
return _read_pdf(path)
if suffix == ".docx":
return _read_docx(path)
if suffix == ".odt":
return _read_odt(path)
return ""
def _episode_files() -> list[Path]:
"""Return one canonical file per episode number, preferring DOCX/ODT over PDF."""
if not EPISODES_DIR.exists():
return []
by_number: dict[str, Path] = {}
for f in sorted(EPISODES_DIR.iterdir()):
if f.suffix.lower() not in (".pdf", ".docx", ".odt"):
continue
num = f.name.split("_")[0].split(" ")[0].strip()
try:
int(num)
except ValueError:
continue
existing = by_number.get(num)
if existing is None or f.suffix.lower() in (".docx", ".odt"):
by_number[num] = f
return [by_number[k] for k in sorted(by_number)]
def build_index(client, force: bool = False) -> list[dict]:
"""Build or load the episode summary index."""
if not force and CACHE_FILE.exists():
with open(CACHE_FILE) as f:
return json.load(f)
from kniepunkt.llm import chat
index = []
files = _episode_files()
for path in files:
text = _read_episode(path)
if not text.strip():
continue
prompt = f"""Analysiere diese KNIEPUNKT-Episode und antworte NUR mit einem JSON-Objekt:
{{
"nummer": "z.B. 001",
"titel": "Titel der Episode",
"hauptthema": "Hauptthema in 1-2 Sätzen",
"allegories": ["verwendete Allegorie oder Metapher", ...],
"kulturelle_referenzen": ["Mythos/Literatur-Referenz", ...],
"kernargument": "Hauptaussage des Autors in 1-2 Sätzen"
}}
Episodentext (gekürzt):
{text[:5000]}"""
response = chat(client, [{"role": "user", "content": prompt}], _SYSTEM, max_tokens=512)
try:
start = response.find("{")
end = response.rfind("}") + 1
data = json.loads(response[start:end])
data["file"] = str(path)
index.append(data)
except (json.JSONDecodeError, ValueError):
index.append({
"nummer": path.name.split("_")[0].strip(),
"titel": path.stem,
"hauptthema": "",
"allegories": [],
"kulturelle_referenzen": [],
"kernargument": "",
"file": str(path),
})
with open(CACHE_FILE, "w") as f:
json.dump(index, f, ensure_ascii=False, indent=2)
return index
def format_for_context(index: list[dict]) -> str:
"""Format the episode index as a compact context string for prompts."""
lines = []
for ep in index:
allegs = ", ".join(ep.get("allegories", [])) or ""
refs = ", ".join(ep.get("kulturelle_referenzen", [])) or ""
lines.append(
f"Ep.{ep.get('nummer', '?')} »{ep.get('titel', '?')}«: "
f"{ep.get('hauptthema', '')} | Allegorien: {allegs} | Referenzen: {refs}"
)
return "\n".join(lines)