slide-factory/apps/api/src/server.ts

import { access, mkdir, writeFile } from "node:fs/promises";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { inflateSync } from "node:zlib";
import cors from "cors";
import express from "express";
import { XMLParser } from "fast-xml-parser";
import JSZip from "jszip";
import multer from "multer";
import { nanoid } from "nanoid";
import pdfParse from "pdf-parse";
import { z } from "zod";
import { loadStylePack, planDeckFromSource, SourceDocument } from "@slide-factory/core";
import { renderPptx } from "@slide-factory/render-pptx";

const CreateDeckRequestSchema = z.object({
  style: z.string().default("incorta"),
  instructions: z.string().optional(),
  audience: z.string().default("executives"),
  input: z.object({
    type: z.enum(["text", "markdown"]).default("markdown"),
    title: z.string().optional(),
    content: z.string().min(1)
  })
});

const app = express();
const port = Number(process.env.SLIDE_FACTORY_PORT || 3025);
const outputDir = path.resolve(process.env.SLIDE_FACTORY_OUTPUT_DIR || "outputs");
const upload = multer({
  storage: multer.memoryStorage(),
  limits: {
    fileSize: 30 * 1024 * 1024,
    files: 1
  }
});

app.use(cors());
app.use(express.json({ limit: "5mb" }));
app.use("/outputs", express.static(outputDir));

app.get("/api/health", (_req, res) => {
  res.json({ ok: true, service: "slide-factory", version: "0.1.0" });
});

app.post("/api/decks", async (req, res, next) => {
  try {
    const body = CreateDeckRequestSchema.parse(req.body);
    const result = await createDeckArtifacts({
      source: body.input,
      instructions: body.instructions,
      audience: body.audience,
      styleId: body.style
    });

    res.status(201).json(result);
  } catch (error) {
    next(error);
  }
});

app.post("/api/decks/from-source", upload.single("source"), async (req, res, next) => {
  try {
    const file = req.file;
    const pastedText = typeof req.body.content === "string" ? req.body.content : "";
    const source = file ? await sourceFromUpload(file) : sourceFromText(pastedText);
    const result = await createDeckArtifacts({
      source,
      instructions: stringField(req.body.instructions),
      audience: stringField(req.body.audience) || "executives",
      styleId: stringField(req.body.style) || "incorta"
    });

    res.status(201).json({
      ...result,
      source: {
        title: source.title,
        type: source.type,
        sourceName: source.sourceName
      }
    });
  } catch (error) {
    next(error);
  }
});

app.use((error: unknown, _req: express.Request, res: express.Response, _next: express.NextFunction) => {
  console.error(error);
  res.status(400).json({
    error: error instanceof Error ? error.message : "Unknown error"
  });
});

app.listen(port, () => {
  console.log(`Slide Factory API listening on http://127.0.0.1:${port}`);
});

async function resolveStylePath(styleId: string): Promise<string> {
  const direct = path.resolve("styles", styleId);
  if (await exists(path.join(direct, "theme.json"))) return direct;

  const moduleDir = path.dirname(fileURLToPath(import.meta.url));
  const repoRoot = path.resolve(moduleDir, "../../..");
  const fromRepoRoot = path.join(repoRoot, "styles", styleId);
  if (await exists(path.join(fromRepoRoot, "theme.json"))) return fromRepoRoot;

  return direct;
}

async function createDeckArtifacts(options: {
  source: SourceDocument;
  instructions?: string;
  audience: string;
  styleId: string;
}) {
  const id = nanoid(10);
  const stylePath = await resolveStylePath(options.styleId);
  const style = await loadStylePack(stylePath);
  const deck = planDeckFromSource({
    source: options.source,
    style,
    instructions: options.instructions,
    audience: options.audience
  });

  const jobDir = path.join(outputDir, id);
  const specPath = path.join(jobDir, "deck.json");
  const pptxPath = path.join(jobDir, "deck.pptx");
  await mkdir(jobDir, { recursive: true });
  await writeFile(specPath, `${JSON.stringify(deck, null, 2)}\n`);
  await renderPptx({ deck, style, outputPath: pptxPath });

  return {
    id,
    title: deck.title,
    slides: deck.slides.length,
    specUrl: `/outputs/${id}/deck.json`,
    pptxUrl: `/outputs/${id}/deck.pptx`
  };
}

function sourceFromText(content: string): SourceDocument {
  const trimmed = content.trim();
  if (!trimmed) {
    throw new Error("Provide source text or upload a file.");
  }
  return {
    type: "markdown",
    title: deriveTitle(trimmed),
    content: trimmed,
    sourceName: "pasted-source.md"
  };
}

async function sourceFromUpload(file: Express.Multer.File): Promise<SourceDocument> {
  const extension = path.extname(file.originalname).toLowerCase();
  const mime = file.mimetype;

  if (extension === ".md" || extension === ".markdown") {
    const content = file.buffer.toString("utf8");
    return {
      type: "markdown",
      title: deriveTitle(content) || baseName(file.originalname),
      content,
      sourceName: file.originalname
    };
  }

  if (extension === ".txt" || mime.startsWith("text/")) {
    const content = file.buffer.toString("utf8");
    return {
      type: "text",
      title: deriveTitle(content) || baseName(file.originalname),
      content,
      sourceName: file.originalname
    };
  }

  if (extension === ".pdf" || mime === "application/pdf") {
    const content = await extractPdfText(file.buffer);
    if (!content) throw new Error("No text could be extracted from the PDF.");
    return {
      type: "pdf",
      title: baseName(file.originalname),
      content,
      sourceName: file.originalname
    };
  }

  if (extension === ".pptx" || mime.includes("presentation")) {
    const content = await extractPptxText(file.buffer);
    if (!content.trim()) throw new Error("No text could be extracted from the slide deck.");
    return {
      type: "pptx",
      title: baseName(file.originalname),
      content,
      sourceName: file.originalname
    };
  }

  if (mime.startsWith("image/")) {
    return {
      type: "text",
      title: baseName(file.originalname),
      sourceName: file.originalname,
      content: [
        `# ${baseName(file.originalname)}`,
        "",
        "Image source uploaded.",
        "",
        `File name: ${file.originalname}`,
        `Image type: ${mime}`,
        `File size: ${Math.round(file.size / 1024)} KB`,
        "",
        "Vision/OCR is not enabled in this local MVP yet. Use the instructions field to describe the slide goal, audience, and what the image should support."
      ].join("\n")
    };
  }

  throw new Error(`Unsupported source type: ${file.originalname || mime}`);
}

async function extractPdfText(buffer: Buffer): Promise<string> {
  try {
    const parsed = await pdfParse(buffer);
    const content = parsed.text.trim();
    if (content) return content;
  } catch {
    // Fall through to a lightweight stream extractor for simple PDFs.
  }

  return extractPdfTextFromStreams(buffer).trim();
}

function extractPdfTextFromStreams(buffer: Buffer): string {
  const pdf = buffer.toString("latin1");
  const chunks: string[] = [];
  const streamPattern = /<<(.*?)>>\s*stream\r?\n([\s\S]*?)\r?\nendstream/g;
  let match: RegExpExecArray | null;
  while ((match = streamPattern.exec(pdf))) {
    const dictionary = match[1] || "";
    const raw = Buffer.from(match[2] || "", "latin1");
    let stream = raw;
    if (dictionary.includes("/FlateDecode")) {
      try {
        stream = inflateSync(raw);
      } catch {
        continue;
      }
    }
    const text = extractPdfStrings(stream.toString("latin1")).join(" ");
    if (text) chunks.push(text);
  }
  return chunks.join("\n\n").replace(/\s+/g, " ").trim();
}

function extractPdfStrings(stream: string): string[] {
  const values: string[] = [];
  const literalPattern = /\((?:\\.|[^\\)])*\)/g;
  for (const match of stream.matchAll(literalPattern)) {
    const value = decodePdfLiteral(match[0].slice(1, -1));
    if (value.trim()) values.push(value.trim());
  }
  const hexPattern = /<([0-9A-Fa-f\s]{4,})>/g;
  for (const match of stream.matchAll(hexPattern)) {
    const value = decodePdfHex(match[1] || "");
    if (value.trim()) values.push(value.trim());
  }
  return values;
}

function decodePdfLiteral(value: string): string {
  return value
    .replace(/\\n/g, "\n")
    .replace(/\\r/g, "\r")
    .replace(/\\t/g, "\t")
    .replace(/\\b/g, "\b")
    .replace(/\\f/g, "\f")
    .replace(/\\([()\\])/g, "$1")
    .replace(/\\([0-7]{1,3})/g, (_match, octal: string) => String.fromCharCode(Number.parseInt(octal, 8)));
}

function decodePdfHex(value: string): string {
  const normalized = value.replace(/\s+/g, "");
  const bytes = (normalized.match(/.{1,2}/g) || []).map((byte) => Number.parseInt(byte.padEnd(2, "0"), 16));
  if (bytes[0] === 0xfe && bytes[1] === 0xff) {
    const chars: string[] = [];
    for (let index = 2; index + 1 < bytes.length; index += 2) {
      chars.push(String.fromCharCode((bytes[index] << 8) + bytes[index + 1]));
    }
    return chars.join("");
  }
  return Buffer.from(bytes).toString("latin1");
}

async function extractPptxText(buffer: Buffer): Promise<string> {
  const zip = await JSZip.loadAsync(buffer);
  const parser = new XMLParser({
    ignoreAttributes: true,
    textNodeName: "#text"
  });
  const slideFiles = Object.keys(zip.files)
    .filter((name) => /^ppt\/slides\/slide\d+\.xml$/.test(name))
    .sort((a, b) => slideNumber(a) - slideNumber(b));

  const slides: string[] = [];
  for (const fileName of slideFiles) {
    const xml = await zip.file(fileName)?.async("string");
    if (!xml) continue;
    const parsed = parser.parse(xml);
    const text = collectText(parsed)
      .map((value) => value.trim())
      .filter(Boolean)
      .join(" ");
    if (text) {
      slides.push(`## Slide ${slideNumber(fileName)}\n\n${text}`);
    }
  }

  return slides.join("\n\n");
}

function collectText(value: unknown): string[] {
  if (typeof value === "string") return [value];
  if (Array.isArray(value)) return value.flatMap(collectText);
  if (!value || typeof value !== "object") return [];
  const record = value as Record<string, unknown>;
  return Object.entries(record).flatMap(([key, child]) => {
    if (key === "a:t" || key === "#text") return collectText(child);
    return collectText(child);
  });
}

function slideNumber(fileName: string): number {
  return Number(/slide(\d+)\.xml$/.exec(fileName)?.[1] || "0");
}

function deriveTitle(content: string): string | undefined {
  return content
    .split(/\r?\n/)
    .map((line) => line.trim())
    .find((line) => line.startsWith("# "))
    ?.replace(/^#\s+/, "")
    .trim();
}

function baseName(fileName: string): string {
  return path.basename(fileName, path.extname(fileName)).replace(/[-_]+/g, " ");
}

function stringField(value: unknown): string | undefined {
  return typeof value === "string" && value.trim() ? value.trim() : undefined;
}

async function exists(targetPath: string): Promise<boolean> {
  try {
    await access(targetPath);
    return true;
  } catch {
    return false;
  }
}