362 lines
11 KiB
TypeScript
362 lines
11 KiB
TypeScript
import { access, mkdir, writeFile } from "node:fs/promises";
|
|
import path from "node:path";
|
|
import { fileURLToPath } from "node:url";
|
|
import { inflateSync } from "node:zlib";
|
|
import cors from "cors";
|
|
import express from "express";
|
|
import { XMLParser } from "fast-xml-parser";
|
|
import JSZip from "jszip";
|
|
import multer from "multer";
|
|
import { nanoid } from "nanoid";
|
|
import pdfParse from "pdf-parse";
|
|
import { z } from "zod";
|
|
import { loadStylePack, planDeckFromSource, SourceDocument } from "@slide-factory/core";
|
|
import { renderPptx } from "@slide-factory/render-pptx";
|
|
|
|
const CreateDeckRequestSchema = z.object({
|
|
style: z.string().default("incorta"),
|
|
instructions: z.string().optional(),
|
|
audience: z.string().default("executives"),
|
|
input: z.object({
|
|
type: z.enum(["text", "markdown"]).default("markdown"),
|
|
title: z.string().optional(),
|
|
content: z.string().min(1)
|
|
})
|
|
});
|
|
|
|
const app = express();
|
|
const port = Number(process.env.SLIDE_FACTORY_PORT || 3025);
|
|
const outputDir = path.resolve(process.env.SLIDE_FACTORY_OUTPUT_DIR || "outputs");
|
|
const upload = multer({
|
|
storage: multer.memoryStorage(),
|
|
limits: {
|
|
fileSize: 30 * 1024 * 1024,
|
|
files: 1
|
|
}
|
|
});
|
|
|
|
app.use(cors());
|
|
app.use(express.json({ limit: "5mb" }));
|
|
app.use("/outputs", express.static(outputDir));
|
|
|
|
app.get("/api/health", (_req, res) => {
|
|
res.json({ ok: true, service: "slide-factory", version: "0.1.0" });
|
|
});
|
|
|
|
app.post("/api/decks", async (req, res, next) => {
|
|
try {
|
|
const body = CreateDeckRequestSchema.parse(req.body);
|
|
const result = await createDeckArtifacts({
|
|
source: body.input,
|
|
instructions: body.instructions,
|
|
audience: body.audience,
|
|
styleId: body.style
|
|
});
|
|
|
|
res.status(201).json(result);
|
|
} catch (error) {
|
|
next(error);
|
|
}
|
|
});
|
|
|
|
app.post("/api/decks/from-source", upload.single("source"), async (req, res, next) => {
|
|
try {
|
|
const file = req.file;
|
|
const pastedText = typeof req.body.content === "string" ? req.body.content : "";
|
|
const source = file ? await sourceFromUpload(file) : sourceFromText(pastedText);
|
|
const result = await createDeckArtifacts({
|
|
source,
|
|
instructions: stringField(req.body.instructions),
|
|
audience: stringField(req.body.audience) || "executives",
|
|
styleId: stringField(req.body.style) || "incorta"
|
|
});
|
|
|
|
res.status(201).json({
|
|
...result,
|
|
source: {
|
|
title: source.title,
|
|
type: source.type,
|
|
sourceName: source.sourceName
|
|
}
|
|
});
|
|
} catch (error) {
|
|
next(error);
|
|
}
|
|
});
|
|
|
|
app.use((error: unknown, _req: express.Request, res: express.Response, _next: express.NextFunction) => {
|
|
console.error(error);
|
|
res.status(400).json({
|
|
error: error instanceof Error ? error.message : "Unknown error"
|
|
});
|
|
});
|
|
|
|
app.listen(port, () => {
|
|
console.log(`Slide Factory API listening on http://127.0.0.1:${port}`);
|
|
});
|
|
|
|
async function resolveStylePath(styleId: string): Promise<string> {
|
|
const direct = path.resolve("styles", styleId);
|
|
if (await exists(path.join(direct, "theme.json"))) return direct;
|
|
|
|
const moduleDir = path.dirname(fileURLToPath(import.meta.url));
|
|
const repoRoot = path.resolve(moduleDir, "../../..");
|
|
const fromRepoRoot = path.join(repoRoot, "styles", styleId);
|
|
if (await exists(path.join(fromRepoRoot, "theme.json"))) return fromRepoRoot;
|
|
|
|
return direct;
|
|
}
|
|
|
|
async function createDeckArtifacts(options: {
|
|
source: SourceDocument;
|
|
instructions?: string;
|
|
audience: string;
|
|
styleId: string;
|
|
}) {
|
|
const id = nanoid(10);
|
|
const stylePath = await resolveStylePath(options.styleId);
|
|
const style = await loadStylePack(stylePath);
|
|
const deck = planDeckFromSource({
|
|
source: options.source,
|
|
style,
|
|
instructions: options.instructions,
|
|
audience: options.audience
|
|
});
|
|
|
|
const jobDir = path.join(outputDir, id);
|
|
const specPath = path.join(jobDir, "deck.json");
|
|
const pptxPath = path.join(jobDir, "deck.pptx");
|
|
await mkdir(jobDir, { recursive: true });
|
|
await writeFile(specPath, `${JSON.stringify(deck, null, 2)}\n`);
|
|
await renderPptx({ deck, style, outputPath: pptxPath });
|
|
|
|
return {
|
|
id,
|
|
title: deck.title,
|
|
slides: deck.slides.length,
|
|
specUrl: `/outputs/${id}/deck.json`,
|
|
pptxUrl: `/outputs/${id}/deck.pptx`
|
|
};
|
|
}
|
|
|
|
function sourceFromText(content: string): SourceDocument {
|
|
const trimmed = content.trim();
|
|
if (!trimmed) {
|
|
throw new Error("Provide source text or upload a file.");
|
|
}
|
|
return {
|
|
type: "markdown",
|
|
title: deriveTitle(trimmed),
|
|
content: trimmed,
|
|
sourceName: "pasted-source.md"
|
|
};
|
|
}
|
|
|
|
async function sourceFromUpload(file: Express.Multer.File): Promise<SourceDocument> {
|
|
const extension = path.extname(file.originalname).toLowerCase();
|
|
const mime = file.mimetype;
|
|
|
|
if (extension === ".md" || extension === ".markdown") {
|
|
const content = file.buffer.toString("utf8");
|
|
return {
|
|
type: "markdown",
|
|
title: deriveTitle(content) || baseName(file.originalname),
|
|
content,
|
|
sourceName: file.originalname
|
|
};
|
|
}
|
|
|
|
if (extension === ".txt" || mime.startsWith("text/")) {
|
|
const content = file.buffer.toString("utf8");
|
|
return {
|
|
type: "text",
|
|
title: deriveTitle(content) || baseName(file.originalname),
|
|
content,
|
|
sourceName: file.originalname
|
|
};
|
|
}
|
|
|
|
if (extension === ".pdf" || mime === "application/pdf") {
|
|
const content = await extractPdfText(file.buffer);
|
|
if (!content) throw new Error("No text could be extracted from the PDF.");
|
|
return {
|
|
type: "pdf",
|
|
title: baseName(file.originalname),
|
|
content,
|
|
sourceName: file.originalname
|
|
};
|
|
}
|
|
|
|
if (extension === ".pptx" || mime.includes("presentation")) {
|
|
const content = await extractPptxText(file.buffer);
|
|
if (!content.trim()) throw new Error("No text could be extracted from the slide deck.");
|
|
return {
|
|
type: "pptx",
|
|
title: baseName(file.originalname),
|
|
content,
|
|
sourceName: file.originalname
|
|
};
|
|
}
|
|
|
|
if (mime.startsWith("image/")) {
|
|
return {
|
|
type: "text",
|
|
title: baseName(file.originalname),
|
|
sourceName: file.originalname,
|
|
content: [
|
|
`# ${baseName(file.originalname)}`,
|
|
"",
|
|
"Image source uploaded.",
|
|
"",
|
|
`File name: ${file.originalname}`,
|
|
`Image type: ${mime}`,
|
|
`File size: ${Math.round(file.size / 1024)} KB`,
|
|
"",
|
|
"Vision/OCR is not enabled in this local MVP yet. Use the instructions field to describe the slide goal, audience, and what the image should support."
|
|
].join("\n")
|
|
};
|
|
}
|
|
|
|
throw new Error(`Unsupported source type: ${file.originalname || mime}`);
|
|
}
|
|
|
|
async function extractPdfText(buffer: Buffer): Promise<string> {
|
|
try {
|
|
const parsed = await pdfParse(buffer);
|
|
const content = parsed.text.trim();
|
|
if (content) return content;
|
|
} catch {
|
|
// Fall through to a lightweight stream extractor for simple PDFs.
|
|
}
|
|
|
|
return extractPdfTextFromStreams(buffer).trim();
|
|
}
|
|
|
|
function extractPdfTextFromStreams(buffer: Buffer): string {
|
|
const pdf = buffer.toString("latin1");
|
|
const chunks: string[] = [];
|
|
const streamPattern = /<<(.*?)>>\s*stream\r?\n([\s\S]*?)\r?\nendstream/g;
|
|
let match: RegExpExecArray | null;
|
|
while ((match = streamPattern.exec(pdf))) {
|
|
const dictionary = match[1] || "";
|
|
const raw = Buffer.from(match[2] || "", "latin1");
|
|
let stream = raw;
|
|
if (dictionary.includes("/FlateDecode")) {
|
|
try {
|
|
stream = inflateSync(raw);
|
|
} catch {
|
|
continue;
|
|
}
|
|
}
|
|
const text = extractPdfStrings(stream.toString("latin1")).join(" ");
|
|
if (text) chunks.push(text);
|
|
}
|
|
return chunks.join("\n\n").replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
function extractPdfStrings(stream: string): string[] {
|
|
const values: string[] = [];
|
|
const literalPattern = /\((?:\\.|[^\\)])*\)/g;
|
|
for (const match of stream.matchAll(literalPattern)) {
|
|
const value = decodePdfLiteral(match[0].slice(1, -1));
|
|
if (value.trim()) values.push(value.trim());
|
|
}
|
|
const hexPattern = /<([0-9A-Fa-f\s]{4,})>/g;
|
|
for (const match of stream.matchAll(hexPattern)) {
|
|
const value = decodePdfHex(match[1] || "");
|
|
if (value.trim()) values.push(value.trim());
|
|
}
|
|
return values;
|
|
}
|
|
|
|
function decodePdfLiteral(value: string): string {
|
|
return value
|
|
.replace(/\\n/g, "\n")
|
|
.replace(/\\r/g, "\r")
|
|
.replace(/\\t/g, "\t")
|
|
.replace(/\\b/g, "\b")
|
|
.replace(/\\f/g, "\f")
|
|
.replace(/\\([()\\])/g, "$1")
|
|
.replace(/\\([0-7]{1,3})/g, (_match, octal: string) => String.fromCharCode(Number.parseInt(octal, 8)));
|
|
}
|
|
|
|
function decodePdfHex(value: string): string {
|
|
const normalized = value.replace(/\s+/g, "");
|
|
const bytes = (normalized.match(/.{1,2}/g) || []).map((byte) => Number.parseInt(byte.padEnd(2, "0"), 16));
|
|
if (bytes[0] === 0xfe && bytes[1] === 0xff) {
|
|
const chars: string[] = [];
|
|
for (let index = 2; index + 1 < bytes.length; index += 2) {
|
|
chars.push(String.fromCharCode((bytes[index] << 8) + bytes[index + 1]));
|
|
}
|
|
return chars.join("");
|
|
}
|
|
return Buffer.from(bytes).toString("latin1");
|
|
}
|
|
|
|
async function extractPptxText(buffer: Buffer): Promise<string> {
|
|
const zip = await JSZip.loadAsync(buffer);
|
|
const parser = new XMLParser({
|
|
ignoreAttributes: true,
|
|
textNodeName: "#text"
|
|
});
|
|
const slideFiles = Object.keys(zip.files)
|
|
.filter((name) => /^ppt\/slides\/slide\d+\.xml$/.test(name))
|
|
.sort((a, b) => slideNumber(a) - slideNumber(b));
|
|
|
|
const slides: string[] = [];
|
|
for (const fileName of slideFiles) {
|
|
const xml = await zip.file(fileName)?.async("string");
|
|
if (!xml) continue;
|
|
const parsed = parser.parse(xml);
|
|
const text = collectText(parsed)
|
|
.map((value) => value.trim())
|
|
.filter(Boolean)
|
|
.join(" ");
|
|
if (text) {
|
|
slides.push(`## Slide ${slideNumber(fileName)}\n\n${text}`);
|
|
}
|
|
}
|
|
|
|
return slides.join("\n\n");
|
|
}
|
|
|
|
function collectText(value: unknown): string[] {
|
|
if (typeof value === "string") return [value];
|
|
if (Array.isArray(value)) return value.flatMap(collectText);
|
|
if (!value || typeof value !== "object") return [];
|
|
const record = value as Record<string, unknown>;
|
|
return Object.entries(record).flatMap(([key, child]) => {
|
|
if (key === "a:t" || key === "#text") return collectText(child);
|
|
return collectText(child);
|
|
});
|
|
}
|
|
|
|
function slideNumber(fileName: string): number {
|
|
return Number(/slide(\d+)\.xml$/.exec(fileName)?.[1] || "0");
|
|
}
|
|
|
|
function deriveTitle(content: string): string | undefined {
|
|
return content
|
|
.split(/\r?\n/)
|
|
.map((line) => line.trim())
|
|
.find((line) => line.startsWith("# "))
|
|
?.replace(/^#\s+/, "")
|
|
.trim();
|
|
}
|
|
|
|
function baseName(fileName: string): string {
|
|
return path.basename(fileName, path.extname(fileName)).replace(/[-_]+/g, " ");
|
|
}
|
|
|
|
function stringField(value: unknown): string | undefined {
|
|
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
|
}
|
|
|
|
async function exists(targetPath: string): Promise<boolean> {
|
|
try {
|
|
await access(targetPath);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|