diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..df10e2e --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +.git +__pycache__ +*.pyc +scans/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..70b8c1e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,37 @@ +FROM ubuntu:24.04 + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y --no-install-recommends \ + sane-utils \ + imagemagick \ + poppler-utils \ + tesseract-ocr \ + tesseract-ocr-deu \ + tesseract-ocr-eng \ + unpaper \ + bc \ + python3 \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* + +# Allow ImageMagick to process PDF files +RUN sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' \ + /etc/ImageMagick-6/policy.xml || true + +WORKDIR /app + +COPY requirements.txt . +RUN pip3 install --no-cache-dir --break-system-packages -r requirements.txt + +COPY scan.sh . +RUN chmod +x scan.sh + +COPY api/ api/ + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=5s --retries=3 \ + CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/status')" || exit 1 + +CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/api/__init__.py b/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/__pycache__/__init__.cpython-312.pyc b/api/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..b9c8b75 Binary files /dev/null and b/api/__pycache__/__init__.cpython-312.pyc differ diff --git a/api/__pycache__/config.cpython-312.pyc b/api/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000..957d1b1 Binary files /dev/null and b/api/__pycache__/config.cpython-312.pyc differ diff --git a/api/__pycache__/main.cpython-312.pyc b/api/__pycache__/main.cpython-312.pyc new file mode 100644 index 0000000..1be39ae Binary files /dev/null and b/api/__pycache__/main.cpython-312.pyc differ diff --git a/api/__pycache__/models.cpython-312.pyc b/api/__pycache__/models.cpython-312.pyc new file mode 100644 index 0000000..51d00b8 Binary files /dev/null and b/api/__pycache__/models.cpython-312.pyc differ diff --git a/api/__pycache__/scanner.cpython-312.pyc b/api/__pycache__/scanner.cpython-312.pyc new file mode 100644 index 0000000..8f22f1a Binary files /dev/null and b/api/__pycache__/scanner.cpython-312.pyc differ diff --git a/api/config.py b/api/config.py new file mode 100644 index 0000000..3d7d547 --- /dev/null +++ b/api/config.py @@ -0,0 +1,12 @@ +import os +from pathlib import Path + +DEVICE: str = os.getenv("SCANNER_DEVICE", "pfusp") +SCAN_DIR: Path = Path(os.getenv("SCAN_DIR", "scans")) +SCRIPT_PATH: Path = Path(os.getenv("SCAN_SCRIPT", "scan.sh")) + +ALLOWED_MODES: list[str] = ["Lineart", "Halftone", "Gray", "Color"] +MIN_RESOLUTION: int = 50 +MAX_RESOLUTION: int = 1200 + +SCAN_TIMEOUT: int = int(os.getenv("SCAN_TIMEOUT", "600")) diff --git a/api/main.py b/api/main.py new file mode 100644 index 0000000..de8dd00 --- /dev/null +++ b/api/main.py @@ -0,0 +1,70 @@ +from contextlib import asynccontextmanager + +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse + +from api.models import PaperResponse, ScanRequest, ScanResponse, StatusResponse +from api.scanner import ( + ScannerBusyError, + ScannerManager, + ScannerTimeoutError, + ScannerUnavailableError, +) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + app.state.scanner = ScannerManager() + yield + + +app = FastAPI(title="scan-adf", lifespan=lifespan) + + +@app.exception_handler(ScannerBusyError) +async def busy_handler(request: Request, exc: ScannerBusyError): + return JSONResponse(status_code=409, content={"detail": str(exc)}) + + +@app.exception_handler(ScannerUnavailableError) +async def unavailable_handler(request: Request, exc: ScannerUnavailableError): + return JSONResponse(status_code=503, content={"detail": str(exc)}) + + +@app.exception_handler(ScannerTimeoutError) +async def timeout_handler(request: Request, exc: ScannerTimeoutError): + return JSONResponse(status_code=504, content={"detail": str(exc)}) + + +@app.post("/scan", status_code=201, response_model=ScanResponse) +async def scan(request: Request, body: ScanRequest): + scanner: ScannerManager = request.app.state.scanner + output = await scanner.start_scan( + mode=body.mode.value, + resolution=body.resolution, + language=body.language, + output=body.output, + ) + return ScanResponse( + message="Scan started", + output=output, + mode=body.mode.value, + resolution=body.resolution, + ) + + +@app.get("/status", response_model=StatusResponse) +async def status(request: Request): + scanner: ScannerManager = request.app.state.scanner + return StatusResponse( + scanning=scanner.is_scanning, + last_result=scanner.last_result, + current=scanner.current_scan_info, + ) + + +@app.get("/paper", response_model=PaperResponse) +async def paper(request: Request): + scanner: ScannerManager = request.app.state.scanner + result = await scanner.check_paper() + return PaperResponse(**result) diff --git a/api/models.py b/api/models.py new file mode 100644 index 0000000..54a72f1 --- /dev/null +++ b/api/models.py @@ -0,0 +1,38 @@ +from enum import Enum +from typing import Any + +from pydantic import BaseModel, Field + +from api.config import ALLOWED_MODES, MAX_RESOLUTION, MIN_RESOLUTION + + +class ScanMode(str, Enum): + lineart = "Lineart" + halftone = "Halftone" + gray = "Gray" + color = "Color" + + +class ScanRequest(BaseModel): + mode: ScanMode = ScanMode.lineart + resolution: int = Field(default=400, ge=MIN_RESOLUTION, le=MAX_RESOLUTION) + language: str = "deu" + output: str | None = None + + +class ScanResponse(BaseModel): + message: str + output: str + mode: str + resolution: int + + +class StatusResponse(BaseModel): + scanning: bool + last_result: dict[str, Any] | None = None + current: dict[str, str] | None = None + + +class PaperResponse(BaseModel): + paper_loaded: bool + raw: str | None = None diff --git a/api/scanner.py b/api/scanner.py new file mode 100644 index 0000000..0e686af --- /dev/null +++ b/api/scanner.py @@ -0,0 +1,155 @@ +import asyncio +import json +import logging +import re +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from api.config import DEVICE, SCAN_DIR, SCAN_TIMEOUT, SCRIPT_PATH + +log = logging.getLogger(__name__) + + +class ScannerBusyError(Exception): + pass + + +class ScannerUnavailableError(Exception): + pass + + +class ScannerTimeoutError(Exception): + pass + + +class ScannerManager: + def __init__(self) -> None: + self._lock = asyncio.Lock() + self._scanning = False + self._last_result: dict[str, Any] | None = None + self._current_scan_info: dict[str, str] | None = None + + @property + def is_scanning(self) -> bool: + return self._scanning + + @property + def last_result(self) -> dict[str, Any] | None: + return self._last_result + + @property + def current_scan_info(self) -> dict[str, str] | None: + return self._current_scan_info + + async def start_scan( + self, + mode: str, + resolution: int, + language: str, + output: str | None = None, + ) -> str: + if self._scanning: + raise ScannerBusyError("A scan is already in progress") + + SCAN_DIR.mkdir(parents=True, exist_ok=True) + + if output is None: + ts = datetime.now(timezone.utc).strftime("%Y-%m-%d-%H-%M-%S") + output = str(SCAN_DIR / f"scan_{ts}.pdf") + + self._scanning = True + self._current_scan_info = { + "output": output, + "mode": mode, + "resolution": str(resolution), + "started_at": datetime.now(timezone.utc).isoformat(), + } + + asyncio.create_task(self._run_scan(mode, resolution, language, output)) + return output + + async def _run_scan( + self, + mode: str, + resolution: int, + language: str, + output: str, + ) -> None: + async with self._lock: + try: + proc = await asyncio.create_subprocess_exec( + "bash", + str(SCRIPT_PATH), + "--mode", mode, + "--resolution", str(resolution), + "--language", language, + "--output", output, + "--overwrite-output-file", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + try: + stdout, stderr = await asyncio.wait_for( + proc.communicate(), timeout=SCAN_TIMEOUT + ) + except asyncio.TimeoutError: + proc.kill() + await proc.communicate() + log.error("Scan timed out after %ds", SCAN_TIMEOUT) + self._last_result = {"status": "timeout", "output": output} + return + + if proc.returncode != 0: + log.error("scan.sh failed: %s", stderr.decode()) + + json_path = Path(output.replace(".pdf", ".json")) + if json_path.exists(): + self._last_result = json.loads(json_path.read_text()) + self._last_result["output"] = output + else: + self._last_result = { + "status": "failed", + "output": output, + "returncode": proc.returncode, + } + except Exception: + log.exception("Unexpected error during scan") + self._last_result = {"status": "error", "output": output} + finally: + self._scanning = False + self._current_scan_info = None + + async def check_paper(self) -> dict[str, bool | str]: + if self._scanning: + raise ScannerBusyError( + "Cannot check paper while a scan is in progress" + ) + + async with self._lock: + try: + proc = await asyncio.create_subprocess_exec( + "scanimage", "-A", "--device-name", DEVICE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await asyncio.wait_for( + proc.communicate(), timeout=10 + ) + except asyncio.TimeoutError: + raise ScannerTimeoutError("scanimage -A timed out") + except FileNotFoundError: + raise ScannerUnavailableError("scanimage not found") + + if proc.returncode != 0: + raise ScannerUnavailableError( + f"scanimage failed: {stderr.decode().strip()}" + ) + + output_text = stdout.decode() + match = re.search( + r"--page-loaded\[=\(yes\|no\)\]\s+\[(yes|no)\]", output_text + ) + paper_loaded = match.group(1) == "yes" if match else False + + return {"paper_loaded": paper_loaded, "raw": output_text} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..74c469c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,10 @@ +services: + scanner: + build: . + ports: + - "8000:8000" + devices: + - /dev/bus/usb:/dev/bus/usb + volumes: + - ./scans:/app/scans + restart: unless-stopped diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7924492 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +fastapi==0.115.0 +uvicorn[standard]==0.32.0