From 5a92d2ed1929184d96d00dec8a1e42ab4f0001db Mon Sep 17 00:00:00 2001
From: moritzrfs <m.reufsteck@me.com>
Date: Sun, 8 Feb 2026 10:12:26 +0100
Subject: [PATCH] Add scan script

---
 scan_0208.sh | 172 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100755 scan_0208.sh
diff --git a/scan_0208.sh b/scan_0208.sh
new file mode 100755
index 0000000..a843d78
--- /dev/null
+++ b/scan_0208.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+
+# Script to control an ADF scanner
+# - start scanning and create a single pdf file
+# - with empty page and orientation detection
+# - tested with Fujitsu SP-1120
+#
+# ... excessively borrowed from https://github.com/rocketraman/sane-scan-pdf
+#
+# Version: 0.2 (headless, fixed page order)
+# Date:    2025-02-08
+# License: GNU General Public License
+# Modified by: Claude AI
+# Original Author: Eric Scheibler
+# E-Mail:  email [at] eric-scheibler [dot] de
+# URL:     http://eric-scheibler.de/en/blog/2015/04/script-to-extract-text-from-images-and-scanned-pdf-files/
+#
+# Install:
+#   sudo apt install imagemagick poppler-utils sane tesseract-ocr tesseract-ocr-deu tesseract-ocr-eng unpaper
+
+OUTPUT="scan.pdf"
+HELP=0
+VERBOSE=0
+
+# scanner params
+DEVICE=pfusp
+RESOLUTION=400
+MODE=Lineart
+
+# ocr params
+OCR_LANGUAGE=deu
+OVERWRITE_OUTPUT_FILE=0
+
+
+#####
+
+TMP_DIR=$(mktemp -d -p "" scan.XXXXXXXXXX)
+cleanup() {
+    rm -rf "$TMP_DIR"
+}
+trap cleanup EXIT
+
+
+# Parse command-line options
+while [[ $# > 0 ]]; do
+    case "$1" in
+    -h|--help) HELP=1 ;;
+    -v|--verbose) VERBOSE=1 ;;
+    -o|--output) shift; OUTPUT="$1" ;;
+    -x|--device) shift; DEVICE=$1;;
+    -m|--mode) shift; MODE=$1 ;;
+    -r|--resolution) shift; RESOLUTION=$1 ;;
+    -l|--language) shift; OCR_LANGUAGE=$1 ;;
+    -w|--overwrite-output-file) OVERWRITE_OUTPUT_FILE=1 ;;
+    esac
+    shift     # next option
+done
+
+if [[ $HELP == 1 ]]; then
+    echo "$(basename $0) [OPTIONS]... [OUTPUT]"
+    echo ""
+    echo "OPTIONS"
+    echo " -x, --device"
+    echo "    Override scanner device name, defaulting to \"pfusp\""
+    echo " -m, --mode"
+    echo "     Mode e.g. Lineart (default), Halftone, Gray, Color, etc."
+    echo " -r, --resolution"
+    echo "     Resolution e.g 400 (default)"
+    echo " -l, --language <lang>"
+    echo "     which language to use for OCR (default: deu)"
+    echo ""
+    echo "OUTPUT"
+    echo " -o, --output <outputfile>"
+    echo "     Output to named file default=scan.pdf"
+    echo " -w, --overwrite-output-file"
+    echo "     Overwrite the output pdf file, if it already exists"
+    echo " -v, --verbose"
+    exit 0
+fi
+
+if [[ $VERBOSE == 0 ]]; then
+    quiet_param="--quiet"
+    suppress_error_messages="2> /dev/null"
+fi
+
+if [[ "$OUTPUT" == "" ]]; then
+    echo >&2 "Output file must be specified. Aborting."
+    exit 1
+fi
+
+if [[ -f "$OUTPUT" ]]; then
+    if [[ $OVERWRITE_OUTPUT_FILE == 0 ]]; then
+        echo >&2 "Output file $OUTPUT already exists. Aborting."
+        exit 1
+    else
+        rm "$OUTPUT"
+    fi
+fi
+
+
+echo >&2 "Scanning..."
+scanadf --device-name "$DEVICE" --source Adf-duplex --resolution $RESOLUTION --mode $MODE -o $TMP_DIR/scan-%04d
+if [[ $? != 0 ]]; then
+    exit 1
+fi
+echo ""
+
+
+shopt -s extglob nullglob
+image_files=($TMP_DIR/scan-[0-9]*)
+num_scans=${#image_files[@]}
+
+if [[ $num_scans > 0 ]]; then
+    echo "Processing $num_scans pages"
+    
+    # Process images in normal order
+    for image_file in ${image_files[@]}; do
+        echo "Process $(basename $image_file)"
+
+        # unpaper
+        eval unpaper $quiet_param --overwrite --dpi $RESOLUTION $image_file $image_file $suppress_error_messages
+
+        # convert to tiff
+        convert -density ${RESOLUTION}x${RESOLUTION} -units PixelsPerInch $image_file ${image_file}.tiff
+        rm $image_file
+
+        # orientation detection
+        orientation_result=$(eval tesseract ${image_file}.tiff - --psm 0 $suppress_error_messages) || orientation_result=
+        if [[ $orientation_result == *"Rotate: 180"* ]]; then
+            echo "Image orientation is upside down, rotate"
+            convert -rotate 180 ${image_file}.tiff ${image_file}.tiff
+        fi
+
+        # empty page detection
+        percentage_white=$(convert ${image_file}.tiff -fuzz 0% -negate -threshold 0 -negate -format "%[fx:100*mean]" info:) || percentage_white=0
+        is_empty_page=$(echo "$percentage_white >= 99.8" | bc -l)
+        if [[ $is_empty_page == 1 && $orientation_result == "" ]]; then
+            echo "Empty page removed"
+        else
+            eval tesseract ${image_file}.tiff $image_file -l $OCR_LANGUAGE pdf $suppress_error_messages
+            rm ${image_file}.tiff
+        fi
+
+        echo ""
+    done
+
+    # Collect PDF files in REVERSE order to fix the page sequence
+    pdf_files=($TMP_DIR/scan-[0-9]*.pdf)
+    num_pdf_files=${#pdf_files[@]}
+    
+    if [[ $num_pdf_files > 0 ]]; then
+        # Reverse the array to fix page order
+        reversed_pdf_files=()
+        for ((i=${#pdf_files[@]}-1; i>=0; i--)); do
+            reversed_pdf_files+=("${pdf_files[i]}")
+        done
+        
+        if [[ $num_pdf_files == 1 ]]; then
+            echo "Creating output PDF..."
+            mv "${reversed_pdf_files[0]}" "$OUTPUT"
+        else
+            echo "Concatenating $num_pdf_files PDFs in correct order..."
+            pdfunite "${reversed_pdf_files[@]}" "$OUTPUT"
+        fi
+    fi
+fi
+
+if [[ -f "$OUTPUT" ]]; then
+    echo "Done. Output saved to: $OUTPUT"
+else
+    echo "No scans found."
+fi