From 1c0fe4c23ea08ef518c27a64024ef2a505bdca44 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 19 Oct 2023 13:20:01 +0200
Subject: [PATCH] PDF Fetching - Handle when the PDF is given as inline content
 without a proper mime header (#1875)

---
 changedetectionio/processors/text_json_diff.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff.py
index 5c5e81b7..13bbc950 100644
--- a/changedetectionio/processors/text_json_diff.py
+++ b/changedetectionio/processors/text_json_diff.py
@@ -167,7 +167,8 @@ class perform_site_check(difference_detection_processor):
             is_html = False
             is_json = False
 
-        if watch.is_pdf or 'application/pdf' in fetcher.get_all_headers().get('content-type', '').lower():
+        inline_pdf = fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in fetcher.content[:10]
+        if watch.is_pdf or 'application/pdf' in fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf:
             from shutil import which
             tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
             if not which(tool):