From 1c0fe4c23ea08ef518c27a64024ef2a505bdca44 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 19 Oct 2023 13:20:01 +0200 Subject: [PATCH] PDF Fetching - Handle when the PDF is given as inline content without a proper mime header (#1875) --- changedetectionio/processors/text_json_diff.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff.py index 5c5e81b7..13bbc950 100644 --- a/changedetectionio/processors/text_json_diff.py +++ b/changedetectionio/processors/text_json_diff.py @@ -167,7 +167,8 @@ class perform_site_check(difference_detection_processor): is_html = False is_json = False - if watch.is_pdf or 'application/pdf' in fetcher.get_all_headers().get('content-type', '').lower(): + inline_pdf = fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in fetcher.content[:10] + if watch.is_pdf or 'application/pdf' in fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf: from shutil import which tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml") if not which(tool):