plaintext mime type fix - Don't attempt to extract HTML content from plaintext, this will remove lines and break changedetection (#391)

2022-01-27 23:16:50 +01:00
parent 6611823962
commit 39b7be9e7a
1 changed files with 14 additions and 9 deletions
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@@ -118,6 +118,8 @@ class perform_site_check():
            if is_html:
                # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
                html_content = fetcher.content
                if not fetcher.headers.get('Content-Type', '') == 'text/plain':
                    if has_filter_rule:
                        # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
                        if css_filter_rule[0] == '/':
@@ -128,6 +130,9 @@ class perform_site_check():
                    # get_text() via inscriptis
                    stripped_text_from_html = get_text(html_content)
                else:
                    # Don't run get_text or xpath/css filters on plaintext
                    stripped_text_from_html = html_content
            # Re #340 - return the content before the 'ignore text' was applied
            text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')