Move global vars to all live under the 'app' var

Adding pip limit
Adding apscheduler
2024-10-25 17:04:52 +02:00 · 2024-10-22 10:50:52 +02:00 · 2024-10-22 10:46:56 +02:00 · 2024-10-21 11:35:37 +02:00 · 2024-10-21 11:34:22 +02:00 · 2024-10-14 12:57:02 +02:00
55 changed files with 1246 additions and 746 deletions
--- a/1
+++ b/1
@@ -37,6 +37,7 @@ RUN pip install --target=/dependencies playwright~=1.41.2 \

 # Final image stage
 FROM python:${PYTHON_VERSION}-slim-bookworm
+LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io"

 RUN apt-get update && apt-get install -y --no-install-recommends \
    libxslt1.1 \
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
 recursive-include changedetectionio/api *
+recursive-include changedetectionio/apprise_plugin *
 recursive-include changedetectionio/blueprint *
 recursive-include changedetectionio/content_fetchers *
 recursive-include changedetectionio/model *
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.46.04'
+__version__ = '0.47.03'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -58,7 +58,7 @@ class Watch(Resource):
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))

        if request.args.get('recheck'):
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            return "OK", 200
        if request.args.get('paused', '') == 'paused':
            self.datastore.data['watching'].get(uuid).pause()
@@ -246,7 +246,7 @@ class CreateWatch(Resource):

        new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
        if new_uuid:
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
            return {'uuid': new_uuid}, 201
        else:
            return "Invalid or unsupported URL", 400
@@ -303,7 +303,7 @@ class CreateWatch(Resource):

        if request.args.get('recheck_all'):
            for uuid in self.datastore.data['watching'].keys():
-                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            return {'status': "OK"}, 200

        return list, 200
--- a/changedetectionio/apprise_plugin/init.py
+++ b/changedetectionio/apprise_plugin/init.py
@@ -1,5 +1,6 @@
 # include the decorator
 from apprise.decorators import notify
+from loguru import logger

@notify(on="delete")
@notify(on="deletes")
@@ -64,10 +65,12 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
            auth = (URLBase.unquote(results.get('user')))

    # Try to auto-guess if it's JSON
+    h = 'application/json; charset=utf-8'
    try:
        json.loads(body)
-        headers['Content-Type'] = 'application/json; charset=utf-8'
+        headers['Content-Type'] = h
    except ValueError as e:
+        logger.warning(f"Could not automatically add '{h}' header to the {kwargs['meta'].get('schema')}:// notification because the document failed to parse as JSON: {e}")
        pass

    r(results.get('url'),
--- a/changedetectionio/blueprint/check_proxies/init.py
+++ b/changedetectionio/blueprint/check_proxies/init.py
@@ -1,4 +1,7 @@
+import importlib
 from concurrent.futures import ThreadPoolExecutor
+
+from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
 from changedetectionio.store import ChangeDetectionStore

 from functools import wraps
@@ -30,7 +33,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    def long_task(uuid, preferred_proxy):
        import time
        from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
-        from changedetectionio.processors.text_json_diff import text_json_diff
        from changedetectionio.safe_jinja import render as jinja_render

        status = {'status': '', 'length': 0, 'text': ''}
@@ -38,8 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        contents = ''
        now = time.time()
        try:
-            update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
-            update_handler.call_browser()
+            processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
+            update_handler = processor_module.perform_site_check(datastore=datastore,
+                                                                 watch_uuid=uuid
+                                                                 )
+
+            update_handler.call_browser(preferred_proxy_id=preferred_proxy)
        # title, size is len contents not len xfer
        except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
            if e.status_code == 404:
@@ -48,7 +54,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"})
            else:
                status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"})
-        except text_json_diff.FilterNotFoundInResponse:
+        except FilterNotFoundInResponse:
            status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"})
        except content_fetcher_exceptions.EmptyReply as e:
            if e.status_code == 403 or e.status_code == 401:
--- a/changedetectionio/blueprint/price_data_follower/init.py
+++ b/changedetectionio/blueprint/price_data_follower/init.py
@@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
        datastore.data['watching'][uuid]['processor'] = 'restock_diff'
        datastore.data['watching'][uuid].clear_watch()
-        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
        return redirect(url_for("index"))

    @login_required
--- a/changedetectionio/blueprint/tags/templates/edit-tag.html
+++ b/changedetectionio/blueprint/tags/templates/edit-tag.html
@@ -17,7 +17,6 @@
 </script>

 <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
-<!--<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>-->
 <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>

 <div class="edit-form monospaced-textarea">
--- a/changedetectionio/content_fetchers/init.py
+++ b/changedetectionio/content_fetchers/init.py
@@ -4,7 +4,9 @@ from loguru import logger
 from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException
 import os

-visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary'
+# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
+visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
+

 # available_fetchers() will scan this implementation looking for anything starting with html_
 # this information is used in the form selections
--- a/changedetectionio/content_fetchers/requests.py
+++ b/changedetectionio/content_fetchers/requests.py
@@ -75,6 +75,7 @@ class fetcher(Fetcher):
        self.headers = r.headers

        if not r.content or not len(r.content):
+            logger.debug(f"Requests returned empty content for '{url}'")
            if not empty_pages_are_a_change:
                raise EmptyReply(url=url, status_code=r.status_code)
            else:
--- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js
+++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js
@@ -154,10 +154,14 @@ function isItemInStock() {
        }

        elementText = "";
-        if (element.tagName.toLowerCase() === "input") {
-            elementText = element.value.toLowerCase().trim();
-        } else {
-            elementText = getElementBaseText(element);
+        try {
+            if (element.tagName.toLowerCase() === "input") {
+                elementText = element.value.toLowerCase().trim();
+            } else {
+                elementText = getElementBaseText(element);
+            }
+        } catch (e) {
+            console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
        }

        if (elementText.length) {
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -476,7 +476,7 @@ class processor_text_json_diff_form(commonSettingsForm):

    title = StringField('Title', default='')

-    ignore_text = StringListField('Remove lines containing', [ValidateListRegex()])
+    ignore_text = StringListField('Ignore lines containing', [ValidateListRegex()])
    headers = StringDictKeyValue('Request headers')
    body = TextAreaField('Request body', [validators.Optional()])
    method = SelectField('Request method', choices=valid_method, default=default_method)
@@ -496,7 +496,7 @@ class processor_text_json_diff_form(commonSettingsForm):
    text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
    webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])

-    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
+    save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})

    proxy = RadioField('Proxy')
    filter_failure_notification_send = BooleanField(
@@ -616,7 +616,7 @@ class globalSettingsForm(Form):

    requests = FormField(globalSettingsRequestForm)
    application = FormField(globalSettingsApplicationForm)
-    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
+    save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})


 class extractDataForm(Form):
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -3,11 +3,11 @@ from lxml import etree
 import json
 import re

-
 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
 TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
-
+TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
 PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
+
 # 'price' , 'lowPrice', 'highPrice' are usually under here
 # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
 LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
@@ -326,6 +326,7 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
 #          - "line numbers" return a list of line numbers that match (int list)
 #
 # wordlist - list of regex's (str) or words (str)
+# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
 def strip_ignore_text(content, wordlist, mode="content"):
    i = 0
    output = []
@@ -341,32 +342,30 @@ def strip_ignore_text(content, wordlist, mode="content"):
        else:
            ignore_text.append(k.strip())

-    for line in content.splitlines():
+    for line in content.splitlines(keepends=True):
        i += 1
        # Always ignore blank lines in this mode. (when this function gets called)
        got_match = False
-        if len(line.strip()):
-            for l in ignore_text:
-                if l.lower() in line.lower():
+        for l in ignore_text:
+            if l.lower() in line.lower():
+                got_match = True
+
+        if not got_match:
+            for r in ignore_regex:
+                if r.search(line):
                    got_match = True

-            if not got_match:
-                for r in ignore_regex:
-                    if r.search(line):
-                        got_match = True
-
-            if not got_match:
-                # Not ignored
-                output.append(line.encode('utf8'))
-            else:
-                ignored_line_numbers.append(i)
-
+        if not got_match:
+            # Not ignored, and should preserve "keepends"
+            output.append(line)
+        else:
+            ignored_line_numbers.append(i)

    # Used for finding out what to highlight
    if mode == "line numbers":
        return ignored_line_numbers

-    return "\n".encode('utf8').join(output)
+    return ''.join(output)

 def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
    from xml.sax.saxutils import escape as xml_escape
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -6,6 +6,8 @@ import re
 from pathlib import Path
 from loguru import logger

+from ..html_tools import TRANSLATE_WHITESPACE_TABLE
+
 # Allowable protocols, protects against javascript: etc
 # file:// is further checked by ALLOW_FILE_URI
 SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
@@ -36,8 +38,9 @@ class model(watch_base):
    jitter_seconds = 0

    def __init__(self, *arg, **kw):
-        self.__datastore_path = kw['datastore_path']
-        del kw['datastore_path']
+        self.__datastore_path = kw.get('datastore_path')
+        if kw.get('datastore_path'):
+            del kw['datastore_path']
        super(model, self).__init__(*arg, **kw)
        if kw.get('default'):
            self.update(kw['default'])
@@ -171,6 +174,10 @@ class model(watch_base):
        """
        tmp_history = {}

+        # In the case we are only using the watch for processing without history
+        if not self.watch_data_dir:
+            return []
+
        # Read the history file as a dict
        fname = os.path.join(self.watch_data_dir, "history.txt")
        if os.path.isfile(fname):
@@ -307,13 +314,13 @@ class model(watch_base):
            dest = os.path.join(self.watch_data_dir, snapshot_fname)
            if not os.path.exists(dest):
                with open(dest, 'wb') as f:
-                    f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
+                    f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
        else:
            snapshot_fname = f"{snapshot_id}.txt"
            dest = os.path.join(self.watch_data_dir, snapshot_fname)
            if not os.path.exists(dest):
                with open(dest, 'wb') as f:
-                    f.write(contents)
+                    f.write(contents.encode('utf-8'))

        # Append to index
        # @todo check last char was \n
@@ -345,14 +352,32 @@ class model(watch_base):
        return seconds

    # Iterate over all history texts and see if something new exists
-    def lines_contain_something_unique_compared_to_history(self, lines: list):
-        local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
+    # Always applying .strip() to start/end but optionally replace any other whitespace
+    def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False):
+        local_lines = []
+        if lines:
+            if ignore_whitespace:
+                if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
+                    local_lines = set([l.translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines])
+                else:
+                    local_lines = set([l.decode('utf-8').translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines])
+            else:
+                if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
+                    local_lines = set([l.strip().lower() for l in lines])
+                else:
+                    local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
+

        # Compare each lines (set) against each history text file (set) looking for something new..
        existing_history = set({})
        for k, v in self.history.items():
            content = self.get_history_snapshot(k)
-            alist = set([line.strip().lower() for line in content.splitlines()])
+
+            if ignore_whitespace:
+                alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()])
+            else:
+                alist = set([line.strip().lower() for line in content.splitlines()])
+
            existing_history = existing_history.union(alist)

        # Check that everything in local_lines(new stuff) already exists in existing_history - it should
@@ -396,8 +421,8 @@ class model(watch_base):
    @property
    def watch_data_dir(self):
        # The base dir of the watch data
-        return os.path.join(self.__datastore_path, self['uuid'])
-    
+        return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
+
    def get_error_text(self):
        """Return the text saved from a previous request that resulted in a non-200 error"""
        fname = os.path.join(self.watch_data_dir, "last-error.txt")
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@@ -18,6 +18,7 @@ class difference_detection_processor():
    screenshot = None
    watch = None
    xpath_data = None
+    preferred_proxy = None

    def __init__(self, *args, datastore, watch_uuid, **kwargs):
        super().__init__(*args, **kwargs)
@@ -26,7 +27,8 @@ class difference_detection_processor():
        # Generic fetcher that should be extended (requests, playwright etc)
        self.fetcher = Fetcher()

-    def call_browser(self):
+    def call_browser(self, preferred_proxy_id=None):
+
        from requests.structures import CaseInsensitiveDict

        # Protect against file:// access
@@ -42,7 +44,7 @@ class difference_detection_processor():
        prefer_fetch_backend = self.watch.get('fetch_backend', 'system')

        # Proxy ID "key"
-        preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
+        preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))

        # Pluggable content self.fetcher
        if not prefer_fetch_backend or prefer_fetch_backend == 'system':
@@ -155,7 +157,7 @@ class difference_detection_processor():
        # After init, call run_changedetection() which will do the actual change-detection

    @abstractmethod
-    def run_changedetection(self, watch, skip_when_checksum_same: bool = True):
+    def run_changedetection(self, watch):
        update_obj = {'last_notification_error': False, 'last_error': False}
        some_data = 'xxxxx'
        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -27,22 +27,27 @@ def _search_prop_by_value(matches, value):
                return prop[1]  # Yield the desired value and exit the function

 def _deduplicate_prices(data):
-    seen = set()
-    unique_data = []
+    import re

+    '''
+    Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159"
+    Get all the values, clean it and add it to a set then return the unique values
+    '''
+    unique_data = set()
+
+    # Return the complete 'datum' where its price was not seen before
    for datum in data:
-        # Convert 'value' to float if it can be a numeric string, otherwise leave it as is
-        try:
-            normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
-        except ValueError:
-            normalized_value = datum.value

-        # If the normalized value hasn't been seen yet, add it to unique data
-        if normalized_value not in seen:
-            unique_data.append(datum)
-            seen.add(normalized_value)
-    
-    return unique_data
+        if isinstance(datum.value, list):
+            # Process each item in the list
+            normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value])
+            unique_data.update(normalized_value)
+        else:
+            # Process single value
+            v = float(re.sub(r'[^\d.]', '', str(datum.value)))
+            unique_data.add(v)
+
+    return list(unique_data)


 # should return Restock()
@@ -83,14 +88,13 @@ def get_itemprop_availability(html_content) -> Restock:
        if price_result:
            # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
            # parse that for the UI?
-            prices_found = set(str(item.value).replace('$', '') for item in price_result)
-            if len(price_result) > 1 and len(prices_found) > 1:
+            if len(price_result) > 1 and len(price_result) > 1:
                # See of all prices are different, in the case that one product has many embedded data types with the same price
                # One might have $121.95 and another 121.95 etc
-                logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.")
+                logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.")
                raise MoreThanOnePriceFound()

-            value['price'] = price_result[0].value
+            value['price'] = price_result[0]

        pricecurrency_result = pricecurrency_parse.find(data)
        if pricecurrency_result:
@@ -140,7 +144,7 @@ class perform_site_check(difference_detection_processor):
    screenshot = None
    xpath_data = None

-    def run_changedetection(self, watch, skip_when_checksum_same=True):
+    def run_changedetection(self, watch):
        import hashlib

        if not watch:
@@ -220,7 +224,7 @@ class perform_site_check(difference_detection_processor):
            itemprop_availability['original_price'] = itemprop_availability.get('price')
            update_obj['restock']["original_price"] = itemprop_availability.get('price')

-        if not self.fetcher.instock_data and not itemprop_availability.get('availability'):
+        if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'):
            raise ProcessorException(
                message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
                url=watch.get('url'),
@@ -237,6 +241,14 @@ class perform_site_check(difference_detection_processor):
            update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
            logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.")

+        # Very often websites will lie about the 'availability' in the metadata, so if the scraped version says its NOT in stock, use that.
+        if self.fetcher.instock_data and self.fetcher.instock_data != 'Possibly in stock':
+            if update_obj['restock'].get('in_stock'):
+                logger.warning(
+                    f"Lie detected in the availability machine data!! when scraping said its not in stock!! itemprop was '{itemprop_availability}' and scraped from browser was '{self.fetcher.instock_data}' update obj was {update_obj['restock']} ")
+                logger.warning(f"Setting instock to FALSE, scraper found '{self.fetcher.instock_data}' in the body but metadata reported not-in-stock")
+                update_obj['restock']["in_stock"] = False
+
        # What we store in the snapshot
        price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else ""
        snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}"
@@ -299,4 +311,4 @@ class perform_site_check(difference_detection_processor):
        # Always record the new checksum
        update_obj["previous_md5"] = fetched_md5

-        return changed_detected, update_obj, snapshot_content.encode('utf-8').strip()
+        return changed_detected, update_obj, snapshot_content.strip()
--- a/changedetectionio/processors/text_json_diff/init.py
+++ b/changedetectionio/processors/text_json_diff/init.py
@@ -0,0 +1,115 @@
+
+from loguru import logger
+
+
+
+def _task(watch, update_handler):
+    from changedetectionio.content_fetchers.exceptions import ReplyWithContentButNoText
+    from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
+
+    text_after_filter = ''
+
+    try:
+        # The slow process (we run 2 of these in parallel)
+        changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
+    except FilterNotFoundInResponse as e:
+        text_after_filter = f"Filter not found in HTML: {str(e)}"
+    except ReplyWithContentButNoText as e:
+        text_after_filter = f"Filter found but no text (empty result)"
+    except Exception as e:
+        text_after_filter = f"Error: {str(e)}"
+
+    if not text_after_filter.strip():
+        text_after_filter = 'Empty content'
+
+    # because run_changedetection always returns bytes due to saving the snapshots etc
+    text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter
+
+    return text_after_filter
+
+
+def prepare_filter_prevew(datastore, watch_uuid):
+    '''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
+    from changedetectionio import forms, html_tools
+    from changedetectionio.model.Watch import model as watch_model
+    from concurrent.futures import ProcessPoolExecutor
+    from copy import deepcopy
+    from flask import request, jsonify
+    import brotli
+    import importlib
+    import os
+    import time
+    now = time.time()
+
+    text_after_filter = ''
+    text_before_filter = ''
+    trigger_line_numbers = []
+    ignore_line_numbers = []
+
+    tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
+
+    if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
+        # Splice in the temporary stuff from the form
+        form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
+                                                   data=request.form
+                                                   )
+
+        # Only update vars that came in via the AJAX post
+        p = {k: v for k, v in form.data.items() if k in request.form.keys()}
+        tmp_watch.update(p)
+        blank_watch_no_filters = watch_model()
+        blank_watch_no_filters['url'] = tmp_watch.get('url')
+
+        latest_filename = next(reversed(tmp_watch.history))
+        html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
+        with open(html_fname, 'rb') as f:
+            decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
+
+            # Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
+            processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
+            update_handler = processor_module.perform_site_check(datastore=datastore,
+                                                                 watch_uuid=tmp_watch.get('uuid')  # probably not needed anymore anyway?
+                                                                 )
+            # Use the last loaded HTML as the input
+            update_handler.datastore = datastore
+            update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string
+            update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
+
+            # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
+            # Do this as a parallel process because it could take some time
+            with ProcessPoolExecutor(max_workers=2) as executor:
+                future1 = executor.submit(_task, tmp_watch, update_handler)
+                future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
+
+                text_after_filter = future1.result()
+                text_before_filter = future2.result()
+
+    try:
+        trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
+                                                            wordlist=tmp_watch['trigger_text'],
+                                                            mode='line numbers'
+                                                            )
+    except Exception as e:
+        text_before_filter = f"Error: {str(e)}"
+
+    try:
+        text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', [])
+        ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
+                                                           wordlist=text_to_ignore,
+                                                           mode='line numbers'
+                                                           )
+    except Exception as e:
+        text_before_filter = f"Error: {str(e)}"
+
+    logger.trace(f"Parsed in {time.time() - now:.3f}s")
+
+    return jsonify(
+        {
+            'after_filter': text_after_filter,
+            'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
+            'duration': time.time() - now,
+            'trigger_line_numbers': trigger_line_numbers,
+            'ignore_line_numbers': ignore_line_numbers,
+        }
+    )
+
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -7,7 +7,7 @@ import re
 import urllib3

 from changedetectionio.processors import difference_detection_processor
-from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
+from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
 from changedetectionio import html_tools, content_fetchers
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 from loguru import logger
@@ -35,8 +35,7 @@ class PDFToHTMLToolNotFound(ValueError):
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):

-    def run_changedetection(self, watch, skip_when_checksum_same=True):
-
+    def run_changedetection(self, watch):
        changed_detected = False
        html_content = ""
        screenshot = False  # as bytes
@@ -59,9 +58,6 @@ class perform_site_check(difference_detection_processor):
        # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
        # Saves a lot of CPU
        update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
-        if skip_when_checksum_same:
-            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
-                raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()

        # Fetching complete, now filters

@@ -205,22 +201,14 @@ class perform_site_check(difference_detection_processor):
        if watch.get('trim_text_whitespace'):
            stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())

-        if watch.get('remove_duplicate_lines'):
-            stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
-
-        if watch.get('sort_text_alphabetically'):
-            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
-            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
-            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
-            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
-
        # Re #340 - return the content before the 'ignore text' was applied
        # Also used to calculate/show what was removed
-        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
+        text_content_before_ignored_filter = stripped_text_from_html

        # @todo whitespace coming from missing rtrim()?
        # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
        # Rewrite's the processing text based on only what diff result they want to see
+
        if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
            # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
            from changedetectionio import diff
@@ -235,12 +223,12 @@ class perform_site_check(difference_detection_processor):
                                             line_feed_sep="\n",
                                             include_change_type_prefix=False)

-            watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter)
+            watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))

            if not rendered_diff and stripped_text_from_html:
                # We had some content, but no differences were found
                # Store our new file as the MD5 so it will trigger in the future
-                c = hashlib.md5(stripped_text_from_html.encode('utf-8').translate(None, b'\r\n\t ')).hexdigest()
+                c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
                return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
            else:
                stripped_text_from_html = rendered_diff
@@ -261,14 +249,6 @@ class perform_site_check(difference_detection_processor):

        update_obj["last_check_status"] = self.fetcher.get_last_status_code()

-        # If there's text to skip
-        # @todo we could abstract out the get_text() to handle this cleaner
-        text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
-        if len(text_to_ignore):
-            stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
-        else:
-            stripped_text_from_html = stripped_text_from_html.encode('utf8')
-
        # 615 Extract text by regex
        extract_text = watch.get('extract_text', [])
        if len(extract_text) > 0:
@@ -277,39 +257,53 @@ class perform_site_check(difference_detection_processor):
                # incase they specified something in '/.../x'
                if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
                    regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
-                    result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
+                    result = re.findall(regex, stripped_text_from_html)

                    for l in result:
                        if type(l) is tuple:
                            # @todo - some formatter option default (between groups)
-                            regex_matched_output += list(l) + [b'\n']
+                            regex_matched_output += list(l) + ['\n']
                        else:
                            # @todo - some formatter option default (between each ungrouped result)
-                            regex_matched_output += [l] + [b'\n']
+                            regex_matched_output += [l] + ['\n']
                else:
                    # Doesnt look like regex, just hunt for plaintext and return that which matches
                    # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
-                    r = re.compile(re.escape(s_re.encode('utf-8')), re.IGNORECASE)
+                    r = re.compile(re.escape(s_re), re.IGNORECASE)
                    res = r.findall(stripped_text_from_html)
                    if res:
                        for match in res:
-                            regex_matched_output += [match] + [b'\n']
+                            regex_matched_output += [match] + ['\n']

            ##########################################################
-            stripped_text_from_html = b''
-            text_content_before_ignored_filter = b''
+            stripped_text_from_html = ''
+
            if regex_matched_output:
                # @todo some formatter for presentation?
-                stripped_text_from_html = b''.join(regex_matched_output)
-                text_content_before_ignored_filter = stripped_text_from_html
+                stripped_text_from_html = ''.join(regex_matched_output)
+
+        if watch.get('remove_duplicate_lines'):
+            stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))


+        if watch.get('sort_text_alphabetically'):
+            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
+            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
+            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
+            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
+
+### CALCULATE MD5
+        # If there's text to ignore
+        text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
+        text_for_checksuming = stripped_text_from_html
+        if text_to_ignore:
+            text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)

        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
-        if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
-            fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
+        if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
+            fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
        else:
-            fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
+            fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()

        ############ Blocking rules, after checksum #################
        blocked = False
@@ -337,19 +331,33 @@ class perform_site_check(difference_detection_processor):
            if result:
                blocked = True

-        # The main thing that all this at the moment comes down to :)
-        if watch.get('previous_md5') != fetched_md5:
-            changed_detected = True

        # Looks like something changed, but did it match all the rules?
        if blocked:
            changed_detected = False
+        else:
+            # The main thing that all this at the moment comes down to :)
+            if watch.get('previous_md5') != fetched_md5:
+                changed_detected = True
+
+            # Always record the new checksum
+            update_obj["previous_md5"] = fetched_md5
+
+            # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
+            if not watch.get('previous_md5'):
+                watch['previous_md5'] = fetched_md5

        logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")

        if changed_detected:
            if watch.get('check_unique_lines', False):
-                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
+                ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
+
+                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
+                    lines=stripped_text_from_html.splitlines(),
+                    ignore_whitespace=ignore_whitespace
+                )
+
                # One or more lines? unsure?
                if not has_unique_lines:
                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
@@ -357,12 +365,6 @@ class perform_site_check(difference_detection_processor):
                else:
                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")

-        # Always record the new checksum
-        update_obj["previous_md5"] = fetched_md5
-
-        # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
-        if not watch.get('previous_md5'):
-            watch['previous_md5'] = fetched_md5

        # stripped_text_from_html - Everything after filters and NO 'ignored' content
        return changed_detected, update_obj, stripped_text_from_html
--- a/changedetectionio/static/js/limit.js
+++ b/changedetectionio/static/js/limit.js
@@ -1,56 +0,0 @@
-/**
- * debounce
- * @param {integer} milliseconds This param indicates the number of milliseconds
- *     to wait after the last call before calling the original function.
- * @param {object} What "this" refers to in the returned function.
- * @return {function} This returns a function that when called will wait the
- *     indicated number of milliseconds after the last call before
- *     calling the original function.
- */
-Function.prototype.debounce = function (milliseconds, context) {
-    var baseFunction = this,
-        timer = null,
-        wait = milliseconds;
-
-    return function () {
-        var self = context || this,
-            args = arguments;
-
-        function complete() {
-            baseFunction.apply(self, args);
-            timer = null;
-        }
-
-        if (timer) {
-            clearTimeout(timer);
-        }
-
-        timer = setTimeout(complete, wait);
-    };
-};
-
-/**
-* throttle
-* @param {integer} milliseconds This param indicates the number of milliseconds
-*     to wait between calls before calling the original function.
-* @param {object} What "this" refers to in the returned function.
-* @return {function} This returns a function that when called will wait the
-*     indicated number of milliseconds between calls before
-*     calling the original function.
-*/
-Function.prototype.throttle = function (milliseconds, context) {
-    var baseFunction = this,
-        lastEventTimestamp = null,
-        limit = milliseconds;
-
-    return function () {
-        var self = context || this,
-            args = arguments,
-            now = Date.now();
-
-        if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
-            lastEventTimestamp = now;
-            baseFunction.apply(self, args);
-        }
-    };
-};
--- a/changedetectionio/static/js/plugins.js
+++ b/changedetectionio/static/js/plugins.js
@@ -1,64 +1,106 @@
-(function($) {
+(function ($) {
+    /**
+     * debounce
+     * @param {integer} milliseconds This param indicates the number of milliseconds
+     *     to wait after the last call before calling the original function.
+     * @param {object} What "this" refers to in the returned function.
+     * @return {function} This returns a function that when called will wait the
+     *     indicated number of milliseconds after the last call before
+     *     calling the original function.
+     */
+    Function.prototype.debounce = function (milliseconds, context) {
+        var baseFunction = this,
+            timer = null,
+            wait = milliseconds;

-/*
-    $('#code-block').highlightLines([
-      {
-        'color': '#dd0000',
-        'lines': [10, 12]
-      },
-      {
-        'color': '#ee0000',
-        'lines': [15, 18]
-      }
-    ]);
-  });
-*/
+        return function () {
+            var self = context || this,
+                args = arguments;

-  $.fn.highlightLines = function(configurations) {
-    return this.each(function() {
-      const $pre = $(this);
-      const textContent = $pre.text();
-      const lines = textContent.split(/\r?\n/); // Handles both \n and \r\n line endings
+            function complete() {
+                baseFunction.apply(self, args);
+                timer = null;
+            }

-      // Build a map of line numbers to styles
-      const lineStyles = {};
+            if (timer) {
+                clearTimeout(timer);
+            }

-      configurations.forEach(config => {
-        const { color, lines: lineNumbers } = config;
-        lineNumbers.forEach(lineNumber => {
-          lineStyles[lineNumber] = color;
+            timer = setTimeout(complete, wait);
+        };
+    };
+
+    /**
+     * throttle
+     * @param {integer} milliseconds This param indicates the number of milliseconds
+     *     to wait between calls before calling the original function.
+     * @param {object} What "this" refers to in the returned function.
+     * @return {function} This returns a function that when called will wait the
+     *     indicated number of milliseconds between calls before
+     *     calling the original function.
+     */
+    Function.prototype.throttle = function (milliseconds, context) {
+        var baseFunction = this,
+            lastEventTimestamp = null,
+            limit = milliseconds;
+
+        return function () {
+            var self = context || this,
+                args = arguments,
+                now = Date.now();
+
+            if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
+                lastEventTimestamp = now;
+                baseFunction.apply(self, args);
+            }
+        };
+    };
+
+    $.fn.highlightLines = function (configurations) {
+        return this.each(function () {
+            const $pre = $(this);
+            const textContent = $pre.text();
+            const lines = textContent.split(/\r?\n/); // Handles both \n and \r\n line endings
+
+            // Build a map of line numbers to styles
+            const lineStyles = {};
+
+            configurations.forEach(config => {
+                const {color, lines: lineNumbers} = config;
+                lineNumbers.forEach(lineNumber => {
+                    lineStyles[lineNumber] = color;
+                });
+            });
+
+            // Function to escape HTML characters
+            function escapeHtml(text) {
+                return text.replace(/[&<>"'`=\/]/g, function (s) {
+                    return "&#" + s.charCodeAt(0) + ";";
+                });
+            }
+
+            // Process each line
+            const processedLines = lines.map((line, index) => {
+                const lineNumber = index + 1; // Line numbers start at 1
+                const escapedLine = escapeHtml(line);
+                const color = lineStyles[lineNumber];
+
+                if (color) {
+                    // Wrap the line in a span with inline style
+                    return `<span style="background-color: ${color}">${escapedLine}</span>`;
+                } else {
+                    return escapedLine;
+                }
+            });
+
+            // Join the lines back together
+            const newContent = processedLines.join('\n');
+
+            // Set the new content as HTML
+            $pre.html(newContent);
        });
-      });
-
-      // Function to escape HTML characters
-      function escapeHtml(text) {
-        return text.replace(/[&<>"'`=\/]/g, function(s) {
-          return "&#" + s.charCodeAt(0) + ";";
-        });
-      }
-
-      // Process each line
-      const processedLines = lines.map((line, index) => {
-        const lineNumber = index + 1; // Line numbers start at 1
-        const escapedLine = escapeHtml(line);
-        const color = lineStyles[lineNumber];
-
-        if (color) {
-          // Wrap the line in a span with inline style
-          return `<span style="background-color: ${color}">${escapedLine}</span>`;
-        } else {
-          return escapedLine;
-        }
-      });
-
-      // Join the lines back together
-      const newContent = processedLines.join('\n');
-
-      // Set the new content as HTML
-      $pre.html(newContent);
-    });
-  };
-   $.fn.miniTabs = function(tabsConfig, options) {
+    };
+    $.fn.miniTabs = function (tabsConfig, options) {
        const settings = {
            tabClass: 'minitab',
            tabsContainerClass: 'minitabs',
@@ -66,10 +108,10 @@
            ...(options || {})
        };

-        return this.each(function() {
+        return this.each(function () {
            const $wrapper = $(this);
            const $contents = $wrapper.find('div[id]').hide();
-            const $tabsContainer = $('<div>', { class: settings.tabsContainerClass }).prependTo($wrapper);
+            const $tabsContainer = $('<div>', {class: settings.tabsContainerClass}).prependTo($wrapper);

            // Generate tabs
            Object.entries(tabsConfig).forEach(([tabTitle, contentSelector], index) => {
@@ -84,7 +126,7 @@
            });

            // Tab click event
-            $tabsContainer.on('click', `.${settings.tabClass}`, function(e) {
+            $tabsContainer.on('click', `.${settings.tabClass}`, function (e) {
                e.preventDefault();
                const $tab = $(this);
                const target = $tab.data('target');
@@ -103,7 +145,7 @@
    // Object to store ongoing requests by namespace
    const requests = {};

-    $.abortiveSingularAjax = function(options) {
+    $.abortiveSingularAjax = function (options) {
        const namespace = options.namespace || 'default';

        // Abort the current request in this namespace if it's still ongoing
--- a/changedetectionio/static/js/recheck-proxy.js
+++ b/changedetectionio/static/js/recheck-proxy.js
@@ -1,14 +1,14 @@
 $(function () {
    /* add container before each proxy location to show status */
-
-    var option_li = $('.fetch-backend-proxy li').filter(function() {
-        return $("input",this)[0].value.length >0;
-    });
-
-    //var option_li = $('.fetch-backend-proxy li');
    var isActive = false;
-    $(option_li).prepend('<div class="proxy-status"></div>');
-    $(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
+
+    function setup_html_widget() {
+        var option_li = $('.fetch-backend-proxy li').filter(function () {
+            return $("input", this)[0].value.length > 0;
+        });
+        $(option_li).prepend('<div class="proxy-status"></div>');
+        $(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
+    }

    function set_proxy_check_status(proxy_key, state) {
        // select input by value name
@@ -59,8 +59,14 @@ $(function () {
    }

    $('#check-all-proxies').click(function (e) {
+
        e.preventDefault()
-        $('body').addClass('proxy-check-active');
+
+        if (!$('body').hasClass('proxy-check-active')) {
+            setup_html_widget();
+            $('body').addClass('proxy-check-active');
+        }
+
        $('.proxy-check-details').html('');
        $('.proxy-status').html('<span class="spinner"></span>').fadeIn();
        $('.proxy-timing').html('');
--- a/changedetectionio/static/js/tabs.js
+++ b/changedetectionio/static/js/tabs.js
@@ -26,8 +26,7 @@ function set_active_tab() {
    if (tab.length) {
        tab[0].parentElement.className = "active";
    }
-    // hash could move the page down
-    window.scrollTo(0, 0);
+
 }

 function focus_error_tab() {
--- a/changedetectionio/static/js/toggle-theme.js
+++ b/changedetectionio/static/js/toggle-theme.js
@@ -49,4 +49,9 @@ $(document).ready(function () {
        $("#overlay").toggleClass('visible');
        heartpath.style.fill = document.getElementById("overlay").classList.contains("visible") ? '#ff0000' : 'var(--color-background)';
    });
+
+    setInterval(function () {
+        $('body').toggleClass('spinner-active', $.active > 0);
+    }, 2000);
+
 });
--- a/changedetectionio/static/js/watch-settings.js
+++ b/changedetectionio/static/js/watch-settings.js
@@ -26,25 +26,28 @@ function request_textpreview_update() {
        data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val();
    });

+    $('body').toggleClass('spinner-active', 1);
+
    $.abortiveSingularAjax({
        type: "POST",
        url: preview_text_edit_filters_url,
        data: data,
        namespace: 'watchEdit'
    }).done(function (data) {
+        console.debug(data['duration'])
        $('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']);
-
        $('#filters-and-triggers #text-preview-inner')
            .text(data['after_filter'])
            .highlightLines([
                {
                    'color': '#ee0000',
                    'lines': data['trigger_line_numbers']
+                },
+                {
+                    'color': '#757575',
+                    'lines': data['ignore_line_numbers']
                }
-            ]);
-
-
-
+            ])
    }).fail(function (error) {
        if (error.statusText === 'abort') {
            console.log('Request was aborted due to a new request being fired.');
@@ -73,18 +76,13 @@ $(document).ready(function () {
    $("#text-preview-inner").css('max-height', (vh-300)+"px");
    $("#text-preview-before-inner").css('max-height', (vh-300)+"px");

-    // Realtime preview of 'Filters & Text' setup
-    var debounced_request_textpreview_update = request_textpreview_update.debounce(100);
-
    $("#activate-text-preview").click(function (e) {
        $('body').toggleClass('preview-text-enabled')
        request_textpreview_update();
-
        const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off';
-        $("#text-preview-refresh")[method]('click', debounced_request_textpreview_update);
-        $('textarea:visible')[method]('keyup blur', debounced_request_textpreview_update);
-        $('input:visible')[method]('keyup blur change', debounced_request_textpreview_update);
-        $("#filters-and-triggers-tab")[method]('click', debounced_request_textpreview_update);
+        $('#filters-and-triggers textarea')[method]('blur', request_textpreview_update.throttle(1000));
+        $('#filters-and-triggers input')[method]('change', request_textpreview_update.throttle(1000));
+        $("#filters-and-triggers-tab")[method]('click', request_textpreview_update.throttle(1000));
    });
    $('.minitabs-wrapper').miniTabs({
        "Content after filters": "#text-preview-inner",
--- a/changedetectionio/static/styles/diff.css
+++ b/changedetectionio/static/styles/diff.css
@@ -153,7 +153,8 @@ html[data-darkmode="true"] {
    border: 1px solid transparent;
    vertical-align: top;
    font: 1em monospace;
-    text-align: left; }
+    text-align: left;
+    overflow: clip; }
  #diff-ui pre {
    white-space: pre-wrap; }

@@ -172,7 +173,9 @@ ins {
  text-decoration: none; }

 #result {
-  white-space: pre-wrap; }
+  white-space: pre-wrap;
+  word-break: break-word;
+  overflow-wrap: break-word; }

 #settings {
  background: rgba(0, 0, 0, 0.05);
@@ -231,3 +234,12 @@ td#diff-col div {
  border-radius: 5px;
  background: var(--color-background);
  box-shadow: 1px 1px 4px var(--color-shadow-jump); }
+
+.pure-form button.reset-margin {
+  margin: 0px; }
+
+.diff-fieldset {
+  display: flex;
+  align-items: center;
+  gap: 4px;
+  flex-wrap: wrap; }
--- a/changedetectionio/static/styles/scss/diff.scss
+++ b/changedetectionio/static/styles/scss/diff.scss
@@ -24,6 +24,7 @@
    vertical-align: top;
    font: 1em monospace;
    text-align: left;
+    overflow: clip; // clip overflowing contents to cell boundariess
  }

  pre {
@@ -50,6 +51,8 @@ ins {

 #result {
  white-space: pre-wrap;
+  word-break: break-word;
+  overflow-wrap: break-word;

  .change {
    span {}
@@ -134,3 +137,15 @@ td#diff-col div {
  background: var(--color-background);
  box-shadow: 1px 1px 4px var(--color-shadow-jump);
 }
+
+// resets button margin to 0px
+.pure-form button.reset-margin {
+  margin: 0px;
+}
+
+.diff-fieldset {
+  display: flex;
+  align-items: center;
+  gap: 4px;
+  flex-wrap: wrap;
+}
--- a/changedetectionio/static/styles/scss/parts/_extra_browsers.scss
+++ b/changedetectionio/static/styles/scss/parts/_extra_browsers.scss
@@ -11,7 +11,22 @@ ul#requests-extra_browsers {
  /* each proxy entry is a `table` */
  table {
    tr {
-      display: inline;
+      display: table-row; // default display for small screens
+      input[type=text] {
+        width: 100%;
+      }
+    }
+  }
+  
+  // apply inline display for larger screens
+  @media only screen and (min-width: 1280px) {
+    table {
+      tr {
+        display: inline;
+        input[type=text] {
+          width: 100%;
+        }
+      }
    }
  }
 }
--- a/changedetectionio/static/styles/scss/parts/_extra_proxies.scss
+++ b/changedetectionio/static/styles/scss/parts/_extra_proxies.scss
@@ -11,7 +11,19 @@ ul#requests-extra_proxies {
  /* each proxy entry is a `table` */
  table {
    tr {
-      display: inline;
+      display: table-row; // default display for small screens
+      input[type=text] {
+        width: 100%;
+      }
+    }
+  }
+  
+  // apply inline display for large screens
+  @media only screen and (min-width: 1024px) {
+    table {
+      tr {
+        display: inline;
+      }
    }
  }
 }
@@ -25,15 +37,19 @@ ul#requests-extra_proxies {

 body.proxy-check-active {
  #request {
+    // Padding set by flex layout
+    /*
    .proxy-status {
      width: 2em;
    }
+    */

    .proxy-check-details {
      font-size: 80%;
      color: #555;
      display: block;
-      padding-left: 4em;
+      padding-left: 2em;
+      max-width: 500px;
    }

    .proxy-timing {
--- a/changedetectionio/static/styles/scss/parts/_minitabs.scss
+++ b/changedetectionio/static/styles/scss/parts/_minitabs.scss
@@ -7,6 +7,16 @@
    border-top: none;
  }

+  .minitabs-content {
+    width: 100%;
+    display: flex;
+    > div {
+      flex: 1 1 auto;
+      min-width: 0;
+      overflow: scroll;
+    }
+  }
+
  .minitabs {
    display: flex;
    border-bottom: 1px solid #ccc;
--- a/changedetectionio/static/styles/scss/parts/_preview_text_filter.scss
+++ b/changedetectionio/static/styles/scss/parts/_preview_text_filter.scss
@@ -42,9 +42,8 @@ body.preview-text-enabled {
    color: var(--color-text-input);
    font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */
    font-size: 70%;
-    overflow-x: scroll;
+    word-break: break-word;
    white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */
-    overflow-wrap: break-word; /* Allows long words to break and wrap to the next line */
  }
 }

--- a/changedetectionio/static/styles/scss/styles.scss
+++ b/changedetectionio/static/styles/scss/styles.scss
@@ -106,10 +106,34 @@ button.toggle-button {
  padding: 5px;
  display: flex;
  justify-content: space-between;
-  border-bottom: 2px solid var(--color-menu-accent);
  align-items: center;
 }

+#pure-menu-horizontal-spinner {
+  height: 3px;
+  background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
+  background-size: 400% 400%;
+  width: 100%;
+  animation: gradient 200s ease infinite;
+}
+
+body.spinner-active {
+  #pure-menu-horizontal-spinner {
+    animation: gradient 1s ease infinite;
+  }
+}
+
+@keyframes gradient {
+	0% {
+		background-position: 0% 50%;
+	}
+	50% {
+		background-position: 100% 50%;
+	}
+	100% {
+		background-position: 0% 50%;
+	}
+}
 .pure-menu-heading {
  color: var(--color-text-menu-heading);
 }
@@ -123,8 +147,14 @@ button.toggle-button {
  }
 }

+
+.tab-pane-inner {
+  // .tab-pane-inner will have the #id that the tab button jumps/anchors to
+  scroll-margin-top: 200px;
+}
+
 section.content {
-  padding-top: 5em;
+  padding-top: 100px;
  padding-bottom: 1em;
  flex-direction: column;
  display: flex;
@@ -907,6 +937,7 @@ $form-edge-padding: 20px;
 }

 .tab-pane-inner {
+
  &:not(:target) {
    display: none;
  }
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@@ -112,26 +112,34 @@ ul#requests-extra_proxies {
  ul#requests-extra_proxies li > label {
    display: none; }
  ul#requests-extra_proxies table tr {
-    display: inline; }
+    display: table-row; }
+    ul#requests-extra_proxies table tr input[type=text] {
+      width: 100%; }
+  @media only screen and (min-width: 1024px) {
+    ul#requests-extra_proxies table tr {
+      display: inline; } }

 #request {
  /* Auto proxy scan/checker */ }
  #request label[for=proxy] {
    display: inline-block; }

-body.proxy-check-active #request .proxy-status {
-  width: 2em; }
-
-body.proxy-check-active #request .proxy-check-details {
-  font-size: 80%;
-  color: #555;
-  display: block;
-  padding-left: 4em; }
-
-body.proxy-check-active #request .proxy-timing {
-  font-size: 80%;
-  padding-left: 1rem;
-  color: var(--color-link); }
+body.proxy-check-active #request {
+  /*
+    .proxy-status {
+      width: 2em;
+    }
+    */ }
+  body.proxy-check-active #request .proxy-check-details {
+    font-size: 80%;
+    color: #555;
+    display: block;
+    padding-left: 2em;
+    max-width: 500px; }
+  body.proxy-check-active #request .proxy-timing {
+    font-size: 80%;
+    padding-left: 1rem;
+    color: var(--color-link); }

 #recommended-proxy {
  display: grid;
@@ -158,7 +166,14 @@ ul#requests-extra_browsers {
  ul#requests-extra_browsers li > label {
    display: none; }
  ul#requests-extra_browsers table tr {
-    display: inline; }
+    display: table-row; }
+    ul#requests-extra_browsers table tr input[type=text] {
+      width: 100%; }
+  @media only screen and (min-width: 1280px) {
+    ul#requests-extra_browsers table tr {
+      display: inline; }
+      ul#requests-extra_browsers table tr input[type=text] {
+        width: 100%; } }

 #extra-browsers-setting {
  border: 1px solid var(--color-grey-800);
@@ -434,6 +449,13 @@ html[data-darkmode="true"] #toggle-light-mode .icon-dark {
    padding: 20px;
    border: 1px solid #ccc;
    border-top: none; }
+  .minitabs-wrapper .minitabs-content {
+    width: 100%;
+    display: flex; }
+    .minitabs-wrapper .minitabs-content > div {
+      flex: 1 1 auto;
+      min-width: 0;
+      overflow: scroll; }
  .minitabs-wrapper .minitabs {
    display: flex;
    border-bottom: 1px solid #ccc; }
@@ -488,11 +510,9 @@ body.preview-text-enabled {
    font-family: "Courier New", Courier, monospace;
    /* Sets the font to a monospace type */
    font-size: 70%;
-    overflow-x: scroll;
+    word-break: break-word;
    white-space: pre-wrap;
-    /* Preserves whitespace and line breaks like <pre> */
-    overflow-wrap: break-word;
-    /* Allows long words to break and wrap to the next line */ }
+    /* Preserves whitespace and line breaks like <pre> */ }

 #activate-text-preview {
  right: 0;
@@ -568,9 +588,26 @@ button.toggle-button {
  padding: 5px;
  display: flex;
  justify-content: space-between;
-  border-bottom: 2px solid var(--color-menu-accent);
  align-items: center; }

+#pure-menu-horizontal-spinner {
+  height: 3px;
+  background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
+  background-size: 400% 400%;
+  width: 100%;
+  animation: gradient 200s ease infinite; }
+
+body.spinner-active #pure-menu-horizontal-spinner {
+  animation: gradient 1s ease infinite; }
+
+@keyframes gradient {
+  0% {
+    background-position: 0% 50%; }
+  50% {
+    background-position: 100% 50%; }
+  100% {
+    background-position: 0% 50%; } }
+
 .pure-menu-heading {
  color: var(--color-text-menu-heading); }

@@ -580,8 +617,11 @@ button.toggle-button {
    background-color: var(--color-background-menu-link-hover);
    color: var(--color-text-menu-link-hover); }

+.tab-pane-inner {
+  scroll-margin-top: 200px; }
+
 section.content {
-  padding-top: 5em;
+  padding-top: 100px;
  padding-bottom: 1em;
  flex-direction: column;
  display: flex;
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -4,6 +4,7 @@ from flask import (
    flash
 )

+from .html_tools import TRANSLATE_WHITESPACE_TABLE
 from . model import App, Watch
 from copy import deepcopy, copy
 from os import path, unlink
@@ -750,17 +751,17 @@ class ChangeDetectionStore:
    def update_5(self):
        # If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings
        # In other words - the watch notification_title and notification_body are not needed if they are the same as the default one
-        current_system_body = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n "))
-        current_system_title = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n "))
+        current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
+        current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
        for uuid, watch in self.data['watching'].items():
            try:
                watch_body = watch.get('notification_body', '')
-                if watch_body and watch_body.translate(str.maketrans('', '', "\r\n ")) == current_system_body:
+                if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body:
                    # Looks the same as the default one, so unset it
                    watch['notification_body'] = None

                watch_title = watch.get('notification_title', '')
-                if watch_title and watch_title.translate(str.maketrans('', '', "\r\n ")) == current_system_title:
+                if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title:
                    # Looks the same as the default one, so unset it
                    watch['notification_title'] = None
            except Exception as e:
--- a/changedetectionio/templates/base.html
+++ b/changedetectionio/templates/base.html
@@ -35,7 +35,9 @@

  <body class="">
    <div class="header">
-      <div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu">
+    <div class="pure-menu-fixed" style="width: 100%;">
+      <div class="home-menu pure-menu pure-menu-horizontal" id="nav-menu">
+
        {% if has_password and not current_user.is_authenticated %}
          <a class="pure-menu-heading" href="https://changedetection.io" rel="noopener">
            <strong>Change</strong>Detection.io</a>
@@ -129,7 +131,12 @@
          </li>
        </ul>
      </div>
+      <div id="pure-menu-horizontal-spinner"></div>
+      </div>
+
    </div>
+
+
    {% if hosted_sticky %}
      <div class="sticky-tab" id="hosted-sticky">
        <a href="https://changedetection.io/?ref={{guid}}">Let us host your instance!</a>
--- a/changedetectionio/templates/diff.html
+++ b/changedetectionio/templates/diff.html
@@ -14,7 +14,7 @@

 <div id="settings">
    <form class="pure-form " action="" method="GET" id="diff-form">
-        <fieldset>
+        <fieldset class="diff-fieldset">
            {% if versions|length >= 1 %}
                <strong>Compare</strong>
                <del class="change"><span>from</span></del>
@@ -33,7 +33,7 @@
                        </option>
                    {% endfor %}
                </select>
-                <button type="submit" class="pure-button pure-button-primary">Go</button>
+                <button type="submit" class="pure-button pure-button-primary reset-margin">Go</button>
            {% endif %}
        </fieldset>
        <fieldset>
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -26,7 +26,6 @@
 </script>
 <script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
-<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
 {% if playwright_enabled %}
@@ -330,9 +329,9 @@ nav
                        {{ render_checkbox_field(form.filter_text_added) }}
                        {{ render_checkbox_field(form.filter_text_replaced) }}
                        {{ render_checkbox_field(form.filter_text_removed) }}
-                    <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span>
-                    <span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
-                    <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
+                    <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span><br>
+                    <span class="pure-form-message-inline">&nbsp;So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
+                    <span class="pure-form-message-inline">&nbsp;When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
                </fieldset>
                <fieldset class="pure-control-group">
                    {{ render_checkbox_field(form.check_unique_lines) }}
@@ -371,7 +370,7 @@ nav
 ") }}
                    <span class="pure-form-message-inline">
                        <ul>
-                            <li>Matching text will be <strong>removed</strong> from the text snapshot</li>
+                            <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
@@ -398,7 +397,9 @@ Unavailable") }}
                </fieldset>
                <fieldset>
                    <div class="pure-control-group">
-                        {{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
+                        {{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
+ or
+keyword") }}
                        <span class="pure-form-message-inline">
                    <ul>
                        <li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
@@ -424,14 +425,15 @@ Unavailable") }}
                    </script>
                    <br>
                    {#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
-
                    <div class="minitabs-wrapper">
-                        <div id="text-preview-inner" class="monospace-preview">
-                            <p>Loading...</p>
-                        </div>
-                        <div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
-                            <p>Loading...</p>
-                        </div>
+                      <div class="minitabs-content">
+                          <div id="text-preview-inner" class="monospace-preview">
+                              <p>Loading...</p>
+                          </div>
+                          <div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
+                              <p>Loading...</p>
+                          </div>
+                      </div>
                    </div>
            </div>
          </div>
--- a/changedetectionio/templates/settings.html
+++ b/changedetectionio/templates/settings.html
@@ -172,7 +172,7 @@ nav
                    <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
                    <span class="pure-form-message-inline">
                        <ul>
-                            <li>Matching text will be <strong>removed</strong> from the text snapshot</li>
+                            <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
                            <li>Note: This is applied globally in addition to the per-watch rules.</li>
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
@@ -276,7 +276,7 @@ nav
                <div class="pure-control-group">
                    {{ render_button(form.save_button) }}
                    <a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
-                    <a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-cancel">Clear Snapshot History</a>
+                    <a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a>
                </div>
            </div>
        </form>
--- a/changedetectionio/tests/itemprop_test_examples/README.md
+++ b/changedetectionio/tests/itemprop_test_examples/README.md
@@ -0,0 +1,6 @@
+# A list of real world examples!
+
+Always the price should be 666.66 for our tests
+
+see test_restock_itemprop.py::test_special_prop_examples
+
--- a/changedetectionio/tests/itemprop_test_examples/a.txt
+++ b/changedetectionio/tests/itemprop_test_examples/a.txt
@@ -0,0 +1,25 @@
+<div class="PriceSection PriceSection_PriceSection__Vx1_Q PriceSection_variantHuge__P9qxg PdpPriceSection"
+     data-testid="price-section"
+     data-optly-product-tile-price-section="true"><span
+        class="PriceRange ProductPrice variant-huge" itemprop="offers"
+        itemscope="" itemtype="http://schema.org/Offer"><div
+        class="VisuallyHidden_VisuallyHidden__VBD83">$155.55</div><span
+        aria-hidden="true" class="Price variant-huge" data-testid="price"
+        itemprop="price"><sup class="sup" data-testid="price-symbol"
+                              itemprop="priceCurrency" content="AUD">$</sup><span
+        class="dollars" data-testid="price-value" itemprop="price"
+        content="155.55">155.55</span><span class="extras"><span class="sup"
+                                                              data-testid="price-sup"></span></span></span></span>
+</div>
+
+<script type="application/ld+json">{
+                                "@type": "Product",
+                                "@context": "https://schema.org",
+                                "name": "test",
+                                "description": "test",
+                                "offers": {
+                                    "@type": "Offer",
+                                    "priceCurrency": "AUD",
+                                    "price": 155.55
+                                },
+                            }</script>
--- a/changedetectionio/tests/proxy_list/test_proxy.py
+++ b/changedetectionio/tests/proxy_list/test_proxy.py
@@ -16,4 +16,4 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    )

    assert b"1 Imported" in res.data
-    time.sleep(3)
+    wait_for_all_checks(client)
--- a/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py
+++ b/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
+import json
 import os
 from flask import url_for
-from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
+from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client


 def set_response():
@@ -18,7 +19,6 @@ def set_response():
        f.write(data)
    time.sleep(1)

-
 def test_socks5(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
    set_response()
@@ -79,3 +79,24 @@ def test_socks5(client, live_server, measure_memory_usage):

    # Should see the proper string
    assert "Awesome, you made it".encode('utf-8') in res.data
+
+    # PROXY CHECKER WIDGET CHECK - this needs more checking
+    uuid = extract_UUID_from_client(client)
+
+    res = client.get(
+        url_for("check_proxies.start_check", uuid=uuid),
+        follow_redirects=True
+    )
+    # It's probably already finished super fast :(
+    #assert b"RUNNING" in res.data
+    
+    wait_for_all_checks(client)
+    res = client.get(
+        url_for("check_proxies.get_recheck_status", uuid=uuid),
+        follow_redirects=True
+    )
+    assert b"OK" in res.data
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
+
--- a/changedetectionio/tests/test_add_replace_remove_filter.py
+++ b/changedetectionio/tests/test_add_replace_remove_filter.py
@@ -77,6 +77,8 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory

    # The trigger line is REMOVED,  this should trigger
    set_original(excluding='The golden line')
+
+    # Check in the processor here what's going on, its triggering empty-reply and no change.
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
@@ -151,7 +153,6 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa

    # A line thats not the trigger should not trigger anything
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    assert b'1 watches queued for rechecking.' in res.data

    wait_for_all_checks(client)
@@ -173,6 +174,5 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
        assert b'-Oh yes please-' in response
        assert '网站监测 内容更新了'.encode('utf-8') in response

-
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_block_while_text_present.py
+++ b/changedetectionio/tests/test_block_while_text_present.py
@@ -65,11 +65,8 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
    live_server_setup(live_server)
    # Use a mix of case in ZzZ to prove it works case-insensitive.
    ignore_text = "out of stoCk\r\nfoobar"
-
    set_original_ignore_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
@@ -127,13 +124,24 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data

+    # 2548
+    # Going back to the ORIGINAL should NOT trigger a change
+    set_original_ignore_response()
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+    res = client.get(url_for("index"))
+    assert b'unviewed' not in res.data

-    # Now we set a change where the text is gone, it should now trigger
+
+    # Now we set a change where the text is gone AND its different content, it should now trigger
    set_modified_response_minus_block_text()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data

+
+
+
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_element_removal.py
+++ b/changedetectionio/tests/test_element_removal.py
@@ -5,7 +5,7 @@ import time
 from flask import url_for

 from ..html_tools import *
-from .util import live_server_setup
+from .util import live_server_setup, wait_for_all_checks


 def test_setup(live_server):
@@ -119,12 +119,10 @@ across multiple lines


 def test_element_removal_full(client, live_server, measure_memory_usage):
-    sleep_time_for_fetch_thread = 3
+    #live_server_setup(live_server)

    set_original_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for("test_endpoint", _external=True)
@@ -132,7 +130,8 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
        url_for("import_page"), data={"urls": test_url}, follow_redirects=True
    )
    assert b"1 Imported" in res.data
-    time.sleep(1)
+    wait_for_all_checks(client)
+
    # Goto the edit page, add the filter data
    # Not sure why \r needs to be added - absent of the #changetext this is not necessary
    subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext"
@@ -148,6 +147,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
        follow_redirects=True,
    )
    assert b"Updated watch." in res.data
+    wait_for_all_checks(client)

    # Check it saved
    res = client.get(
@@ -156,10 +156,10 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
    assert bytes(subtractive_selectors_data.encode("utf-8")) in res.data

    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    assert b'1 watches queued for rechecking.' in res.data

-    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

    # so that we set the state to 'unviewed' after all the edits
    client.get(url_for("diff_history_page", uuid="first"))
@@ -168,10 +168,11 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
    set_modified_response()

    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    assert b'1 watches queued for rechecking.' in res.data

    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

    # There should not be an unviewed change, as changes should be removed
    res = client.get(url_for("index"))
--- a/changedetectionio/tests/test_extract_regex.py
+++ b/changedetectionio/tests/test_extract_regex.py
@@ -71,7 +71,7 @@ def test_setup(client, live_server, measure_memory_usage):
    live_server_setup(live_server)

 def test_check_filter_multiline(client, live_server, measure_memory_usage):
-    #live_server_setup(live_server)
+   # live_server_setup(live_server)
    set_multiline_response()

    # Add our URL to the import page
--- a/changedetectionio/tests/test_ignore_regex_text.py
+++ b/changedetectionio/tests/test_ignore_regex_text.py
@@ -33,13 +33,17 @@ def test_strip_regex_text_func():

    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)

-    assert b"but 1 lines" in stripped_content
-    assert b"igNORe-cAse text" not in stripped_content
-    assert b"but 1234 lines" not in stripped_content
-    assert b"really" not in stripped_content
-    assert b"not this" not in stripped_content
+    assert "but 1 lines" in stripped_content
+    assert "igNORe-cAse text" not in stripped_content
+    assert "but 1234 lines" not in stripped_content
+    assert "really" not in stripped_content
+    assert "not this" not in stripped_content

    # Check line number reporting
    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers")
    assert stripped_content == [2, 5, 6, 7, 8, 10]

+    # Check that linefeeds are preserved when there are is no matching ignores
+    content = "some text\n\nand other text\n"
+    stripped_content = html_tools.strip_ignore_text(content, ignore_lines)
+    assert content == stripped_content
--- a/changedetectionio/tests/test_ignore_text.py
+++ b/changedetectionio/tests/test_ignore_text.py
@@ -22,10 +22,15 @@ def test_strip_text_func():
    ignore_lines = ["sometimes"]

    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
+    assert "sometimes" not in stripped_content
+    assert "Some content" in stripped_content

-    assert b"sometimes" not in stripped_content
-    assert b"Some content" in stripped_content
+    # Check that line feeds dont get chewed up when something is found
+    test_content = "Some initial text\n\nWhich is across multiple lines\n\nZZZZz\n\n\nSo let's see what happens."
+    ignore = ['something irrelevent but just to check', 'XXXXX', 'YYYYY', 'ZZZZZ']

+    stripped_content = html_tools.strip_ignore_text(test_content, ignore)
+    assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens."

 def set_original_ignore_response():
    test_return_data = """<html>
@@ -141,8 +146,6 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa



-
-
    # Just to be sure.. set a regular modified change..
    set_modified_original_ignore_response()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
@@ -153,17 +156,17 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa

    res = client.get(url_for("preview_page", uuid="first"))

-    # Should no longer be in the preview
-    assert b'new ignore stuff' not in res.data
+    # SHOULD BE be in the preview, it was added in set_modified_original_ignore_response()
+    # and we have "new ignore stuff" in ignore_text
+    # it is only ignored, it is not removed (it will be highlighted too)
+    assert b'new ignore stuff' in res.data

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data

+# When adding some ignore text, it should not trigger a change, even if something else on that line changes
 def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
-
-    # Give the endpoint time to spin up
-    time.sleep(1)
-
+    #live_server_setup(live_server)
    ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ"
    set_original_ignore_response()

@@ -172,6 +175,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
        url_for("settings_page"),
        data={
            "requests-time_between_check-minutes": 180,
+            "application-ignore_whitespace": "y",
            "application-global_ignore_text": ignore_text,
            'application-fetch_backend': "html_requests"
        },
@@ -192,9 +196,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
    # Give the thread time to pick it up
    wait_for_all_checks(client)

-
-    # Goto the edit page of the item, add our ignore text
-    # Add our URL to the import page
+    #Adding some ignore text should not trigger a change
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"},
@@ -210,20 +212,15 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem

    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
-    # Give the thread time to pick it up
    wait_for_all_checks(client)
-
-    # so that we are sure everything is viewed and in a known 'nothing changed' state
-    res = client.get(url_for("diff_history_page", uuid="first"))
-
-    # It should report nothing found (no new 'unviewed' class)
+    # It should report nothing found (no new 'unviewed' class), adding random ignore text should not cause a change
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data
+#####

-
-    #  Make a change which includes the ignore text
+    # Make a change which includes the ignore text, it should be ignored and no 'change' triggered
+    # It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list
    set_modified_ignore_response()

    # Trigger a check
@@ -233,6 +230,7 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem

    # It should report nothing found (no new 'unviewed' class)
    res = client.get(url_for("index"))
+
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data

--- a/changedetectionio/tests/test_live_preview.py
+++ b/changedetectionio/tests/test_live_preview.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+from flask import url_for
+from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+
+
+def set_response():
+
+    data = f"""<html>
+       <body>Awesome, you made it<br>
+yeah the socks request worked<br>
+something to ignore<br>
+something to trigger<br>
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(data)
+
+def test_content_filter_live_preview(client, live_server, measure_memory_usage):
+    live_server_setup(live_server)
+    set_response()
+
+    test_url = url_for('test_endpoint', _external=True)
+
+    res = client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": test_url, "tags": ''},
+        follow_redirects=True
+    )
+    uuid = extract_UUID_from_client(client)
+    res = client.post(
+        url_for("edit_page", uuid=uuid),
+        data={
+            "include_filters": "",
+            "fetch_backend": 'html_requests',
+            "ignore_text": "something to ignore",
+            "trigger_text": "something to trigger",
+            "url": test_url,
+        },
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    wait_for_all_checks(client)
+
+    # The endpoint is a POST and accepts the form values to override the watch preview
+    import json
+
+    # DEFAULT OUTPUT WITHOUT ANYTHING UPDATED/CHANGED - SHOULD SEE THE WATCH DEFAULTS
+    res = client.post(
+        url_for("watch_get_preview_rendered", uuid=uuid)
+    )
+    default_return = json.loads(res.data.decode('utf-8'))
+    assert default_return.get('after_filter')
+    assert default_return.get('before_filter')
+    assert default_return.get('ignore_line_numbers') == [3] # "something to ignore" line 3
+    assert default_return.get('trigger_line_numbers') == [4] # "something to trigger" line 4
+
+    # SEND AN UPDATE AND WE SHOULD SEE THE OUTPUT CHANGE SO WE KNOW TO HIGHLIGHT NEW STUFF
+    res = client.post(
+        url_for("watch_get_preview_rendered", uuid=uuid),
+        data={
+            "include_filters": "",
+            "fetch_backend": 'html_requests',
+            "ignore_text": "sOckS", # Also be sure case insensitive works
+            "trigger_text": "AweSOme",
+            "url": test_url,
+        },
+    )
+    reply = json.loads(res.data.decode('utf-8'))
+    assert reply.get('after_filter')
+    assert reply.get('before_filter')
+    assert reply.get('ignore_line_numbers') == [2]  # Ignored - "socks" on line 2
+    assert reply.get('trigger_line_numbers') == [1]  # Triggers "Awesome" in line 1
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_notification.py
+++ b/changedetectionio/tests/test_notification.py
@@ -429,3 +429,24 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
        follow_redirects=True
    )

+    #2727 - be sure a test notification when there are zero watches works ( should all be deleted now)
+
+    os.unlink("test-datastore/notification.txt")
+
+
+    ######### Test global/system settings
+    res = client.post(
+        url_for("ajax_callback_send_notification_test")+"?mode=global-settings",
+        data={"notification_urls": test_notification_url},
+        follow_redirects=True
+    )
+
+    assert res.status_code != 400
+    assert res.status_code != 500
+
+    # Give apprise time to fire
+    time.sleep(4)
+
+    with open("test-datastore/notification.txt", 'r') as f:
+        x = f.read()
+        assert 'change detection is cool 网站监测 内容更新了' in x
--- a/changedetectionio/tests/test_preview_endpoints.py
+++ b/changedetectionio/tests/test_preview_endpoints.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+import time
+from flask import url_for
+from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
+
+
+# `subtractive_selectors` should still work in `source:` type requests
+def test_fetch_pdf(client, live_server, measure_memory_usage):
+    import shutil
+    shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")
+
+    live_server_setup(live_server)
+    test_url = url_for('test_pdf_endpoint', _external=True)
+    # Add our URL to the import page
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+
+    assert b"1 Imported" in res.data
+
+    wait_for_all_checks(client)
+
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    # PDF header should not be there (it was converted to text)
+    assert b'PDF' not in res.data[:10]
+    assert b'hello world' in res.data
+
+    # So we know if the file changes in other ways
+    import hashlib
+    original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
+    # We should have one
+    assert len(original_md5) > 0
+    # And it's going to be in the document
+    assert b'Document checksum - ' + bytes(str(original_md5).encode('utf-8')) in res.data
+
+    shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf")
+    changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    assert b'1 watches queued for rechecking.' in res.data
+
+    wait_for_all_checks(client)
+
+    # Now something should be ready, indicated by having a 'unviewed' class
+    res = client.get(url_for("index"))
+    assert b'unviewed' in res.data
+
+    # The original checksum should be not be here anymore (cdio adds it to the bottom of the text)
+
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    assert original_md5.encode('utf-8') not in res.data
+    assert changed_md5.encode('utf-8') in res.data
+
+    res = client.get(
+        url_for("diff_history_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    assert original_md5.encode('utf-8') in res.data
+    assert changed_md5.encode('utf-8') in res.data
+
+    assert b'here is a change' in res.data
--- a/changedetectionio/tests/test_restock_itemprop.py
+++ b/changedetectionio/tests/test_restock_itemprop.py
@@ -3,7 +3,7 @@ import os
 import time

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
+from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
 from ..notification import default_notification_format

 instock_props = [
@@ -413,3 +413,31 @@ def test_data_sanity(client, live_server):
    res = client.get(
        url_for("edit_page", uuid="first"))
    assert test_url2.encode('utf-8') in res.data
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
+
+# All examples should give a prive of 666.66
+def test_special_prop_examples(client, live_server):
+    import glob
+    #live_server_setup(live_server)
+
+    test_url = url_for('test_endpoint', _external=True)
+    check_path = os.path.join(os.path.dirname(__file__), "itemprop_test_examples", "*.txt")
+    files = glob.glob(check_path)
+    assert files
+    for test_example_filename in files:
+        with open(test_example_filename, 'r') as example_f:
+            with open("test-datastore/endpoint-content.txt", "w") as test_f:
+                test_f.write(f"<html><body>{example_f.read()}</body></html>")
+
+            # Now fetch it and check the price worked
+            client.post(
+                url_for("form_quick_watch_add"),
+                data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
+                follow_redirects=True
+            )
+            wait_for_all_checks(client)
+            res = client.get(url_for("index"))
+            assert b'ception' not in res.data
+            assert b'155.55' in res.data
--- a/changedetectionio/tests/unit/test_watch_model.py
+++ b/changedetectionio/tests/unit/test_watch_model.py
@@ -18,12 +18,13 @@ class TestDiffBuilder(unittest.TestCase):

        watch['last_viewed'] = 110

-        watch.save_history_text(contents=b"hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents=b"hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents=b"hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents=b"hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents=b"hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
-        watch.save_history_text(contents=b"hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))
+        # Contents from the browser are always returned from the browser/requests/etc as str, str is basically UTF-16 in python
+        watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents="hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents="hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents="hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents="hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
+        watch.save_history_text(contents="hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))

        p = watch.get_next_snapshot_key_to_last_viewed
        assert p == "112", "Correct last-viewed timestamp was detected"
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -19,11 +19,9 @@ from loguru import logger
 class update_worker(threading.Thread):
    current_uuid = None

-    def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
-        self.q = q
+    def __init__(self, app, *args, **kwargs):
+
        self.app = app
-        self.notification_q = notification_q
-        self.datastore = datastore
        super().__init__(*args, **kwargs)

    def queue_notification_for_watch(self, notification_q, n_object, watch):
@@ -81,7 +79,8 @@ class update_worker(threading.Thread):
            'watch_url': watch.get('url') if watch else None,
        })

-        n_object.update(watch.extra_notification_token_values())
+        if watch:
+            n_object.update(watch.extra_notification_token_values())

        logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s")
        logger.debug("Queued notification for sending")
@@ -101,19 +100,19 @@ class update_worker(threading.Thread):
        v = watch.get(var_name)
        if v and not watch.get('notification_muted'):
            if var_name == 'notification_format' and v == default_notification_format_for_watch:
-                return self.datastore.data['settings']['application'].get('notification_format')
+                return self.app.datastore.data['settings']['application'].get('notification_format')

            return v

-        tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
+        tags = self.app.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
        if tags:
            for tag_uuid, tag in tags.items():
                v = tag.get(var_name)
                if v and not tag.get('notification_muted'):
                    return v

-        if self.datastore.data['settings']['application'].get(var_name):
-            return self.datastore.data['settings']['application'].get(var_name)
+        if self.app.datastore.data['settings']['application'].get(var_name):
+            return self.app.datastore.data['settings']['application'].get(var_name)

        # Otherwise could be defaults
        if var_name == 'notification_format':
@@ -128,7 +127,7 @@ class update_worker(threading.Thread):
    def send_content_changed_notification(self, watch_uuid):

        n_object = {}
-        watch = self.datastore.data['watching'].get(watch_uuid)
+        watch = self.app.datastore.data['watching'].get(watch_uuid)
        if not watch:
            return

@@ -155,17 +154,17 @@ class update_worker(threading.Thread):
            queued = True

            count = watch.get('notification_alert_count', 0) + 1
-            self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count})
+            self.app.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count})

-            self.queue_notification_for_watch(notification_q=self.notification_q, n_object=n_object, watch=watch)
+            self.queue_notification_for_watch(notification_q=self.app.notification_q, n_object=n_object, watch=watch)

        return queued


    def send_filter_failure_notification(self, watch_uuid):

-        threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
-        watch = self.datastore.data['watching'].get(watch_uuid)
+        threshold = self.app.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
+        watch = self.app.datastore.data['watching'].get(watch_uuid)
        if not watch:
            return

@@ -178,8 +177,8 @@ class update_worker(threading.Thread):
        if len(watch['notification_urls']):
            n_object['notification_urls'] = watch['notification_urls']

-        elif len(self.datastore.data['settings']['application']['notification_urls']):
-            n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
+        elif len(self.app.datastore.data['settings']['application']['notification_urls']):
+            n_object['notification_urls'] = self.app.datastore.data['settings']['application']['notification_urls']

        # Only prepare to notify if the rules above matched
        if 'notification_urls' in n_object:
@@ -188,16 +187,16 @@ class update_worker(threading.Thread):
                'uuid': watch_uuid,
                'screenshot': None
            })
-            self.notification_q.put(n_object)
+            self.app.notification_q.put(n_object)
            logger.debug(f"Sent filter not found notification for {watch_uuid}")
        else:
            logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")

    def send_step_failure_notification(self, watch_uuid, step_n):
-        watch = self.datastore.data['watching'].get(watch_uuid, False)
+        watch = self.app.datastore.data['watching'].get(watch_uuid, False)
        if not watch:
            return
-        threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
+        threshold = self.app.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
        n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1),
                    'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} "
                                         "did not appear on the page after {} attempts, did the page change layout? "
@@ -208,8 +207,8 @@ class update_worker(threading.Thread):
        if len(watch['notification_urls']):
            n_object['notification_urls'] = watch['notification_urls']

-        elif len(self.datastore.data['settings']['application']['notification_urls']):
-            n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
+        elif len(self.app.datastore.data['settings']['application']['notification_urls']):
+            n_object['notification_urls'] = self.app.datastore.data['settings']['application']['notification_urls']

        # Only prepare to notify if the rules above matched
        if 'notification_urls' in n_object:
@@ -217,7 +216,7 @@ class update_worker(threading.Thread):
                'watch_url': watch['url'],
                'uuid': watch_uuid
            })
-            self.notification_q.put(n_object)
+            self.app.notification_q.put(n_object)
            logger.error(f"Sent step not found notification for {watch_uuid}")


@@ -225,7 +224,7 @@ class update_worker(threading.Thread):
        # All went fine, remove error artifacts
        cleanup_files = ["last-error-screenshot.png", "last-error.txt"]
        for f in cleanup_files:
-            full_path = os.path.join(self.datastore.datastore_path, uuid, f)
+            full_path = os.path.join(self.app.datastore.datastore_path, uuid, f)
            if os.path.isfile(full_path):
                os.unlink(full_path)

@@ -236,23 +235,23 @@ class update_worker(threading.Thread):
            update_handler = None

            try:
-                queued_item_data = self.q.get(block=False)
+                queued_item_data = self.app.update_q.get(block=False)
            except queue.Empty:
                pass

            else:
                uuid = queued_item_data.item.get('uuid')
                self.current_uuid = uuid
-                if uuid in list(self.datastore.data['watching'].keys()) and self.datastore.data['watching'][uuid].get('url'):
+                if uuid in list(self.app.datastore.data['watching'].keys()) and self.app.datastore.data['watching'][uuid].get('url'):
                    changed_detected = False
                    contents = b''
                    process_changedetection_results = True
                    update_obj = {}

                    # Clear last errors (move to preflight func?)
-                    self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
+                    self.app.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None

-                    watch = self.datastore.data['watching'].get(uuid)
+                    watch = self.app.datastore.data['watching'].get(uuid)

                    logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
                    now = time.time()
@@ -260,9 +259,6 @@ class update_worker(threading.Thread):
                    try:
                        # Processor is what we are using for detecting the "Change"
                        processor = watch.get('processor', 'text_json_diff')
-                        # Abort processing when the content was the same as the last fetch
-                        skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
-

                        # Init a new 'difference_detection_processor', first look in processors
                        processor_module_name = f"changedetectionio.processors.{processor}.processor"
@@ -272,22 +268,19 @@ class update_worker(threading.Thread):
                            print(f"Processor module '{processor}' not found.")
                            raise e

-                        update_handler = processor_module.perform_site_check(datastore=self.datastore,
+                        update_handler = processor_module.perform_site_check(datastore=self.app.datastore,
                                                                             watch_uuid=uuid
                                                                             )

                        update_handler.call_browser()

-                        changed_detected, update_obj, contents = update_handler.run_changedetection(
-                            watch=watch,
-                            skip_when_checksum_same=skip_when_same_checksum,
-                        )
+                        changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)

                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
                        # We then convert/.decode('utf-8') for the notification etc
-                        if not isinstance(contents, (bytes, bytearray)):
-                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
+#                        if not isinstance(contents, (bytes, bytearray)):
+#                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
                    except PermissionError as e:
                        logger.critical(f"File permission error updating file, watch: {uuid}")
                        logger.critical(str(e))
@@ -299,7 +292,7 @@ class update_worker(threading.Thread):
                            watch.save_screenshot(screenshot=e.screenshot)
                        if e.xpath_data:
                            watch.save_xpath_data(data=e.xpath_data)
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
+                        self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
                        process_changedetection_results = False

                    except content_fetchers_exceptions.ReplyWithContentButNoText as e:
@@ -316,7 +309,7 @@ class update_worker(threading.Thread):
                            else:
                                extra_help = ", it's possible that the filters were found, but contained no usable text."

-                        self.datastore.update_watch(uuid=uuid, update_obj={
+                        self.app.datastore.update_watch(uuid=uuid, update_obj={
                            'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}"
                        })

@@ -338,7 +331,8 @@ class update_worker(threading.Thread):
                        elif e.status_code == 500:
                            err_text = "Error - 500 (Internal server error) received from the web site"
                        else:
-                            err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
+                            extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else ''
+                            err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}"

                        if e.screenshot:
                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
@@ -347,15 +341,15 @@ class update_worker(threading.Thread):
                        if e.page_text:
                            watch.save_error_text(contents=e.page_text)

-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
+                        self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
                        process_changedetection_results = False

                    except FilterNotFoundInResponse as e:
-                        if not self.datastore.data['watching'].get(uuid):
+                        if not self.app.datastore.data['watching'].get(uuid):
                            continue

                        err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
+                        self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})

                        # Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
                        if e.screenshot:
@@ -369,7 +363,7 @@ class update_worker(threading.Thread):
                            c = watch.get('consecutive_filter_failures', 0)
                            c += 1
                            # Send notification if we reached the threshold?
-                            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
+                            threshold = self.app.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
                            logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
                            if c >= threshold:
                                if not watch.get('notification_muted'):
@@ -378,7 +372,7 @@ class update_worker(threading.Thread):
                                c = 0
                                logger.debug(f"Reset filter failure count back to zero")

-                            self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
+                            self.app.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
                        else:
                            logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")

@@ -390,20 +384,20 @@ class update_worker(threading.Thread):
                        process_changedetection_results = False
                        changed_detected = False
                    except content_fetchers_exceptions.BrowserConnectError as e:
-                        self.datastore.update_watch(uuid=uuid,
+                        self.app.datastore.update_watch(uuid=uuid,
                                                    update_obj={'last_error': e.msg
                                                                }
                                                    )
                        process_changedetection_results = False
                    except content_fetchers_exceptions.BrowserFetchTimedOut as e:
-                        self.datastore.update_watch(uuid=uuid,
+                        self.app.datastore.update_watch(uuid=uuid,
                                                    update_obj={'last_error': e.msg
                                                                }
                                                    )
                        process_changedetection_results = False
                    except content_fetchers_exceptions.BrowserStepsStepException as e:

-                        if not self.datastore.data['watching'].get(uuid):
+                        if not self.app.datastore.data['watching'].get(uuid):
                            continue

                        error_step = e.step_n + 1
@@ -421,7 +415,7 @@ class update_worker(threading.Thread):

                        logger.debug(f"BrowserSteps exception at step {error_step} {str(e.original_e)}")

-                        self.datastore.update_watch(uuid=uuid,
+                        self.app.datastore.update_watch(uuid=uuid,
                                                    update_obj={'last_error': err_text,
                                                                'browser_steps_last_error_step': error_step
                                                                }
@@ -431,7 +425,7 @@ class update_worker(threading.Thread):
                            c = watch.get('consecutive_filter_failures', 0)
                            c += 1
                            # Send notification if we reached the threshold?
-                            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
+                            threshold = self.app.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
                                                                                           0)
                            logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
                            if threshold > 0 and c >= threshold:
@@ -439,26 +433,26 @@ class update_worker(threading.Thread):
                                    self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
                                c = 0

-                            self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
+                            self.app.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})

                        process_changedetection_results = False

                    except content_fetchers_exceptions.EmptyReply as e:
                        # Some kind of custom to-str handler in the exception handler that does this?
                        err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
+                        self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
                    except content_fetchers_exceptions.ScreenshotUnavailable as e:
                        err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
+                        self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
                    except content_fetchers_exceptions.JSActionExceptions as e:
                        err_text = "Error running JS Actions - Page request - "+e.message
                        if e.screenshot:
                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
+                        self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
                    except content_fetchers_exceptions.PageUnloadable as e:
@@ -469,26 +463,26 @@ class update_worker(threading.Thread):
                        if e.screenshot:
                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)

-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
+                        self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code,
                                                                           'has_ldjson_price_data': None})
                        process_changedetection_results = False
                    except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
                        err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
+                        self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
                        process_changedetection_results = False
                        logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}")

                    except Exception as e:
                        logger.error(f"Exception reached processing watch UUID: {uuid}")
                        logger.error(str(e))
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
+                        self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
                        # Other serious error
                        process_changedetection_results = False

                    else:
                        # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
-                        if not self.datastore.data['watching'].get(uuid):
+                        if not self.app.datastore.data['watching'].get(uuid):
                            continue

                        update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
@@ -502,14 +496,14 @@ class update_worker(threading.Thread):

                        self.cleanup_error_artifacts(uuid)

-                    if not self.datastore.data['watching'].get(uuid):
+                    if not self.app.datastore.data['watching'].get(uuid):
                        continue
                    #
                    # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
                    if process_changedetection_results:

                        # Extract <title> as title if possible/requested.
-                        if self.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
+                        if self.app.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
                            if not watch['title'] or not len(watch['title']):
                                try:
                                    update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content)
@@ -520,7 +514,7 @@ class update_worker(threading.Thread):
                        # Now update after running everything
                        timestamp = round(time.time())
                        try:
-                            self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
+                            self.app.datastore.update_watch(uuid=uuid, update_obj=update_obj)


                            # Also save the snapshot on the first time checked, "last checked" will always be updated, so we just check history length.
@@ -558,7 +552,7 @@ class update_worker(threading.Thread):
                            # Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
                            logger.critical("!!!! Exception in update_worker while processing process_changedetection_results !!!")
                            logger.critical(str(e))
-                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
+                            self.app.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})


                    # Always record that we atleast tried
@@ -567,13 +561,13 @@ class update_worker(threading.Thread):
                    # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
                    try:
                        server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
-                        self.datastore.update_watch(uuid=uuid,
+                        self.app.datastore.update_watch(uuid=uuid,
                                                    update_obj={'remote_server_reply': server_header}
                                                    )
                    except Exception as e:
                        pass

-                    self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
+                    self.app.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
                                                                       'last_checked': round(time.time()),
                                                                       'check_count': count
                                                                       })
--- a/requirements.txt
+++ b/requirements.txt
@@ -82,7 +82,7 @@ pytest-flask ~=1.2
 # Anything 4.0 and up but not 5.0
 jsonschema ~= 4.0

-
+apscheduler ~= 3.9
 loguru

 # For scraping all possible metadata relating to products so we can do better restock detection
Author	SHA1	Message	Date
dgtlmoon	587ac0fe46	Move global vars to all live under the 'app' var	2024-10-25 17:04:52 +02:00
dgtlmoon	34fe88af67	Adding pip limit	2024-10-22 10:50:52 +02:00
dgtlmoon	4b7f7f8379	Adding apscheduler	2024-10-22 10:46:56 +02:00
dgtlmoon	82e0b99b07	#2727 Notifications - Fix "send test notification" on empty list, includes test (#2731 )	2024-10-21 11:35:37 +02:00
Emmanuel Ojighoro	b0ff9d161e	UI - Fix mobile styling inconsistencies and resolve diff page overflow issue (#2716 )	2024-10-21 11:34:22 +02:00
dgtlmoon	c1dd681643	Filters - "Block change detection when text exists" should not trigger a change when the original text returns	2024-10-14 12:57:02 +02:00
dgtlmoon	ecafa27833	UI - More work on tab buttons hiding behind menu/header :-)	2024-10-11 22:54:09 +02:00
dgtlmoon	f7d4e58613	0.47.03	2024-10-11 17:33:00 +02:00
dgtlmoon	5bb47e47db	Remove same checksum skip check - saved a little CPU but added a lot of complexity (#2700 )	2024-10-11 17:28:42 +02:00
dgtlmoon	03151da68e	UI - Fix scroll offset / tab buttons hiding behind menu/header	2024-10-11 16:04:08 +02:00
dgtlmoon	a16a70229d	0.47.01	2024-10-11 15:02:17 +02:00
dgtlmoon	9476c1076b	Adding missing `apprise_plugin` for pypi/pip based installs	2024-10-11 15:01:27 +02:00
dgtlmoon	a4959b5971	0.47.00	2024-10-11 13:04:56 +02:00
dgtlmoon	a278fa22f2	Restock multiprice improvements (#2698 )	2024-10-11 11:43:35 +02:00
dgtlmoon	d39530b261	Test - Simple test for live preview	2024-10-11 11:07:12 +02:00
dgtlmoon	d4b4355ff5	Adding test for proxy checker/scanner (#2697 )	2024-10-11 09:52:55 +02:00
dgtlmoon	c1c8de3104	Fixing proxy checker (#2696 )	2024-10-11 00:19:19 +02:00
dgtlmoon	5a768d7db3	UTF-8 handling fixes, Improvements to whitespace filtering (#2691 )	2024-10-10 14:59:39 +02:00
dgtlmoon	f38429ec93	Testing - Tidyup (#2693 )	2024-10-10 12:45:23 +02:00
dgtlmoon	783926962d	Filters & Text - Preview refactor/improvements (#2689 )	2024-10-09 09:17:32 +02:00
Marc	6cd1d50a4f	Build - Add image source label to Dockerfile (Better Renovate and others support) (#2690 )	2024-10-09 08:30:23 +02:00
dgtlmoon	54a4970a4c	Custom JSON/POST Notifications - Log when it could not apply the application/json content-type header	2024-10-08 09:48:38 +02:00
dgtlmoon	fd00453e6d	UI - Filters live preview - improvements to layout	2024-10-08 08:59:10 +02:00
dgtlmoon	2842ffb205	Restock - Use the scraped 'Not in stock' product status over the metadata version (many website lie in the metadata) (#2684 )	2024-10-07 20:10:35 +02:00
dgtlmoon	ec4e2f5649	UI - Better 40x error message (#2685 )	2024-10-07 16:52:19 +02:00
dgtlmoon	fe8e3d1cb1	Visual Selector - Including <button> (#2686 )	2024-10-07 16:52:04 +02:00
dgtlmoon	69fbafbdb7	Stock/not-in-stock scraper - slight reliability improvement (#2687 )	2024-10-07 16:51:47 +02:00