WIP

2023-04-08 21:14:03 +02:00 · 2023-04-08 20:35:13 +02:00 · 2023-04-08 20:12:30 +02:00 · 2023-04-08 18:49:27 +02:00 · 2023-04-06 16:12:18 +02:00 · 2023-04-06 15:26:06 +02:00
83 changed files with 2657 additions and 1336 deletions
--- a/.github/workflows/containers.yml
+++ b/.github/workflows/containers.yml
@@ -98,6 +98,8 @@ jobs:
          platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
          cache-from: type=local,src=/tmp/.buildx-cache
          cache-to: type=local,dest=/tmp/.buildx-cache
+# Looks like this was disabled
+#          provenance: false

      # A new tagged release is required, which builds :tag and :latest
      - name: Build and push :tag
@@ -116,6 +118,8 @@ jobs:
          platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
          cache-from: type=local,src=/tmp/.buildx-cache
          cache-to: type=local,dest=/tmp/.buildx-cache
+# Looks like this was disabled
+#          provenance: false

      - name: Image digest
        run: echo step SHA ${{ steps.vars.outputs.sha_short }} tag ${{steps.vars.outputs.tag}} branch ${{steps.vars.outputs.branch}} digest ${{ steps.docker_build.outputs.digest }}
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -1,38 +0,0 @@
-name: PyPi Test and Push tagged release
-
-# Triggers the workflow on push or pull request events
-on:
-  workflow_run:
-    workflows: ["ChangeDetection.io Test"]
-    tags: '*.*'
-    types: [completed]
-
-
-jobs:
-  test-build:
-    runs-on: ubuntu-latest
-    steps:
-
-      - uses: actions/checkout@v2
-      - name: Set up Python 3.9
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.9
-
-
-      - name: Test that pip builds without error
-        run: |
-          pip3 --version
-          python3 -m pip install wheel
-          python3 setup.py bdist_wheel
-          python3 -m pip install dist/changedetection.io-*-none-any.whl --force
-          changedetection.io -d /tmp -p 10000 &
-          sleep 3
-          curl http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
-          killall -9 changedetection.io
-
-      # https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ?
-      # https://github.com/docker/buildx/issues/59 ? Needs to be one platform?
-
-      # https://github.com/docker/buildx/issues/495#issuecomment-918925854
-#if: ${{ github.event_name == 'release'}}
--- a/.github/workflows/test-container-build.yml
+++ b/.github/workflows/test-container-build.yml
@@ -10,11 +10,13 @@ on:
    paths:
      - requirements.txt
      - Dockerfile
+      - .github/workflows/*

  pull_request:
    paths:
      - requirements.txt
      - Dockerfile
+      - .github/workflows/*

  # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
  # @todo: some kind of path filter for requirements.txt and Dockerfile
--- a/.github/workflows/test-only.yml
+++ b/.github/workflows/test-only.yml
@@ -50,10 +50,13 @@ jobs:
        run: |
          
          # Selenium fetch
-          docker run -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
+          docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
          
          # Playwright/Browserless fetch
-          docker run -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
+          docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
+          
+          # restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it
+          docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'

      - name: Test proxy interaction
        run: |
@@ -67,10 +70,10 @@ jobs:
          sleep 3
          # Should return 0 (no error) when grep finds it
          curl -s http://localhost:5556 |grep -q checkbox-uuid
-          curl -s http://localhost:5556/rss|grep -q rss-specification
+          
          # and IPv6
          curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
-          curl -s -g -6 "http://[::1]:5556/rss"|grep -q rss-specification
+          

 #export WEBDRIVER_URL=http://localhost:4444/wd/hub
 #pytest tests/fetchers/test_content.py
--- a/.github/workflows/test-pip-build.yml
+++ b/.github/workflows/test-pip-build.yml
@@ -0,0 +1,36 @@
+name: ChangeDetection.io PIP package test
+
+# Triggers the workflow on push or pull request events
+
+# This line doesnt work, even tho it is the documented one
+on: [push, pull_request]
+
+  # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
+  # @todo: some kind of path filter for requirements.txt and Dockerfile
+jobs:
+  test-pip-build-basics:
+    runs-on: ubuntu-latest
+    steps:
+        - uses: actions/checkout@v2
+
+        - name: Set up Python 3.9
+          uses: actions/setup-python@v2
+          with:
+            python-version: 3.9
+
+
+        - name: Test that the basic pip built package runs without error
+          run: |
+            set -e
+            mkdir dist
+            pip3 install wheel
+            python3 setup.py bdist_wheel            
+            pip3 install -r requirements.txt
+            rm ./changedetection.py
+            rm -rf changedetectio
+            
+            pip3 install dist/changedetection.io*.whl
+            changedetection.io -d /tmp -p 10000 &
+            sleep 3
+            curl http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
+            killall -9 changedetection.io
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,6 +1,7 @@
 recursive-include changedetectionio/api *
 recursive-include changedetectionio/blueprint *
 recursive-include changedetectionio/model *
+recursive-include changedetectionio/processors *
 recursive-include changedetectionio/res *
 recursive-include changedetectionio/static *
 recursive-include changedetectionio/templates *
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-## Web Site Change Detection, Monitoring and Notification.
+## Web Site Change Detection, Restock monitoring and notifications.

 **_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._**

@@ -49,6 +49,7 @@ Requires Playwright to be enabled.
 - Governmental department updates (changes are often only on their websites)
 - New software releases, security advisories when you're not on their mailing list.
 - Festivals with changes
+- Discogs restock alerts and monitoring
 - Realestate listing changes
 - Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
 - COVID related news from government websites
@@ -63,6 +64,8 @@ Requires Playwright to be enabled.
 - You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product)
 - Get notified when certain keywords appear in Twitter search results
 - Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords.
+- Get alerts when new job positions are open on Bamboo HR and other job platforms
+- Website defacement monitoring

 _Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_

@@ -100,6 +103,8 @@ $ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/d

 `:latest` tag is our latest stable release, `:dev` tag is our bleeding edge `master` branch.

+Alternative docker repository over at ghcr - [ghcr.io/dgtlmoon/changedetection.io](https://ghcr.io/dgtlmoon/changedetection.io)
+
 ### Windows

 See the install instructions at the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -1,5 +1,15 @@
 #!/usr/bin/python3

+from changedetectionio import queuedWatchMetaData
+from copy import deepcopy
+from distutils.util import strtobool
+from feedgen.feed import FeedGenerator
+from flask_compress import Compress as FlaskCompress
+from flask_login import current_user
+from flask_restful import abort, Api
+from flask_wtf import CSRFProtect
+from functools import wraps
+from threading import Event
 import datetime
 import flask_login
 import logging
@@ -10,12 +20,6 @@ import threading
 import time
 import timeago

-from changedetectionio import queuedWatchMetaData
-from copy import deepcopy
-from distutils.util import strtobool
-from feedgen.feed import FeedGenerator
-from threading import Event
-
 from flask import (
    Flask,
    abort,
@@ -28,15 +32,11 @@ from flask import (
    session,
    url_for,
 )
-from flask_compress import Compress as FlaskCompress
-from flask_login import login_required
-from flask_restful import abort, Api
-from flask_wtf import CSRFProtect

 from changedetectionio import html_tools
 from changedetectionio.api import api_v1

-__version__ = '0.40.1.0'
+__version__ = '0.41.1'

 datastore = None

@@ -53,7 +53,6 @@ app = Flask(__name__,
            static_url_path="",
            static_folder="static",
            template_folder="templates")
-from flask_compress import Compress

 # Super handy for compressing large BrowserSteps responses and others
 FlaskCompress(app)
@@ -65,7 +64,8 @@ app.config.exit = Event()

 app.config['NEW_VERSION_AVAILABLE'] = False

-app.config['LOGIN_DISABLED'] = False
+if os.getenv('FLASK_SERVER_NAME'):
+    app.config['SERVER_NAME'] = os.getenv('FLASK_SERVER_NAME')

 #app.config["EXPLAIN_TEMPLATE_LOADING"] = True

@@ -74,7 +74,6 @@ app.config['TEMPLATES_AUTO_RELOAD'] = True
 app.jinja_env.add_extension('jinja2.ext.loopcontrols')
 csrf = CSRFProtect()
 csrf.init_app(app)
-
 notification_debug_log=[]

 watch_api = Api(app, decorators=[csrf.exempt])
@@ -149,7 +148,6 @@ class User(flask_login.UserMixin):

    # Compare given password against JSON store or Env var
    def check_password(self, password):
-
        import base64
        import hashlib

@@ -157,11 +155,9 @@ class User(flask_login.UserMixin):
        raw_salt_pass = os.getenv("SALTED_PASS", False)

        if not raw_salt_pass:
-            raw_salt_pass = datastore.data['settings']['application']['password']
+            raw_salt_pass = datastore.data['settings']['application'].get('password')

        raw_salt_pass = base64.b64decode(raw_salt_pass)
-
-
        salt_from_storage = raw_salt_pass[:32]  # 32 is the length of the salt

        # Use the exact same setup you used to generate the key, but this time put in the password to check
@@ -171,21 +167,44 @@ class User(flask_login.UserMixin):
            salt_from_storage,
            100000
        )
-        new_key =  salt_from_storage + new_key
+        new_key = salt_from_storage + new_key

        return new_key == raw_salt_pass

    pass

+
+def login_optionally_required(func):
+    @wraps(func)
+    def decorated_view(*args, **kwargs):
+
+        has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
+
+        # Permitted
+        if request.endpoint == 'static_content' and request.view_args['group'] == 'styles':
+            return func(*args, **kwargs)
+        # Permitted
+        elif request.endpoint == 'diff_history_page' and datastore.data['settings']['application'].get('shared_diff_access'):
+            return func(*args, **kwargs)
+
+        elif request.method in flask_login.config.EXEMPT_METHODS:
+            return func(*args, **kwargs)
+        elif app.config.get('LOGIN_DISABLED'):
+            return func(*args, **kwargs)
+        elif has_password_enabled and not current_user.is_authenticated:
+            return app.login_manager.unauthorized()
+
+        return func(*args, **kwargs)
+
+    return decorated_view
+
 def changedetection_app(config=None, datastore_o=None):
    global datastore
    datastore = datastore_o

    # so far just for read-only via tests, but this will be moved eventually to be the main source
    # (instead of the global var)
-    app.config['DATASTORE']=datastore_o
-
-    #app.config.update(config or {})
+    app.config['DATASTORE'] = datastore_o

    login_manager = flask_login.LoginManager(app)
    login_manager.login_view = 'login'
@@ -213,6 +232,8 @@ def changedetection_app(config=None, datastore_o=None):
    # https://flask-cors.readthedocs.io/en/latest/
    #    CORS(app)

+
+
    @login_manager.user_loader
    def user_loader(email):
        user = User()
@@ -221,7 +242,7 @@ def changedetection_app(config=None, datastore_o=None):

    @login_manager.unauthorized_handler
    def unauthorized_handler():
-        # @todo validate its a URL of this host and use that
+        flash("You must be logged in, please log in.", 'error')
        return redirect(url_for('login', next=url_for('index')))

    @app.route('/logout')
@@ -234,10 +255,6 @@ def changedetection_app(config=None, datastore_o=None):
    @app.route('/login', methods=['GET', 'POST'])
    def login():

-        if not datastore.data['settings']['application']['password'] and not os.getenv("SALTED_PASS", False):
-            flash("Login not required, no password enabled.", "notice")
-            return redirect(url_for('index'))
-
        if request.method == 'GET':
            if flask_login.current_user.is_authenticated:
                flash("Already logged in")
@@ -272,27 +289,22 @@ def changedetection_app(config=None, datastore_o=None):
        return redirect(url_for('login'))

    @app.before_request
-    def do_something_whenever_a_request_comes_in():
-
-        # Disable password login if there is not one set
-        # (No password in settings or env var)
-        app.config['LOGIN_DISABLED'] = datastore.data['settings']['application']['password'] == False and os.getenv("SALTED_PASS", False) == False
-
+    def before_request_handle_cookie_x_settings():
        # Set the auth cookie path if we're running as X-settings/X-Forwarded-Prefix
        if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers:
            app.config['REMEMBER_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
            app.config['SESSION_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']

-        # For the RSS path, allow access via a token
-        if request.path == '/rss' and request.args.get('token'):
-            app_rss_token = datastore.data['settings']['application']['rss_access_token']
-            rss_url_token = request.args.get('token')
-            if app_rss_token == rss_url_token:
-                app.config['LOGIN_DISABLED'] = True
+        return None

    @app.route("/rss", methods=['GET'])
-    @login_required
    def rss():
+        # Always requires token set
+        app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
+        rss_url_token = request.args.get('token')
+        if rss_url_token != app_rss_token:
+            return "Access denied, bad token", 403
+
        from . import diff
        limit_tag = request.args.get('tag')

@@ -328,8 +340,6 @@ def changedetection_app(config=None, datastore_o=None):
            if len(dates) < 2:
                continue

-            prev_fname = watch.history[dates[-2]]
-
            if not watch.viewed:
                # Re #239 - GUID needs to be individual for each event
                # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
@@ -350,9 +360,12 @@ def changedetection_app(config=None, datastore_o=None):

                watch_title = watch.get('title') if watch.get('title') else watch.get('url')
                fe.title(title=watch_title)
-                latest_fname = watch.history[dates[-1]]

-                html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="</br>")
+                html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
+                                             newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
+                                             include_equal=False,
+                                             line_feed_sep="<br>")
+
                fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff),
                           type='CDATA')

@@ -366,7 +379,7 @@ def changedetection_app(config=None, datastore_o=None):
        return response

    @app.route("/", methods=['GET'])
-    @login_required
+    @login_optionally_required
    def index():
        from changedetectionio import forms

@@ -417,6 +430,7 @@ def changedetection_app(config=None, datastore_o=None):
                                 has_unviewed=datastore.has_unviewed,
                                 hosted_sticky=os.getenv("SALTED_PASS", False) == False,
                                 queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
+                                 system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
                                 tags=existing_tags,
                                 watches=sorted_watches
                                 )
@@ -429,7 +443,7 @@ def changedetection_app(config=None, datastore_o=None):

    # AJAX endpoint for sending a test
    @app.route("/notification/send-test", methods=['POST'])
-    @login_required
+    @login_optionally_required
    def ajax_callback_send_notification_test():

        import apprise
@@ -462,7 +476,7 @@ def changedetection_app(config=None, datastore_o=None):


    @app.route("/clear_history/<string:uuid>", methods=['GET'])
-    @login_required
+    @login_optionally_required
    def clear_watch_history(uuid):
        try:
            datastore.clear_watch_history(uuid)
@@ -474,7 +488,7 @@ def changedetection_app(config=None, datastore_o=None):
        return redirect(url_for('index'))

    @app.route("/clear_history", methods=['GET', 'POST'])
-    @login_required
+    @login_optionally_required
    def clear_all_history():

        if request.method == 'POST':
@@ -495,49 +509,15 @@ def changedetection_app(config=None, datastore_o=None):
        output = render_template("clear_all_history.html")
        return output

-
-    # If they edited an existing watch, we need to know to reset the current/previous md5 to include
-    # the excluded text.
-    def get_current_checksum_include_ignore_text(uuid):
-
-        import hashlib
-
-        from changedetectionio import fetch_site_status
-
-        # Get the most recent one
-        newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
-
-        # 0 means that theres only one, so that there should be no 'unviewed' history available
-        if newest_history_key == 0:
-            newest_history_key = list(datastore.data['watching'][uuid].history.keys())[0]
-
-        if newest_history_key:
-            with open(datastore.data['watching'][uuid].history[newest_history_key],
-                      encoding='utf-8') as file:
-                raw_content = file.read()
-
-                handler = fetch_site_status.perform_site_check(datastore=datastore)
-                stripped_content = html_tools.strip_ignore_text(raw_content,
-                                                             datastore.data['watching'][uuid]['ignore_text'])
-
-                if datastore.data['settings']['application'].get('ignore_whitespace', False):
-                    checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest()
-                else:
-                    checksum = hashlib.md5(stripped_content).hexdigest()
-
-                return checksum
-
-        return datastore.data['watching'][uuid]['previous_md5']
-
-
    @app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
-    @login_required
+    @login_optionally_required
    # https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
    # https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?

    def edit_page(uuid):
-        from changedetectionio import forms
-        from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
+        from . import forms
+        from .blueprint.browser_steps.browser_steps import browser_step_ui_config
+        from . import processors

        using_default_check_time = True
        # More for testing, possible to return the first/only
@@ -552,6 +532,15 @@ def changedetection_app(config=None, datastore_o=None):
            flash("No watch with the UUID %s found." % (uuid), "error")
            return redirect(url_for('index'))

+        switch_processor = request.args.get('switch_processor')
+        if switch_processor:
+            for p in processors.available_processors():
+                if p[0] == switch_processor:
+                    datastore.data['watching'][uuid]['processor'] = switch_processor
+                    flash(f"Switched to mode - {p[1]}.")
+                    datastore.clear_watch_history(uuid)
+                    redirect(url_for('edit_page', uuid=uuid))
+
        # be sure we update with a copy instead of accidently editing the live object by reference
        default = deepcopy(datastore.data['watching'][uuid])

@@ -585,6 +574,7 @@ def changedetection_app(config=None, datastore_o=None):


        if request.method == 'POST' and form.validate():
+
            extra_update_obj = {}

            if request.args.get('unpause_on_save'):
@@ -611,6 +601,16 @@ def changedetection_app(config=None, datastore_o=None):
            if datastore.proxy_list is not None and form.data['proxy'] == '':
                extra_update_obj['proxy'] = None

+            # Unsetting all filter_text methods should make it go back to default
+            # This particularly affects tests running
+            if 'filter_text_added' in form.data and not form.data.get('filter_text_added') \
+                    and 'filter_text_replaced' in form.data and not form.data.get('filter_text_replaced') \
+                    and 'filter_text_removed' in form.data and not form.data.get('filter_text_removed'):
+                extra_update_obj['filter_text_added'] = True
+                extra_update_obj['filter_text_replaced'] = True
+                extra_update_obj['filter_text_removed'] = True
+
+
            datastore.data['watching'][uuid].update(form.data)
            datastore.data['watching'][uuid].update(extra_update_obj)

@@ -638,8 +638,6 @@ def changedetection_app(config=None, datastore_o=None):

            visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid)

-            # Only works reliably with Playwright
-            visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'

            # JQ is difficult to install on windows and must be manually added (outside requirements.txt)
            jq_support = True
@@ -655,7 +653,11 @@ def changedetection_app(config=None, datastore_o=None):
            if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
                is_html_webdriver = True

+            # Only works reliably with Playwright
+            visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver
+
            output = render_template("edit.html",
+                                     available_processors=processors.available_processors(),
                                     browser_steps_config=browser_step_ui_config,
                                     current_base_url=datastore.data['settings']['application']['base_url'],
                                     emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
@@ -675,9 +677,9 @@ def changedetection_app(config=None, datastore_o=None):
        return output

    @app.route("/settings", methods=['GET', "POST"])
-    @login_required
+    @login_optionally_required
    def settings_page():
-        from changedetectionio import content_fetcher, forms
+        from . import forms

        default = deepcopy(datastore.data['settings'])
        if datastore.proxy_list is not None:
@@ -755,9 +757,11 @@ def changedetection_app(config=None, datastore_o=None):
        return output

    @app.route("/import", methods=['GET', "POST"])
-    @login_required
+    @login_optionally_required
    def import_page():
        remaining_urls = []
+        from . import forms
+
        if request.method == 'POST':
            from .importer import import_url_list, import_distill_io_json

@@ -765,7 +769,7 @@ def changedetection_app(config=None, datastore_o=None):
            if request.values.get('urls') and len(request.values.get('urls').strip()):
                # Import and push into the queue for immediate update check
                importer = import_url_list()
-                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
+                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor'))
                for uuid in importer.new_uuids:
                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))

@@ -783,9 +787,12 @@ def changedetection_app(config=None, datastore_o=None):
                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))


-
+        form = forms.importForm(formdata=request.form if request.method == 'POST' else None,
+#                               data=default,
+                               )
        # Could be some remaining, or we could be on GET
        output = render_template("import.html",
+                                 form=form,
                                 import_url_list_remaining="\n".join(remaining_urls),
                                 original_distill_json=''
                                 )
@@ -793,7 +800,7 @@ def changedetection_app(config=None, datastore_o=None):

    # Clear all statuses, so we do not see the 'unviewed' class
    @app.route("/form/mark-all-viewed", methods=['GET'])
-    @login_required
+    @login_optionally_required
    def mark_all_viewed():

        # Save the current newest history as the most recently viewed
@@ -803,7 +810,7 @@ def changedetection_app(config=None, datastore_o=None):
        return redirect(url_for('index'))

    @app.route("/diff/<string:uuid>", methods=['GET', 'POST'])
-    @login_required
+    @login_optionally_required
    def diff_history_page(uuid):

        from changedetectionio import forms
@@ -851,36 +858,35 @@ def changedetection_app(config=None, datastore_o=None):
        # Save the current newest history as the most recently viewed
        datastore.set_last_viewed(uuid, time.time())

-        newest_file = history[dates[-1]]
-
        # Read as binary and force decode as UTF-8
        # Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
        try:
-            with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
-                newest_version_file_contents = f.read()
+            newest_version_file_contents = watch.get_history_snapshot(dates[-1])
        except Exception as e:
-            newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
+            newest_version_file_contents = "Unable to read {}.\n".format(dates[-1])

        previous_version = request.args.get('previous_version')
-        try:
-            previous_file = history[previous_version]
-        except KeyError:
-            # Not present, use a default value, the second one in the sorted list.
-            previous_file = history[dates[-2]]
+        previous_timestamp = dates[-2]
+        if previous_version:
+            previous_timestamp = previous_version

        try:
-            with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
-                previous_version_file_contents = f.read()
+            previous_version_file_contents = watch.get_history_snapshot(previous_timestamp)
        except Exception as e:
-            previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
+            previous_version_file_contents = "Unable to read {}.\n".format(previous_timestamp)


        screenshot_url = watch.get_screenshot()

        system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'

-        is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
-                    watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
+        is_html_webdriver = False
+        if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
+            is_html_webdriver = True
+
+        password_enabled_and_share_is_off = False
+        if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
+            password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')

        output = render_template("diff.html",
                                 current_diff_url=watch['url'],
@@ -895,6 +901,7 @@ def changedetection_app(config=None, datastore_o=None):
                                 left_sticky=True,
                                 newest=newest_version_file_contents,
                                 newest_version_timestamp=dates[-1],
+                                 password_enabled_and_share_is_off=password_enabled_and_share_is_off,
                                 previous=previous_version_file_contents,
                                 screenshot=screenshot_url,
                                 uuid=uuid,
@@ -905,7 +912,7 @@ def changedetection_app(config=None, datastore_o=None):
        return output

    @app.route("/preview/<string:uuid>", methods=['GET'])
-    @login_required
+    @login_optionally_required
    def preview_page(uuid):
        content = []
        ignored_line_numbers = []
@@ -925,8 +932,9 @@ def changedetection_app(config=None, datastore_o=None):
        extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]


-        is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
-                watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
+        is_html_webdriver = False
+        if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
+            is_html_webdriver = True

        # Never requested successfully, but we detected a fetch error
        if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
@@ -945,37 +953,35 @@ def changedetection_app(config=None, datastore_o=None):
            return output

        timestamp = list(watch.history.keys())[-1]
-        filename = watch.history[timestamp]
        try:
-            with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
-                tmp = f.readlines()
+            tmp = watch.get_history_snapshot(timestamp).splitlines()

-                # Get what needs to be highlighted
-                ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']
+            # Get what needs to be highlighted
+            ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']

-                # .readlines will keep the \n, but we will parse it here again, in the future tidy this up
-                ignored_line_numbers = html_tools.strip_ignore_text(content="".join(tmp),
-                                                                    wordlist=ignore_rules,
-                                                                    mode='line numbers'
-                                                                    )
+            # .readlines will keep the \n, but we will parse it here again, in the future tidy this up
+            ignored_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp),
+                                                                wordlist=ignore_rules,
+                                                                mode='line numbers'
+                                                                )

-                trigger_line_numbers = html_tools.strip_ignore_text(content="".join(tmp),
-                                                                    wordlist=watch['trigger_text'],
-                                                                    mode='line numbers'
-                                                                    )
-                # Prepare the classes and lines used in the template
-                i=0
-                for l in tmp:
-                    classes=[]
-                    i+=1
-                    if i in ignored_line_numbers:
-                        classes.append('ignored')
-                    if i in trigger_line_numbers:
-                        classes.append('triggered')
-                    content.append({'line': l, 'classes': ' '.join(classes)})
+            trigger_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp),
+                                                                wordlist=watch['trigger_text'],
+                                                                mode='line numbers'
+                                                                )
+            # Prepare the classes and lines used in the template
+            i=0
+            for l in tmp:
+                classes=[]
+                i+=1
+                if i in ignored_line_numbers:
+                    classes.append('ignored')
+                if i in trigger_line_numbers:
+                    classes.append('triggered')
+                content.append({'line': l, 'classes': ' '.join(classes)})

        except Exception as e:
-            content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''})
+            content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})

        output = render_template("preview.html",
                                 content=content,
@@ -995,7 +1001,7 @@ def changedetection_app(config=None, datastore_o=None):
        return output

    @app.route("/settings/notification-logs", methods=['GET'])
-    @login_required
+    @login_optionally_required
    def notification_logs():
        global notification_debug_log
        output = render_template("notification-log.html",
@@ -1005,7 +1011,7 @@ def changedetection_app(config=None, datastore_o=None):

    # We're good but backups are even better!
    @app.route("/backup", methods=['GET'])
-    @login_required
+    @login_optionally_required
    def get_backup():

        import zipfile
@@ -1017,7 +1023,8 @@ def changedetection_app(config=None, datastore_o=None):
            os.unlink(previous_backup_filename)

        # create a ZipFile object
-        backupname = "changedetection-backup-{}.zip".format(int(time.time()))
+        timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+        backupname = "changedetection-backup-{}.zip".format(timestamp)
        backup_filepath = os.path.join(datastore_o.datastore_path, backupname)

        with zipfile.ZipFile(backup_filepath, "w",
@@ -1125,13 +1132,14 @@ def changedetection_app(config=None, datastore_o=None):
            abort(404)

    @app.route("/form/add/quickwatch", methods=['POST'])
-    @login_required
+    @login_optionally_required
    def form_quick_watch_add():
        from changedetectionio import forms
        form = forms.quickWatchForm(request.form)

        if not form.validate():
-            flash("Error")
+            for widget, l in form.errors.items():
+                flash(','.join(l), 'error')
            return redirect(url_for('index'))

        url = request.form.get('url').strip()
@@ -1140,24 +1148,24 @@ def changedetection_app(config=None, datastore_o=None):
            return redirect(url_for('index'))

        add_paused = request.form.get('edit_and_watch_submit_button') != None
-        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
+        processor = request.form.get('processor', 'text_json_diff')
+        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused, 'processor': processor})

-
-        if not add_paused and new_uuid:
-            # Straight into the queue.
-            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
-            flash("Watch added.")
-
-        if add_paused:
-            flash('Watch added in Paused state, saving will unpause.')
-            return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
+        if new_uuid:
+            if add_paused:
+                flash('Watch added in Paused state, saving will unpause.')
+                return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
+            else:
+                # Straight into the queue.
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
+                flash("Watch added.")

        return redirect(url_for('index'))



    @app.route("/api/delete", methods=['GET'])
-    @login_required
+    @login_optionally_required
    def form_delete():
        uuid = request.args.get('uuid')

@@ -1174,7 +1182,7 @@ def changedetection_app(config=None, datastore_o=None):
        return redirect(url_for('index'))

    @app.route("/api/clone", methods=['GET'])
-    @login_required
+    @login_optionally_required
    def form_clone():
        uuid = request.args.get('uuid')
        # More for testing, possible to return the first/only
@@ -1182,13 +1190,15 @@ def changedetection_app(config=None, datastore_o=None):
            uuid = list(datastore.data['watching'].keys()).pop()

        new_uuid = datastore.clone(uuid)
-        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
-        flash('Cloned.')
+        if new_uuid:
+            if not datastore.data['watching'].get(uuid).get('paused'):
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
+            flash('Cloned.')

        return redirect(url_for('index'))

    @app.route("/api/checknow", methods=['GET'])
-    @login_required
+    @login_optionally_required
    def form_watch_checknow():
        # Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
        tag = request.args.get('tag')
@@ -1222,7 +1232,7 @@ def changedetection_app(config=None, datastore_o=None):
        return redirect(url_for('index', tag=tag))

    @app.route("/form/checkbox-operations", methods=['POST'])
-    @login_required
+    @login_optionally_required
    def form_watch_list_checkbox_operations():
        op = request.form['op']
        uuids = request.form.getlist('uuids')
@@ -1286,7 +1296,7 @@ def changedetection_app(config=None, datastore_o=None):
        return redirect(url_for('index'))

    @app.route("/api/share-url", methods=['GET'])
-    @login_required
+    @login_optionally_required
    def form_share_put_watch():
        """Given a watch UUID, upload the info and return a share-link
           the share-link can be imported/added"""
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -33,7 +33,7 @@ class Watch(Resource):
    @auth.check_token
    def get(self, uuid):
        """
-        @api {get} /api/v1/watch/:uuid Single watch information
+        @api {get} /api/v1/watch/:uuid Get a single watch data
        @apiDescription Retrieve watch information and set muted/paused status
        @apiExample {curl} Example usage:
            curl http://localhost:4000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091  -H"x-api-key:813031b16330fe25e3780cf0325daa45"
@@ -70,13 +70,16 @@ class Watch(Resource):
            return "OK", 200

        # Return without history, get that via another API call
+        # Properties are not returned as a JSON, so add the required props manually
        watch['history_n'] = watch.history_n
+        watch['last_changed'] = watch.last_changed
+
        return watch

    @auth.check_token
    def delete(self, uuid):
        """
-        @api {delete} /api/v1/watch/:uuid Delete watch information
+        @api {delete} /api/v1/watch/:uuid Delete a watch and related history
        @apiExample {curl} Example usage:
            curl http://localhost:4000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45"
        @apiParam {uuid} uuid Watch unique ID.
@@ -90,21 +93,18 @@ class Watch(Resource):
        self.datastore.delete(uuid)
        return 'OK', 204

-    # Update an existing
    @auth.check_token
    @expects_json(schema_update_watch)
    def put(self, uuid):
        """
        @api {put} /api/v1/watch/:uuid Update watch information
        @apiExample {curl} Example usage:
-            Create a watch (POST)
-            curl http://localhost:4000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "nice list"}'
            Update (PUT)
            curl http://localhost:4000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "new list"}'

-        @apiDescription Updates an existing watch using JSON, accepts the same structure as at https://github.com/dgtlmoon/changedetection.io/blob/fab7d325f764d6912bef671f1d78bf217689c537/changedetectionio/model/Watch.py#L15
+        @apiDescription Updates an existing watch using JSON, accepts the same structure as returned in <a href="#api-Watch-Watch">get single watch information</a>
        @apiParam {uuid} uuid Watch unique ID.
-        @apiName Update
+        @apiName Update a watch
        @apiGroup Watch
        @apiSuccess (200) {String} OK Was updated
        @apiSuccess (500) {String} ERR Some other error
@@ -131,6 +131,21 @@ class WatchHistory(Resource):
    # Get a list of available history for a watch by UUID
    # curl http://localhost:4000/api/v1/watch/<string:uuid>/history
    def get(self, uuid):
+        """
+        @api {get} /api/v1/watch/<string:uuid>/history Get a list of all historical snapshots available for a watch
+        @apiDescription Requires `uuid`, returns list
+        @apiExample {curl} Example usage:
+            curl http://localhost:4000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
+            {
+                "1676649279": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/cb7e9be8258368262246910e6a2a4c30.txt",
+                "1677092785": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/e20db368d6fc633e34f559ff67bb4044.txt",
+                "1677103794": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/02efdd37dacdae96554a8cc85dc9c945.txt"
+            }
+        @apiName Get list of available stored snapshots for watch
+        @apiGroup Watch History
+        @apiSuccess (200) {String} OK
+        @apiSuccess (404) {String} ERR Not found
+        """
        watch = self.datastore.data['watching'].get(uuid)
        if not watch:
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
@@ -142,11 +157,18 @@ class WatchSingleHistory(Resource):
        # datastore is a black box dependency
        self.datastore = kwargs['datastore']

-    # Read a given history snapshot and return its content
-    # <string:timestamp> or "latest"
-    # curl http://localhost:4000/api/v1/watch/<string:uuid>/history/<int:timestamp>
    @auth.check_token
    def get(self, uuid, timestamp):
+        """
+        @api {get} /api/v1/watch/<string:uuid>/history/<int:timestamp> Get single snapshot from watch
+        @apiDescription Requires watch `uuid` and `timestamp`. `timestamp` of "`latest`" for latest available snapshot, or <a href="#api-Watch_History-Get_list_of_available_stored_snapshots_for_watch">use the list returned here</a>
+        @apiExample {curl} Example usage:
+            curl http://localhost:4000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
+        @apiName Get single snapshot content
+        @apiGroup Watch History
+        @apiSuccess (200) {String} OK
+        @apiSuccess (404) {String} ERR Not found
+        """
        watch = self.datastore.data['watching'].get(uuid)
        if not watch:
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
@@ -157,8 +179,7 @@ class WatchSingleHistory(Resource):
        if timestamp == 'latest':
            timestamp = list(watch.history.keys())[-1]

-        with open(watch.history[timestamp], 'r') as f:
-            content = f.read()
+        content = watch.get_history_snapshot(timestamp)

        response = make_response(content, 200)
        response.mimetype = "text/plain"
@@ -175,21 +196,19 @@ class CreateWatch(Resource):
    @expects_json(schema_create_watch)
    def post(self):
        """
-        @api {post} /api/v1/watch Create a watch
-        @apiDescription requires `url`, Creates a watch, also accepts accepts the same structure as at https://github.com/dgtlmoon/changedetection.io/blob/fab7d325f764d6912bef671f1d78bf217689c537/changedetectionio/model/Watch.py#L15
+        @api {post} /api/v1/watch Create a single watch
+        @apiDescription Requires atleast `url` set, can accept the same structure as <a href="#api-Watch-Watch">get single watch information</a> to create.
        @apiExample {curl} Example usage:
            curl http://localhost:4000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "nice list"}'
        @apiName Create
-        @apiGroup CreateWatch
+        @apiGroup Watch
        @apiSuccess (200) {String} OK Was created
        @apiSuccess (500) {String} ERR Some other error
        """

-        #
        json_data = request.get_json()
        url = json_data['url'].strip()

-
        if not validators.url(json_data['url'].strip()):
            return "Invalid or unsupported URL", 400

@@ -202,23 +221,41 @@ class CreateWatch(Resource):
        del extras['url']

        new_uuid = self.datastore.add_watch(url=url, extras=extras)
-        self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
-        return {'uuid': new_uuid}, 201
+        if new_uuid:
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
+            return {'uuid': new_uuid}, 201
+        else:
+            return "Invalid or unsupported URL", 400

    @auth.check_token
    def get(self):
        """
-        @api {get} /api/v1/watch
+        @api {get} /api/v1/watch List watches
        @apiDescription Return concise list of available watches and some very basic info
        @apiExample {curl} Example usage:
            curl http://localhost:4000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45"
-            recheck_all=1 to recheck all
+            {
+                "6a4b7d5c-fee4-4616-9f43-4ac97046b595": {
+                    "last_changed": 1677103794,
+                    "last_checked": 1677103794,
+                    "last_error": false,
+                    "title": "",
+                    "url": "http://www.quotationspage.com/random.php"
+                },
+                "e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": {
+                    "last_changed": 0,
+                    "last_checked": 1676662819,
+                    "last_error": false,
+                    "title": "QuickLook",
+                    "url": "https://github.com/QL-Win/QuickLook/tags"
+                }
+            }
+
        @apiParam {String} [recheck_all]       Optional Set to =1 to force recheck of all watches
        @apiParam {String} [tag]               Optional name of tag to limit results
        @apiName ListWatches
-        @apiGroup CreateWatch
-
-        :return:
+        @apiGroup Watch Management
+        @apiSuccess (200) {String} OK JSON dict
        """
        list = {}

@@ -249,6 +286,22 @@ class SystemInfo(Resource):

    @auth.check_token
    def get(self):
+        """
+        @api {get} /api/v1/systeminfo Return system info
+        @apiDescription Return some info about the current system state
+        @apiExample {curl} Example usage:
+            curl http://localhost:4000/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45"
+            HTTP/1.0 200
+            {
+                'queue_size': 10 ,
+                'overdue_watches': ["watch-uuid-list"],
+                'uptime': 38344.55,
+                'watch_count': 800,
+                'version': "0.40.1"
+            }
+        @apiName Get Info
+        @apiGroup System Information
+        """
        import time
        overdue_watches = []

@@ -267,10 +320,11 @@ class SystemInfo(Resource):
            # Allow 5 minutes of grace time before we decide it's overdue
            if time_since_check - (5 * 60) > t:
                overdue_watches.append(uuid)
-
+        from changedetectionio import __version__ as main_version
        return {
                   'queue_size': self.update_q.qsize(),
                   'overdue_watches': overdue_watches,
                   'uptime': round(time.time() - self.datastore.start_time, 2),
-                   'watch_count': len(self.datastore.data.get('watching', {}))
+                   'watch_count': len(self.datastore.data.get('watching', {})),
+                   'version': main_version
               }, 200
--- a/changedetectionio/blueprint/browser_steps/init.py
+++ b/changedetectionio/blueprint/browser_steps/init.py
@@ -23,11 +23,10 @@

 from distutils.util import strtobool
 from flask import Blueprint, request, make_response
-from flask_login import login_required
 import os
 import logging
 from changedetectionio.store import ChangeDetectionStore
-
+from changedetectionio import login_optionally_required
 browsersteps_live_ui_o = {}
 browsersteps_playwright_browser_interface = None
 browsersteps_playwright_browser_interface_browser = None
@@ -65,7 +64,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):

    browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")

-    @login_required
+    @login_optionally_required
    @browser_steps_blueprint.route("/browsersteps_update", methods=['GET', 'POST'])
    def browsersteps_ui_update():
        import base64
@@ -107,8 +106,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):

            if step_operation == 'Goto site':
                step_operation = 'goto_url'
-                step_optional_value = None
-                step_selector = datastore.data['watching'][uuid].get('url')
+                step_optional_value = datastore.data['watching'][uuid].get('url')
+                step_selector = None

            # @todo try.. accept.. nice errors not popups..
            try:
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -25,12 +25,14 @@ browser_step_ui_config = {'Choose one': '0 0',
                          'Execute JS': '0 1',
 #                          'Extract text and use as filter': '1 0',
                          'Goto site': '0 0',
+                          'Goto URL': '0 1',
                          'Press Enter': '0 0',
                          'Select by label': '1 1',
                          'Scroll down': '0 0',
                          'Uncheck checkbox': '1 0',
                          'Wait for seconds': '0 1',
                          'Wait for text': '0 1',
+                          'Wait for text in element': '1 1',
                          #                          'Press Page Down': '0 0',
                          #                          'Press Page Up': '0 0',
                          # weird bug, come back to it later
@@ -53,7 +55,7 @@ class steppable_browser_interface():

        print("> action calling", call_action_name)
        # https://playwright.dev/python/docs/selectors#xpath-selectors
-        if selector.startswith('/') and not selector.startswith('//'):
+        if selector and selector.startswith('/') and not selector.startswith('//'):
            selector = "xpath=" + selector

        action_handler = getattr(self, "action_" + call_action_name)
@@ -72,10 +74,10 @@ class steppable_browser_interface():
        self.page.wait_for_timeout(3 * 1000)
        print("Call action done in", time.time() - now)

-    def action_goto_url(self, url, optional_value):
+    def action_goto_url(self, selector, value):
        # self.page.set_viewport_size({"width": 1280, "height": 5000})
        now = time.time()
-        response = self.page.goto(url, timeout=0, wait_until='commit')
+        response = self.page.goto(value, timeout=0, wait_until='commit')

        # Wait_until = commit
        # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
@@ -132,6 +134,17 @@ class steppable_browser_interface():
    def action_wait_for_seconds(self, selector, value):
        self.page.wait_for_timeout(int(value) * 1000)

+    def action_wait_for_text(self, selector, value):
+        import json
+        v = json.dumps(value)
+        self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000)
+
+    def action_wait_for_text_in_element(self, selector, value):
+        import json
+        s = json.dumps(selector)
+        v = json.dumps(value)
+        self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000)
+
    # @todo - in the future make some popout interface to capture what needs to be set
    # https://playwright.dev/python/docs/api/class-keyboard
    def action_press_enter(self, selector, value):
@@ -224,7 +237,7 @@ class browsersteps_live_ui(steppable_browser_interface):
    def get_current_state(self):
        """Return the screenshot and interactive elements mapping, generally always called after action_()"""
        from pkg_resources import resource_string
-        xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
+        xpath_element_js = resource_string(__name__, "../res/xpath_element_scraper.js").decode('utf-8')
        now = time.time()
        self.page.wait_for_timeout(1 * 1000)

@@ -259,8 +272,8 @@ class browsersteps_live_ui(steppable_browser_interface):
        self.page.evaluate("var include_filters=''")
        from pkg_resources import resource_string
        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
-        xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
-        from changedetectionio.content_fetcher import visualselector_xpath_selectors
+        xpath_element_js = resource_string(__name__, "../res/xpath_element_scraper.js").decode('utf-8')
+        from changedetectionio.fetchers import visualselector_xpath_selectors
        xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
        xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
--- a/changedetectionio/changedetection.py
+++ b/changedetectionio/changedetection.py
@@ -13,7 +13,7 @@ import signal
 import socket
 import sys

-from . import store, changedetection_app, content_fetcher
+from . import store, changedetection_app
 from . import __version__

 # Only global so we can access it in the signal handler
@@ -31,11 +31,13 @@ def sigterm_handler(_signo, _stack_frame):
 def main():
    global datastore
    global app
-    ssl_mode = False
-    host = ''
-    port = os.environ.get('PORT') or 5000
-    do_cleanup = False
+
    datastore_path = None
+    do_cleanup = False
+    host = ''
+    ipv6_enabled = False
+    port = os.environ.get('PORT') or 5000
+    ssl_mode = False

    # On Windows, create and use a default path.
    if os.name == 'nt':
@@ -46,7 +48,7 @@ def main():
        datastore_path = os.path.join(os.getcwd(), "../datastore")

    try:
-        opts, args = getopt.getopt(sys.argv[1:], "Ccsd:h:p:", "port")
+        opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:", "port")
    except getopt.GetoptError:
        print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path]')
        sys.exit(2)
@@ -66,6 +68,10 @@ def main():
        if opt == '-d':
            datastore_path = arg

+        if opt == '-6':
+            print ("Enabling IPv6 listen support")
+            ipv6_enabled = True
+
        # Cleanup (remove text files that arent in the index)
        if opt == '-c':
            do_cleanup = True
@@ -133,13 +139,15 @@ def main():
        from werkzeug.middleware.proxy_fix import ProxyFix
        app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)

+    s_type = socket.AF_INET6 if ipv6_enabled else socket.AF_INET
+
    if ssl_mode:
        # @todo finalise SSL config, but this should get you in the right direction if you need it.
-        eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), socket.AF_INET6),
+        eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), s_type),
                                               certfile='cert.pem',
                                               keyfile='privkey.pem',
                                               server_side=True), app)

    else:
-        eventlet.wsgi.server(eventlet.listen((host, int(port)), socket.AF_INET6), app)
+        eventlet.wsgi.server(eventlet.listen((host, int(port)), s_type), app)

--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -1,607 +0,0 @@
-import hashlib
-from abc import abstractmethod
-import chardet
-import json
-import logging
-import os
-import requests
-import sys
-import time
-
-visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
-
-class Non200ErrorCodeReceived(Exception):
-    def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
-        # Set this so we can use it in other parts of the app
-        self.status_code = status_code
-        self.url = url
-        self.screenshot = screenshot
-        self.xpath_data = xpath_data
-        self.page_text = None
-
-        if page_html:
-            from changedetectionio import html_tools
-            self.page_text = html_tools.html_to_text(page_html)
-        return
-
-class checksumFromPreviousCheckWasTheSame(Exception):
-    def __init__(self):
-        return
-
-class JSActionExceptions(Exception):
-    def __init__(self, status_code, url, screenshot, message=''):
-        self.status_code = status_code
-        self.url = url
-        self.screenshot = screenshot
-        self.message = message
-        return
-
-class BrowserStepsStepTimout(Exception):
-    def __init__(self, step_n):
-        self.step_n = step_n
-        return
-
-
-class PageUnloadable(Exception):
-    def __init__(self, status_code, url, message, screenshot=False):
-        # Set this so we can use it in other parts of the app
-        self.status_code = status_code
-        self.url = url
-        self.screenshot = screenshot
-        self.message = message
-        return
-
-class EmptyReply(Exception):
-    def __init__(self, status_code, url, screenshot=None):
-        # Set this so we can use it in other parts of the app
-        self.status_code = status_code
-        self.url = url
-        self.screenshot = screenshot
-        return
-
-class ScreenshotUnavailable(Exception):
-    def __init__(self, status_code, url, page_html=None):
-        # Set this so we can use it in other parts of the app
-        self.status_code = status_code
-        self.url = url
-        if page_html:
-            from html_tools import html_to_text
-            self.page_text = html_to_text(page_html)
-        return
-
-class ReplyWithContentButNoText(Exception):
-    def __init__(self, status_code, url, screenshot=None):
-        # Set this so we can use it in other parts of the app
-        self.status_code = status_code
-        self.url = url
-        self.screenshot = screenshot
-        return
-
-class Fetcher():
-    error = None
-    status_code = None
-    content = None
-    headers = None
-    browser_steps = None
-    browser_steps_screenshot_path = None
-
-    fetcher_description = "No description"
-    webdriver_js_execute_code = None
-    xpath_element_js = ""
-
-    xpath_data = None
-
-    # Will be needed in the future by the VisualSelector, always get this where possible.
-    screenshot = False
-    system_http_proxy = os.getenv('HTTP_PROXY')
-    system_https_proxy = os.getenv('HTTPS_PROXY')
-
-    # Time ONTOP of the system defined env minimum time
-    render_extract_delay = 0
-
-    def __init__(self):
-        from pkg_resources import resource_string
-        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
-        self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
-
-
-    @abstractmethod
-    def get_error(self):
-        return self.error
-
-    @abstractmethod
-    def run(self,
-            url,
-            timeout,
-            request_headers,
-            request_body,
-            request_method,
-            ignore_status_codes=False,
-            current_include_filters=None,
-            is_binary=False):
-        # Should set self.error, self.status_code and self.content
-        pass
-
-    @abstractmethod
-    def quit(self):
-        return
-
-    @abstractmethod
-    def get_last_status_code(self):
-        return self.status_code
-
-    @abstractmethod
-    def screenshot_step(self, step_n):
-        return None
-
-    @abstractmethod
-    # Return true/false if this checker is ready to run, in the case it needs todo some special config check etc
-    def is_ready(self):
-        return True
-
-    def iterate_browser_steps(self):
-        from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
-        from playwright._impl._api_types import TimeoutError
-        from jinja2 import Environment
-        jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
-
-        step_n = 0
-
-        if self.browser_steps is not None and len(self.browser_steps):
-            interface = steppable_browser_interface()
-            interface.page = self.page
-
-            valid_steps = filter(lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), self.browser_steps)
-
-            for step in valid_steps:
-                step_n += 1
-                print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
-                self.screenshot_step("before-"+str(step_n))
-                self.save_step_html("before-"+str(step_n))
-                try:
-                    optional_value = step['optional_value']
-                    selector = step['selector']
-                    # Support for jinja2 template in step values, with date module added
-                    if '{%' in step['optional_value'] or '{{' in step['optional_value']:
-                        optional_value = str(jinja2_env.from_string(step['optional_value']).render())
-                    if '{%' in step['selector'] or '{{' in step['selector']:
-                        selector = str(jinja2_env.from_string(step['selector']).render())
-
-                    getattr(interface, "call_action")(action_name=step['operation'],
-                                                      selector=selector,
-                                                      optional_value=optional_value)
-                    self.screenshot_step(step_n)
-                    self.save_step_html(step_n)
-                except TimeoutError:
-                    # Stop processing here
-                    raise BrowserStepsStepTimout(step_n=step_n)
-
-
-
-    # It's always good to reset these
-    def delete_browser_steps_screenshots(self):
-        import glob
-        if self.browser_steps_screenshot_path is not None:
-            dest = os.path.join(self.browser_steps_screenshot_path, 'step_*.jpeg')
-            files = glob.glob(dest)
-            for f in files:
-                os.unlink(f)
-
-#   Maybe for the future, each fetcher provides its own diff output, could be used for text, image
-#   the current one would return javascript output (as we use JS to generate the diff)
-#
-def available_fetchers():
-    # See the if statement at the bottom of this file for how we switch between playwright and webdriver
-    import inspect
-    p = []
-    for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
-        if inspect.isclass(obj):
-            # @todo html_ is maybe better as fetcher_ or something
-            # In this case, make sure to edit the default one in store.py and fetch_site_status.py
-            if name.startswith('html_'):
-                t = tuple([name, obj.fetcher_description])
-                p.append(t)
-
-    return p
-
-class base_html_playwright(Fetcher):
-    fetcher_description = "Playwright {}/Javascript".format(
-        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
-    )
-    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
-        fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
-
-    browser_type = ''
-    command_executor = ''
-
-    # Configs for Proxy setup
-    # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
-    playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
-
-    proxy = None
-
-    def __init__(self, proxy_override=None):
-        super().__init__()
-        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
-        self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
-        self.command_executor = os.getenv(
-            "PLAYWRIGHT_DRIVER_URL",
-            'ws://playwright-chrome:3000'
-        ).strip('"')
-
-        # If any proxy settings are enabled, then we should setup the proxy object
-        proxy_args = {}
-        for k in self.playwright_proxy_settings_mappings:
-            v = os.getenv('playwright_proxy_' + k, False)
-            if v:
-                proxy_args[k] = v.strip('"')
-
-        if proxy_args:
-            self.proxy = proxy_args
-
-        # allow per-watch proxy selection override
-        if proxy_override:
-            self.proxy = {'server': proxy_override}
-
-        if self.proxy:
-            # Playwright needs separate username and password values
-            from urllib.parse import urlparse
-            parsed = urlparse(self.proxy.get('server'))
-            if parsed.username:
-                self.proxy['username'] = parsed.username
-                self.proxy['password'] = parsed.password
-
-    def screenshot_step(self, step_n=''):
-
-        # There's a bug where we need to do it twice or it doesnt take the whole page, dont know why.
-        self.page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
-        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
-
-        if self.browser_steps_screenshot_path is not None:
-            destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
-            logging.debug("Saving step screenshot to {}".format(destination))
-            with open(destination, 'wb') as f:
-                f.write(screenshot)
-
-    def save_step_html(self, step_n):
-        content = self.page.content()
-        destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
-        logging.debug("Saving step HTML to {}".format(destination))
-        with open(destination, 'w') as f:
-            f.write(content)
-
-    def run(self,
-            url,
-            timeout,
-            request_headers,
-            request_body,
-            request_method,
-            ignore_status_codes=False,
-            current_include_filters=None,
-            is_binary=False):
-
-        from playwright.sync_api import sync_playwright
-        import playwright._impl._api_types
-
-        self.delete_browser_steps_screenshots()
-        response = None
-        with sync_playwright() as p:
-            browser_type = getattr(p, self.browser_type)
-
-            # Seemed to cause a connection Exception even tho I can see it connect
-            # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
-            # 60,000 connection timeout only
-            browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000)
-
-            # Set user agent to prevent Cloudflare from blocking the browser
-            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
-            context = browser.new_context(
-                user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
-                proxy=self.proxy,
-                # This is needed to enable JavaScript execution on GitHub and others
-                bypass_csp=True,
-                # Can't think why we need the service workers for our use case?
-                service_workers='block',
-                # Should never be needed
-                accept_downloads=False
-            )
-
-            self.page = context.new_page()
-            if len(request_headers):
-                context.set_extra_http_headers(request_headers)
-
-                self.page.set_default_navigation_timeout(90000)
-                self.page.set_default_timeout(90000)
-
-                # Listen for all console events and handle errors
-                self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
-
-            # Goto page
-            try:
-                # Wait_until = commit
-                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
-                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
-                # This seemed to solve nearly all 'TimeoutErrors'
-                response = self.page.goto(url, wait_until='commit')
-            except playwright._impl._api_types.Error as e:
-                # Retry once - https://github.com/browserless/chrome/issues/2485
-                # Sometimes errors related to invalid cert's and other can be random
-                print ("Content Fetcher > retrying request got error - ", str(e))
-                time.sleep(1)
-                response = self.page.goto(url, wait_until='commit')
-
-            except Exception as e:
-                print ("Content Fetcher > Other exception when page.goto", str(e))
-                context.close()
-                browser.close()
-                raise PageUnloadable(url=url, status_code=None, message=str(e))
-
-            # Execute any browser steps
-            try:
-                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
-                self.page.wait_for_timeout(extra_wait * 1000)
-
-                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
-                    self.page.evaluate(self.webdriver_js_execute_code)
-
-            except playwright._impl._api_types.TimeoutError as e:
-                context.close()
-                browser.close()
-                # This can be ok, we will try to grab what we could retrieve
-                pass
-            except Exception as e:
-                print ("Content Fetcher > Other exception when executing custom JS code", str(e))
-                context.close()
-                browser.close()
-                raise PageUnloadable(url=url, status_code=None, message=str(e))
-
-            if response is None:
-                context.close()
-                browser.close()
-                print ("Content Fetcher > Response object was none")
-                raise EmptyReply(url=url, status_code=None)
-
-            # Bug 2(?) Set the viewport size AFTER loading the page
-            self.page.set_viewport_size({"width": 1280, "height": 1024})
-
-            # Run Browser Steps here
-            self.iterate_browser_steps()
-
-            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
-            time.sleep(extra_wait)
-
-
-            self.content = self.page.content()
-            self.status_code = response.status
-
-            if len(self.page.content().strip()) == 0:
-                context.close()
-                browser.close()
-                print ("Content Fetcher > Content was empty")
-                raise EmptyReply(url=url, status_code=response.status)
-
-            # Bug 2(?) Set the viewport size AFTER loading the page
-            self.page.set_viewport_size({"width": 1280, "height": 1024})
-
-            self.status_code = response.status
-            self.content = self.page.content()
-            self.headers = response.all_headers()
-
-            # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
-            if current_include_filters is not None:
-                self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
-            else:
-                self.page.evaluate("var include_filters=''")
-
-            self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
-
-            # Bug 3 in Playwright screenshot handling
-            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
-            # JPEG is better here because the screenshots can be very very large
-
-            # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
-            # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
-            # acceptable screenshot quality here
-            try:
-                # Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
-                self.page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
-                # The actual screenshot
-                self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
-            except Exception as e:
-                context.close()
-                browser.close()
-                raise ScreenshotUnavailable(url=url, status_code=None)
-
-            context.close()
-            browser.close()
-
-class base_html_webdriver(Fetcher):
-    if os.getenv("WEBDRIVER_URL"):
-        fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
-    else:
-        fetcher_description = "WebDriver Chrome/Javascript"
-
-    command_executor = ''
-
-    # Configs for Proxy setup
-    # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
-    selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
-                                        'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
-                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
-    proxy = None
-
-    def __init__(self, proxy_override=None):
-        super().__init__()
-        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
-
-        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
-        self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
-
-        # If any proxy settings are enabled, then we should setup the proxy object
-        proxy_args = {}
-        for k in self.selenium_proxy_settings_mappings:
-            v = os.getenv('webdriver_' + k, False)
-            if v:
-                proxy_args[k] = v.strip('"')
-
-        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
-        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
-            proxy_args['httpProxy'] = self.system_http_proxy
-        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
-            proxy_args['httpsProxy'] = self.system_https_proxy
-
-        # Allows override the proxy on a per-request basis
-        if proxy_override is not None:
-            proxy_args['httpProxy'] = proxy_override
-
-        if proxy_args:
-            self.proxy = SeleniumProxy(raw=proxy_args)
-
-    def run(self,
-            url,
-            timeout,
-            request_headers,
-            request_body,
-            request_method,
-            ignore_status_codes=False,
-            current_include_filters=None,
-            is_binary=False):
-
-        from selenium import webdriver
-        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
-        from selenium.common.exceptions import WebDriverException
-        # request_body, request_method unused for now, until some magic in the future happens.
-
-        # check env for WEBDRIVER_URL
-        self.driver = webdriver.Remote(
-            command_executor=self.command_executor,
-            desired_capabilities=DesiredCapabilities.CHROME,
-            proxy=self.proxy)
-
-        try:
-            self.driver.get(url)
-        except WebDriverException as e:
-            # Be sure we close the session window
-            self.quit()
-            raise
-
-        self.driver.set_window_size(1280, 1024)
-        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
-
-        if self.webdriver_js_execute_code is not None:
-            self.driver.execute_script(self.webdriver_js_execute_code)
-            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
-            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
-
-        # @todo - how to check this? is it possible?
-        self.status_code = 200
-        # @todo somehow we should try to get this working for WebDriver
-        # raise EmptyReply(url=url, status_code=r.status_code)
-
-        # @todo - dom wait loaded?
-        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
-        self.content = self.driver.page_source
-        self.headers = {}
-
-        self.screenshot = self.driver.get_screenshot_as_png()
-
-    # Does the connection to the webdriver work? run a test connection.
-    def is_ready(self):
-        from selenium import webdriver
-        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
-
-        self.driver = webdriver.Remote(
-            command_executor=self.command_executor,
-            desired_capabilities=DesiredCapabilities.CHROME)
-
-        # driver.quit() seems to cause better exceptions
-        self.quit()
-        return True
-
-    def quit(self):
-        if self.driver:
-            try:
-                self.driver.quit()
-            except Exception as e:
-                print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
-
-
-# "html_requests" is listed as the default fetcher in store.py!
-class html_requests(Fetcher):
-    fetcher_description = "Basic fast Plaintext/HTTP Client"
-
-    def __init__(self, proxy_override=None):
-        self.proxy_override = proxy_override
-
-    def run(self,
-            url,
-            timeout,
-            request_headers,
-            request_body,
-            request_method,
-            ignore_status_codes=False,
-            current_include_filters=None,
-            is_binary=False):
-
-        # Make requests use a more modern looking user-agent
-        if not 'User-Agent' in request_headers:
-            request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
-                                                      'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
-
-        proxies = {}
-
-        # Allows override the proxy on a per-request basis
-        if self.proxy_override:
-            proxies = {'http': self.proxy_override, 'https': self.proxy_override, 'ftp': self.proxy_override}
-        else:
-            if self.system_http_proxy:
-                proxies['http'] = self.system_http_proxy
-            if self.system_https_proxy:
-                proxies['https'] = self.system_https_proxy
-
-        r = requests.request(method=request_method,
-                             data=request_body,
-                             url=url,
-                             headers=request_headers,
-                             timeout=timeout,
-                             proxies=proxies,
-                             verify=False)
-
-        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
-        # For example - some sites don't tell us it's utf-8, but return utf-8 content
-        # This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
-        # https://github.com/psf/requests/issues/1604 good info about requests encoding detection
-        if not is_binary:
-            # Don't run this for PDF (and requests identified as binary) takes a _long_ time
-            if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
-                encoding = chardet.detect(r.content)['encoding']
-                if encoding:
-                    r.encoding = encoding
-
-        if not r.content or not len(r.content):
-            raise EmptyReply(url=url, status_code=r.status_code)
-
-        # @todo test this
-        # @todo maybe you really want to test zero-byte return pages?
-        if r.status_code != 200 and not ignore_status_codes:
-            # maybe check with content works?
-            raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text)
-
-        self.status_code = r.status_code
-        if is_binary:
-            # Binary files just return their checksum until we add something smarter
-            self.content = hashlib.md5(r.content).hexdigest()
-        else:
-            self.content = r.text
-
-        self.headers = r.headers
-        self.raw_content = r.content
-
-
-# Decide which is the 'real' HTML webdriver, this is more a system wide config
-# rather than site-specific.
-use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
-if use_playwright_as_chrome_fetcher:
-    html_webdriver = base_html_playwright
-else:
-    html_webdriver = base_html_webdriver
--- a/changedetectionio/diff.py
+++ b/changedetectionio/diff.py
@@ -10,7 +10,7 @@ def same_slicer(l, a, b):
        return l[a:b]

 # like .compare but a little different output
-def customSequenceMatcher(before, after, include_equal=False):
+def customSequenceMatcher(before, after, include_equal=False, include_removed=True, include_added=True, include_replaced=True, include_change_type_prefix=True):
    cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after)

    # @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?)
@@ -18,34 +18,39 @@ def customSequenceMatcher(before, after, include_equal=False):
        if include_equal and tag == 'equal':
            g = before[alo:ahi]
            yield g
-        elif tag == 'delete':
-            g = ["(removed) " + i for i in same_slicer(before, alo, ahi)]
+        elif include_removed and tag == 'delete':
+            row_prefix = "(removed) " if include_change_type_prefix else ''
+            g = [ row_prefix + i for i in same_slicer(before, alo, ahi)]
            yield g
-        elif tag == 'replace':
-            g = ["(changed) " + i for i in same_slicer(before, alo, ahi)]
-            g += ["(into   ) " + i for i in same_slicer(after, blo, bhi)]
+        elif include_replaced and tag == 'replace':
+            row_prefix = "(changed) " if include_change_type_prefix else ''
+            g = [row_prefix + i for i in same_slicer(before, alo, ahi)]
+            row_prefix = "(into) " if include_change_type_prefix else ''
+            g += [row_prefix + i for i in same_slicer(after, blo, bhi)]
            yield g
-        elif tag == 'insert':
-            g = ["(added  ) " + i for i in same_slicer(after, blo, bhi)]
+        elif include_added and tag == 'insert':
+            row_prefix = "(added) " if include_change_type_prefix else ''
+            g = [row_prefix + i for i in same_slicer(after, blo, bhi)]
            yield g

 # only_differences - only return info about the differences, no context
-# line_feed_sep could be "<br/>" or "<li>" or "\n" etc
-def render_diff(previous_file, newest_file, include_equal=False, line_feed_sep="\n"):
-    with open(newest_file, 'r') as f:
-        newest_version_file_contents = f.read()
-        newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
+# line_feed_sep could be "<br>" or "<li>" or "\n" etc
+def render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=True, include_replaced=True, line_feed_sep="\n", include_change_type_prefix=True):

-    if previous_file:
-        with open(previous_file, 'r') as f:
-            previous_version_file_contents = f.read()
+    newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
+
+    if previous_version_file_contents:
            previous_version_file_contents = [line.rstrip() for line in previous_version_file_contents.splitlines()]
    else:
        previous_version_file_contents = ""

-    rendered_diff = customSequenceMatcher(previous_version_file_contents,
-                                          newest_version_file_contents,
-                                          include_equal)
+    rendered_diff = customSequenceMatcher(before=previous_version_file_contents,
+                                          after=newest_version_file_contents,
+                                          include_equal=include_equal,
+                                          include_removed=include_removed,
+                                          include_added=include_added,
+                                          include_replaced=include_replaced,
+                                          include_change_type_prefix=include_change_type_prefix)

    # Recursively join lists
    f = lambda L: line_feed_sep.join([f(x) if type(x) is list else x for x in L])
--- a/changedetectionio/fetchers/init.py
+++ b/changedetectionio/fetchers/init.py
@@ -0,0 +1,150 @@
+from abc import abstractmethod
+import os
+from . import exceptions
+
+visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
+
+
+class Fetcher():
+    browser_steps = None
+    browser_steps_screenshot_path = None
+    content = None
+    error = None
+    fetcher_description = "No description"
+    headers = None
+    status_code = None
+    webdriver_js_execute_code = None
+    xpath_data = None
+    xpath_element_js = ""
+    instock_data = None
+    instock_data_js = ""
+
+    # Will be needed in the future by the VisualSelector, always get this where possible.
+    screenshot = False
+    system_http_proxy = os.getenv('HTTP_PROXY')
+    system_https_proxy = os.getenv('HTTPS_PROXY')
+
+    # Time ONTOP of the system defined env minimum time
+    render_extract_delay = 0
+
+    def __init__(self):
+        from pkg_resources import resource_string
+        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
+        self.xpath_element_js = resource_string(__name__, "../res/xpath_element_scraper.js").decode('utf-8')
+        self.instock_data_js = resource_string(__name__, "../res/stock-not-in-stock.js").decode('utf-8')
+
+
+    @abstractmethod
+    def get_error(self):
+        return self.error
+
+    @abstractmethod
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+        # Should set self.error, self.status_code and self.content
+        pass
+
+    @abstractmethod
+    def quit(self):
+        return
+
+    @abstractmethod
+    def get_last_status_code(self):
+        return self.status_code
+
+    @abstractmethod
+    def screenshot_step(self, step_n):
+        return None
+
+    @abstractmethod
+    # Return true/false if this checker is ready to run, in the case it needs todo some special config check etc
+    def is_ready(self):
+        return True
+
+    def iterate_browser_steps(self):
+        from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
+        from playwright._impl._api_types import TimeoutError
+        from jinja2 import Environment
+        jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
+
+        step_n = 0
+
+        if self.browser_steps is not None and len(self.browser_steps):
+            interface = steppable_browser_interface()
+            interface.page = self.page
+
+            valid_steps = filter(lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), self.browser_steps)
+
+            for step in valid_steps:
+                step_n += 1
+                print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
+                self.screenshot_step("before-"+str(step_n))
+                self.save_step_html("before-"+str(step_n))
+                try:
+                    optional_value = step['optional_value']
+                    selector = step['selector']
+                    # Support for jinja2 template in step values, with date module added
+                    if '{%' in step['optional_value'] or '{{' in step['optional_value']:
+                        optional_value = str(jinja2_env.from_string(step['optional_value']).render())
+                    if '{%' in step['selector'] or '{{' in step['selector']:
+                        selector = str(jinja2_env.from_string(step['selector']).render())
+
+                    getattr(interface, "call_action")(action_name=step['operation'],
+                                                      selector=selector,
+                                                      optional_value=optional_value)
+                    self.screenshot_step(step_n)
+                    self.save_step_html(step_n)
+                except TimeoutError:
+                    # Stop processing here
+                    raise exceptions.BrowserStepsStepTimout(step_n=step_n)
+
+
+
+    # It's always good to reset these
+    def delete_browser_steps_screenshots(self):
+        import glob
+        if self.browser_steps_screenshot_path is not None:
+            dest = os.path.join(self.browser_steps_screenshot_path, 'step_*.jpeg')
+            files = glob.glob(dest)
+            for f in files:
+                os.unlink(f)
+
+#   Maybe for the future, each fetcher provides its own diff output, could be used for text, image
+#   the current one would return javascript output (as we use JS to generate the diff)
+#
+
+
+def available_fetchers():
+    from . import playwright, html_requests, webdriver
+
+    p = []
+    p.append(tuple(['html_requests', html_requests.fetcher.fetcher_description]))
+
+    # Prefer playwright
+    if os.getenv('PLAYWRIGHT_DRIVER_URL', False):
+        p.append(tuple(['html_webdriver', playwright.fetcher.fetcher_description]))
+
+    elif os.getenv('WEBDRIVER_URL'):
+        p.append(tuple(['html_webdriver', webdriver.fetcher.fetcher_description]))
+
+
+    return p
+
+html_webdriver = None
+# Decide which is the 'real' HTML webdriver, this is more a system wide config rather than site-specific.
+use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
+if use_playwright_as_chrome_fetcher:
+    from . import playwright
+    html_webdriver = getattr(playwright, "fetcher")
+
+else:
+    from . import webdriver
+    html_webdriver = getattr(webdriver, "fetcher")
+
--- a/changedetectionio/fetchers/browserless.py
+++ b/changedetectionio/fetchers/browserless.py
@@ -0,0 +1,71 @@
+from . import Fetcher
+import os
+import requests
+
+
+# Exploit the debugging API to get screenshot and HTML without needing playwright
+# https://www.browserless.io/docs/scrape#debugging
+
+class fetcher(Fetcher):
+    fetcher_description = "Browserless Chrome/Javascript via '{}'".format(os.getenv("BROWSERLESS_DRIVER_URL"))
+
+    command_executor = ''
+    proxy = None
+
+    def __init__(self, proxy_override=None, command_executor=None):
+        super().__init__()
+        self.proxy = proxy_override
+
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+
+        proxy = ""
+        if self.proxy:
+            proxy = f"--proxy-server={self.proxy}"
+
+        import json
+        r = requests.request(method='POST',
+                             data=json.dumps({
+                                 "url": f"{url}?{proxy}",
+                                 "elements": [],
+                                 "debug": {
+                                     "screenshot": True,
+                                     "console": False,
+                                     "network": True,
+                                     "cookies": False,
+                                     "html": True
+                                 }
+                             }),
+                             url=os.getenv("BROWSERLESS_DRIVER_URL"),
+                             headers={'Content-Type': 'application/json'},
+                             timeout=timeout,
+                             verify=False)
+
+        # "waitFor": "() => document.querySelector('h1')"
+        #        extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
+        #        self.page.wait_for_timeout(extra_wait * 1000)
+
+        if r.status_code == 200:
+            # the basic request to browserless was OK, but how was the internal request to the site?
+            result = r.json()
+
+            if result['debug']['network'].get('inbound') and len(result['debug']['network']['inbound']):
+                self.status_code = result['debug']['network']['inbound'][000]['status']
+
+            self.content = result['debug']['html']
+
+            self.headers = {}
+            if result['debug'].get('screenshot'):
+                import base64
+                self.screenshot = base64.b64decode(result['debug']['screenshot'])
+
+    def is_ready(self):
+        # Try ping?
+        return os.getenv("BROWSERLESS_DRIVER_URL", False)
--- a/changedetectionio/fetchers/exceptions.py
+++ b/changedetectionio/fetchers/exceptions.py
@@ -0,0 +1,66 @@
+class Non200ErrorCodeReceived(Exception):
+    def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
+        # Set this so we can use it in other parts of the app
+        self.status_code = status_code
+        self.url = url
+        self.screenshot = screenshot
+        self.xpath_data = xpath_data
+        self.page_text = None
+
+        if page_html:
+            from changedetectionio import html_tools
+            self.page_text = html_tools.html_to_text(page_html)
+        return
+
+class checksumFromPreviousCheckWasTheSame(Exception):
+    def __init__(self):
+        return
+
+class JSActionExceptions(Exception):
+    def __init__(self, status_code, url, screenshot, message=''):
+        self.status_code = status_code
+        self.url = url
+        self.screenshot = screenshot
+        self.message = message
+        return
+
+class BrowserStepsStepTimout(Exception):
+    def __init__(self, step_n):
+        self.step_n = step_n
+        return
+
+
+class PageUnloadable(Exception):
+    def __init__(self, status_code, url, message, screenshot=False):
+        # Set this so we can use it in other parts of the app
+        self.status_code = status_code
+        self.url = url
+        self.screenshot = screenshot
+        self.message = message
+        return
+
+class EmptyReply(Exception):
+    def __init__(self, status_code, url, screenshot=None):
+        # Set this so we can use it in other parts of the app
+        self.status_code = status_code
+        self.url = url
+        self.screenshot = screenshot
+        return
+
+class ScreenshotUnavailable(Exception):
+    def __init__(self, status_code, url, page_html=None):
+        # Set this so we can use it in other parts of the app
+        self.status_code = status_code
+        self.url = url
+        if page_html:
+            from ..html_tools import html_to_text
+            self.page_text = html_to_text(page_html)
+        return
+
+class ReplyWithContentButNoText(Exception):
+    def __init__(self, status_code, url, screenshot=None):
+        # Set this so we can use it in other parts of the app
+        self.status_code = status_code
+        self.url = url
+        self.screenshot = screenshot
+        return
--- a/changedetectionio/fetchers/html_requests.py
+++ b/changedetectionio/fetchers/html_requests.py
@@ -0,0 +1,80 @@
+from . import Fetcher
+from . import exceptions
+
+
+# "html_requests" is listed as the default fetcher in store.py!
+class fetcher(Fetcher):
+    fetcher_description = "Basic fast Plaintext/HTTP Client"
+
+
+    def __init__(self, proxy_override=None):
+        self.proxy_override = proxy_override
+
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+
+        import chardet
+        import hashlib
+        import os
+        import requests
+
+        # Make requests use a more modern looking user-agent
+        if not 'User-Agent' in request_headers:
+            request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
+                                                      'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
+
+        proxies = {}
+
+        # Allows override the proxy on a per-request basis
+        if self.proxy_override:
+            proxies = {'http': self.proxy_override, 'https': self.proxy_override, 'ftp': self.proxy_override}
+        else:
+            if self.system_http_proxy:
+                proxies['http'] = self.system_http_proxy
+            if self.system_https_proxy:
+                proxies['https'] = self.system_https_proxy
+
+        r = requests.request(method=request_method,
+                             data=request_body,
+                             url=url,
+                             headers=request_headers,
+                             timeout=timeout,
+                             proxies=proxies,
+                             verify=False)
+
+        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
+        # For example - some sites don't tell us it's utf-8, but return utf-8 content
+        # This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
+        # https://github.com/psf/requests/issues/1604 good info about requests encoding detection
+        if not is_binary:
+            # Don't run this for PDF (and requests identified as binary) takes a _long_ time
+            if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
+                encoding = chardet.detect(r.content)['encoding']
+                if encoding:
+                    r.encoding = encoding
+
+        if not r.content or not len(r.content):
+            raise exceptions.EmptyReply(url=url, status_code=r.status_code)
+
+        # @todo test this
+        # @todo maybe you really want to test zero-byte return pages?
+        if r.status_code != 200 and not ignore_status_codes:
+            # maybe check with content works?
+            raise exceptions.Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text)
+
+        self.status_code = r.status_code
+        if is_binary:
+            # Binary files just return their checksum until we add something smarter
+            self.content = hashlib.md5(r.content).hexdigest()
+        else:
+            self.content = r.text
+
+        self.headers = r.headers
+        self.raw_content = r.content
--- a/changedetectionio/fetchers/playwright.py
+++ b/changedetectionio/fetchers/playwright.py
@@ -0,0 +1,208 @@
+from . import Fetcher
+from . import exceptions
+from . import visualselector_xpath_selectors
+
+import os
+import logging
+import time
+
+class fetcher(Fetcher):
+    fetcher_description = "Playwright {}/Javascript".format(
+        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
+    )
+    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
+        fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
+
+    browser_type = ''
+    command_executor = ''
+
+    # Configs for Proxy setup
+    # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
+    playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
+
+    proxy = None
+
+    def __init__(self, proxy_override=None):
+        super().__init__()
+        import json
+
+        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
+        self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
+        self.command_executor = os.getenv(
+            "PLAYWRIGHT_DRIVER_URL",
+            'ws://playwright-chrome:3000'
+        ).strip('"')
+
+        # If any proxy settings are enabled, then we should setup the proxy object
+        proxy_args = {}
+        for k in self.playwright_proxy_settings_mappings:
+            v = os.getenv('playwright_proxy_' + k, False)
+            if v:
+                proxy_args[k] = v.strip('"')
+
+        if proxy_args:
+            self.proxy = proxy_args
+
+        # allow per-watch proxy selection override
+        if proxy_override:
+            self.proxy = {'server': proxy_override}
+
+        if self.proxy:
+            # Playwright needs separate username and password values
+            from urllib.parse import urlparse
+            parsed = urlparse(self.proxy.get('server'))
+            if parsed.username:
+                self.proxy['username'] = parsed.username
+                self.proxy['password'] = parsed.password
+
+    def screenshot_step(self, step_n=''):
+        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
+
+        if self.browser_steps_screenshot_path is not None:
+            destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
+            logging.debug("Saving step screenshot to {}".format(destination))
+            with open(destination, 'wb') as f:
+                f.write(screenshot)
+
+    def save_step_html(self, step_n):
+        content = self.page.content()
+        destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
+        logging.debug("Saving step HTML to {}".format(destination))
+        with open(destination, 'w') as f:
+            f.write(content)
+
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+
+        from playwright.sync_api import sync_playwright
+        import playwright._impl._api_types
+        import json
+
+        self.delete_browser_steps_screenshots()
+        response = None
+        with sync_playwright() as p:
+            browser_type = getattr(p, self.browser_type)
+
+            # Seemed to cause a connection Exception even tho I can see it connect
+            # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
+            # 60,000 connection timeout only
+            browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000)
+
+            # Set user agent to prevent Cloudflare from blocking the browser
+            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
+            context = browser.new_context(
+                user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
+                proxy=self.proxy,
+                # This is needed to enable JavaScript execution on GitHub and others
+                bypass_csp=True,
+                # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
+                service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
+                # Should never be needed
+                accept_downloads=False
+            )
+
+            self.page = context.new_page()
+            if len(request_headers):
+                context.set_extra_http_headers(request_headers)
+
+                self.page.set_default_navigation_timeout(90000)
+                self.page.set_default_timeout(90000)
+
+                # Listen for all console events and handle errors
+                self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
+
+            # Goto page
+            try:
+                # Wait_until = commit
+                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
+                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
+                # This seemed to solve nearly all 'TimeoutErrors'
+                response = self.page.goto(url, wait_until='commit')
+            except playwright._impl._api_types.Error as e:
+                # Retry once - https://github.com/browserless/chrome/issues/2485
+                # Sometimes errors related to invalid cert's and other can be random
+                print ("Content Fetcher > retrying request got error - ", str(e))
+                time.sleep(1)
+                response = self.page.goto(url, wait_until='commit')
+
+            except Exception as e:
+                print ("Content Fetcher > Other exception when page.goto", str(e))
+                context.close()
+                browser.close()
+                raise exceptions.PageUnloadable(url=url, status_code=None, message=str(e))
+
+            # Execute any browser steps
+            try:
+                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
+                self.page.wait_for_timeout(extra_wait * 1000)
+
+                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
+                    self.page.evaluate(self.webdriver_js_execute_code)
+
+            except playwright._impl._api_types.TimeoutError as e:
+                context.close()
+                browser.close()
+                # This can be ok, we will try to grab what we could retrieve
+                pass
+            except Exception as e:
+                print ("Content Fetcher > Other exception when executing custom JS code", str(e))
+                context.close()
+                browser.close()
+                raise exceptions.PageUnloadable(url=url, status_code=None, message=str(e))
+
+            if response is None:
+                context.close()
+                browser.close()
+                print ("Content Fetcher > Response object was none")
+                raise exceptions.EmptyReply(url=url, status_code=None)
+
+            # Run Browser Steps here
+            self.iterate_browser_steps()
+
+            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
+            time.sleep(extra_wait)
+
+            self.content = self.page.content()
+            self.status_code = response.status
+            if len(self.page.content().strip()) == 0:
+                context.close()
+                browser.close()
+                print ("Content Fetcher > Content was empty")
+                raise exceptions.EmptyReply(url=url, status_code=response.status)
+
+            self.status_code = response.status
+            self.headers = response.all_headers()
+
+            # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
+            if current_include_filters is not None:
+                self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
+            else:
+                self.page.evaluate("var include_filters=''")
+
+            self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
+            self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
+
+            # Bug 3 in Playwright screenshot handling
+            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
+            # JPEG is better here because the screenshots can be very very large
+
+            # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
+            # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
+            # acceptable screenshot quality here
+            try:
+                # The actual screenshot
+                self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
+            except Exception as e:
+                context.close()
+                browser.close()
+                raise exceptions.ScreenshotUnavailable(url=url, status_code=None)
+
+            context.close()
+            browser.close()
--- a/changedetectionio/fetchers/webdriver.py
+++ b/changedetectionio/fetchers/webdriver.py
@@ -0,0 +1,103 @@
+from . import Fetcher
+import os
+import time
+
+class fetcher(Fetcher):
+    if os.getenv("WEBDRIVER_URL"):
+        fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
+    else:
+        fetcher_description = "WebDriver Chrome/Javascript"
+
+    command_executor = ''
+
+    # Configs for Proxy setup
+    # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
+    selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
+                                        'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
+                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
+    proxy = None
+
+    def __init__(self, proxy_override=None, command_executor=None):
+        super().__init__()
+        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
+
+        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
+        if command_executor:
+            self.command_executor = command_executor
+        else:
+            self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
+
+        # If any proxy settings are enabled, then we should setup the proxy object
+        proxy_args = {}
+        for k in self.selenium_proxy_settings_mappings:
+            v = os.getenv('webdriver_' + k, False)
+            if v:
+                proxy_args[k] = v.strip('"')
+
+        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
+        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
+            proxy_args['httpProxy'] = self.system_http_proxy
+        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
+            proxy_args['httpsProxy'] = self.system_https_proxy
+
+        # Allows override the proxy on a per-request basis
+        if proxy_override is not None:
+            proxy_args['httpProxy'] = proxy_override
+
+        if proxy_args:
+            self.proxy = SeleniumProxy(raw=proxy_args)
+
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+
+        from selenium import webdriver
+        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+        from selenium.common.exceptions import WebDriverException
+        # request_body, request_method unused for now, until some magic in the future happens.
+
+        # check env for WEBDRIVER_URL
+        self.driver = webdriver.Remote(
+            command_executor=self.command_executor,
+            desired_capabilities=DesiredCapabilities.CHROME,
+            proxy=self.proxy
+        )
+
+        try:
+            self.driver.get(url)
+        except WebDriverException as e:
+            # Be sure we close the session window
+            self.quit()
+            raise
+
+        self.driver.set_window_size(1280, 1024)
+        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
+
+        if self.webdriver_js_execute_code is not None:
+            self.driver.execute_script(self.webdriver_js_execute_code)
+            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
+            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
+
+        # @todo - how to check this? is it possible?
+        self.status_code = 200
+        # @todo somehow we should try to get this working for WebDriver
+        # raise EmptyReply(url=url, status_code=r.status_code)
+
+        # @todo - dom wait loaded?
+        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
+        self.content = self.driver.page_source
+        self.headers = {}
+
+        self.screenshot = self.driver.get_screenshot_as_png()
+
+    # Try something with requests?
+    def is_ready(self):
+        return True
+
+
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -21,7 +21,6 @@ from wtforms.validators import ValidationError
 # each select <option data-enabled="enabled-0-0"
 from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config

-from changedetectionio import content_fetcher
 from changedetectionio.notification import (
    valid_notification_formats,
 )
@@ -135,30 +134,31 @@ class ValidateContentFetcherIsReady(object):

    def __call__(self, form, field):
        import urllib3.exceptions
-        from changedetectionio import content_fetcher
+        import importlib

        # Better would be a radiohandler that keeps a reference to each class
        if field.data is not None and field.data != 'system':
-            klass = getattr(content_fetcher, field.data)
-            some_object = klass()
-            try:
-                ready = some_object.is_ready()
+            from . import fetchers
+            if fetchers.html_webdriver is not None:
+                try:
+                    driver = fetchers.html_webdriver()
+                    driver.is_ready()

-            except urllib3.exceptions.MaxRetryError as e:
-                driver_url = some_object.command_executor
-                message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data))
-                message += '<br/>' + field.gettext(
-                    'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.')
-                message += '<br/>' + field.gettext('Did you follow the instructions in the wiki?')
-                message += '<br/><br/>' + field.gettext('WebDriver Host: %s' % (driver_url))
-                message += '<br/><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>'
-                message += '<br/>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e)))
+                except urllib3.exceptions.MaxRetryError as e:
+                    driver_url = fetchers.html_webdriver.command_executor
+                    message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data))
+                    message += '<br>' + field.gettext(
+                        'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.')
+                    message += '<br>' + field.gettext('Did you follow the instructions in the wiki?')
+                    message += '<br><br>' + field.gettext('WebDriver Host: %s' % (driver_url))
+                    message += '<br><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>'
+                    message += '<br>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e)))

-                raise ValidationError(message)
+                    raise ValidationError(message)

-            except Exception as e:
-                message = field.gettext('Content fetcher \'%s\' did not respond properly, unable to use it.\n %s')
-                raise ValidationError(message % (field.data, e))
+                except Exception as e:
+                    message = field.gettext('Content fetcher \'%s\' did not respond properly, unable to use it.\n %s')
+                    raise ValidationError(message % (field.data, e))


 class ValidateNotificationBodyAndTitleWhenURLisSet(object):
@@ -232,12 +232,17 @@ class validateURL(object):

    def __call__(self, form, field):
        import validators
+
        try:
            validators.url(field.data.strip())
        except validators.ValidationFailure:
            message = field.gettext('\'%s\' is not a valid URL.' % (field.data.strip()))
            raise ValidationError(message)

+        from .model.Watch import is_safe_url
+        if not is_safe_url(field.data):
+            raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX')
+

 class ValidateListRegex(object):
    """
@@ -339,23 +344,30 @@ class ValidateCSSJSONXPATHInput(object):
                    raise ValidationError("A system-error occurred when validating your jq expression")

 class quickWatchForm(Form):
+    from . import processors
+
    url = fields.URLField('URL', validators=[validateURL()])
    tag = StringField('Group tag', [validators.Optional()])
    watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
+    processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
    edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})


-
 # Common to a single watch and the global settings
 class commonSettingsForm(Form):
+    from .fetchers import available_fetchers
    notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers()])
    notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
    notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
    notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
-    fetch_backend = RadioField(u'Fetch Method', choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
+    fetch_backend = RadioField(u'Fetch Method', choices=available_fetchers(), validators=[ValidateContentFetcherIsReady()])
    extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
    webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1,
                                                                                                                                    message="Should contain one or more seconds")])
+class importForm(Form):
+    from . import processors
+    processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
+    urls = TextAreaField('URLs')

 class SingleBrowserStep(Form):

@@ -388,11 +400,19 @@ class watchForm(commonSettingsForm):
    body = TextAreaField('Request body', [validators.Optional()])
    method = SelectField('Request method', choices=valid_method, default=default_method)
    ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
-    check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
+    check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
+
+    filter_text_added = BooleanField('Added lines', default=True)
+    filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
+    filter_text_removed = BooleanField('Removed lines', default=True)
+
+    # @todo this class could be moved to its own text_json_diff_watchForm and this goes to restock_diff_Watchform perhaps
+    in_stock_only = BooleanField('Only trigger when product goes BACK to in-stock', default=True)
+
    trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
        browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
-    text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
+    text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
    webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])

    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
@@ -453,18 +473,18 @@ class globalSettingsRequestForm(Form):

 # datastore.data['settings']['application']..
 class globalSettingsApplicationForm(commonSettingsForm):
-
-    base_url = StringField('Base URL', validators=[validators.Optional()])
-    global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
-    global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
-    ignore_whitespace = BooleanField('Ignore whitespace')
-    removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
-    empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False)
-    render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
-    fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
+    from .fetchers import available_fetchers
    api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
+    base_url = StringField('Base URL', validators=[validators.Optional()])
+    empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False)
+    fetch_backend = RadioField('Fetch Method', default="html_requests", choices=available_fetchers(), validators=[ValidateContentFetcherIsReady()])
+    global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
+    global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
+    ignore_whitespace = BooleanField('Ignore whitespace')
    password = SaltyPasswordField()
-
+    removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
+    render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
+    shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
    filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
                                                                  render_kw={"style": "width: 5em;"},
                                                                  validators=[validators.NumberRange(min=0,
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -8,7 +8,7 @@ import json
 import re

 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
-TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"
+TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"

 # 'price' , 'lowPrice', 'highPrice' are usually under here
 # all of those may or may not appear on different websites
@@ -287,3 +287,18 @@ def workarounds_for_obfuscations(content):
    content = re.sub('<!--\s+-->', '', content)

    return content
+
+
+def get_triggered_text(content, trigger_text):
+    triggered_text = []
+    result = strip_ignore_text(content=content,
+                               wordlist=trigger_text,
+                               mode="line numbers")
+
+    i = 1
+    for p in content.splitlines():
+        if i in result:
+            triggered_text.append(p)
+        i += 1
+
+    return triggered_text
--- a/changedetectionio/importer.py
+++ b/changedetectionio/importer.py
@@ -29,6 +29,7 @@ class import_url_list(Importer):
            data,
            flash,
            datastore,
+            processor=None
            ):

        urls = data.split("\n")
@@ -52,7 +53,11 @@ class import_url_list(Importer):
            # Flask wtform validators wont work with basic auth, use validators package
            # Up to 5000 per batch so we dont flood the server
            if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
-                new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False)
+                extras = None
+                if processor:
+                    extras = {'processor': processor}
+                new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False, extras=extras)
+
                if new_uuid:
                    # Straight into the queue.
                    self.new_uuids.append(new_uuid)
--- a/changedetectionio/model/App.py
+++ b/changedetectionio/model/App.py
@@ -40,6 +40,7 @@ class model(dict):
                    'notification_body': default_notification_body,
                    'notification_format': default_notification_format,
                    'schema_version' : 0,
+                    'shared_diff_access': False,
                    'webdriver_delay': None  # Extra delay in seconds before extracting text
                }
            }
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -1,9 +1,14 @@
 from distutils.util import strtobool
 import logging
 import os
+import re
 import time
 import uuid

+# Allowable protocols, protects against javascript: etc
+# file:// is further checked by ALLOW_FILE_URI
+SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
+
 minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
 mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}

@@ -18,12 +23,17 @@ base_config = {
    'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine.
    'extract_text': [],  # Extract text by regex after filters
    'extract_title_as_title': False,
-    'fetch_backend': None,
+    'fetch_backend': 'system', # plaintext, playwright etc
+    'processor': 'text_json_diff', # could be restock_diff or others from .processors
    'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
+    'filter_text_added': True,
+    'filter_text_replaced': True,
+    'filter_text_removed': True,
    'has_ldjson_price_data': None,
    'track_ldjson_price_data': None,
    'headers': {},  # Extra headers to send
    'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
+    'in_stock_only' : True, # Only trigger change on going to instock from out-of-stock
    'include_filters': [],
    'last_checked': 0,
    'last_error': False,
@@ -55,6 +65,22 @@ base_config = {
    'webdriver_js_execute_code': None,  # Run before change-detection
 }

+
+def is_safe_url(test_url):
+    # See https://github.com/dgtlmoon/changedetection.io/issues/1358
+
+    # Remove 'source:' prefix so we dont get 'source:javascript:' etc
+    # 'source:' is a valid way to tell us to return the source
+
+    r = re.compile(re.escape('source:'), re.IGNORECASE)
+    test_url = r.sub('', test_url)
+
+    pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
+    if not pattern.match(test_url.strip()):
+        return False
+
+    return True
+
 class model(dict):
    __newest_history_key = None
    __history_n = 0
@@ -93,7 +119,11 @@ class model(dict):

    @property
    def link(self):
+
        url = self.get('url', '')
+        if not is_safe_url(url):
+            return 'DISABLED'
+
        ready_url = url
        if '{%' in url or '{{' in url:
            from jinja2 import Environment
@@ -128,7 +158,9 @@ class model(dict):
    @property
    def is_pdf(self):
        # content_type field is set in the future
-        return '.pdf' in self.get('url', '').lower() or 'pdf' in self.get('content_type', '').lower()
+        # https://github.com/dgtlmoon/changedetection.io/issues/1392
+        # Not sure the best logic here
+        return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()

    @property
    def label(self):
@@ -212,9 +244,32 @@ class model(dict):
        bump = self.history
        return self.__newest_history_key

+    def get_history_snapshot(self, timestamp):
+        import brotli
+        filepath = self.history[timestamp]
+
+        # See if a brotli versions exists and switch to that
+        if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
+            filepath = f"{filepath}.br"
+
+        # OR in the backup case that the .br does not exist, but the plain one does
+        if filepath.endswith('.br') and not os.path.isfile(filepath):
+            if os.path.isfile(filepath.replace('.br', '')):
+                filepath = filepath.replace('.br', '')
+
+        if filepath.endswith('.br'):
+            # Brotli doesnt have a fileheader to detect it, so we rely on filename
+            # https://www.rfc-editor.org/rfc/rfc7932
+            with open(filepath, 'rb') as f:
+                return(brotli.decompress(f.read()).decode('utf-8'))
+
+        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
+            return f.read()
+
    # Save some text file to the appropriate path and bump the history
    # result_obj from fetch_site_status.run()
-    def save_history_text(self, contents, timestamp):
+    def save_history_text(self, contents, timestamp, snapshot_id):
+        import brotli

        self.ensure_data_dir_exists()

@@ -223,13 +278,21 @@ class model(dict):
        if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
            time.sleep(timestamp - self.__newest_history_key)

-        snapshot_fname = "{}.txt".format(str(uuid.uuid4()))
+        threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
+        skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))

-        # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
-        # most sites are utf-8 and some are even broken utf-8
-        with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f:
-            f.write(contents)
-            f.close()
+        if not skip_brotli and len(contents) > threshold:
+            snapshot_fname = f"{snapshot_id}.txt.br"
+            dest = os.path.join(self.watch_data_dir, snapshot_fname)
+            if not os.path.exists(dest):
+                with open(dest, 'wb') as f:
+                    f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
+        else:
+            snapshot_fname = f"{snapshot_id}.txt"
+            dest = os.path.join(self.watch_data_dir, snapshot_fname)
+            if not os.path.exists(dest):
+                with open(dest, 'wb') as f:
+                    f.write(contents)

        # Append to index
        # @todo check last char was \n
@@ -266,7 +329,8 @@ class model(dict):
        # Compare each lines (set) against each history text file (set) looking for something new..
        existing_history = set({})
        for k, v in self.history.items():
-            alist = set([line.decode('utf-8').strip().lower() for line in open(v, 'rb')])
+            content = self.get_history_snapshot(k)
+            alist = set([line.strip().lower() for line in content.splitlines()])
            existing_history = existing_history.union(alist)

        # Check that everything in local_lines(new stuff) already exists in existing_history - it should
@@ -281,17 +345,6 @@ class model(dict):
        # False is not an option for AppRise, must be type None
        return None

-    def get_screenshot_as_jpeg(self):
-
-        # Created by save_screenshot()
-        fname = os.path.join(self.watch_data_dir, "last-screenshot.jpg")
-        if os.path.isfile(fname):
-            return fname
-
-        # False is not an option for AppRise, must be type None
-        return None
-
-
    def __get_file_ctime(self, filename):
        fname = os.path.join(self.watch_data_dir, filename)
        if os.path.isfile(fname):
@@ -338,6 +391,7 @@ class model(dict):
            return fname
        return False

+
    def pause(self):
        self['paused'] = True

@@ -367,8 +421,8 @@ class model(dict):
        # self.history will be keyed with the full path
        for k, fname in self.history.items():
            if os.path.isfile(fname):
-                with open(fname, "r") as f:
-                    contents = f.read()
+                if True:
+                    contents = self.get_history_snapshot(k)
                    res = re.findall(regex, contents, re.MULTILINE)
                    if res:
                        if not csv_writer:
@@ -404,3 +458,38 @@ class model(dict):
    # Return list of tags, stripped and lowercase, used for searching
    def all_tags(self):
        return [s.strip().lower() for s in self.get('tag','').split(',')]
+
+    def has_special_diff_filter_options_set(self):
+
+        # All False - nothing would be done, so act like it's not processable
+        if not self.get('filter_text_added', True) and not self.get('filter_text_replaced', True) and not self.get('filter_text_removed', True):
+            return False
+
+        # Or one is set
+        if not self.get('filter_text_added', True) or not self.get('filter_text_replaced', True) or not self.get('filter_text_removed', True):
+            return True
+
+        # None is set
+        return False
+
+
+    def get_last_fetched_before_filters(self):
+        import brotli
+        filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
+
+        if not os.path.isfile(filepath):
+            # If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
+            dates = list(self.history.keys())
+            if len(dates):
+                return self.get_history_snapshot(dates[-1])
+            else:
+                return ''
+
+        with open(filepath, 'rb') as f:
+            return(brotli.decompress(f.read()).decode('utf-8'))
+
+    def save_last_fetched_before_filters(self, contents):
+        import brotli
+        filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
+        with open(filepath, 'wb') as f:
+            f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
--- a/changedetectionio/notification.py
+++ b/changedetectionio/notification.py
@@ -5,15 +5,18 @@ import json

 valid_tokens = {
    'base_url': '',
-    'watch_url': '',
-    'watch_uuid': '',
-    'watch_title': '',
-    'watch_tag': '',
+    'current_snapshot': '',
    'diff': '',
+    'diff_added': '',
    'diff_full': '',
+    'diff_removed': '',
    'diff_url': '',
    'preview_url': '',
-    'current_snapshot': ''
+    'triggered_text': '',
+    'watch_tag': '',
+    'watch_title': '',
+    'watch_url': '',
+    'watch_uuid': '',
 }

 default_notification_format_for_watch = 'System default'
@@ -120,10 +123,10 @@ def process_notification(n_object, datastore):
                    url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'

                if url.startswith('tgram://'):
-                    # Telegram only supports a limit subset of HTML, remove the '<br/>' we place in.
+                    # Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
                    # re https://github.com/dgtlmoon/changedetection.io/issues/555
                    # @todo re-use an existing library we have already imported to strip all non-allowed tags
-                    n_body = n_body.replace('<br/>', '\n')
+                    n_body = n_body.replace('<br>', '\n')
                    n_body = n_body.replace('</br>', '\n')
                    # real limit is 4096, but minus some for extra metadata
                    payload_max_size = 3600
@@ -209,15 +212,18 @@ def create_notification_parameters(n_object, datastore):
    tokens.update(
        {
            'base_url': base_url if base_url is not None else '',
+            'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else '',
+            'diff': n_object.get('diff', ''),  # Null default in the case we use a test
+            'diff_added': n_object.get('diff_added', ''),  # Null default in the case we use a test
+            'diff_full': n_object.get('diff_full', ''),  # Null default in the case we use a test
+            'diff_removed': n_object.get('diff_removed', ''),  # Null default in the case we use a test
+            'diff_url': diff_url,
+            'preview_url': preview_url,
+            'triggered_text': n_object.get('triggered_text', ''),
+            'watch_tag': watch_tag if watch_tag is not None else '',
+            'watch_title': watch_title if watch_title is not None else '',
            'watch_url': watch_url,
            'watch_uuid': uuid,
-            'watch_title': watch_title if watch_title is not None else '',
-            'watch_tag': watch_tag if watch_tag is not None else '',
-            'diff_url': diff_url,
-            'diff': n_object.get('diff', ''),  # Null default in the case we use a test
-            'diff_full': n_object.get('diff_full', ''),  # Null default in the case we use a test
-            'preview_url': preview_url,
-            'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else ''
        })

    return tokens
--- a/changedetectionio/processors/README.md
+++ b/changedetectionio/processors/README.md
@@ -0,0 +1,11 @@
+# Change detection post-processors
+
+The concept here is to be able to switch between different domain specific problems to solve.
+
+- `text_json_diff` The traditional text and JSON comparison handler
+- `restock_diff` Only cares about detecting if a product looks like it has some text that suggests that it's out of stock, otherwise assumes that it's in stock.
+
+Some suggestions for the future
+
+- `graphical` 
+- `restock_and_price` - extract price AND stock text
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@@ -0,0 +1,24 @@
+from abc import abstractmethod
+import hashlib
+
+
+class difference_detection_processor():
+
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @abstractmethod
+    def run(self, uuid, skip_when_checksum_same=True):
+        update_obj = {'last_notification_error': False, 'last_error': False}
+        some_data = 'xxxxx'
+        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
+        changed_detected = False
+        return changed_detected, update_obj, ''.encode('utf-8')
+
+
+def available_processors():
+    from . import restock_diff, text_json_diff
+    x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
+    # @todo Make this smarter with introspection of sorts.
+    return x
--- a/changedetectionio/processors/restock_diff.py
+++ b/changedetectionio/processors/restock_diff.py
@@ -0,0 +1,126 @@
+
+import hashlib
+import os
+import re
+import urllib3
+from . import difference_detection_processor
+from copy import deepcopy
+from .. import fetchers
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+name = 'Re-stock detection for single product pages'
+description = 'Detects if the product goes back to in-stock'
+
+class perform_site_check(difference_detection_processor):
+    screenshot = None
+    xpath_data = None
+
+    def __init__(self, *args, datastore, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.datastore = datastore
+
+    def run(self, uuid, skip_when_checksum_same=True):
+
+        # DeepCopy so we can be sure we don't accidently change anything by reference
+        watch = deepcopy(self.datastore.data['watching'].get(uuid))
+
+        if not watch:
+            raise Exception("Watch no longer exists.")
+
+        # Protect against file:// access
+        if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
+            raise Exception(
+                "file:// type access is denied for security reasons."
+            )
+
+        # Unset any existing notification error
+        update_obj = {'last_notification_error': False, 'last_error': False}
+        extra_headers = watch.get('headers', [])
+
+        # Tweak the base config with the per-watch ones
+        request_headers = deepcopy(self.datastore.data['settings']['headers'])
+        request_headers.update(extra_headers)
+
+        # https://github.com/psf/requests/issues/4525
+        # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
+        # do this by accident.
+        if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
+            request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
+
+        timeout = self.datastore.data['settings']['requests'].get('timeout')
+
+        url = watch.link
+
+        request_body = self.datastore.data['watching'][uuid].get('body')
+        request_method = self.datastore.data['watching'][uuid].get('method')
+        ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
+
+        # Pluggable content fetcher
+        prefer_backend = watch.get_fetch_backend
+        if not prefer_backend or prefer_backend == 'system':
+            prefer_backend = self.datastore.data['settings']['application']['fetch_backend']
+
+        if prefer_backend == 'html_webdriver':
+            preferred_fetcher = fetchers.html_webdriver
+        else:
+            from ..fetchers import html_requests
+            preferred_fetcher = html_requests
+
+
+        proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
+        proxy_url = None
+        if proxy_id:
+            proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
+            print("UUID {} Using proxy {}".format(uuid, proxy_url))
+
+        fetcher = preferred_fetcher(proxy_override=proxy_url)
+
+        # Configurable per-watch or global extra delay before extracting text (for webDriver types)
+        system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
+        if watch['webdriver_delay'] is not None:
+            fetcher.render_extract_delay = watch.get('webdriver_delay')
+        elif system_webdriver_delay is not None:
+            fetcher.render_extract_delay = system_webdriver_delay
+
+        # Could be removed if requests/plaintext could also return some info?
+        if prefer_backend != 'html_webdriver':
+            raise Exception("Re-stock detection requires Chrome or compatible webdriver/playwright fetcher to work")
+
+        if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip():
+            fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code')
+
+        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'))
+        fetcher.quit()
+
+        self.screenshot = fetcher.screenshot
+        self.xpath_data = fetcher.xpath_data
+
+        # Track the content type
+        update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
+        update_obj["last_check_status"] = fetcher.get_last_status_code()
+
+        # Main detection method
+        fetched_md5 = None
+        if fetcher.instock_data:
+            fetched_md5 = hashlib.md5(fetcher.instock_data.encode('utf-8')).hexdigest()
+            # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
+            update_obj["in_stock"] = True if fetcher.instock_data == 'Possibly in stock' else False
+
+
+        # The main thing that all this at the moment comes down to :)
+        changed_detected = False
+
+        if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
+            # Yes if we only care about it going to instock, AND we are in stock
+            if watch.get('in_stock_only') and update_obj["in_stock"]:
+                changed_detected = True
+
+            if not watch.get('in_stock_only'):
+                # All cases
+                changed_detected = True
+
+        # Always record the new checksum
+        update_obj["previous_md5"] = fetched_md5
+
+        return changed_detected, update_obj, fetcher.instock_data.encode('utf-8')
--- a/changedetectionio/processors/text_json_diff.py
+++ b/changedetectionio/processors/text_json_diff.py
@@ -1,3 +1,5 @@
+# HTML to TEXT/JSON DIFFERENCE FETCHER
+
 import hashlib
 import json
 import logging
@@ -5,13 +7,18 @@ import os
 import re
 import urllib3

-from changedetectionio import content_fetcher, html_tools
+from changedetectionio import html_tools
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 from copy import deepcopy
+from . import difference_detection_processor
+from .. import fetchers

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


+name =  'Webpage Text/HTML, JSON and PDF changes'
+description = 'Detects all text changes where possible'
+
 class FilterNotFoundInResponse(ValueError):
    def __init__(self, msg):
        ValueError.__init__(self, msg)
@@ -23,7 +30,7 @@ class PDFToHTMLToolNotFound(ValueError):

 # Some common stuff here that can be moved to a base class
 # (set_proxy_from_list)
-class perform_site_check():
+class perform_site_check(difference_detection_processor):
    screenshot = None
    xpath_data = None

@@ -53,7 +60,7 @@ class perform_site_check():
        watch = deepcopy(self.datastore.data['watching'].get(uuid))

        if not watch:
-            return
+            raise Exception("Watch no longer exists.")

        # Protect against file:// access
        if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
@@ -95,11 +102,12 @@ class perform_site_check():
        if not prefer_backend or prefer_backend == 'system':
            prefer_backend = self.datastore.data['settings']['application']['fetch_backend']

-        if hasattr(content_fetcher, prefer_backend):
-            klass = getattr(content_fetcher, prefer_backend)
+        if prefer_backend == 'html_webdriver':
+            preferred_fetcher = fetchers.html_webdriver
        else:
-            # If the klass doesnt exist, just use a default
-            klass = getattr(content_fetcher, "html_requests")
+            from ..fetchers import html_requests
+            preferred_fetcher = html_requests
+

        proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
        proxy_url = None
@@ -107,7 +115,7 @@ class perform_site_check():
            proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
            print("UUID {} Using proxy {}".format(uuid, proxy_url))

-        fetcher = klass(proxy_override=proxy_url)
+        fetcher = preferred_fetcher(proxy_override=proxy_url)

        # Configurable per-watch or global extra delay before extracting text (for webDriver types)
        system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
@@ -141,7 +149,7 @@ class perform_site_check():
        update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
        if skip_when_checksum_same:
            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
-                raise content_fetcher.checksumFromPreviousCheckWasTheSame()
+                raise fetchers.exceptions.checksumFromPreviousCheckWasTheSame()


        # Fetching complete, now filters
@@ -273,10 +281,38 @@ class perform_site_check():
        # Re #340 - return the content before the 'ignore text' was applied
        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

+
+        # @todo whitespace coming from missing rtrim()?
+        # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
+        # Rewrite's the processing text based on only what diff result they want to see
+        if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
+            # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
+            from .. import diff
+            # needs to not include (added) etc or it may get used twice
+            # Replace the processed text with the preferred result
+            rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(),
+                                                       newest_version_file_contents=stripped_text_from_html,
+                                                       include_equal=False,  # not the same lines
+                                                       include_added=watch.get('filter_text_added', True),
+                                                       include_removed=watch.get('filter_text_removed', True),
+                                                       include_replaced=watch.get('filter_text_replaced', True),
+                                                       line_feed_sep="\n",
+                                                       include_change_type_prefix=False)
+
+            watch.save_last_fetched_before_filters(text_content_before_ignored_filter)
+
+            if not rendered_diff and stripped_text_from_html:
+                # We had some content, but no differences were found
+                # Store our new file as the MD5 so it will trigger in the future
+                c = hashlib.md5(text_content_before_ignored_filter.translate(None, b'\r\n\t ')).hexdigest()
+                return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
+            else:
+                stripped_text_from_html = rendered_diff
+
        # Treat pages with no renderable text content as a change? No by default
        empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
        if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
-            raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)
+            raise fetchers.exceptions.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)

        # We rely on the actual text in the html output.. many sites have random script vars etc,
        # in the future we'll implement other mechanisms.
@@ -331,6 +367,7 @@ class perform_site_check():
            blocked = True
            # Filter and trigger works the same, so reuse it
            # It should return the line numbers that match
+            # Unblock flow if the trigger was found (some text remained after stripped what didnt match)
            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
                                                  wordlist=trigger_text,
                                                  mode="line numbers")
--- a/changedetectionio/res/stock-not-in-stock.js
+++ b/changedetectionio/res/stock-not-in-stock.js
@@ -0,0 +1,97 @@
+function isItemInStock() {
+  // @todo Pass these in so the same list can be used in non-JS fetchers
+  const outOfStockTexts = [
+    '0 in stock',
+    'agotado',
+    'artikel zurzeit vergriffen',
+    'as soon as stock is available',
+    'available for back order',
+    'backordered',
+    'brak na stanie',
+    'brak w magazynie',
+    'coming soon',
+    'currently unavailable',
+    'en rupture de stock',
+    'item is no longer available',
+    'message if back in stock',
+    'nachricht bei',
+    'nicht auf lager',
+    'nicht lieferbar',
+    'nicht zur verfügung',
+    'no disponible temporalmente',
+    'no longer in stock',
+    'not available',
+    'not in stock',
+    'notify me when available',
+    'não estamos a aceitar encomendas',
+    'out of stock',
+    'out-of-stock',
+    'produkt niedostępny',
+    'sold out',
+    'temporarily out of stock',
+    'temporarily unavailable',
+    'we do not currently have an estimate of when this product will be back in stock.',
+    'zur zeit nicht an lager',
+  ];
+
+
+  const negateOutOfStockRegexs = [
+      '[0-9] in stock'
+  ]
+  var negateOutOfStockRegexs_r = [];
+  for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
+    negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g'));
+  }
+
+
+  const elementsWithZeroChildren = Array.from(document.getElementsByTagName('*')).filter(element => element.children.length === 0);
+
+  // REGEXS THAT REALLY MEAN IT'S IN STOCK
+  for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) {
+    const element = elementsWithZeroChildren[i];
+    if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
+      var elementText="";
+      if (element.tagName.toLowerCase() === "input") {
+        elementText = element.value.toLowerCase();
+      } else {
+        elementText = element.textContent.toLowerCase();
+      }
+
+      if (elementText.length) {
+        // try which ones could mean its in stock
+        for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
+          if (negateOutOfStockRegexs_r[i].test(elementText)) {
+            return 'Possibly in stock';
+          }
+        }
+      }
+    }
+  }
+
+  // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
+  for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) {
+    const element = elementsWithZeroChildren[i];
+    if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
+      var elementText="";
+      if (element.tagName.toLowerCase() === "input") {
+        elementText = element.value.toLowerCase();
+      } else {
+        elementText = element.textContent.toLowerCase();
+      }
+
+      if (elementText.length) {
+        // and these mean its out of stock
+        for (const outOfStockText of outOfStockTexts) {
+          if (elementText.includes(outOfStockText)) {
+            return elementText; // item is out of stock
+          }
+        }
+      }
+    }
+  }
+
+  return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
+}
+
+// returns the element text that makes it think it's out of stock
+return isItemInStock();
--- a/changedetectionio/run_basic_tests.sh
+++ b/changedetectionio/run_basic_tests.sh
@@ -28,3 +28,11 @@ pytest tests/test_notification.py
 # Re-run with HIDE_REFERER set - could affect login
 export HIDE_REFERER=True
 pytest tests/test_access_control.py
+
+# Re-run a few tests that will trigger brotli based storage
+export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
+pytest tests/test_access_control.py
+pytest tests/test_notification.py
+pytest tests/test_backend.py
+pytest tests/test_rss.py
+pytest tests/test_unique_lines.py
--- a/changedetectionio/static/styles/scss/styles.scss
+++ b/changedetectionio/static/styles/scss/styles.scss
@@ -241,6 +241,10 @@ body:before {
  font-size: 85%;
 }

+.button-xsmall {
+  font-size: 70%;
+}
+
 .fetch-error {
  padding-top: 1em;
  font-size: 80%;
@@ -889,6 +893,21 @@ body.full-width {
      font-size: .875em;
    }
  }
+  .text-filtering {
+    h3 {
+      margin-top: 0;
+    }
+    border: 1px solid #ccc;
+    padding: 1rem;
+    border-radius: 5px;
+    margin-bottom: 1rem;
+    fieldset:last-of-type {
+      padding-bottom: 0;
+      .pure-control-group {
+        padding-bottom: 0;
+      }
+    }
+  }
 }

 ul {
@@ -1044,3 +1063,30 @@ ul {
  vertical-align: middle;
 }

+
+#quick-watch-processor-type {
+  color: #fff;
+  ul {
+    padding: 0.3rem;
+
+    li {
+      list-style: none;
+      font-size: 0.8rem;
+    }
+  }
+
+}
+
+.restock-label {
+  &.in-stock {
+    background-color: var(--color-background-button-green);
+    color: #fff;
+  }
+  &.not-in-stock {
+    background-color: var(--color-background-button-cancel);
+    color: #777;
+  }
+  padding: 3px;
+  border-radius: 3px;
+  white-space: nowrap;
+}
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@@ -432,6 +432,9 @@ body:before {
 .button-small {
  font-size: 85%; }

+.button-xsmall {
+  font-size: 70%; }
+
 .fetch-error {
  padding-top: 1em;
  font-size: 80%;
@@ -869,6 +872,17 @@ body.full-width .edit-form {
    color: var(--color-text-input-description); }
    .edit-form .pure-form-message-inline code {
      font-size: .875em; }
+  .edit-form .text-filtering {
+    border: 1px solid #ccc;
+    padding: 1rem;
+    border-radius: 5px;
+    margin-bottom: 1rem; }
+    .edit-form .text-filtering h3 {
+      margin-top: 0; }
+    .edit-form .text-filtering fieldset:last-of-type {
+      padding-bottom: 0; }
+      .edit-form .text-filtering fieldset:last-of-type .pure-control-group {
+        padding-bottom: 0; }

 ul {
  padding-left: 1em;
@@ -980,3 +994,22 @@ ul {
  display: inline-block;
  height: 0.8rem;
  vertical-align: middle; }
+
+#quick-watch-processor-type {
+  color: #fff; }
+  #quick-watch-processor-type ul {
+    padding: 0.3rem; }
+    #quick-watch-processor-type ul li {
+      list-style: none;
+      font-size: 0.8rem; }
+
+.restock-label {
+  padding: 3px;
+  border-radius: 3px;
+  white-space: nowrap; }
+  .restock-label.in-stock {
+    background-color: var(--color-background-button-green);
+    color: #fff; }
+  .restock-label.not-in-stock {
+    background-color: var(--color-background-button-cancel);
+    color: #777; }
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -1,20 +1,20 @@
 from flask import (
    flash
 )
-import json
-import logging
-import os
-import threading
-import time
-import uuid as uuid_builder
+
+from . model import App, Watch
 from copy import deepcopy
 from os import path, unlink
 from threading import Lock
+import json
+import logging
+import os
 import re
 import requests
 import secrets
-
-from . model import App, Watch
+import threading
+import time
+import uuid as uuid_builder

 # Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
 # Open a github issue if you know something :)
@@ -192,27 +192,24 @@ class ChangeDetectionStore:
        tags.sort()
        return tags

-    def unlink_history_file(self, path):
-        try:
-            unlink(path)
-        except (FileNotFoundError, IOError):
-            pass
-
    # Delete a single watch by UUID
    def delete(self, uuid):
+        import pathlib
+        import shutil
+
        with self.lock:
            if uuid == 'all':
                self.__data['watching'] = {}

                # GitHub #30 also delete history records
                for uuid in self.data['watching']:
-                    for path in self.data['watching'][uuid].history.values():
-                        self.unlink_history_file(path)
+                    path = pathlib.Path(os.path.join(self.datastore_path, uuid))
+                    shutil.rmtree(path)
+                    self.needs_write_urgent = True

            else:
-                for path in self.data['watching'][uuid].history.values():
-                    self.unlink_history_file(path)
-
+                path = pathlib.Path(os.path.join(self.datastore_path, uuid))
+                shutil.rmtree(path)
                del self.data['watching'][uuid]

            self.needs_write_urgent = True
@@ -290,6 +287,7 @@ class ChangeDetectionStore:
                    'method',
                    'paused',
                    'previous_md5',
+                    'processor',
                    'subtractive_selectors',
                    'tag',
                    'text_should_not_be_present',
@@ -309,9 +307,12 @@ class ChangeDetectionStore:
                logging.error("Error fetching metadata for shared watch link", url, str(e))
                flash("Error fetching metadata for {}".format(url), 'error')
                return False
+        from .model.Watch import is_safe_url
+        if not is_safe_url(url):
+            flash('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX', 'error')
+            return None

        with self.lock:
-
            # #Re 569
            new_watch = Watch.model(datastore_path=self.datastore_path, default={
                'url': url,
@@ -360,11 +361,6 @@ class ChangeDetectionStore:
            f.write(screenshot)
            f.close()

-        # Make a JPEG that's used in notifications (due to being a smaller size) available
-        from PIL import Image
-        im1 = Image.open(target_path)
-        im1.convert('RGB').save(target_path.replace('.png','.jpg'), quality=int(os.getenv("NOTIFICATION_SCREENSHOT_JPG_QUALITY", 75)))
-

    def save_error_text(self, watch_uuid, contents):
        if not self.data['watching'].get(watch_uuid):
@@ -673,3 +669,13 @@ class ChangeDetectionStore:
                self.data['settings']['application']['notification_urls'][i] = re.sub(r, r'{{\1}}', url)

        return
+
+    # Some setups may have missed the correct default, so it shows the wrong config in the UI, although it will default to system-wide
+    def update_10(self):
+        for uuid, watch in self.data['watching'].items():
+            try:
+                if not watch.get('fetch_backend', ''):
+                    watch['fetch_backend'] = 'system'
+            except:
+                continue
+        return
--- a/changedetectionio/templates/_common_fields.jinja
+++ b/changedetectionio/templates/_common_fields.jinja
@@ -17,14 +17,15 @@
                                <li><code>tgram://</code> bots cant send messages to other bots, so you should specify chat ID of non-bot user.</li>
                                <li><code>tgram://</code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
                                <li><code>gets://</code>, <code>posts://</code>, <code>puts://</code>, <code>deletes://</code> for direct API calls (or omit the "<code>s</code>" for non-SSL ie <code>get://</code>)</li>
+                                  <li>Accepts the <code>{{ '{{token}}' }}</code> placeholders listed below</li>
                              </ul>
                            </div>
                            <div class="notifications-wrapper">
-                              <a id="send-test-notification" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Send test notification</a>
+                              <a id="send-test-notification" class="pure-button button-secondary button-xsmall" >Send test notification</a>
                            {% if emailprefix %}
-                              <a id="add-email-helper" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Add email</a>
+                              <a id="add-email-helper" class="pure-button button-secondary button-xsmall" >Add email</a>
                            {% endif %}
-                              <a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Notification debug logs</a>
+                              <a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" >Notification debug logs</a>
                            </div>
                        </div>
                        <div id="notification-customisation" class="pure-control-group">
@@ -55,48 +56,66 @@
                                    </thead>
                                    <tbody>
                                    <tr>
-                                        <td><code>{{ '{{ base_url }}' }}</code></td>
+                                        <td><code>{{ '{{base_url}}' }}</code></td>
                                        <td>The URL of the changedetection.io instance you are running.</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ watch_url }}' }}</code></td>
+                                        <td><code>{{ '{{watch_url}}' }}</code></td>
                                        <td>The URL being watched.</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ watch_uuid }}' }}</code></td>
+                                        <td><code>{{ '{{watch_uuid}}' }}</code></td>
                                        <td>The UUID of the watch.</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ watch_title }}' }}</code></td>
+                                        <td><code>{{ '{{watch_title}}' }}</code></td>
                                        <td>The title of the watch.</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ watch_tag }}' }}</code></td>
+                                        <td><code>{{ '{{watch_tag}}' }}</code></td>
                                        <td>The watch label / tag</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ preview_url }}' }}</code></td>
+                                        <td><code>{{ '{{preview_url}}' }}</code></td>
                                        <td>The URL of the preview page generated by changedetection.io.</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ diff_url }}' }}</code></td>
-                                        <td>The diff output - differences only</td>
+                                        <td><code>{{ '{{diff_url}}' }}</code></td>
+                                        <td>The URL of the diff output for the watch.</td>
+                                    </tr>
+									<tr>
+                                        <td><code>{{ '{{diff}}' }}</code></td>
+                                        <td>The diff output - only changes, additions, and removals</td>
+                                    </tr>
+									<tr>
+                                        <td><code>{{ '{{diff_added}}' }}</code></td>
+                                        <td>The diff output - only changes and additions</td>
+                                    </tr>
+									<tr>
+                                        <td><code>{{ '{{diff_removed}}' }}</code></td>
+                                        <td>The diff output - only changes and removals</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ diff_full }}' }}</code></td>
+                                        <td><code>{{ '{{diff_full}}' }}</code></td>
                                        <td>The diff output - full difference output</td>
                                    </tr>
                                    <tr>
-                                        <td><code>{{ '{{ current_snapshot }}' }}</code></td>
+                                        <td><code>{{ '{{current_snapshot}}' }}</code></td>
                                        <td>The current snapshot value, useful when combined with JSON or CSS filters
                                        </td>
                                    </tr>
+                                    <tr>
+                                        <td><code>{{ '{{triggered_text}}' }}</code></td>
+                                        <td>Text that tripped the trigger from filters</td>
+                                    </tr>
                                    </tbody>
                                </table>
                                <div class="pure-form-message-inline">
                                    <br>
-                                    URLs generated by changedetection.io (such as <code>{{ '{{ diff_url }}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br/>
+                                    URLs generated by changedetection.io (such as <code>{{ '{{diff_url}}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br>
                                    Your <code>BASE_URL</code> var is currently "{{settings_application['current_base_url']}}"
+									<br>
+									Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removal%7D%7D-notification-tokens">More Here</a> </br>
                                </div>
                            </div>
                        </div>
--- a/changedetectionio/templates/diff.html
+++ b/changedetectionio/templates/diff.html
@@ -76,8 +76,12 @@
    </div>

     <div class="tab-pane-inner" id="text">
-         <div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.
-         </div>
+         <div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.</div>
+
+         {% if password_enabled_and_share_is_off %}
+           <div class="tip">Pro-tip: You can enable <strong>"share access when password is enabled"</strong> from settings</div>
+         {% endif %}
+
         <div class="snapshot-age">{{watch_a.snapshot_text_ctime|format_timestamp_timeago}}</div>

         <table>
@@ -120,12 +124,12 @@
            <div class="pure-control-group">
                {{ render_field(extract_form.extract_regex) }}
                <span class="pure-form-message-inline">
-                    A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br/>
+                    A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br>

                    <p>
-                        For example, to extract only the numbers from text &dash;</br>
-                        <strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code></br>
-                        <strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br/>
+                        For example, to extract only the numbers from text &dash;<br>
+                        <strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
+                        <strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
                    </p>
                    <p>
                        <a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
@@ -150,4 +154,4 @@
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>


-{% endblock %}
+{% endblock %}
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -34,8 +34,15 @@
            {% if playwright_enabled %}
            <li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
            {% endif %}
+
+            {% if watch['processor'] == 'text_json_diff' %}
            <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
            <li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
+            {% endif %}
+
+            {% if watch['processor'] == 'restock_diff' %}
+            <li class="tab"><a href="#restock">Restock Detection</a></li>
+            {% endif %}
            <li class="tab"><a href="#notifications">Notifications</a></li>
        </ul>
    </div>
@@ -49,8 +56,18 @@
                <fieldset>
                    <div class="pure-control-group">
                        {{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
-                        <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br/>
-                        <span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br/>
+                        <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
+                        <span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
+                        <span class="pure-form-message-inline">
+                        {% if watch['processor'] == 'text_json_diff' %}
+                            Current mode: <strong>Webpage Text/HTML, JSON and PDF changes.</strong><br>
+                          <a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=restock_diff" class="pure-button button-xsmall">Switch to re-stock detection mode.</a>
+                        {% else %}
+                        Current mode: <strong>Re-stock detection.</strong><br>
+                          <a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=text_json_diff" class="pure-button button-xsmall">Switch to Webpage Text/HTML, JSON and PDF changes mode.</a>
+                        {% endif %}
+                        </span>
+
                    </div>
                    <div class="pure-control-group">
                        {{ render_field(form.title, class="m-d") }}
@@ -106,10 +123,10 @@
                        {{ render_field(form.webdriver_delay) }}
                        <div class="pure-form-message-inline">
                            <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
-                            <br/>
+                            <br>
                            This will wait <i>n</i> seconds before extracting the text.
                            {% if using_global_webdriver_wait %}
-                            <br/><strong>Using the current global default settings</strong>
+                            <br><strong>Using the current global default settings</strong>
                            {% endif %}
                        </div>
                    </div>
@@ -214,9 +231,10 @@ User-Agent: wonderbra 1.0") }}
                </fieldset>
            </div>

+            {% if watch['processor'] == 'text_json_diff' %}
            <div class="tab-pane-inner" id="filters-and-triggers">
                    <div class="pure-control-group">
-                            <strong>Pro-tips:</strong><br/>
+                            <strong>Pro-tips:</strong><br>
                            <ul>
                                <li>
                                    Use the preview page to see your filters and triggers highlighted.
@@ -226,12 +244,6 @@ User-Agent: wonderbra 1.0") }}
                                </li>
                            </ul>
                    </div>
-                    <fieldset>
-                        <div class="pure-control-group">
-                            {{ render_checkbox_field(form.check_unique_lines) }}
-                            <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
-                        </div>
-                    </fieldset>
                    <div class="pure-control-group">
                        {% set field = render_field(form.include_filters,
                            rows=5,
@@ -241,9 +253,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                        %}
                        {{ field }}
                        {% if '/text()' in  field %}
-                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br/>
+                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
                        {% endif %}
-                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/>
+                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>

                    <ul>
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
@@ -266,40 +278,42 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                            </li>
                    </ul>
                    Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
-                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
+                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
                </span>
                    </div>
-                    <div class="pure-control-group">
-                      {{ render_field(form.subtractive_selectors, rows=5, placeholder="header
+                <fieldset class="pure-control-group">
+                    {{ render_field(form.subtractive_selectors, rows=5, placeholder="header
 footer
 nav
 .stockticker") }}
-                      <span class="pure-form-message-inline">
+                    <span class="pure-form-message-inline">
                        <ul>
                          <li> Remove HTML element(s) by CSS selector before text conversion. </li>
                          <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
                        </ul>
                      </span>
-                    </div>
-                <fieldset class="pure-group">
-                    {{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
-/some.regex\d{2}/ for case-INsensitive regex
-                    ") }}
-                    <span class="pure-form-message-inline">
-                        <ul>
-                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
-                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
-                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
-                            <li>Use the preview/show current tab to see ignores</li>
-                        </ul>
-                </span>
+                </fieldset>
+                <div class="text-filtering">
+                <fieldset class="pure-group" id="text-filtering-type-options">
+                    <h3>Text filtering</h3>
+                        Limit trigger/ignore/block/extract to;<br>
+                        {{ render_checkbox_field(form.filter_text_added) }}
+                        {{ render_checkbox_field(form.filter_text_replaced) }}
+                        {{ render_checkbox_field(form.filter_text_removed) }}
+                    <span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span>
+                    <span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
+                    <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
+                </fieldset>

-            </fieldset>
+                <fieldset class="pure-control-group">
+                    {{ render_checkbox_field(form.check_unique_lines) }}
+                    <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
+                </fieldset>
                <fieldset>
                    <div class="pure-control-group">
                        {{ render_field(form.trigger_text, rows=5, placeholder="Some text to wait for in a line
 /some.regex\d{2}/ for case-INsensitive regex
-                    ") }}
+") }}
                        <span class="pure-form-message-inline">
                    <ul>
                        <li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
@@ -310,6 +324,21 @@ nav
                        </span>
                    </div>
                </fieldset>
+                <fieldset class="pure-group">
+                    {{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
+/some.regex\d{2}/ for case-INsensitive regex
+") }}
+                    <span class="pure-form-message-inline">
+                        <ul>
+                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
+                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
+                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
+                            <li>Use the preview/show current tab to see ignores</li>
+                        </ul>
+                </span>
+
+                </fieldset>
+
                <fieldset>
                    <div class="pure-control-group">
                        {{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock
@@ -334,7 +363,7 @@ Unavailable") }}
                        <li>Extracts text in the final output (line by line) after other filters using regular expressions;
                            <ul>
                                <li>Regular expression &dash; example <code>/reports.+?2022/i</code></li>
-                                <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br/></li>
+                                <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
                                <li>Keyword example &dash; example <code>Out of stock</code></li>
                                <li>Use groups to extract just that text &dash; example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
                            </ul>
@@ -344,8 +373,22 @@ Unavailable") }}
                        </span>
                    </div>
                </fieldset>
+                </div>
            </div>
+            {% endif %}

+            {% if watch['processor'] == 'restock_diff' %}
+            <div class="tab-pane-inner" id="restock">
+                    <fieldset>
+                        <div class="pure-control-group">
+                            {{ render_checkbox_field(form.in_stock_only) }}
+                            <span class="pure-form-message-inline">Only trigger notifications when page changes from <strong>out of stock</strong> to <strong>back in stock</strong></span>
+                        </div>
+                    </fieldset>
+            </div>
+            {% endif %}
+
+            {% if watch['processor'] == 'text_json_diff' %}
            <div class="tab-pane-inner visual-selector-ui" id="visualselector">
                <img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">

@@ -353,7 +396,7 @@ Unavailable") }}
                    <div class="pure-control-group">
                        {% if visualselector_enabled %}
                            <span class="pure-form-message-inline">
-                                The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection &dash; after the <i>Browser Steps</i> has completed.<br/><br/>
+                                The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection &dash; after the <i>Browser Steps</i> has completed.<br><br>
                            </span>

                            <div id="selector-header">
@@ -378,6 +421,7 @@ Unavailable") }}
                    </div>
                </fieldset>
            </div>
+            {% endif %}

            <div id="actions">
                <div class="pure-control-group">
--- a/changedetectionio/templates/import.html
+++ b/changedetectionio/templates/import.html
@@ -1,5 +1,6 @@
 {% extends 'base.html' %}
 {% block content %}
+{% from '_helpers.jinja' import render_field %}
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <div class="edit-form monospaced-textarea">

@@ -14,7 +15,6 @@
        <form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST">
            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
            <div class="tab-pane-inner" id="url-list">
-                <fieldset class="pure-group">
                    <legend>
                        Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
                        (,):
@@ -23,7 +23,7 @@
                        <br>
                        URLs which do not pass validation will stay in the textarea.
                    </legend>
-
+                {{ render_field(form.processor, class="processor") }}

                    <textarea name="urls" class="pure-input-1-2" placeholder="https://"
                              style="width: 100%;
@@ -31,22 +31,24 @@
                                white-space: pre;
                                overflow-wrap: normal;
                                overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea>
-                </fieldset>

+<div id="quick-watch-processor-type">
+
+                    </div>

            </div>

            <div class="tab-pane-inner" id="distill-io">


-                <fieldset class="pure-group">
+
                    <legend>
-                        Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.</br>
+                        Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
                        This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
-                        <br/>
+                        <br>
                        <p>
-                        How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br/>
-                        Be sure to set your default fetcher to Chrome if required.</br>
+                        How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
+                        Be sure to set your default fetcher to Chrome if required.<br>
                        </p>
                    </legend>

@@ -75,7 +77,7 @@
    ]
 }
 " rows="25">{{ original_distill_json }}</textarea>
-                </fieldset>
+
            </div>
            <button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
        </form>
--- a/changedetectionio/templates/preview.html
+++ b/changedetectionio/templates/preview.html
@@ -54,7 +54,7 @@
         <div class="tip">
             For now, Differences are performed on text, not graphically, only the latest screenshot is available.
         </div>
-         </br>
+         <br>
         {% if is_html_webdriver %}
           {% if screenshot %}
             <div class="snapshot-age">{{watch.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
@@ -67,4 +67,4 @@
         {% endif %}
     </div>
 </div>
-{% endblock %}
+{% endblock %}
--- a/changedetectionio/templates/settings.html
+++ b/changedetectionio/templates/settings.html
@@ -40,7 +40,7 @@
                    <div class="pure-control-group">
                        {{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
                        <span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
-                            <br/>
+                            <br>
                        Set to <strong>0</strong> to disable
                        </span>
                    </div>
@@ -57,11 +57,16 @@
                        {% endif %}
                    </div>

+                    <div class="pure-control-group">
+                        {{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
+                        <span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
+                        </span>
+                    </div>
                    <div class="pure-control-group">
                        {{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
                        class="m-d") }}
                        <span class="pure-form-message-inline">
-                            Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br/>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
+                            Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
                            <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
                        </span>
                    </div>
@@ -100,13 +105,13 @@
                        <p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
                        <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
                    </span>
-                    <br/>
+                    <br>
                    Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
                </div>
                <fieldset class="pure-group" id="webdriver-override-options">
                    <div class="pure-form-message-inline">
                        <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
-                        <br/>
+                        <br>
                        This will wait <i>n</i> seconds before extracting the text.
                    </div>
                    <div class="pure-control-group">
@@ -119,14 +124,14 @@

                    <fieldset class="pure-group">
                    {{ render_checkbox_field(form.application.form.ignore_whitespace) }}
-                    <span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br/>
+                    <span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br>
                    <i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
                    </span>
                    </fieldset>
                <fieldset class="pure-group">
                    {{ render_checkbox_field(form.application.form.render_anchor_tag_content) }}
                    <span class="pure-form-message-inline">Render anchor tag content, default disabled, when enabled renders links as <code>(link text)[https://somesite.com]</code>
-                        <br/>
+                        <br>
                    <i>Note:</i> Changing this could affect the content of your existing watches, possibly trigger alerts etc.
                    </span>
                    </fieldset>
@@ -146,7 +151,7 @@ nav
                    {{ render_field(form.application.form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
 /some.regex\d{2}/ for case-INsensitive regex
                    ") }}
-                    <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br/>
+                    <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
                    <span class="pure-form-message-inline">
                        <ul>
                            <li>Note: This is applied globally in addition to the per-watch rules.</li>
@@ -165,8 +170,8 @@ nav

                <div class="pure-control-group">
                    {{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
-                    <div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header</div><br/>
-                    <div class="pure-form-message-inline"><br/>API Key <span id="api-key">{{api_key}}</span>
+                    <div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header</div><br>
+                    <div class="pure-form-message-inline"><br>API Key <span id="api-key">{{api_key}}</span>
                        <span style="display:none;" id="api-key-copy" >copy</span>
                    </div>
                </div>
@@ -176,7 +181,7 @@ nav
                <p><strong>Tip</strong>: You can connect to websites using <a href="https://brightdata.grsm.io/n0r16zf7eivq">BrightData</a> proxies, their service <strong>WebUnlocker</strong> will solve most CAPTCHAs, whilst their <strong>Residential Proxies</strong> may help to avoid CAPTCHA altogether. </p>
                <p>It may be easier to try <strong>WebUnlocker</strong> first, WebUnlocker also supports country selection.</p>
                <p>
-                    When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the example URL below<br/>
+                    When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the example URL below<br>
                    The Proxy URL with BrightData should start with <code>http://brd-customer...</code>
                </p>

--- a/changedetectionio/templates/watch-overview.html
+++ b/changedetectionio/templates/watch-overview.html
@@ -21,6 +21,10 @@
                    {{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
                </div>
            </div>
+            <div id="quick-watch-processor-type">
+                {{ render_simple_field(form.processor, title="Edit first then Watch") }}
+            </div>
+
        </fieldset>
        <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
    </form>
@@ -28,12 +32,12 @@
    <form class="pure-form" action="{{ url_for('form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
    <div id="checkbox-operations">
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="pause">Pause</button>
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="unpause">UnPause</button>
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="mute">Mute</button>
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="unmute">UnMute</button>
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="recheck">Recheck</button>
-        <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="notification-default">Use default notification</button>
+        <button class="pure-button button-secondary button-xsmall"  name="op" value="pause">Pause</button>
+        <button class="pure-button button-secondary button-xsmall"  name="op" value="unpause">UnPause</button>
+        <button class="pure-button button-secondary button-xsmall"  name="op" value="mute">Mute</button>
+        <button class="pure-button button-secondary button-xsmall"  name="op" value="unmute">UnMute</button>
+        <button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button>
+        <button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button>
        <button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button>
    </div>
    <div>
@@ -57,9 +61,9 @@
                <th></th>
                {% set link_order = "desc" if sort_order else "asc" %}
                {% set arrow_span = "" %}
-                <th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order)}}">Website <span class='arrow {{link_order}}'></span></a></th>
-                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
-                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
+                <th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag)}}">Website <span class='arrow {{link_order}}'></span></a></th>
+                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
+                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
                <th></th>
            </tr>
            </thead>
@@ -72,7 +76,7 @@
              {% if not ( loop.index >= 3 and loop.index <=4) %}{% continue %}{% endif %} -->
             #}
            <tr id="{{ watch.uuid }}"
-                class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}
+                class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
                {% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
                {% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
                {% if watch.paused is defined and watch.paused != False %}paused{% endif %}
@@ -91,7 +95,12 @@
                    <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
                    <a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img class="status-icon"  src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" /></a>

-                    {%if watch.get_fetch_backend == "html_webdriver" %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />{% endif %}
+                    {% if watch.get_fetch_backend == "html_webdriver"
+                         or (  watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver'  )
+                    %}
+                    <img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />
+                    {% endif %}
+
                    {%if watch.is_pdf  %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" />{% endif %}
                    {% if watch.last_error is defined and watch.last_error != False %}
                    <div class="fetch-error">{{ watch.last_error }}
@@ -108,12 +117,26 @@
                    {% if watch.last_notification_error is defined and watch.last_notification_error != False %}
                    <div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div>
                    {% endif %}
-                    {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  %}
-                    <div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
+
+                    {% if watch['processor'] == 'text_json_diff'  %}
+                        {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  %}
+                        <div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
+                        {% endif %}
+                        {% if watch['track_ldjson_price_data'] == 'accepted' %}
+                        <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon"/> Price</span>
+                        {% endif %}
                    {% endif %}
-                    {% if watch['track_ldjson_price_data'] == 'accepted' %}
-                    <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon"/> Price</span>
+
+                    {% if watch['processor'] == 'restock_diff'  %}
+                    <span class="restock-label {{'in-stock' if watch['in_stock'] else 'not-in-stock' }}" title="detecting restock conditions">
+                        <!-- maybe some object watch['processor'][restock_diff] or.. -->
+                        {% if watch['last_checked'] %}
+                            {% if watch['in_stock'] %} In stock {% else %} Not in stock {% endif %}
+                        {% else %}
+                            Not yet checked
+                        {% endif %}
                    {% endif %}
+
                    {% if not active_tag %}
                    <span class="watch-tag-list">{{ watch.tag}}</span>
                    {% endif %}
--- a/changedetectionio/tests/restock/init.py
+++ b/changedetectionio/tests/restock/init.py
@@ -0,0 +1,2 @@
+"""Tests for the app."""
+
--- a/changedetectionio/tests/restock/conftest.py
+++ b/changedetectionio/tests/restock/conftest.py
@@ -0,0 +1,3 @@
+#!/usr/bin/python3
+
+from .. import conftest
--- a/changedetectionio/tests/restock/test_restock.py
+++ b/changedetectionio/tests/restock/test_restock.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python3
+import os
+import time
+from flask import url_for
+from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from changedetectionio.notification import (
+    default_notification_body,
+    default_notification_format,
+    default_notification_title,
+    valid_notification_formats,
+)
+
+
+def set_original_response():
+    test_return_data = """<html>
+       <body>
+     Some initial text<br>
+     <p>Which is across multiple lines</p>
+     <br>
+     So let's see what happens.  <br>
+     <div>price: $10.99</div>
+     <div id="sametext">Out of stock</div>
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+    return None
+
+
+
+def set_back_in_stock_response():
+    test_return_data = """<html>
+       <body>
+     Some initial text<br>
+     <p>Which is across multiple lines</p>
+     <br>
+     So let's see what happens.  <br>
+     <div>price: $10.99</div>
+     <div id="sametext">Available!</div>
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+    return None
+
+# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
+def test_restock_detection(client, live_server):
+
+    set_original_response()
+    #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
+
+    time.sleep(1)
+    live_server_setup(live_server)
+    #####################
+    notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json')
+
+
+    #####################
+    # Set this up for when we remove the notification from the watch, it should fallback with these details
+    res = client.post(
+        url_for("settings_page"),
+        data={"application-notification_urls": notification_url,
+              "application-notification_title": "fallback-title "+default_notification_title,
+              "application-notification_body": "fallback-body "+default_notification_body,
+              "application-notification_format": default_notification_format,
+              "requests-time_between_check-minutes": 180,
+              'application-fetch_backend': "html_webdriver"},
+        follow_redirects=True
+    )
+    # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
+    test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet')
+
+
+    client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": test_url, "tag": '', 'processor': 'restock_diff'},
+        follow_redirects=True
+    )
+
+    # Is it correctly show as NOT in stock?
+    wait_for_all_checks(client)
+    res = client.get(url_for("index"))
+    assert b'not-in-stock' in res.data
+
+    # Is it correctly shown as in stock
+    set_back_in_stock_response()
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+    res = client.get(url_for("index"))
+    assert b'not-in-stock' not in res.data
+
+    # We should have a notification
+    time.sleep(2)
+    assert os.path.isfile("test-datastore/notification.txt")
+    os.unlink("test-datastore/notification.txt")
+
+    # Default behaviour is to only fire notification when it goes OUT OF STOCK -> IN STOCK
+    # So here there should be no file, because we go IN STOCK -> OUT OF STOCK
+    set_original_response()
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+    assert not os.path.isfile("test-datastore/notification.txt")
--- a/changedetectionio/tests/test_access_control.py
+++ b/changedetectionio/tests/test_access_control.py
@@ -1,18 +1,34 @@
+from . util import live_server_setup, extract_UUID_from_client
 from flask import url_for
-from . util import live_server_setup
+import time

-def test_check_access_control(app, client):
+def test_check_access_control(app, client, live_server):
    # Still doesnt work, but this is closer.
+    live_server_setup(live_server)

    with app.test_client(use_cookies=True) as c:
        # Check we don't have any password protection enabled yet.
        res = c.get(url_for("settings_page"))
        assert b"Remove password" not in res.data

-        # Enable password check.
+        # add something that we can hit via diff page later
+        res = c.post(
+            url_for("import_page"),
+            data={"urls": url_for('test_random_content_endpoint', _external=True)},
+            follow_redirects=True
+        )
+
+        assert b"1 Imported" in res.data
+        time.sleep(2)
+        res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+        assert b'1 watches queued for rechecking.' in res.data
+        time.sleep(2)
+
+        # Enable password check and diff page access bypass
        res = c.post(
            url_for("settings_page"),
            data={"application-password": "foobar",
+                  "application-shared_diff_access": "True",
                  "requests-time_between_check-minutes": 180,
                  'application-fetch_backend': "html_requests"},
            follow_redirects=True
@@ -22,9 +38,15 @@ def test_check_access_control(app, client):

        # Check we hit the login
        res = c.get(url_for("index"), follow_redirects=True)
-
+        # Should be logged out
        assert b"Login" in res.data

+        # The diff page should return something valid when logged out
+        res = client.get(url_for("diff_history_page", uuid="first"))
+        assert b'Random content' in res.data
+
+
+
        # Menu should not be available yet
        #        assert b"SETTINGS" not in res.data
        #        assert b"BACKUP" not in res.data
@@ -109,3 +131,25 @@ def test_check_access_control(app, client):

        assert b"Password protection enabled" not in res.data

+        # Now checking the diff access
+        # Enable password check and diff page access bypass
+        res = c.post(
+            url_for("settings_page"),
+            data={"application-password": "foobar",
+                  # Should be disabled
+#                  "application-shared_diff_access": "True",
+                  "requests-time_between_check-minutes": 180,
+                  'application-fetch_backend': "html_requests"},
+            follow_redirects=True
+        )
+
+        assert b"Password protection enabled." in res.data
+
+        # Check we hit the login
+        res = c.get(url_for("index"), follow_redirects=True)
+        # Should be logged out
+        assert b"Login" in res.data
+
+        # The diff page should return something valid when logged out
+        res = client.get(url_for("diff_history_page", uuid="first"))
+        assert b'Random content' not in res.data
--- a/changedetectionio/tests/test_add_replace_remove_filter.py
+++ b/changedetectionio/tests/test_add_replace_remove_filter.py
@@ -0,0 +1,176 @@
+#!/usr/bin/python3
+
+import time
+from flask import url_for
+from .util import live_server_setup
+from changedetectionio import html_tools
+
+
+def set_original(excluding=None, add_line=None):
+    test_return_data = """<html>
+     <body>
+     <p>Some initial text</p>
+     <p>So let's see what happens.</p>
+     <p>and a new line!</p>
+     <p>The golden line</p>
+     <p>A BREAK TO MAKE THE TOP LINE STAY AS "REMOVED" OR IT WILL GET COUNTED AS "CHANGED INTO"</p>
+     <p>Something irrelevant</p>          
+     </body>
+     </html>
+    """
+
+    if add_line:
+        c=test_return_data.splitlines()
+        c.insert(5, add_line)
+        test_return_data = "\n".join(c)
+
+    if excluding:
+        output = ""
+        for i in test_return_data.splitlines():
+            if not excluding in i:
+                output += f"{i}\n"
+
+        test_return_data = output
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+
+def test_setup(client, live_server):
+    live_server_setup(live_server)
+
+def test_check_removed_line_contains_trigger(client, live_server):
+    sleep_time_for_fetch_thread = 3
+
+    # Give the endpoint time to spin up
+    time.sleep(1)
+    set_original()
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+
+    # Give the thread time to pick it up
+    time.sleep(sleep_time_for_fetch_thread)
+
+    # Goto the edit page, add our ignore text
+    # Add our URL to the import page
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data={"trigger_text": 'The golden line',
+              "url": test_url,
+              'fetch_backend': "html_requests",
+              'filter_text_removed': 'y'},
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    time.sleep(sleep_time_for_fetch_thread)
+    set_original(excluding='Something irrelevant')
+
+    # A line thats not the trigger should not trigger anything
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    assert b'1 watches queued for rechecking.' in res.data
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' not in res.data
+
+    # The trigger line is REMOVED,  this should trigger
+    set_original(excluding='The golden line')
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' in res.data
+
+
+    # Now add it back, and we should not get a trigger
+    client.get(url_for("mark_all_viewed"), follow_redirects=True)
+    set_original(excluding=None)
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' not in res.data
+
+    # Remove it again, and we should get a trigger
+    set_original(excluding='The golden line')
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' in res.data
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
+
+
+def test_check_add_line_contains_trigger(client, live_server):
+
+    sleep_time_for_fetch_thread = 3
+
+    # Give the endpoint time to spin up
+    time.sleep(1)
+    test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://') + "?xxx={{ watch_url }}"
+
+    res = client.post(
+        url_for("settings_page"),
+        data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
+              "application-notification_body": 'triggered text was -{{triggered_text}}-',
+              # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
+              "application-notification_urls": test_notification_url,
+              "application-minutes_between_check": 180,
+              "application-fetch_backend": "html_requests"
+              },
+        follow_redirects=True
+    )
+    assert b'Settings updated' in res.data
+
+    set_original()
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+
+    # Give the thread time to pick it up
+    time.sleep(sleep_time_for_fetch_thread)
+
+    # Goto the edit page, add our ignore text
+    # Add our URL to the import page
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data={"trigger_text": 'Oh yes please',
+              "url": test_url,
+              'fetch_backend': "html_requests",
+              'filter_text_removed': '',
+              'filter_text_added': 'y'},
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    time.sleep(sleep_time_for_fetch_thread)
+    set_original(excluding='Something irrelevant')
+
+    # A line thats not the trigger should not trigger anything
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    assert b'1 watches queued for rechecking.' in res.data
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' not in res.data
+
+    # The trigger line is ADDED,  this should trigger
+    set_original(add_line='<p>Oh yes please</p>')
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(sleep_time_for_fetch_thread)
+    res = client.get(url_for("index"))
+    assert b'unviewed' in res.data
+
+    with open("test-datastore/notification.txt", 'r') as f:
+        response= f.read()
+        assert '-Oh yes please-' in response
+
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_api.py
+++ b/changedetectionio/tests/test_api.py
@@ -11,10 +11,10 @@ import uuid
 def set_original_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
     <div id="changetext">Some text that will change</div>
     </body>
@@ -29,10 +29,10 @@ def set_original_response():
 def set_modified_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>which has this one new line</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
     <div id="changetext">Some text that changes</div>
     </body>
--- a/changedetectionio/tests/test_automatic_follow_ldjson_price.py
+++ b/changedetectionio/tests/test_automatic_follow_ldjson_price.py
@@ -7,10 +7,10 @@ from .util import live_server_setup, extract_UUID_from_client, extract_api_key_f
 def set_response_with_ldjson():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div class="sametext">Some text thats the same</div>
     <div class="changetext">Some text that will change</div>
     <script type="application/ld+json">
@@ -61,10 +61,10 @@ def set_response_with_ldjson():
 def set_response_without_ldjson():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div class="sametext">Some text thats the same</div>
     <div class="changetext">Some text that will change</div>     
     </body>
@@ -143,4 +143,4 @@ def test_check_ldjson_price_autodetect(client, live_server):
    assert b'ldjson-price-track-offer' not in res.data
    
    ##########################################################################################
-    client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
--- a/changedetectionio/tests/test_backend.py
+++ b/changedetectionio/tests/test_backend.py
@@ -3,7 +3,7 @@
 import time
 from flask import url_for
 from urllib.request import urlopen
-from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
+from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI

 sleep_time_for_fetch_thread = 3

@@ -11,7 +11,7 @@ sleep_time_for_fetch_thread = 3
 # Basic test to check inscriptus is not adding return line chars, basically works etc
 def test_inscriptus():
    from inscriptis import get_text
-    html_content = "<html><body>test!<br/>ok man</body></html>"
+    html_content = "<html><body>test!<br>ok man</body></html>"
    stripped_text_from_html = get_text(html_content)
    assert stripped_text_from_html == 'test!\nok man'

@@ -76,12 +76,13 @@ def test_check_basic_change_detection_functionality(client, live_server):
    assert b'unviewed' in res.data

    # #75, and it should be in the RSS feed
-    res = client.get(url_for("rss"))
+    rss_token = extract_rss_token_from_UI(client)
+    res = client.get(url_for("rss", token=rss_token, _external=True))
    expected_url = url_for('test_endpoint', _external=True)
    assert b'<rss' in res.data

    # re #16 should have the diff in here too
-    assert b'(into   ) which has this one new line' in res.data
+    assert b'(into) which has this one new line' in res.data
    assert b'CDATA' in res.data

    assert expected_url.encode('utf-8') in res.data
--- a/changedetectionio/tests/test_block_while_text_present.py
+++ b/changedetectionio/tests/test_block_while_text_present.py
@@ -8,10 +8,10 @@ from changedetectionio import html_tools
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -24,10 +24,10 @@ def set_original_ignore_response():
 def set_modified_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some NEW nice initial text</br>
+     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <p>new ignore stuff</p>
     <p>out of stock</p>
     <p>blah</p>
@@ -44,11 +44,11 @@ def set_modified_original_ignore_response():
 def set_modified_response_minus_block_text():
    test_return_data = """<html>
       <body>
-     Some NEW nice initial text</br>
+     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
     <p>now on sale $2/p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <p>new ignore stuff</p>
     <p>blah</p>
     </body>
@@ -87,7 +87,10 @@ def test_check_block_changedetection_text_NOT_present(client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"text_should_not_be_present": ignore_text, "url": test_url, 'fetch_backend': "html_requests"},
+        data={"text_should_not_be_present": ignore_text,
+              "url": test_url,
+              'fetch_backend': "html_requests"
+              },
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
@@ -129,7 +132,6 @@ def test_check_block_changedetection_text_NOT_present(client, live_server):
    set_modified_response_minus_block_text()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    time.sleep(sleep_time_for_fetch_thread)
-
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data

--- a/changedetectionio/tests/test_css_selector.py
+++ b/changedetectionio/tests/test_css_selector.py
@@ -12,10 +12,10 @@ def test_setup(live_server):
 def set_original_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
     <div id="changetext">Some text that will change</div>
     </body>
@@ -29,10 +29,10 @@ def set_original_response():
 def set_modified_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>which has this one new line</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
     <div id="changetext">Some text that changes</div>
     </body>
--- a/changedetectionio/tests/test_element_removal.py
+++ b/changedetectionio/tests/test_element_removal.py
@@ -25,10 +25,10 @@ def set_original_response():
    </ul>
    </nav>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
    <div id="changetext">Some text that will change</div>
     </body>
    <footer>
@@ -54,10 +54,10 @@ def set_modified_response():
    </ul>
    </nav>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
    <div id="changetext">Some text that changes</div>
     </body>
    <footer>
@@ -71,7 +71,6 @@ def set_modified_response():


 def test_element_removal_output():
-    from changedetectionio import fetch_site_status
    from inscriptis import get_text

    # Check text with sub-parts renders correctly
@@ -85,7 +84,7 @@ def test_element_removal_output():
    </ul>
    </nav>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>across multiple lines</p>
     <div id="changetext">Some text that changes</div>
     </body>
--- a/changedetectionio/tests/test_errorhandling.py
+++ b/changedetectionio/tests/test_errorhandling.py
@@ -59,6 +59,8 @@ def test_http_error_handler(client, live_server):
    _runner_test_http_errors(client, live_server, 404, 'Page not found')
    _runner_test_http_errors(client, live_server, 500, '(Internal server Error) received')
    _runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400')
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data

 # Just to be sure error text is properly handled
 def test_DNS_errors(client, live_server):
@@ -81,4 +83,48 @@ def test_DNS_errors(client, live_server):
    assert found_name_resolution_error
    # Should always record that we tried
    assert bytes("just now".encode('utf-8')) in res.data
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data

+# Re 1513
+def test_low_level_errors_clear_correctly(client, live_server):
+    #live_server_setup(live_server)
+    # Give the endpoint time to spin up
+    time.sleep(1)
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("<html><body><div id=here>Hello world</div></body></html>")
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": "https://dfkjasdkfjaidjfsdajfksdajfksdjfDOESNTEXIST.com"},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+    time.sleep(2)
+
+    # We should see the DNS error
+    res = client.get(url_for("index"))
+    found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
+    assert found_name_resolution_error
+
+    # Update with what should work
+    client.post(
+        url_for("edit_page", uuid="first"),
+        data={
+            "url": test_url,
+            "fetch_backend": "html_requests"},
+        follow_redirects=True
+    )
+
+    # Now the error should be gone
+    time.sleep(2)
+    res = client.get(url_for("index"))
+    found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data
+    assert not found_name_resolution_error
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_extract_regex.py
+++ b/changedetectionio/tests/test_extract_regex.py
@@ -10,10 +10,10 @@ from ..html_tools import *
 def set_original_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
     <div class="changetext">Some text that will change</div>     
     </body>
@@ -28,12 +28,12 @@ def set_original_response():
 def set_modified_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>which has this one new line</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="sametext">Some text thats the same</div>
-     <div class="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/>  2000 online )</div>
+     <div class="changetext">Some text that did change ( 1000 online <br> 80 guests<br>  2000 online )</div>
     <div class="changetext">SomeCase insensitive 3456</div>
     </body>
     </html>
@@ -49,8 +49,8 @@ def set_multiline_response():
    test_return_data = """<html>
       <body>
     
-     <p>Something <br/>
-        across 6 billion multiple<br/>
+     <p>Something <br>
+        across 6 billion multiple<br>
        lines
     </p>
     
--- a/changedetectionio/tests/test_filter_exist_changes.py
+++ b/changedetectionio/tests/test_filter_exist_changes.py
@@ -11,10 +11,10 @@ from changedetectionio.model import App
 def set_response_without_filter():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="nope-doesnt-exist">Some text thats the same</div>     
     </body>
     </html>
@@ -28,10 +28,10 @@ def set_response_without_filter():
 def set_response_with_filter():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div class="ticket-available">Ticket now on sale!</div>     
     </body>
     </html>
@@ -117,18 +117,3 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se

    assert 'Ticket now on sale' in notification
    os.unlink("test-datastore/notification.txt")
-
-
-    # Test that if it gets removed, then re-added, we get a notification
-    # Remove the target and re-add it, we should get a new notification
-    set_response_without_filter()
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    time.sleep(3)
-    assert not os.path.isfile("test-datastore/notification.txt")
-
-    set_response_with_filter()
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    time.sleep(3)
-    assert os.path.isfile("test-datastore/notification.txt")
-
-# Also test that the filter was updated after the first one was requested
--- a/changedetectionio/tests/test_filter_failure_notification.py
+++ b/changedetectionio/tests/test_filter_failure_notification.py
@@ -8,10 +8,10 @@ from changedetectionio.model import App
 def set_response_with_filter():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div id="nope-doesnt-exist">Some text thats the same</div>     
     </body>
     </html>
@@ -145,4 +145,4 @@ def test_check_xpath_filter_failure_notification(client, live_server):
    time.sleep(1)
    run_filter_test(client, '//*[@id="nope-doesnt-exist"]')

-# Test that notification is never sent
+# Test that notification is never sent
--- a/changedetectionio/tests/test_html_to_text.py
+++ b/changedetectionio/tests/test_html_to_text.py
@@ -6,11 +6,11 @@ from ..html_tools import html_to_text
 def test_html_to_text_func():
    test_html = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
     <a href="/first_link"> More Text </a>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <a href="second_link.com"> Even More Text </a>
     </body>
     </html>
@@ -21,7 +21,7 @@ def test_html_to_text_func():

    no_links_text = \
        "Some initial text\n\nWhich is across multiple " \
-        "lines\n\nMore Text So let's see what happens. Even More Text"
+        "lines\n\nMore Text\nSo let's see what happens.\nEven More Text"

    # check that no links are in the extracted text
    assert text_content == no_links_text
@@ -31,7 +31,7 @@ def test_html_to_text_func():

    links_text = \
        "Some initial text\n\nWhich is across multiple lines\n\n[ More Text " \
-        "](/first_link) So let's see what happens. [ Even More Text ]" \
+        "](/first_link)\nSo let's see what happens.\n[ Even More Text ]" \
        "(second_link.com)"

    # check that links are present in the extracted text
--- a/changedetectionio/tests/test_ignore_regex_text.py
+++ b/changedetectionio/tests/test_ignore_regex_text.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python3

-import time
-from flask import url_for
 from . util import live_server_setup
 from changedetectionio import html_tools

@@ -11,7 +9,7 @@ def test_setup(live_server):
 # Unit test of the stripper
 # Always we are dealing in utf-8
 def test_strip_regex_text_func():
-    from changedetectionio import fetch_site_status
+    from ..processors import text_json_diff as fetch_site_status

    test_content = """
    but sometimes we want to remove the lines.
--- a/changedetectionio/tests/test_ignore_text.py
+++ b/changedetectionio/tests/test_ignore_text.py
@@ -11,7 +11,8 @@ def test_setup(live_server):
 # Unit test of the stripper
 # Always we are dealing in utf-8
 def test_strip_text_func():
-    from changedetectionio import fetch_site_status
+    from ..processors import text_json_diff as fetch_site_status
+

    test_content = """
    Some content
@@ -33,10 +34,10 @@ def test_strip_text_func():
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -49,10 +50,10 @@ def set_original_ignore_response():
 def set_modified_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some NEW nice initial text</br>
+     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <p>new ignore stuff</p>
     <p>blah</p>
     </body>
@@ -68,11 +69,11 @@ def set_modified_original_ignore_response():
 def set_modified_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
     <P>ZZZZz</P>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

--- a/changedetectionio/tests/test_ignorehyperlinks.py
+++ b/changedetectionio/tests/test_ignorehyperlinks.py
@@ -12,10 +12,10 @@ def test_setup(live_server):
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <a href="/original_link"> Some More Text </a>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>
    """
@@ -29,10 +29,10 @@ def set_original_ignore_response():
 def set_modified_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <a href="/modified_link"> Some More Text </a>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>
    """
--- a/changedetectionio/tests/test_ignorestatuscode.py
+++ b/changedetectionio/tests/test_ignorestatuscode.py
@@ -12,10 +12,10 @@ def test_setup(live_server):
 def set_original_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>
    """
@@ -27,10 +27,10 @@ def set_original_response():
 def set_some_changed_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines, and a new thing too.</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>
    """
--- a/changedetectionio/tests/test_ignorewhitespace.py
+++ b/changedetectionio/tests/test_ignorewhitespace.py
@@ -12,15 +12,15 @@ def test_setup(live_server):
 def set_original_ignore_response_but_with_whitespace():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>


     Which is across multiple lines</p>
     <br>
-     </br>
+     <br>

-         So let's see what happens.  </br>
+         So let's see what happens.  <br>


     </body>
@@ -34,10 +34,10 @@ def set_original_ignore_response_but_with_whitespace():
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

--- a/changedetectionio/tests/test_jsonpath_jq_selector.py
+++ b/changedetectionio/tests/test_jsonpath_jq_selector.py
@@ -198,8 +198,8 @@ def test_check_json_without_filter(client, live_server):
    )

    # Should still see '"html": "<b>"'
-    assert b'&#34;&lt;b&gt;' in res.data
-    assert res.data.count(b'{\n') >= 2
+    assert b'&#34;html&#34;: &#34;&lt;b&gt;&#34;' in res.data
+    assert res.data.count(b'{') >= 2

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_notification.py
+++ b/changedetectionio/tests/test_notification.py
@@ -73,16 +73,12 @@ def test_check_notification(client, live_server):
    # We write the PNG to disk, but a JPEG should appear in the notification
    # Write the last screenshot png
    testimage_png = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='
-    # This one is created when we save the screenshot from the webdriver/playwright session (converted from PNG)
-    testimage_jpg = '/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AKp//2Q=='


    uuid = extract_UUID_from_client(client)
    datastore = 'test-datastore'
    with open(os.path.join(datastore, str(uuid), 'last-screenshot.png'), 'wb') as f:
        f.write(base64.b64decode(testimage_png))
-    with open(os.path.join(datastore, str(uuid), 'last-screenshot.jpg'), 'wb') as f:
-        f.write(base64.b64decode(testimage_jpg))

    # Goto the edit page, add our ignore text
    # Add our URL to the import page
@@ -100,6 +96,8 @@ def test_check_notification(client, live_server):
                                                   "Diff URL: {{diff_url}}\n"
                                                   "Snapshot: {{current_snapshot}}\n"
                                                   "Diff: {{diff}}\n"
+                                                   "Diff Added: {{diff_added}}\n"
+                                                   "Diff Removed: {{diff_removed}}\n"
                                                   "Diff Full: {{diff_full}}\n"
                                                   ":-)",
                              "notification_screenshot": True,
@@ -147,7 +145,7 @@ def test_check_notification(client, live_server):
    assert ':-)' in notification_submission
    assert "Diff Full: Some initial text" in notification_submission
    assert "Diff: (changed) Which is across multiple lines" in notification_submission
-    assert "(into   ) which has this one new line" in notification_submission
+    assert "(into) which has this one new line" in notification_submission
    # Re #342 - check for accidental python byte encoding of non-utf8/string
    assert "b'" not in notification_submission
    assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE)
@@ -160,12 +158,12 @@ def test_check_notification(client, live_server):

    # Check the attachment was added, and that it is a JPEG from the original PNG
    notification_submission_object = json.loads(notification_submission)
-    assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.jpg'
+    # We keep PNG screenshots for now
+    assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.png'
    assert len(notification_submission_object['attachments'][0]['base64'])
-    assert notification_submission_object['attachments'][0]['mimetype'] == 'image/jpeg'
+    assert notification_submission_object['attachments'][0]['mimetype'] == 'image/png'
    jpeg_in_attachment = base64.b64decode(notification_submission_object['attachments'][0]['base64'])
-    assert b'JFIF' in jpeg_in_attachment
-    assert testimage_png not in notification_submission
+
    # Assert that the JPEG is readable (didn't get chewed up somewhere)
    from PIL import Image
    import io
@@ -297,7 +295,10 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server):
        follow_redirects=True
    )
    assert b'Settings updated' in res.data
-
+    client.get(
+        url_for("form_delete", uuid="all"),
+        follow_redirects=True
+    )
    # Add a watch and trigger a HTTP POST
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
--- a/changedetectionio/tests/test_rss.py
+++ b/changedetectionio/tests/test_rss.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python3
+
+import time
+from flask import url_for
+from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI
+
+
+def test_rss_and_token(client, live_server):
+    set_original_response()
+    live_server_setup(live_server)
+
+    # Add our URL to the import page
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": url_for('test_random_content_endpoint', _external=True)},
+        follow_redirects=True
+    )
+
+    assert b"1 Imported" in res.data
+    rss_token = extract_rss_token_from_UI(client)
+
+    time.sleep(2)
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(2)
+
+    # Add our URL to the import page
+    res = client.get(
+        url_for("rss", token="bad token", _external=True),
+        follow_redirects=True
+    )
+
+    assert b"Access denied, bad token" in res.data
+
+    res = client.get(
+        url_for("rss", token=rss_token, _external=True),
+        follow_redirects=True
+    )
+    assert b"Access denied, bad token" not in res.data
+    assert b"Random content" in res.data
--- a/changedetectionio/tests/test_security.py
+++ b/changedetectionio/tests/test_security.py
@@ -2,11 +2,9 @@ from flask import url_for
 from . util import set_original_response, set_modified_response, live_server_setup
 import time

-def test_setup(live_server):
+
+def test_bad_access(client, live_server):
    live_server_setup(live_server)
-
-def test_file_access(client, live_server):
-
    res = client.post(
        url_for("import_page"),
        data={"urls": 'https://localhost'},
@@ -19,18 +17,49 @@ def test_file_access(client, live_server):
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={
-              "url": 'file:///etc/passwd',
+              "url": 'javascript:alert(document.domain)',
              "tag": "",
              "method": "GET",
              "fetch_backend": "html_requests",
              "body": ""},
        follow_redirects=True
    )
-    time.sleep(3)

-    res = client.get(
-        url_for("index", uuid="first"),
+    assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
+
+    res = client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": '            javascript:alert(123)', "tag": ''},
        follow_redirects=True
    )

-    assert b'denied for security reasons' in res.data
+    assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
+
+    res = client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": '%20%20%20javascript:alert(123)%20%20', "tag": ''},
+        follow_redirects=True
+    )
+
+    assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
+
+
+    res = client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": ' source:javascript:alert(document.domain)', "tag": ''},
+        follow_redirects=True
+    )
+
+    assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
+
+    # file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
+
+    client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": 'file:///tasty/disk/drive', "tag": ''},
+        follow_redirects=True
+    )
+    time.sleep(1)
+    res = client.get(url_for("index"))
+
+    assert b'file:// type access is denied for security reasons.' in res.data
--- a/changedetectionio/tests/test_trigger.py
+++ b/changedetectionio/tests/test_trigger.py
@@ -8,10 +8,10 @@ from . util import live_server_setup
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -24,10 +24,10 @@ def set_original_ignore_response():
 def set_modified_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some NEW nice initial text</br>
+     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -40,12 +40,12 @@ def set_modified_original_ignore_response():
 def set_modified_with_trigger_text_response():
    test_return_data = """<html>
       <body>
-     Some NEW nice initial text</br>
+     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
+     <br>
     Add to cart
-     <br/>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -142,4 +142,4 @@ def test_trigger_functionality(client, live_server):
    res = client.get(url_for("preview_page", uuid="first"))

    # We should be able to see what we triggered on
-    assert b'<div class="triggered">Add to cart' in res.data
+    assert b'<div class="triggered">Add to cart' in res.data
--- a/changedetectionio/tests/test_trigger_regex.py
+++ b/changedetectionio/tests/test_trigger_regex.py
@@ -8,10 +8,10 @@ from . util import live_server_setup
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

@@ -72,7 +72,7 @@ def test_trigger_regex_functionality(client, live_server):
    assert b'unviewed' not in res.data

    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write("regex test123<br/>\nsomething 123")
+        f.write("regex test123<br>\nsomething 123")

    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    time.sleep(sleep_time_for_fetch_thread)
@@ -81,4 +81,4 @@ def test_trigger_regex_functionality(client, live_server):

    # Cleanup everything
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data
+    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_trigger_regex_with_filter.py
+++ b/changedetectionio/tests/test_trigger_regex_with_filter.py
@@ -8,10 +8,10 @@ from . util import live_server_setup
 def set_original_ignore_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>

--- a/changedetectionio/tests/test_unique_lines.py
+++ b/changedetectionio/tests/test_unique_lines.py
@@ -94,7 +94,6 @@ def test_unique_lines_functionality(client, live_server):
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data

-
    # Now set the content which contains the new text and re-ordered existing text
    set_modified_with_trigger_text_response()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
--- a/changedetectionio/tests/test_xpath_selector.py
+++ b/changedetectionio/tests/test_xpath_selector.py
@@ -12,10 +12,10 @@ def test_setup(live_server):
 def set_original_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <div class="sametext">Some text thats the same</div>
     <div class="changetext">Some text that will change</div>
     </body>
@@ -29,10 +29,10 @@ def set_original_response():
 def set_modified_response():
    test_return_data = """<html>
       <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  THIS CHANGES AND SHOULDNT TRIGGER A CHANGE</br>
+     <br>
+     So let's see what happens.  THIS CHANGES AND SHOULDNT TRIGGER A CHANGE<br>
     <div class="sametext">Some text thats the same</div>
     <div class="changetext">Some new text</div>
     </body>
--- a/changedetectionio/tests/unit/test_notification_diff.py
+++ b/changedetectionio/tests/unit/test_notification_diff.py
@@ -13,18 +13,51 @@ class TestDiffBuilder(unittest.TestCase):

    def test_expected_diff_output(self):
        base_dir = os.path.dirname(__file__)
-        output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt")
+        with open(base_dir + "/test-content/before.txt", 'r') as f:
+            previous_version_file_contents = f.read()
+
+        with open(base_dir + "/test-content/after.txt", 'r') as f:
+            newest_version_file_contents = f.read()
+
+        output = diff.render_diff(previous_version_file_contents=previous_version_file_contents,
+                                  newest_version_file_contents=newest_version_file_contents)
+
        output = output.split("\n")
+
+
        self.assertIn('(changed) ok', output)
-        self.assertIn('(into   ) xok', output)
-        self.assertIn('(into   ) next-x-ok', output)
-        self.assertIn('(added  ) and something new', output)
+        self.assertIn('(into) xok', output)
+        self.assertIn('(into) next-x-ok', output)
+        self.assertIn('(added) and something new', output)

-
-        output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt")
+        with open(base_dir + "/test-content/after-2.txt", 'r') as f:
+            newest_version_file_contents = f.read()
+        output = diff.render_diff(previous_version_file_contents, newest_version_file_contents)
        output = output.split("\n")
        self.assertIn('(removed) for having learned computerese,', output)
        self.assertIn('(removed) I continue to examine bits, bytes and words', output)
+        
+        #diff_removed
+        with open(base_dir + "/test-content/before.txt", 'r') as f:
+            previous_version_file_contents = f.read()
+
+        with open(base_dir + "/test-content/after.txt", 'r') as f:
+            newest_version_file_contents = f.read()
+        output = diff.render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=False)
+        output = output.split("\n")
+        self.assertIn('(changed) ok', output)
+        self.assertIn('(into) xok', output)
+        self.assertIn('(into) next-x-ok', output)
+        self.assertNotIn('(added) and something new', output)
+        
+        #diff_removed
+        with open(base_dir + "/test-content/after-2.txt", 'r') as f:
+            newest_version_file_contents = f.read()
+        output = diff.render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=False)
+        output = output.split("\n")
+        self.assertIn('(removed) for having learned computerese,', output)
+        self.assertIn('(removed) I continue to examine bits, bytes and words', output)
+        

        # @todo test blocks of changed, blocks of added, blocks of removed

--- a/changedetectionio/tests/util.py
+++ b/changedetectionio/tests/util.py
@@ -9,10 +9,10 @@ def set_original_response():
    test_return_data = """<html>
    <head><title>head title</title></head>
    <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>Which is across multiple lines</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     <span class="foobar-detection" style='display:none'></span>
     </body>
     </html>
@@ -26,10 +26,10 @@ def set_modified_response():
    test_return_data = """<html>
    <head><title>modified head title</title></head>
    <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>which has this one new line</p>
-     </br>
-     So let's see what happens.  </br>
+     <br>
+     So let's see what happens.  <br>
     </body>
     </html>
    """
@@ -43,11 +43,11 @@ def set_more_modified_response():
    test_return_data = """<html>
    <head><title>modified head title</title></head>
    <body>
-     Some initial text</br>
+     Some initial text<br>
     <p>which has this one new line</p>
-     </br>
-     So let's see what happens.  </br>
-     Ohh yeah awesome<br/>
+     <br>
+     So let's see what happens.  <br>
+     Ohh yeah awesome<br>
     </body>
     </html>
    """
@@ -70,6 +70,15 @@ def extract_api_key_from_UI(client):
    api_key = m.group(1)
    return api_key.strip()

+# kinda funky, but works for now
+def extract_rss_token_from_UI(client):
+    import re
+    res = client.get(
+        url_for("index"),
+    )
+    m = re.search('token=(.+?)"', str(res.data))
+    token_key = m.group(1)
+    return token_key.strip()

 # kinda funky, but works for now
 def extract_UUID_from_client(client):
@@ -98,6 +107,12 @@ def wait_for_all_checks(client):

 def live_server_setup(live_server):

+    @live_server.app.route('/test-random-content-endpoint')
+    def test_random_content_endpoint():
+        import secrets
+        return "Random content - {}\n".format(secrets.token_hex(64))
+
+
    @live_server.app.route('/test-endpoint')
    def test_endpoint():
        ctype = request.args.get('content_type')
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -3,9 +3,8 @@ import threading
 import queue
 import time

-from changedetectionio import content_fetcher
-from changedetectionio import queuedWatchMetaData
-from changedetectionio.fetch_site_status import FilterNotFoundInResponse
+from .processors.text_json_diff import FilterNotFoundInResponse
+from .fetchers import exceptions

 # A single update worker
 #
@@ -65,20 +64,32 @@ class update_worker(threading.Thread):
        if 'notification_urls' in n_object and n_object['notification_urls']:
            # HTML needs linebreak, but MarkDown and Text can use a linefeed
            if n_object['notification_format'] == 'HTML':
-                line_feed_sep = "</br>"
+                line_feed_sep = "<br>"
            else:
                line_feed_sep = "\n"

-            with open(watch_history[dates[-1]], 'rb') as f:
-                snapshot_contents = f.read()
+            # Add text that was triggered
+            snapshot_contents = watch.get_history_snapshot(dates[-1])
+            trigger_text = watch.get('trigger_text', [])
+            triggered_text = ''
+
+            if len(trigger_text):
+                from . import html_tools
+                triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
+                if triggered_text:
+                    triggered_text = line_feed_sep.join(triggered_text)
+

            n_object.update({
-                'watch_url': watch['url'],
+                'current_snapshot': snapshot_contents,
+                'diff': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), line_feed_sep=line_feed_sep),
+                'diff_added': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_removed=False, line_feed_sep=line_feed_sep),
+                'diff_full': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_equal=True, line_feed_sep=line_feed_sep),
+                'diff_removed': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_added=False, line_feed_sep=line_feed_sep),
+                'screenshot': watch.get_screenshot() if watch.get('notification_screenshot') else None,
+                'triggered_text': triggered_text,
                'uuid': watch_uuid,
-                'screenshot': watch.get_screenshot_as_jpeg() if watch.get('notification_screenshot') else None,
-                'current_snapshot': snapshot_contents.decode('utf-8'),
-                'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
-                'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
+                'watch_url': watch['url'],
            })
            logging.info (">> SENDING NOTIFICATION")
            self.notification_q.put(n_object)
@@ -151,9 +162,8 @@ class update_worker(threading.Thread):
                os.unlink(full_path)

    def run(self):
-        from changedetectionio import fetch_site_status

-        update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
+        from .processors import text_json_diff, restock_diff

        while not self.app.config.exit.is_set():

@@ -169,14 +179,23 @@ class update_worker(threading.Thread):
                if uuid in list(self.datastore.data['watching'].keys()):
                    changed_detected = False
                    contents = b''
-                    screenshot = False
-                    update_obj= {}
-                    xpath_data = False
                    process_changedetection_results = True
-                    print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
+                    update_obj = {}
+                    print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority,
+                                                                           self.datastore.data['watching'][uuid]['url']))
                    now = time.time()

                    try:
+                        processor = self.datastore.data['watching'][uuid].get('processor','text_json_diff')
+
+                        # @todo some way to switch by name
+                        update_handler = None
+                        if processor == 'restock_diff':
+                            update_handler = restock_diff.perform_site_check(datastore=self.datastore)
+                        else:
+                            # Used as a default and also by some tests
+                            update_handler = text_json_diff.perform_site_check(datastore=self.datastore)
+
                        changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same'))
                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@@ -186,7 +205,7 @@ class update_worker(threading.Thread):
                    except PermissionError as e:
                        self.app.logger.error("File permission error updating", uuid, str(e))
                        process_changedetection_results = False
-                    except content_fetcher.ReplyWithContentButNoText as e:
+                    except exceptions.ReplyWithContentButNoText as e:
                        # Totally fine, it's by choice - just continue on, nothing more to care about
                        # Page had elements/content but no renderable text
                        # Backend (not filters) gave zero output
@@ -195,7 +214,7 @@ class update_worker(threading.Thread):
                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
                        process_changedetection_results = False

-                    except content_fetcher.Non200ErrorCodeReceived as e:
+                    except exceptions.Non200ErrorCodeReceived as e:
                        if e.status_code == 403:
                            err_text = "Error - 403 (Access denied) received"
                        elif e.status_code == 404:
@@ -212,9 +231,7 @@ class update_worker(threading.Thread):
                        if e.page_text:
                            self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)

-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
-                                                                           # So that we get a trigger when the content is added again
-                                                                           'previous_md5': ''})
+                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
                        process_changedetection_results = False

                    except FilterNotFoundInResponse as e:
@@ -222,9 +239,7 @@ class update_worker(threading.Thread):
                            continue

                        err_text = "Warning, no filters were found, no change detection ran."
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
-                                                                           # So that we get a trigger when the content is added again
-                                                                           'previous_md5': ''})
+                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})

                        # Only when enabled, send the notification
                        if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
@@ -241,21 +256,20 @@ class update_worker(threading.Thread):

                            self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})

-                        process_changedetection_results = True
+                        process_changedetection_results = False

-                    except content_fetcher.checksumFromPreviousCheckWasTheSame as e:
-                        # Yes fine, so nothing todo
-                        pass
+                    except exceptions.checksumFromPreviousCheckWasTheSame as e:
+                        # Yes fine, so nothing todo, don't continue to process.
+                        process_changedetection_results = False
+                        changed_detected = False

-                    except content_fetcher.BrowserStepsStepTimout as e:
+                    except exceptions.BrowserStepsStepTimout as e:

                        if not self.datastore.data['watching'].get(uuid):
                            continue

                        err_text = "Warning, browser step at position {} could not run, target not found, check the watch, add a delay if necessary.".format(e.step_n+1)
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
-                                                                           # So that we get a trigger when the content is added again
-                                                                           'previous_md5': ''})
+                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})


                        if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
@@ -271,25 +285,28 @@ class update_worker(threading.Thread):
                                c = 0

                            self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
+
                        process_changedetection_results = False

-                    except content_fetcher.EmptyReply as e:
+                    except exceptions.EmptyReply as e:
                        # Some kind of custom to-str handler in the exception handler that does this?
                        err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
-                    except content_fetcher.ScreenshotUnavailable as e:
+                        process_changedetection_results = False
+                    except exceptions.ScreenshotUnavailable as e:
                        err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
-                    except content_fetcher.JSActionExceptions as e:
+                    except exceptions.JSActionExceptions as e:
                        err_text = "Error running JS Actions - Page request - "+e.message
                        if e.screenshot:
                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
-                    except content_fetcher.PageUnloadable as e:
+                        process_changedetection_results = False
+                    except exceptions.PageUnloadable as e:
                        err_text = "Page request from server didnt respond correctly"
                        if e.message:
                            err_text = "{} - {}".format(err_text, e.message)
@@ -299,6 +316,7 @@ class update_worker(threading.Thread):

                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
+                        process_changedetection_results = False
                    except Exception as e:
                        self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
@@ -315,19 +333,19 @@ class update_worker(threading.Thread):

                        self.cleanup_error_artifacts(uuid)

+                    #
                    # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
                    if process_changedetection_results:
                        try:
-                            watch = self.datastore.data['watching'][uuid]
-                            fname = "" # Saved history text filename
-
-                            # For the FIRST time we check a site, or a change detected, save the snapshot.
-                            if changed_detected or not watch['last_checked']:
-                                # A change was detected
-                                watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
-
+                            watch = self.datastore.data['watching'].get(uuid)
                            self.datastore.update_watch(uuid=uuid, update_obj=update_obj)

+                            # Also save the snapshot on the first time checked
+                            if changed_detected or not watch['last_checked']:
+                                watch.save_history_text(contents=contents,
+                                                        timestamp=str(round(time.time())),
+                                                        snapshot_id=update_obj.get('previous_md5', 'none'))
+
                            # A change was detected
                            if changed_detected:
                                print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -41,7 +41,6 @@ services:
  #
  #        Base URL of your changedetection.io install (Added to the notification alert)
  #      - BASE_URL=https://mysite.com
-
  #        Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
  #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
  #      - USE_X_SETTINGS=1
@@ -95,7 +94,10 @@ services:
 #            - CHROME_REFRESH_TIME=600000
 #            - DEFAULT_BLOCK_ADS=true
 #            - DEFAULT_STEALTH=true
-
+#
+#             Ignore HTTPS errors, like for self-signed certs
+#            - DEFAULT_IGNORE_HTTPS_ERRORS=true
+#
 volumes:
  changedetection-data:

--- a/docs/api_v1/assets/main.bundle.js
+++ b/docs/api_v1/assets/main.bundle.js
--- a/docs/api_v1/assets/main.css
+++ b/docs/api_v1/assets/main.css
@@ -49,6 +49,7 @@ input[type="date"] {
  src: url('./glyphicons-halflings-regular.eot');
  src: url('./glyphicons-halflings-regular.eot?#iefix') format('embedded-opentype'),
    url('./glyphicons-halflings-regular.woff') format('woff'),
+    url('./glyphicons-halflings-regular.woff2') format('woff2'),
    url('./glyphicons-halflings-regular.ttf') format('truetype'),
    url('./glyphicons-halflings-regular.svg#glyphicons-halflingsregular') format('svg');
 }
--- a/docs/api_v1/index.html
+++ b/docs/api_v1/index.html
@@ -5,13 +5,13 @@
  <meta name="description" content="Manage your changedetection.io watches via API, requires the `x-api-key` header which is found in the settings UI.">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-  <link href="assets/bootstrap.min.css" rel="stylesheet" media="screen">
-  <link href="assets/prism.css" rel="stylesheet" />
-  <link href="assets/main.css" rel="stylesheet" media="screen, print">
-  <link href="assets/favicon.ico" rel="icon" type="image/x-icon">
-  <link href="assets/apple-touch-icon.png" rel="apple-touch-icon" sizes="180x180">
-  <link href="assets/favicon-32x32.png" rel="icon" type="image/png" sizes="32x32">
-  <link href="assets/favicon-16x16.png"rel="icon" type="image/png" sizes="16x16">
+  <link href="assets/bootstrap.min.css?v=1677105736053" rel="stylesheet" media="screen">
+  <link href="assets/prism.css?v=1677105736053" rel="stylesheet" />
+  <link href="assets/main.css?v=1677105736053" rel="stylesheet" media="screen, print">
+  <link href="assets/favicon.ico?v=1677105736053" rel="icon" type="image/x-icon">
+  <link href="assets/apple-touch-icon.png?v=1677105736053" rel="apple-touch-icon" sizes="180x180">
+  <link href="assets/favicon-32x32.png?v=1677105736053" rel="icon" type="image/png" sizes="32x32">
+  <link href="assets/favicon-16x16.png?v=1677105736053" rel="icon" type="image/png" sizes="16x16">
 </head>

 <body class="container-fluid">
@@ -928,6 +928,6 @@
  </div>
 </div>

-<script src="assets/main.bundle.js"></script>
+<script src="assets/main.bundle.js?v=1677105736053"></script>
 </body>
 </html>
--- a/docs/apidoc.json
+++ b/docs/apidoc.json
@@ -3,5 +3,6 @@
  "version": "0.1.0",
  "description": "Manage your changedetection.io watches via API, requires the `x-api-key` header which is found in the settings UI.",
  "title": "changedetection.io API",
-  "url" : "https://changedetection.io/docs/api_v1/index.html"
+  "url" : "",
+  "sampleUrl" : false
 }
--- a/docs/package.json
+++ b/docs/package.json
@@ -1,5 +1,5 @@
 {
  "dependencies": {
-    "apidoc": "^0.53.1"
+    "apidoc": "^0.54.0"
  }
 }
--- a/docs/screenshot.png
+++ b/docs/screenshot.png
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,7 +31,7 @@ dnspython<2.3.0
 # jq not available on Windows so must be installed manually

 # Notification library
-apprise~=1.2.1
+apprise~=1.3.0

 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
 paho-mqtt
@@ -42,7 +42,7 @@ paho-mqtt
 cryptography~=3.4

 # Used for CSS filtering
-bs4
+beautifulsoup4

 # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
 lxml
@@ -68,5 +68,5 @@ pillow
 # playwright is installed at Dockerfile build time because it's not available on all platforms

 # Include pytest, so if theres a support issue we can ask them to run these tests on their setup
-pytest ~=6.2
+pytest ~=7.2
 pytest-flask ~=1.2
Author	SHA1	Message	Date
dgtlmoon	ec77b45e84	WIP	2023-04-08 21:14:03 +02:00
dgtlmoon	138f7fc59c	WIP	2023-04-08 20:35:13 +02:00
dgtlmoon	56b768d24f	WIP	2023-04-08 20:12:30 +02:00
dgtlmoon	a61d7b4284	Attempt to abstract out each fetch type (requests/playwright/webdriver etc)	2023-04-08 18:49:27 +02:00
dgtlmoon	9076ba6bd3	Tests - error test - be sure to clear results from other test parts	2023-04-06 16:12:18 +02:00
dgtlmoon	43af18e2bc	Update README.md	2023-04-06 15:26:06 +02:00
dgtlmoon	ad75e8cdd0	Tests - Add test to check that low level fetch errors are cleared on next check	2023-04-06 14:46:08 +02:00
dgtlmoon	f604643356	Restock alerts - adding extra detection texts	2023-04-06 13:51:33 +02:00
dgtlmoon	d5fd22f693	Restock monitor - Identify the cases where the product is also definitely in stock (#1489 )	2023-03-23 18:34:56 +01:00
dgtlmoon	1d9d11b3f5	Automated CI test for ensuring pypi package was built correctly (#1488 )	2023-03-23 12:20:18 +01:00
dgtlmoon	f49464f451	GitHub container build - 'provenance' was disabled	2023-03-22 10:40:49 +01:00
dgtlmoon	bc6bde4062	0.41.1	2023-03-21 23:16:01 +01:00
dgtlmoon	2863167f45	Fix for pip installations	2023-03-21 23:15:53 +01:00
dgtlmoon	ce3966c104	0.41	2023-03-21 20:30:21 +01:00
dgtlmoon	d5f574ca17	Notifications - Include triggered text token as `{{triggered_text}}` in notifications, so you can send just the content that matches. (#1485 )	2023-03-21 19:16:13 +01:00
dgtlmoon	c96ece170a	Notification tokens - add comment that the {{tokens}} can be used in the URLs also	2023-03-21 19:04:12 +01:00
dgtlmoon	1fb90bbddc	Quick add form - adjust font size and rename stock recheck	2023-03-20 20:19:32 +01:00
dgtlmoon	55b6ae86e8	Ability to set which text to process triggers on (added, removed, changed) according to the difference (#1483 )	2023-03-20 20:16:57 +01:00
dgtlmoon	66b892f770	Restock / stock / out of stock monitor - bumping detection texts	2023-03-20 15:01:52 +01:00
dgtlmoon	3b80bb2f0e	Use brotli for reducing the size of the text snapshots (#1482 )	2023-03-19 21:12:22 +01:00
dgtlmoon	e6d2d87b31	Notification screenshots - now PNG only for now to save disk space (no point creating two images) (#1481 )	2023-03-18 20:52:52 +01:00
dgtlmoon	6e71088cde	New feature - Restock / stock / out of stock monitor option/mode	2023-03-18 20:36:26 +01:00
dgtlmoon	2bc988dffc	UI - Clone/copy watch - A paused watch should not be checked when copied/cloned #1471 .	2023-03-17 23:58:15 +01:00
dgtlmoon	a578de36c5	Update README.md	2023-03-17 16:56:29 +01:00
dgtlmoon	4c74d39df0	Code - Abstract out the diff fetch types to make it easier to integrate new ones (#1467 )	2023-03-12 18:11:53 +01:00
dgtlmoon	c454cbb808	BrowserSteps - Adding `Goto URL` step	2023-03-12 17:22:56 +01:00
dgtlmoon	6f1eec0d5a	Fixing bad linebreak definition `</br>` in notifications and UI (#1465 )	2023-03-12 17:05:34 +01:00
reecespieces	0d05ee1586	Notification Improvements - New tokens `{{diff_added}}` and `{{diff_removed}}`, removed whitespace around `added` and `into` ( Issue #905 ) (#1454 )	2023-03-12 16:21:47 +01:00
dgtlmoon	23476f0e70	Update README.md	2023-03-01 23:13:35 +01:00
dgtlmoon	cf363971c1	Bug - False change alerts - code cleanups Re #962 (#1444 )	2023-02-28 18:04:58 +01:00
dgtlmoon	35409f79bf	Update README.md	2023-02-28 14:55:43 +01:00
dgtlmoon	fc88306805	Be sure that `process_changedetection_results` is off after PageUnloadable and EmptyReply exceptions from fetcher - Re #962 (#1439 )	2023-02-26 13:54:14 +01:00
dgtlmoon	8253074d56	False change alerts fix - Don't reset watch checksum when a fetch error happens, adjust test to not test for fluctuating filter (#1437 )	2023-02-25 22:14:47 +01:00
Fabian Affolter	5f9c8db3e1	Library update - Replace bs4 with beautifulsoup4 (#1433 )	2023-02-25 22:06:13 +01:00
dgtlmoon	abf234298c	API - Including `last_changed` timestamp in watch API info (#1436 )	2023-02-25 22:00:46 +01:00
Hmmbob	0e1032a36a	Update apprise to 1.3.0 (#1430 )	2023-02-25 21:06:12 +01:00
dgtlmoon	3b96e40464	API documentation - improving example for list watches	2023-02-22 23:43:14 +01:00
dgtlmoon	c747cf7ba8	API documentation - improving example for snapshot history	2023-02-22 23:40:16 +01:00
dgtlmoon	3e98c8ae4b	API - Adding current version to 'System Information' endpoint, bumping API docs, Re #1429	2023-02-22 23:34:36 +01:00
dgtlmoon	aaad71fc19	Further improving API documentation Re #1426	2023-02-22 21:30:02 +01:00
dgtlmoon	78f93113d8	Improving API documentation Re #1426	2023-02-22 20:57:01 +01:00
dgtlmoon	e9e586205a	Browser Steps - Adding "Wait for text" and "Wait for text in element" Re #1427	2023-02-22 20:10:21 +01:00
dgtlmoon	89f1ba58b6	Re #1382 - UI fix - sorting now works with selected tag	2023-02-17 20:39:18 +01:00
dgtlmoon	6f4fd011e3	Dont rewrite/resave snapshot when its the same data, just bump the history index, saves disk space. (#1414 )	2023-02-17 17:15:27 +01:00
dgtlmoon	900dc5ee78	Fetching - False alerts issue #962 - be sure to avoid triggering changedetection when checksums were the same (#1410 )	2023-02-17 16:59:03 +01:00
dgtlmoon	7b8b50138b	Deleting a watch now removes the entire watch storage directory (#1408 )	2023-02-11 14:10:54 +01:00
dgtlmoon	01af21f856	Use year/date in the backup snapshot zip filename instead of epoch seconds (#1377 #1407 )	2023-02-11 13:44:16 +01:00
dgtlmoon	f7f4ab314b	PDF text conversion - fix bug where it detected a site as a PDF file incorrectly Re #1392 #1393	2023-02-08 09:32:57 +01:00
dgtlmoon	ce0355c0ad	Remove unused code (#1394 )	2023-02-08 09:32:15 +01:00
dgtlmoon	0f43213d9d	UI - preview page - Fix bug where playwright/chrome was system default and [preview] didnt show snapshot	2023-02-07 16:55:34 +01:00
dgtlmoon	93c57d9fad	Adding example docker-compose.yml config to ignore errors from self-signed certs #1389	2023-02-06 17:24:12 +01:00
dgtlmoon	3cdd075baf	0.40.2	2023-02-03 19:20:13 +01:00
dgtlmoon	5c617e8530	Code cleanup - remove unused import	2023-02-03 18:35:58 +01:00
dgtlmoon	1a48965ba1	UI fix - Fix logic for showing screenshot on diff page (#1379 )	2023-02-03 11:23:48 +01:00
dgtlmoon	41856c4ed8	Re #1365 - Playwright - Browser "Service Workers" should be enabled by default but unset via env var PLAYWRIGHT_SERVICE_WORKERS=block (#1367 )	2023-02-01 20:50:40 +01:00
dgtlmoon	0ed897c50f	New setting to allow passwordless access to your 'diff' page - perfect for sharing your diff page securely, refactored login code (#1357 )	2023-01-29 22:36:55 +01:00
dgtlmoon	f8e587c415	Security - Possible stored XSS in watch list - Only permit HTTP/HTTP/FTP by default - override with env var `SAFE_PROTOCOL_REGEX` (#1359 )	2023-01-29 11:12:06 +01:00
dgtlmoon	d47a25eb6d	Playwright - Removing old bug fix where playwright needed screenshot called twice to make the full screen screenshot be actually fullscreen (#1356 )	2023-01-28 15:02:53 +01:00
dgtlmoon	9a0792d185	Fetch backend UI default fixes for VisualSelector and BrowserSteps (#1344 )	2023-01-25 19:47:54 +01:00
dgtlmoon	948ef7ade4	Fix fetch UI default fetch backend option icon (#1343 )	2023-01-25 18:07:44 +01:00
dgtlmoon	0ba139f8f9	Docker container build - docker container buildx version change causing errors with watchtower and others (#1336 )	2023-01-24 23:45:43 +01:00
dgtlmoon	a9431191fc	0.40.1.1	2023-01-22 13:03:15 +01:00
dgtlmoon	774451f256	Re #1328 - add `-6` flag to enable IPv6 (#1329 )	2023-01-22 11:10:25 +01:00