hmm

woops
try some matrix build
2025-05-10 10:38:10 +02:00 · 2025-05-10 10:37:27 +02:00 · 2025-05-10 10:36:41 +02:00 · 2025-05-10 10:28:02 +02:00 · 2025-05-09 23:29:21 +02:00 · 2025-05-09 14:35:35 +02:00
46 changed files with 1503 additions and 450 deletions
--- a/.github/workflows/test-container-build.yml
+++ b/.github/workflows/test-container-build.yml
@@ -1,10 +1,5 @@
 name: ChangeDetection.io Container Build Test
 # Triggers the workflow on push or pull request events
 # This line doesnt work, even tho it is the documented one
 #on: [push, pull_request]
 on:
  push:
    paths:
@@ -20,51 +15,53 @@ on:
      - .github/workflows/*
      - .github/test/Dockerfile*
  # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
  # @todo: some kind of path filter for requirements.txt and Dockerfile
 jobs:
  test-container-build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python_version: [3.11, 3.12, 3.13]
    steps:
-        - uses: actions/checkout@v4
+      - uses: actions/checkout@v4
        - name: Set up Python 3.11
          uses: actions/setup-python@v5
          with:
            python-version: 3.11
-        # Just test that the build works, some libraries won't compile on ARM/rPi etc
+      - name: Set up Python ${{ matrix.python_version }}
-        - name: Set up QEMU
+        uses: actions/setup-python@v5
-          uses: docker/setup-qemu-action@v3
+        with:
-          with:
+          python-version: ${{ matrix.python_version }}
            image: tonistiigi/binfmt:latest
            platforms: all
-        - name: Set up Docker Buildx
+      # Set up QEMU
-          id: buildx
+      - name: Set up QEMU
-          uses: docker/setup-buildx-action@v3
+        uses: docker/setup-qemu-action@v3
-          with:
+        with:
-            install: true
+          image: tonistiigi/binfmt:latest
-            version: latest
+          platforms: all
            driver-opts: image=moby/buildkit:master
-        # https://github.com/dgtlmoon/changedetection.io/pull/1067
+      - name: Set up Docker Buildx
-        # Check we can still build under alpine/musl
+        id: buildx
-        - name: Test that the docker containers can build (musl via alpine check)
+        uses: docker/setup-buildx-action@v3
-          id: docker_build_musl
+        with:
-          uses: docker/build-push-action@v6
+          install: true
-          with:
+          version: latest
-            context: ./
+          driver-opts: image=moby/buildkit:master
            file: ./.github/test/Dockerfile-alpine
            platforms: linux/amd64,linux/arm64
-        - name: Test that the docker containers can build
+      # Musl (alpine) build test (runs once per matrix job, or can skip with conditional if needed)
-          id: docker_build
+      - name: Test that the docker containers can build (musl via alpine check)
-          uses: docker/build-push-action@v6
+        id: docker_build_musl
-          # https://github.com/docker/build-push-action#customizing
+        uses: docker/build-push-action@v6
-          with:
+        with:
-            context: ./
+          context: ./
-            file: ./Dockerfile
+          file: ./.github/test/Dockerfile-alpine
-            platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
+          platforms: linux/amd64,linux/arm64
            cache-from: type=local,src=/tmp/.buildx-cache
            cache-to: type=local,dest=/tmp/.buildx-cache
      # Main build with matrix Python version
      - name: Test that the docker containers can build (Python ${{ matrix.python_version }})
        id: docker_build
        uses: docker/build-push-action@v6
        with:
          context: ./
          file: ./Dockerfile
          platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8,linux/aarch64
          cache-from: type=local,src=/tmp/.buildx-cache
          cache-to: type=local,dest=/tmp/.buildx-cache
        build: |
          args=PYTHON_VERSION=${{ matrix.python_version }}
--- a/.github/workflows/test-only.yml
+++ b/.github/workflows/test-only.yml
@@ -8,13 +8,13 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
-      - name: Lint with flake8
+      - name: Lint with Ruff
        run: |
-          pip3 install flake8
+          pip install ruff
-          # stop the build if there are Python syntax errors or undefined names
+          # Check for syntax errors and undefined names
-          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+          ruff check . --select E9,F63,F7,F82
-          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+          # Complete check with errors treated as warnings
-          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+          ruff check . --exit-zero
  test-application-3-10:
    needs: lint-code
@@ -41,5 +41,4 @@ jobs:
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
    with:
      python-version: '3.13'
-      skip-pypuppeteer: true
+      skip-pypuppeteer: true
--- a/.github/workflows/test-stack-reusable-workflow.yml
+++ b/.github/workflows/test-stack-reusable-workflow.yml
@@ -172,8 +172,8 @@ jobs:
          curl --retry-connrefused --retry 6  -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
          # Check whether TRACE log is enabled.
-          # Also, check whether TRACE is came from STDERR
+          # Also, check whether TRACE came from STDOUT
-          docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1
+          docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
          # Check whether DEBUG is came from STDOUT
          docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ dist/
 .env
 .venv/
 venv/
 .python-version
 # IDEs
 .idea
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,9 @@
 repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.11.2
    hooks:
      # Lint (and apply safe fixes)
      - id: ruff
        args: [--fix]
      # Fomrat
      - id: ruff-format
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -0,0 +1,48 @@
 # Minimum supported version
 target-version = "py310"
 # Formatting options
 line-length = 100
 indent-width = 4
 exclude = [
    "__pycache__",
    ".eggs",
    ".git",
    ".tox",
    ".venv",
    "*.egg-info",
    "*.pyc",
 ]
 [lint]
 # https://docs.astral.sh/ruff/rules/
 select = [
    "B", # flake8-bugbear
    "B9",
    "C", 
    "E", # pycodestyle
    "F", # Pyflakes
    "I", # isort
    "N", # pep8-naming
    "UP", # pyupgrade
    "W", # pycodestyle
 ]
 ignore = [
    "B007", # unused-loop-control-variable
    "B909", # loop-iterator-mutation
    "E203", # whitespace-before-punctuation
    "E266", # multiple-leading-hashes-for-block-comment
    "E501", # redundant-backslash
    "F403", # undefined-local-with-import-star
    "N802", # invalid-function-name
    "N806", # non-lowercase-variable-in-function
    "N815", # mixed-case-variable-in-class-scope
 ]
 [lint.mccabe]
 max-complexity = 12
 [format]
 indent-style = "space"
 quote-style = "preserve"
--- a/2
+++ b/2
@@ -68,7 +68,7 @@ COPY changedetection.py /app/changedetection.py
 # Github Action test purpose(test-only.yml).
 # On production, it is effectively LOGGER_LEVEL=''.
 ARG LOGGER_LEVEL=''
-ENV LOGGER_LEVEL "$LOGGER_LEVEL"
+ENV LOGGER_LEVEL="$LOGGER_LEVEL"
 WORKDIR /app
 CMD ["python", "./changedetection.py", "-d", "/datastore"]
--- a/changedetection.py
+++ b/changedetection.py
@@ -3,4 +3,6 @@
 # Only exists for direct CLI usage
 import changedetectionio
-changedetectionio.main()
+
 if __name__ == '__main__':
    changedetectionio.main()
--- a/changedetectionio/PLUGIN_README.md
+++ b/changedetectionio/PLUGIN_README.md
@@ -0,0 +1,98 @@
 # Creating Plugins for changedetection.io
 This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways.
 ## Plugin Types
 ### UI Stats Tab Plugins
 These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch.
 #### Creating a UI Stats Tab Plugin
 1. Create a Python file in a directory that will be loaded by the plugin system.
 2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook:
 ```python
 import pluggy
 from loguru import logger
 global_hookimpl = pluggy.HookimplMarker("changedetectionio")
@global_hookimpl
 def ui_edit_stats_extras(watch):
    """Add custom content to the stats tab"""
    # Calculate or retrieve your stats
    my_stat = calculate_something(watch)
    # Return HTML content as a string
    html = f"""
    <div class="my-plugin-stats">
        <h4>My Plugin Statistics</h4>
        <p>My statistic: {my_stat}</p>
    </div>
    """
    return html
 ```
 3. The HTML you return will be included in the Stats tab.
 ## Plugin Loading
 Plugins can be loaded from:
 1. Built-in plugin directories in the codebase
 2. External packages using setuptools entry points
 To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`.
 ## Example Plugin
 Here's a simple example of a plugin that adds a word count statistic to the Stats tab:
 ```python
 import pluggy
 from loguru import logger
 global_hookimpl = pluggy.HookimplMarker("changedetectionio")
 def count_words_in_history(watch):
    """Count words in the latest snapshot"""
    try:
        if not watch.history.keys():
            return 0
        latest_key = list(watch.history.keys())[-1]
        latest_content = watch.get_history_snapshot(latest_key)
        return len(latest_content.split())
    except Exception as e:
        logger.error(f"Error counting words: {str(e)}")
        return 0
@global_hookimpl
 def ui_edit_stats_extras(watch):
    """Add word count to the Stats tab"""
    word_count = count_words_in_history(watch)
    html = f"""
    <div class="word-count-stats">
        <h4>Content Analysis</h4>
        <table class="pure-table">
            <tbody>
                <tr>
                    <td>Word count (latest snapshot)</td>
                    <td>{word_count}</td>
                </tr>
            </tbody>
        </table>
    </div>
    """
    return html
 ```
 ## Testing Your Plugin
 1. Place your plugin in one of the directories scanned by the plugin system
 2. Restart changedetection.io
 3. Go to the Edit page of a watch and check the Stats tab to see your content
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@
 # Read more https://github.com/dgtlmoon/changedetection.io/wiki
-__version__ = '0.49.13'
+__version__ = '0.49.16'
 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
@@ -106,7 +106,7 @@ def main():
    # Without this, a logger will be duplicated
    logger.remove()
    try:
-        log_level_for_stdout = { 'DEBUG', 'SUCCESS' }
+        log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' }
        logger.configure(handlers=[
            {"sink": sys.stdout, "level": logger_level,
             "filter" : lambda record: record['level'].name in log_level_for_stdout},
--- a/changedetectionio/blueprint/browser_steps/init.py
+++ b/changedetectionio/blueprint/browser_steps/init.py
@@ -53,14 +53,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        a = "?" if not '?' in base_url else '&'
        base_url += a + f"timeout={keepalive_ms}"
-        try:
+        browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
            browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
        except Exception as e:
            if 'ECONNREFUSED' in str(e):
                return make_response('Unable to start the Playwright Browser session, is it running?', 401)
            else:
                # Other errors, bad URL syntax, bad reply etc
                return make_response(str(e), 401)
        proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
        proxy = None
@@ -109,7 +102,16 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        logger.debug("Starting connection with playwright")
        logger.debug("browser_steps.py connecting")
-        browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
+
        try:
            browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
        except Exception as e:
            if 'ECONNREFUSED' in str(e):
                return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
            else:
                # Other errors, bad URL syntax, bad reply etc
                return make_response(str(e), 401)
        logger.debug("Starting connection with playwright - done")
        return {'browsersteps_session_id': browsersteps_session_id}
@@ -166,9 +168,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            step_optional_value = request.form.get('optional_value')
            is_last_step = strtobool(request.form.get('is_last_step'))
            # @todo try.. accept.. nice errors not popups..
            try:
                browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
                                         selector=step_selector,
                                         optional_value=step_optional_value)
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -1,6 +1,8 @@
 import os
 import time
 import re
 import sys
 import traceback
 from random import randint
 from loguru import logger
@@ -35,6 +37,7 @@ browser_step_ui_config = {'Choose one': '0 0',
                          'Make all child elements visible': '1 0',
                          'Press Enter': '0 0',
                          'Select by label': '1 1',
                          '<select> by option text': '1 1',
                          'Scroll down': '0 0',
                          'Uncheck checkbox': '1 0',
                          'Wait for seconds': '0 1',
@@ -54,7 +57,6 @@ browser_step_ui_config = {'Choose one': '0 0',
 class steppable_browser_interface():
    page = None
    start_url = None
    action_timeout = 10 * 1000
    def __init__(self, start_url):
@@ -62,6 +64,10 @@ class steppable_browser_interface():
    # Convert and perform "Click Button" for example
    def call_action(self, action_name, selector=None, optional_value=None):
        if self.page is None:
            logger.warning("Cannot call action on None page object")
            return
        now = time.time()
        call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower())
        if call_action_name == 'choose_one':
@@ -72,28 +78,33 @@ class steppable_browser_interface():
        if selector and selector.startswith('/') and not selector.startswith('//'):
            selector = "xpath=" + selector
        # Check if action handler exists
        if not hasattr(self, "action_" + call_action_name):
            logger.warning(f"Action handler for '{call_action_name}' not found")
            return
        action_handler = getattr(self, "action_" + call_action_name)
        # Support for Jinja2 variables in the value and selector
        if selector and ('{%' in selector or '{{' in selector):
            selector = jinja_render(template_str=selector)
        if optional_value and ('{%' in optional_value or '{{' in optional_value):
            optional_value = jinja_render(template_str=optional_value)
        action_handler(selector, optional_value)
        # Safely wait for timeout
        self.page.wait_for_timeout(1.5 * 1000)
        logger.debug(f"Call action done in {time.time()-now:.2f}s")
    def action_goto_url(self, selector=None, value=None):
-        # self.page.set_viewport_size({"width": 1280, "height": 5000})
+        if not value:
            logger.warning("No URL provided for goto_url action")
            return None
        now = time.time()
        response = self.page.goto(value, timeout=0, wait_until='load')
        # Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout)
        #and also wait for seconds ?
        #await page.waitForTimeout(1000);
        #await page.waitForTimeout(extra_wait_ms);
        logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
        return response
@@ -103,36 +114,40 @@ class steppable_browser_interface():
    def action_click_element_containing_text(self, selector=None, value=''):
        logger.debug("Clicking element containing text")
-        if not len(value.strip()):
+        if not value or not len(value.strip()):
            return
        elem = self.page.get_by_text(value)
        if elem.count():
            elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
    def action_click_element_containing_text_if_exists(self, selector=None, value=''):
        logger.debug("Clicking element containing text if exists")
-        if not len(value.strip()):
+        if not value or not len(value.strip()):
            return
        elem = self.page.get_by_text(value)
        logger.debug(f"Clicking element containing text - {elem.count()} elements found")
        if elem.count():
            elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
-        else:
+                
            return
    def action_enter_text_in_field(self, selector, value):
-        if not len(selector.strip()):
+        if not selector or not len(selector.strip()):
            return
        self.page.fill(selector, value, timeout=self.action_timeout)
    def action_execute_js(self, selector, value):
-        response = self.page.evaluate(value)
+        if not value:
-        return response
+            return None
        return self.page.evaluate(value)
    def action_click_element(self, selector, value):
        logger.debug("Clicking element")
-        if not len(selector.strip()):
+        if not selector or not len(selector.strip()):
            return
        self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
@@ -140,24 +155,38 @@ class steppable_browser_interface():
    def action_click_element_if_exists(self, selector, value):
        import playwright._impl._errors as _api_types
        logger.debug("Clicking element if exists")
-        if not len(selector.strip()):
+        if not selector or not len(selector.strip()):
            return
        try:
            self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
-        except _api_types.TimeoutError as e:
+        except _api_types.TimeoutError:
            return
-        except _api_types.Error as e:
+        except _api_types.Error:
            # Element was there, but page redrew and now its long long gone
            return
    def action_click_x_y(self, selector, value):
-        if not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
+        if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
-            raise Exception("'Click X,Y' step should be in the format of '100 , 90'")
+            logger.warning("'Click X,Y' step should be in the format of '100 , 90'")
            return
-        x, y = value.strip().split(',')
+        try:
-        x = int(float(x.strip()))
+            x, y = value.strip().split(',')
-        y = int(float(y.strip()))
+            x = int(float(x.strip()))
-        self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
+            y = int(float(y.strip()))
            self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
        except Exception as e:
            logger.error(f"Error parsing x,y coordinates: {str(e)}")
    def action__select_by_option_text(self, selector, value):
        if not selector or not len(selector.strip()):
            return
        self.page.select_option(selector, label=value, timeout=self.action_timeout)
    def action_scroll_down(self, selector, value):
        # Some sites this doesnt work on for some reason
@@ -165,23 +194,42 @@ class steppable_browser_interface():
        self.page.wait_for_timeout(1000)
    def action_wait_for_seconds(self, selector, value):
-        self.page.wait_for_timeout(float(value.strip()) * 1000)
+        try:
            seconds = float(value.strip()) if value else 1.0
            self.page.wait_for_timeout(seconds * 1000)
        except (ValueError, TypeError) as e:
            logger.error(f"Invalid value for wait_for_seconds: {str(e)}")
    def action_wait_for_text(self, selector, value):
        if not value:
            return
        import json
        v = json.dumps(value)
-        self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000)
+        self.page.wait_for_function(
            f'document.querySelector("body").innerText.includes({v});',
            timeout=30000
        )
    def action_wait_for_text_in_element(self, selector, value):
        if not selector or not value:
            return
        import json
        s = json.dumps(selector)
        v = json.dumps(value)
-        self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000)
+        
        self.page.wait_for_function(
            f'document.querySelector({s}).innerText.includes({v});',
            timeout=30000
        )
    # @todo - in the future make some popout interface to capture what needs to be set
    # https://playwright.dev/python/docs/api/class-keyboard
    def action_press_enter(self, selector, value):
        self.page.keyboard.press("Enter", delay=randint(200, 500))
    def action_press_page_up(self, selector, value):
        self.page.keyboard.press("PageUp", delay=randint(200, 500))
@@ -190,17 +238,30 @@ class steppable_browser_interface():
        self.page.keyboard.press("PageDown", delay=randint(200, 500))
    def action_check_checkbox(self, selector, value):
        if not selector:
            return
        self.page.locator(selector).check(timeout=self.action_timeout)
    def action_uncheck_checkbox(self, selector, value):
        if not selector:
            return
        self.page.locator(selector).uncheck(timeout=self.action_timeout)
    def action_remove_elements(self, selector, value):
        """Removes all elements matching the given selector from the DOM."""
        if not selector:
            return
        self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
    def action_make_all_child_elements_visible(self, selector, value):
        """Recursively makes all child elements inside the given selector fully visible."""
        if not selector:
            return
        self.page.locator(selector).locator("*").evaluate_all("""
            els => els.forEach(el => {
                el.style.display = 'block';   // Forces it to be displayed
@@ -224,7 +285,9 @@ class browsersteps_live_ui(steppable_browser_interface):
    # bump and kill this if idle after X sec
    age_start = 0
    headers = {}
-
+    # Track if resources are properly cleaned up
    _is_cleaned_up = False
    # use a special driver, maybe locally etc
    command_executor = os.getenv(
        "PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL"
@@ -243,9 +306,14 @@ class browsersteps_live_ui(steppable_browser_interface):
        self.age_start = time.time()
        self.playwright_browser = playwright_browser
        self.start_url = start_url
        self._is_cleaned_up = False
        if self.context is None:
            self.connect(proxy=proxy)
    def __del__(self):
        # Ensure cleanup happens if object is garbage collected
        self.cleanup()
    # Connect and setup a new context
    def connect(self, proxy=None):
        # Should only get called once - test that
@@ -264,31 +332,74 @@ class browsersteps_live_ui(steppable_browser_interface):
            user_agent=manage_user_agent(headers=self.headers),
        )
        self.page = self.context.new_page()
        # self.page.set_default_navigation_timeout(keep_open)
        self.page.set_default_timeout(keep_open)
-        # @todo probably this doesnt work
+        # Set event handlers
-        self.page.on(
+        self.page.on("close", self.mark_as_closed)
            "close",
            self.mark_as_closed,
        )
        # Listen for all console events and handle errors
        self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
        logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
        self.page.wait_for_timeout(1 * 1000)
    def mark_as_closed(self):
        logger.debug("Page closed, cleaning up..")
        self.cleanup()
    def cleanup(self):
        """Properly clean up all resources to prevent memory leaks"""
        if self._is_cleaned_up:
            return
        logger.debug("Cleaning up browser steps resources")
        # Clean up page
        if hasattr(self, 'page') and self.page is not None:
            try:
                # Force garbage collection before closing
                self.page.request_gc()
            except Exception as e:
                logger.debug(f"Error during page garbage collection: {str(e)}")
            try:
                # Remove event listeners before closing
                self.page.remove_listener("close", self.mark_as_closed)
            except Exception as e:
                logger.debug(f"Error removing event listeners: {str(e)}")
            try:
                self.page.close()
            except Exception as e:
                logger.debug(f"Error closing page: {str(e)}")
            self.page = None
        # Clean up context
        if hasattr(self, 'context') and self.context is not None:
            try:
                self.context.close()
            except Exception as e:
                logger.debug(f"Error closing context: {str(e)}")
            self.context = None
        self._is_cleaned_up = True
        logger.debug("Browser steps resources cleanup complete")
    @property
    def has_expired(self):
-        if not self.page:
+        if not self.page or self._is_cleaned_up:
            return True
-
+        
        # Check if session has expired based on age
        max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10))  # Default 10 minutes
        if (time.time() - self.age_start) > max_age_seconds:
            logger.debug(f"Browser steps session expired after {max_age_seconds} seconds")
            return True
        return False
    def get_current_state(self):
        """Return the screenshot and interactive elements mapping, generally always called after action_()"""
@@ -297,36 +408,55 @@ class browsersteps_live_ui(steppable_browser_interface):
        # because we for now only run browser steps in playwright mode (not puppeteer mode)
        from changedetectionio.content_fetchers.playwright import capture_full_page
        # Safety check - don't proceed if resources are cleaned up
        if self._is_cleaned_up or self.page is None:
            logger.warning("Attempted to get current state after cleanup")
            return (None, None)
        xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
        now = time.time()
        self.page.wait_for_timeout(1 * 1000)
-        screenshot = capture_full_page(page=self.page)
+        screenshot = None
        xpath_data = None
        try:
            # Get screenshot first
            screenshot = capture_full_page(page=self.page)
            logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
-        logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
+            # Then get interactive elements
            now = time.time()
            self.page.evaluate("var include_filters=''")
            self.page.request_gc()
-        now = time.time()
+            scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
        self.page.evaluate("var include_filters=''")
        # Go find the interactive elements
        # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
-        self.page.request_gc()
+            MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
            xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
                "visualselector_xpath_selectors": scan_elements,
                "max_height": MAX_TOTAL_HEIGHT
            }))
            self.page.request_gc()
-        scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
+            # Sort elements by size
-
+            xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
-        MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+            logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
-        xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
+            
-            "visualselector_xpath_selectors": scan_elements,
+        except Exception as e:
-            "max_height": MAX_TOTAL_HEIGHT
+            logger.error(f"Error getting current state: {str(e)}")
-        }))
+            # Attempt recovery - force garbage collection
-        self.page.request_gc()
+            try:
-
+                self.page.request_gc()
-        # So the JS will find the smallest one first
+            except:
-        xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
+                pass
-        logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
+        
-
+        # Request garbage collection one final time
-        # playwright._impl._api_types.Error: Browser closed.
+        try:
-        # @todo show some countdown timer?
+            self.page.request_gc()
        except:
            pass
        return (screenshot, xpath_data)
--- a/changedetectionio/blueprint/tags/init.py
+++ b/changedetectionio/blueprint/tags/init.py
@@ -104,6 +104,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
        default = datastore.data['settings']['application']['tags'].get(uuid)
        if not default:
            flash("Tag not found", "error")
            return redirect(url_for('watchlist.index'))
        form = group_restock_settings_form(
                                       formdata=request.form if request.method == 'POST' else None,
--- a/changedetectionio/blueprint/tags/templates/edit-tag.html
+++ b/changedetectionio/blueprint/tags/templates/edit-tag.html
@@ -66,7 +66,7 @@
                    <div  class="pure-control-group inline-radio">
                      {{ render_checkbox_field(form.notification_muted) }}
                    </div>
-                    {% if is_html_webdriver %}
+                    {% if 1 %}
                    <div class="pure-control-group inline-radio">
                      {{ render_checkbox_field(form.notification_screenshot) }}
                        <span class="pure-form-message-inline">
--- a/changedetectionio/blueprint/ui/edit.py
+++ b/changedetectionio/blueprint/ui/edit.py
@@ -213,9 +213,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
            if request.method == 'POST' and not form.validate():
                flash("An error occurred, please see below.", "error")
            visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid)
            # JQ is difficult to install on windows and must be manually added (outside requirements.txt)
            jq_support = True
            try:
@@ -225,16 +222,20 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
            watch = datastore.data['watching'].get(uuid)
            # if system or watch is configured to need a chrome type browser
            system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
-
+            watch_needs_selenium_or_playwright = False
            watch_uses_webdriver = False
            if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
-                watch_uses_webdriver = True
+                watch_needs_selenium_or_playwright = True
            from zoneinfo import available_timezones
            # Only works reliably with Playwright
            # Import the global plugin system
            from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras
            template_args = {
                'available_processors': processors.available_processors(),
                'available_timezones': sorted(available_timezones()),
@@ -247,14 +248,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
                'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
                'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
                'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
                'watch_uses_webdriver': watch_uses_webdriver,
                'jq_support': jq_support,
                'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
                'settings_application': datastore.data['settings']['application'],
                'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
                'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
                'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
                'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
                'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
                'using_global_webdriver_wait': not default['webdriver_delay'],
                'uuid': uuid,
-                'watch': watch
+                'watch': watch,
                'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright,
            }
            included_content = None
--- a/changedetectionio/conditions/init.py
+++ b/changedetectionio/conditions/init.py
@@ -5,7 +5,7 @@ from json_logic.builtins import BUILTINS
 from .exceptions import EmptyConditionRuleRowNotUsable
 from .pluggy_interface import plugin_manager  # Import the pluggy plugin manager
 from . import default_plugin
-
+from loguru import logger
 # List of all supported JSON Logic operators
 operator_choices = [
    (None, "Choose one - Operator"),
@@ -94,20 +94,41 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat
    EXECUTE_DATA = {}
    result = True
-    ruleset_settings = application_datastruct['watching'].get(current_watch_uuid)
+    watch = application_datastruct['watching'].get(current_watch_uuid)
-    if ruleset_settings.get("conditions"):
+    if watch and watch.get("conditions"):
-        logic_operator = "and" if ruleset_settings.get("conditions_match_logic", "ALL") == "ALL" else "or"
+        logic_operator = "and" if watch.get("conditions_match_logic", "ALL") == "ALL" else "or"
-        complete_rules = filter_complete_rules(ruleset_settings['conditions'])
+        complete_rules = filter_complete_rules(watch['conditions'])
        if complete_rules:
            # Give all plugins a chance to update the data dict again (that we will test the conditions against)
            for plugin in plugin_manager.get_plugins():
-                new_execute_data = plugin.add_data(current_watch_uuid=current_watch_uuid,
+                try:
-                                                   application_datastruct=application_datastruct,
+                    import concurrent.futures
-                                                   ephemeral_data=ephemeral_data)
+                    import time
                    with concurrent.futures.ThreadPoolExecutor() as executor:
                        future = executor.submit(
                            plugin.add_data,
                            current_watch_uuid=current_watch_uuid,
                            application_datastruct=application_datastruct,
                            ephemeral_data=ephemeral_data
                        )
                        logger.debug(f"Trying plugin {plugin}....")
-                if new_execute_data and isinstance(new_execute_data, dict):
+                        # Set a timeout of 10 seconds
-                    EXECUTE_DATA.update(new_execute_data)
+                        try:
                            new_execute_data = future.result(timeout=10)
                            if new_execute_data and isinstance(new_execute_data, dict):
                                EXECUTE_DATA.update(new_execute_data)
                        except concurrent.futures.TimeoutError:
                            # The plugin took too long, abort processing for this watch
                            raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.")
                except Exception as e:
                    # Log the error but continue with the next plugin
                    import logging
                    logging.error(f"Error executing plugin {plugin.__class__.__name__}: {str(e)}")
                    continue
            # Create the ruleset
            ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules)
@@ -132,3 +153,18 @@ for plugin in plugin_manager.get_plugins():
    if isinstance(new_field_choices, list):
        field_choices.extend(new_field_choices)
 def collect_ui_edit_stats_extras(watch):
    """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
    extras_content = []
    for plugin in plugin_manager.get_plugins():
        try:
            content = plugin.ui_edit_stats_extras(watch=watch)
            if content:
                extras_content.append(content)
        except Exception as e:
            # Skip plugins that don't implement the hook or have errors
            pass
    return "\n".join(extras_content) if extras_content else ""
--- a/changedetectionio/conditions/pluggy_interface.py
+++ b/changedetectionio/conditions/pluggy_interface.py
@@ -1,5 +1,8 @@
 import pluggy
-from . import default_plugin  # Import the default plugin
+import os
 import importlib
 import sys
 from . import default_plugin
 # ✅ Ensure that the namespace in HookspecMarker matches PluginManager
 PLUGIN_NAMESPACE = "changedetectionio_conditions"
@@ -30,6 +33,11 @@ class ConditionsSpec:
    def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
        """Add to the datadict"""
        pass
    @hookspec
    def ui_edit_stats_extras(watch):
        """Return HTML content to add to the stats tab in the edit view"""
        pass
 # ✅ Set up Pluggy Plugin Manager
 plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -40,5 +48,27 @@ plugin_manager.add_hookspecs(ConditionsSpec)
 # ✅ Register built-in plugins manually
 plugin_manager.register(default_plugin, "default_plugin")
 # ✅ Load plugins from the plugins directory
 def load_plugins_from_directory():
    plugins_dir = os.path.join(os.path.dirname(__file__), 'plugins')
    if not os.path.exists(plugins_dir):
        return
    # Get all Python files (excluding __init__.py)
    for filename in os.listdir(plugins_dir):
        if filename.endswith(".py") and filename != "__init__.py":
            module_name = filename[:-3]  # Remove .py extension
            module_path = f"changedetectionio.conditions.plugins.{module_name}"
            try:
                module = importlib.import_module(module_path)
                # Register the plugin with pluggy
                plugin_manager.register(module, module_name)
            except (ImportError, AttributeError) as e:
                print(f"Error loading plugin {module_name}: {e}")
 # Load plugins from the plugins directory
 load_plugins_from_directory()
 # ✅ Discover installed plugins from external packages (if any)
 plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
--- a/changedetectionio/conditions/plugins/init.py
+++ b/changedetectionio/conditions/plugins/init.py
@@ -0,0 +1 @@
 # Import plugins package to make them discoverable
--- a/changedetectionio/conditions/plugins/levenshtein_plugin.py
+++ b/changedetectionio/conditions/plugins/levenshtein_plugin.py
@@ -0,0 +1,107 @@
 import pluggy
 from loguru import logger
 # Support both plugin systems
 conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
 global_hookimpl = pluggy.HookimplMarker("changedetectionio")
 def levenshtein_ratio_recent_history(watch, incoming_text=None):
    try:
        from Levenshtein import ratio, distance
        k = list(watch.history.keys())
        a = None
        b = None
        # When called from ui_edit_stats_extras, we don't have incoming_text
        if incoming_text is None:
            a = watch.get_history_snapshot(timestamp=k[-1])  # Latest snapshot
            b = watch.get_history_snapshot(timestamp=k[-2])  # Previous snapshot
        # Needs atleast one snapshot
        elif len(k) >= 1: # Should be atleast one snapshot to compare against
            a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
            b = incoming_text if incoming_text else k[-2]
        if a and b:
            distance_value = distance(a, b)
            ratio_value = ratio(a, b)
            return {
                'distance': distance_value,
                'ratio': ratio_value,
                'percent_similar': round(ratio_value * 100, 2)
            }
    except Exception as e:
        logger.warning(f"Unable to calc similarity: {str(e)}")
    return ''
@conditions_hookimpl
 def register_operators():
    pass
@conditions_hookimpl
 def register_operator_choices():
    pass
@conditions_hookimpl
 def register_field_choices():
    return [
        ("levenshtein_ratio", "Levenshtein - Text similarity ratio"),
        ("levenshtein_distance", "Levenshtein - Text change distance"),
    ]
@conditions_hookimpl
 def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
    res = {}
    watch = application_datastruct['watching'].get(current_watch_uuid)
    # ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc
    if watch and 'text' in ephemeral_data:
        lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text',''))
        if isinstance(lev_data, dict):
            res['levenshtein_ratio'] = lev_data.get('ratio', 0)
            res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)
            res['levenshtein_distance'] = lev_data.get('distance', 0)
    return res
@global_hookimpl
 def ui_edit_stats_extras(watch):
    """Add Levenshtein stats to the UI using the global plugin system"""
    """Generate the HTML for Levenshtein stats - shared by both plugin systems"""
    if len(watch.history.keys()) < 2:
        return "<p>Not enough history to calculate Levenshtein metrics</p>"
    try:
        lev_data = levenshtein_ratio_recent_history(watch)
        if not lev_data or not isinstance(lev_data, dict):
            return "<p>Unable to calculate Levenshtein metrics</p>"
        html = f"""
        <div class="levenshtein-stats">
            <h4>Levenshtein Text Similarity Details</h4>
            <table class="pure-table">
                <tbody>
                    <tr>
                        <td>Raw distance (edits needed)</td>
                        <td>{lev_data['distance']}</td>
                    </tr>
                    <tr>
                        <td>Similarity ratio</td>
                        <td>{lev_data['ratio']:.4f}</td>
                    </tr>
                    <tr>
                        <td>Percent similar</td>
                        <td>{lev_data['percent_similar']}%</td>
                    </tr>
                </tbody>
            </table>
            <p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p>
        </div>
        """
        return html
    except Exception as e:
        logger.error(f"Error generating Levenshtein UI extras: {str(e)}")
        return "<p>Error calculating Levenshtein metrics</p>"
--- a/changedetectionio/conditions/plugins/wordcount_plugin.py
+++ b/changedetectionio/conditions/plugins/wordcount_plugin.py
@@ -0,0 +1,82 @@
 import pluggy
 from loguru import logger
 # Support both plugin systems
 conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
 global_hookimpl = pluggy.HookimplMarker("changedetectionio")
 def count_words_in_history(watch, incoming_text=None):
    """Count words in snapshot text"""
    try:
        if incoming_text is not None:
            # When called from add_data with incoming text
            return len(incoming_text.split())
        elif watch.history.keys():
            # When called from UI extras to count latest snapshot
            latest_key = list(watch.history.keys())[-1]
            latest_content = watch.get_history_snapshot(latest_key)
            return len(latest_content.split())
        return 0
    except Exception as e:
        logger.error(f"Error counting words: {str(e)}")
        return 0
 # Implement condition plugin hooks
@conditions_hookimpl
 def register_operators():
    # No custom operators needed
    return {}
@conditions_hookimpl
 def register_operator_choices():
    # No custom operator choices needed
    return []
@conditions_hookimpl
 def register_field_choices():
    # Add a field that will be available in conditions
    return [
        ("word_count", "Word count of content"),
    ]
@conditions_hookimpl
 def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
    """Add word count data for conditions"""
    result = {}
    watch = application_datastruct['watching'].get(current_watch_uuid)
    if watch and 'text' in ephemeral_data:
        word_count = count_words_in_history(watch, ephemeral_data['text'])
        result['word_count'] = word_count
    return result
 def _generate_stats_html(watch):
    """Generate the HTML content for the stats tab"""
    word_count = count_words_in_history(watch)
    html = f"""
    <div class="word-count-stats">
        <h4>Content Analysis</h4>
        <table class="pure-table">
            <tbody>
                <tr>
                    <td>Word count (latest snapshot)</td>
                    <td>{word_count}</td>
                </tr>
            </tbody>
        </table>
        <p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p>
    </div>
    """
    return html
@conditions_hookimpl
 def ui_edit_stats_extras(watch):
    """Add word count stats to the UI through conditions plugin system"""
    return _generate_stats_html(watch)
@global_hookimpl
 def ui_edit_stats_extras(watch):
    """Add word count stats to the UI using the global plugin system"""
    return _generate_stats_html(watch)
--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@@ -26,9 +26,11 @@ def capture_full_page(page):
    step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
    screenshot_chunks = []
    y = 0
-    
+
    # If page height is larger than current viewport, use a larger viewport for better capturing
    if page_height > page.viewport_size['height']:
        if page_height < step_size:
            step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
        logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
        # Set viewport to a larger size to capture more content at once
        page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
@@ -59,7 +61,10 @@ def capture_full_page(page):
        p.join()
        logger.debug(
            f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
-
+        # Explicit cleanup
        del screenshot_chunks
        del p
        del parent_conn, child_conn
        screenshot_chunks = None
        return screenshot
@@ -189,7 +194,6 @@ class fetcher(Fetcher):
            browsersteps_interface.page = self.page
            response = browsersteps_interface.action_goto_url(value=url)
            self.headers = response.all_headers()
            if response is None:
                context.close()
@@ -197,6 +201,8 @@ class fetcher(Fetcher):
                logger.debug("Content Fetcher > Response object from the browser communication was none")
                raise EmptyReply(url=url, status_code=None)
            self.headers = response.all_headers()
            try:
                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
                    browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
@@ -286,12 +292,28 @@ class fetcher(Fetcher):
                    pass
                # Clean up resources properly
-                context.close()
+                try:
-                context = None
+                    self.page.request_gc()
                except:
                    pass
-                self.page.close()
+                try:
                    self.page.close()
                except:
                    pass
                self.page = None
-                browser.close()
+                try:
-                borwser = None
+                    context.close()
                except:
                    pass
                context = None
                try:
                    browser.close()
                except:
                    pass
                browser = None
--- a/changedetectionio/content_fetchers/puppeteer.py
+++ b/changedetectionio/content_fetchers/puppeteer.py
@@ -46,9 +46,10 @@ async def capture_full_page(page):
    screenshot_chunks = []
    y = 0
    if page_height > page.viewport['height']:
        if page_height < step_size:
            step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
        await page.setViewport({'width': page.viewport['width'], 'height': step_size})
    while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
        await page.evaluate(f"window.scrollTo(0, {y})")
        screenshot_chunks.append(await page.screenshot(type_='jpeg',
@@ -146,7 +147,7 @@ class fetcher(Fetcher):
                         is_binary,
                         empty_pages_are_a_change
                         ):
-
+        import re
        self.delete_browser_steps_screenshots()
        extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
@@ -171,6 +172,17 @@ class fetcher(Fetcher):
        # headless - ask a new page
        self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
        if '--window-size' in self.browser_connection_url:
            # Be sure the viewport is always the window-size, this is often not the same thing
            match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url)
            if match:
                logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}")
                await self.page.setViewport({
                    "width": int(match.group(1)),
                    "height": int(match.group(2))
                })
                logger.debug(f"Puppeteer viewport size {self.page.viewport}")
        try:
            from pyppeteerstealth import inject_evasions_into_page
        except ImportError:
@@ -217,7 +229,6 @@ class fetcher(Fetcher):
        response = await self.page.goto(url, waitUntil="load")
        if response is None:
            await self.page.close()
            await browser.close()
--- a/changedetectionio/content_fetchers/requests.py
+++ b/changedetectionio/content_fetchers/requests.py
@@ -28,6 +28,7 @@ class fetcher(Fetcher):
        import chardet
        import requests
        from requests.exceptions import ProxyError, ConnectionError, RequestException
        if self.browser_steps_get_valid_steps():
            raise BrowserStepsInUnsupportedFetcher(url=url)
@@ -52,14 +53,19 @@ class fetcher(Fetcher):
        if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
            from requests_file import FileAdapter
            session.mount('file://', FileAdapter())
-
+        try:
-        r = session.request(method=request_method,
+            r = session.request(method=request_method,
-                            data=request_body.encode('utf-8') if type(request_body) is str else request_body,
+                                data=request_body.encode('utf-8') if type(request_body) is str else request_body,
-                            url=url,
+                                url=url,
-                            headers=request_headers,
+                                headers=request_headers,
-                            timeout=timeout,
+                                timeout=timeout,
-                            proxies=proxies,
+                                proxies=proxies,
-                            verify=False)
+                                verify=False)
        except Exception as e:
            msg = str(e)
            if proxies and 'SOCKSHTTPSConnectionPool' in msg:
                msg = f"Proxy connection failed? {msg}"
            raise Exception(msg) from e
        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
        # For example - some sites don't tell us it's utf-8, but return utf-8 content
--- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js
+++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js
@@ -10,6 +10,7 @@ async () => {
            'article épuisé',
            'artikel zurzeit vergriffen',
            'as soon as stock is available',
            'aucune offre n\'est disponible',
            'ausverkauft', // sold out
            'available for back order',
            'awaiting stock',
@@ -25,9 +26,8 @@ async () => {
            'dieser artikel ist bald wieder verfügbar',
            'dostępne wkrótce',
            'en rupture',
            'en rupture de stock',
            'épuisé',
            'esgotado',
            'in kürze lieferbar',
            'indisponible',
            'indisponível',
            'isn\'t in stock right now',
@@ -50,10 +50,12 @@ async () => {
            'niet leverbaar',
            'niet op voorraad',
            'no disponible',
-            'non disponibile',
+            'no featured offers available',
-            'non disponible',
+            'no longer available',
            'no longer in stock',
            'no tickets available',
            'non disponibile',
            'non disponible',
            'not available',
            'not currently available',
            'not in stock',
@@ -89,13 +91,15 @@ async () => {
            'vergriffen',
            'vorbestellen',
            'vorbestellung ist bald möglich',
            'we don\'t currently have any',
            'we couldn\'t find any products that match',
            'we do not currently have an estimate of when this product will be back in stock.',
            'we don\'t currently have any',
            'we don\'t know when or if this item will be back in stock.',
            'we were not able to find a match',
            'when this arrives in stock',
            'when this item is available to order',
            'zur zeit nicht an lager',
            'épuisé',
            '品切れ',
            '已售',
            '已售完',
@@ -122,6 +126,20 @@ async () => {
        // so it's good to filter to just the 'above the fold' elements
        // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
        function elementIsInEyeBallRange(element) {
            // outside the 'fold' or some weird text in the heading area
            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
            // Note: theres also an automated test that places the 'out of stock' text fairly low down
            // Skip text that could be in the header area
            if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) {
                return false;
            }
            // Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there
            if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) {
                return false;
            }
            return true;
        }
 // @todo - if it's SVG or IMG, go into image diff mode
@@ -158,9 +176,7 @@ async () => {
        for (let i = elementsToScan.length - 1; i >= 0; i--) {
            const element = elementsToScan[i];
-            // outside the 'fold' or some weird text in the heading area
+            if (!elementIsInEyeBallRange(element)) {
            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
            if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
                continue
            }
@@ -174,11 +190,11 @@ async () => {
            } catch (e) {
                console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
            }
            if (elementText.length) {
                // try which ones could mean its in stock
                if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
                    console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
                    element.style.border = "2px solid green"; // highlight the element that was detected as in stock
                    return 'Possibly in stock';
                }
            }
@@ -187,10 +203,8 @@ async () => {
        // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
        for (let i = elementsToScan.length - 1; i >= 0; i--) {
            const element = elementsToScan[i];
-            // outside the 'fold' or some weird text in the heading area
+
-            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
+            if (!elementIsInEyeBallRange(element)) {
            // Note: theres also an automated test that places the 'out of stock' text fairly low down
            if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
                continue
            }
            elementText = "";
@@ -205,6 +219,7 @@ async () => {
                for (const outOfStockText of outOfStockTexts) {
                    if (elementText.includes(outOfStockText)) {
                        console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
                        element.style.border = "2px solid red"; // highlight the element that was detected as out of stock
                        return outOfStockText; // item is out of stock
                    }
                }
--- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js
+++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
@@ -202,7 +202,6 @@ async (options) => {
        // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
        for (const f of include_filters) {
            bbox = false;
            q = false;
            if (!f.length) {
                console.log("xpath_element_scraper: Empty filter, skipping");
@@ -255,7 +254,7 @@ async (options) => {
                            console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
                        } catch (e) {
                            console.log(e)
-                            console.log("xpath_element_scraper: error looking up q.ownerElement")
+                            console.log("xpath_element_scraper: error looking up node.ownerElement")
                        }
                    }
--- a/changedetectionio/content_fetchers/screenshot_handler.py
+++ b/changedetectionio/content_fetchers/screenshot_handler.py
@@ -31,33 +31,33 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
        # Draw caption on top (overlaid, not extending canvas)
        draw = ImageDraw.Draw(stitched)
-
+        if original_page_height > capture_height:
-        caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
+            caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
-        padding = 10
+            padding = 10
-        font_size = 35
+            font_size = 35
-        font_color = (255, 0, 0)
+            font_color = (255, 0, 0)
-        background_color = (255, 255, 255)
+            background_color = (255, 255, 255)
-        # Try to load a proper font
+            # Try to load a proper font
-        try:
+            try:
-            font = ImageFont.truetype("arial.ttf", font_size)
+                font = ImageFont.truetype("arial.ttf", font_size)
-        except IOError:
+            except IOError:
-            font = ImageFont.load_default()
+                font = ImageFont.load_default()
-        bbox = draw.textbbox((0, 0), caption_text, font=font)
+            bbox = draw.textbbox((0, 0), caption_text, font=font)
-        text_width = bbox[2] - bbox[0]
+            text_width = bbox[2] - bbox[0]
-        text_height = bbox[3] - bbox[1]
+            text_height = bbox[3] - bbox[1]
-        # Draw white rectangle background behind text
+            # Draw white rectangle background behind text
-        rect_top = 0
+            rect_top = 0
-        rect_bottom = text_height + 2 * padding
+            rect_bottom = text_height + 2 * padding
-        draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
+            draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
-        # Draw text centered horizontally, 10px padding from top of the rectangle
+            # Draw text centered horizontally, 10px padding from top of the rectangle
-        text_x = (max_width - text_width) // 2
+            text_x = (max_width - text_width) // 2
-        text_y = padding
+            text_y = padding
-        draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
+            draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
        # Encode and send image
        output = io.BytesIO()
--- a/changedetectionio/content_fetchers/webdriver_selenium.py
+++ b/changedetectionio/content_fetchers/webdriver_selenium.py
@@ -10,16 +10,13 @@ class fetcher(Fetcher):
    else:
        fetcher_description = "WebDriver Chrome/Javascript"
    # Configs for Proxy setup
    # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
    selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
                                        'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
    proxy = None
    proxy_url = None
    def __init__(self, proxy_override=None, custom_browser_connection_url=None):
        super().__init__()
-        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
+        from urllib.parse import urlparse
        from selenium.webdriver.common.proxy import Proxy
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
        if not custom_browser_connection_url:
@@ -28,25 +25,27 @@ class fetcher(Fetcher):
            self.browser_connection_is_custom = True
            self.browser_connection_url = custom_browser_connection_url
        # If any proxy settings are enabled, then we should setup the proxy object
        proxy_args = {}
        for k in self.selenium_proxy_settings_mappings:
            v = os.getenv('webdriver_' + k, False)
            if v:
                proxy_args[k] = v.strip('"')
-        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
+        ##### PROXY SETUP #####
        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
            proxy_args['httpProxy'] = self.system_http_proxy
        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
            proxy_args['httpsProxy'] = self.system_https_proxy
-        # Allows override the proxy on a per-request basis
+        proxy_sources = [
-        if proxy_override is not None:
+            self.system_http_proxy,
-            proxy_args['httpProxy'] = proxy_override
+            self.system_https_proxy,
            os.getenv('webdriver_proxySocks'),
            os.getenv('webdriver_socksProxy'),
            os.getenv('webdriver_proxyHttp'),
            os.getenv('webdriver_httpProxy'),
            os.getenv('webdriver_proxyHttps'),
            os.getenv('webdriver_httpsProxy'),
            os.getenv('webdriver_sslProxy'),
            proxy_override, # last one should override
        ]
        # The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server=
        for k in filter(None, proxy_sources):
            if not k:
                continue
            self.proxy_url = k.strip()
        if proxy_args:
            self.proxy = SeleniumProxy(raw=proxy_args)
    def run(self,
            url,
@@ -59,63 +58,77 @@ class fetcher(Fetcher):
            is_binary=False,
            empty_pages_are_a_change=False):
        from selenium import webdriver
        from selenium.webdriver.chrome.options import Options as ChromeOptions
        from selenium.common.exceptions import WebDriverException
        # request_body, request_method unused for now, until some magic in the future happens.
        options = ChromeOptions()
        options.add_argument("--headless")
        if self.proxy:
            options.proxy = self.proxy
-        self.driver = webdriver.Remote(
+        # Load Chrome options from env
-            command_executor=self.browser_connection_url,
+        CHROME_OPTIONS = [
-            options=options)
+            line.strip()
            for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
            if line.strip()
        ]
        for opt in CHROME_OPTIONS:
            options.add_argument(opt)
        # 1. proxy_config /Proxy(proxy_config) selenium object is REALLY unreliable
        # 2. selenium-wire cant be used because the websocket version conflicts with pypeteer-ng
        # 3. selenium only allows ONE runner at a time by default!
        # 4. driver must use quit() or it will continue to block/hold the selenium process!!
        if self.proxy_url:
            options.add_argument(f'--proxy-server={self.proxy_url}')
        from selenium.webdriver.remote.remote_connection import RemoteConnection
        from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
        driver = None
        try:
            # Create the RemoteConnection and set timeout (e.g., 30 seconds)
            remote_connection = RemoteConnection(
                self.browser_connection_url,
            )
            remote_connection.set_timeout(30)  # seconds
            # Now create the driver with the RemoteConnection
            driver = RemoteWebDriver(
                command_executor=remote_connection,
                options=options
            )
            driver.set_page_load_timeout(int(os.getenv("WEBDRIVER_PAGELOAD_TIMEOUT", 45)))
        except Exception as e:
            if driver:
                driver.quit()
            raise e
        try:
-            self.driver.get(url)
+            driver.get(url)
        except WebDriverException as e:
            # Be sure we close the session window
            self.quit()
            raise
-        self.driver.set_window_size(1280, 1024)
+            if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
-        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
+                driver.set_window_size(1280, 1024)
-        if self.webdriver_js_execute_code is not None:
+            driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
            self.driver.execute_script(self.webdriver_js_execute_code)
            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
-        # @todo - how to check this? is it possible?
+            if self.webdriver_js_execute_code is not None:
-        self.status_code = 200
+                driver.execute_script(self.webdriver_js_execute_code)
-        # @todo somehow we should try to get this working for WebDriver
+                # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
-        # raise EmptyReply(url=url, status_code=r.status_code)
+                driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
-        # @todo - dom wait loaded?
+            # @todo - how to check this? is it possible?
-        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
+            self.status_code = 200
-        self.content = self.driver.page_source
+            # @todo somehow we should try to get this working for WebDriver
-        self.headers = {}
+            # raise EmptyReply(url=url, status_code=r.status_code)
-        self.screenshot = self.driver.get_screenshot_as_png()
+            # @todo - dom wait loaded?
            time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
            self.content = driver.page_source
            self.headers = {}
            self.screenshot = driver.get_screenshot_as_png()
        except Exception as e:
            driver.quit()
            raise e
-    # Does the connection to the webdriver work? run a test connection.
+        driver.quit()
    def is_ready(self):
        from selenium import webdriver
        from selenium.webdriver.chrome.options import Options as ChromeOptions
        self.driver = webdriver.Remote(
            command_executor=self.command_executor,
            options=ChromeOptions())
        # driver.quit() seems to cause better exceptions
        self.quit()
        return True
    def quit(self, watch=None):
        if self.driver:
            try:
                self.driver.quit()
            except Exception as e:
                logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -224,27 +224,37 @@ class StringDictKeyValue(StringField):
    def _value(self):
        if self.data:
-            output = u''
+            output = ''
-            for k in self.data.keys():
+            for k, v in self.data.items():
-                output += "{}: {}\r\n".format(k, self.data[k])
+                output += f"{k}: {v}\r\n"
            return output
        else:
-            return u''
+            return ''
-    # incoming
+    # incoming data processing + validation
    def process_formdata(self, valuelist):
        self.data = {}
        errors = []
        if valuelist:
-            self.data = {}
+            # Remove empty strings (blank lines)
-            # Remove empty strings
+            cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()]
-            cleaned = list(filter(None, valuelist[0].split("\n")))
+            for idx, s in enumerate(cleaned, start=1):
-            for s in cleaned:
+                if ':' not in s:
-                parts = s.strip().split(':', 1)
+                    errors.append(f"Line {idx} is missing a ':' separator.")
-                if len(parts) == 2:
+                    continue
-                    self.data.update({parts[0].strip(): parts[1].strip()})
+                parts = s.split(':', 1)
                key = parts[0].strip()
                value = parts[1].strip()
-        else:
+                if not key:
-            self.data = {}
+                    errors.append(f"Line {idx} has an empty key.")
                if not value:
                    errors.append(f"Line {idx} has an empty value.")
                self.data[key] = value
        if errors:
            raise ValidationError("Invalid input:\n" + "\n".join(errors))
 class ValidateContentFetcherIsReady(object):
    """
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -309,10 +309,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
        soup = BeautifulSoup(content, 'html.parser')
        if ensure_is_ldjson_info_type:
-            bs_result = soup.findAll('script', {"type": "application/ld+json"})
+            bs_result = soup.find_all('script', {"type": "application/ld+json"})
        else:
-            bs_result = soup.findAll('script')
+            bs_result = soup.find_all('script')
-        bs_result += soup.findAll('body')
+        bs_result += soup.find_all('body')
        bs_jsons = []
        for result in bs_result:
@@ -435,45 +435,29 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
    return re.sub(pattern, repl, html_content)
-def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
+
 # NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
 def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
    from inscriptis import get_text
    from inscriptis.model.config import ParserConfig
    """Converts html string to a string with just the text. If ignoring
    rendering anchor tag content is enable, anchor tag content are also
    included in the text
    :param html_content: string with html content
    :param render_anchor_tag_content: boolean flag indicating whether to extract
    hyperlinks (the anchor tag content) together with text. This refers to the
    'href' inside 'a' tags.
    Anchor tag content is rendered in the following manner:
    '[ text ](anchor tag content)'
    :return: extracted text from the HTML
    """
    #  if anchor tag content flag is set to True define a config for
    #  extracting this content
    if render_anchor_tag_content:
        parser_config = ParserConfig(
            annotation_rules={"a": ["hyperlink"]},
            display_links=True
        )
    # otherwise set config to None/default
    else:
        parser_config = None
    # RSS Mode - Inscriptis will treat `title` as something else.
    # Make it as a regular block display element (//item/title)
    # This is a bit of a hack - the real way it to use XSLT to convert it to HTML #1874
    if is_rss:
        html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
        html_content = re.sub(r'</title>', r'</h1>', html_content)
    text_content = get_text(html_content, config=parser_config)
    return text_content
 # Does LD+JSON exist with a @type=='product' and a .price set anywhere?
 def has_ldjson_product_info(content):
    try:
--- a/changedetectionio/pluggy_interface.py
+++ b/changedetectionio/pluggy_interface.py
@@ -0,0 +1,82 @@
 import pluggy
 import os
 import importlib
 import sys
 # Global plugin namespace for changedetection.io
 PLUGIN_NAMESPACE = "changedetectionio"
 hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE)
 hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE)
 class ChangeDetectionSpec:
    """Hook specifications for extending changedetection.io functionality."""
    @hookspec
    def ui_edit_stats_extras(watch):
        """Return HTML content to add to the stats tab in the edit view.
        Args:
            watch: The watch object being edited
        Returns:
            str: HTML content to be inserted in the stats tab
        """
        pass
 # Set up Plugin Manager
 plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
 # Register hookspecs
 plugin_manager.add_hookspecs(ChangeDetectionSpec)
 # Load plugins from subdirectories
 def load_plugins_from_directories():
    # Dictionary of directories to scan for plugins
    plugin_dirs = {
        'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'),
        # Add more plugin directories here as needed
    }
    # Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory
    for dir_name, dir_path in plugin_dirs.items():
        if not os.path.exists(dir_path):
            continue
        # Get all Python files (excluding __init__.py)
        for filename in os.listdir(dir_path):
            if filename.endswith(".py") and filename != "__init__.py":
                module_name = filename[:-3]  # Remove .py extension
                module_path = f"changedetectionio.{dir_name}.plugins.{module_name}"
                try:
                    module = importlib.import_module(module_path)
                    # Register the plugin with pluggy
                    plugin_manager.register(module, module_name)
                except (ImportError, AttributeError) as e:
                    print(f"Error loading plugin {module_name}: {e}")
 # Load plugins
 load_plugins_from_directories()
 # Discover installed plugins from external packages (if any)
 plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
 # Helper function to collect UI stats extras from all plugins
 def collect_ui_edit_stats_extras(watch):
    """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
    extras_content = []
    # Get all plugins that implement the ui_edit_stats_extras hook
    results = plugin_manager.hook.ui_edit_stats_extras(watch=watch)
    # If we have results, add them to our content
    if results:
        for result in results:
            if result:  # Skip empty results
                extras_content.append(result)
    return "\n".join(extras_content) if extras_content else ""
--- a/changedetectionio/run_basic_tests.sh
+++ b/changedetectionio/run_basic_tests.sh
@@ -38,6 +38,9 @@ pytest tests/test_backend.py
 pytest tests/test_rss.py
 pytest tests/test_unique_lines.py
 # Try high concurrency
 FETCH_WORKERS=130 pytest  tests/test_history_consistency.py -v -l
 # Check file:// will pickup a file when enabled
 echo "Hello world" > /tmp/test-file.txt
 ALLOW_FILE_URI=yes pytest tests/test_security.py
--- a/changedetectionio/run_proxy_tests.sh
+++ b/changedetectionio/run_proxy_tests.sh
@@ -82,3 +82,25 @@ done
 docker kill squid-one squid-two squid-custom
 # Test that the UI is returning the correct error message when a proxy is not available
 # Requests
 docker run --network changedet-network \
  test-changedetectionio \
  bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py'
 # Playwright
 docker run --network changedet-network \
  test-changedetectionio \
  bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
 # Puppeteer fast
 docker run --network changedet-network \
  test-changedetectionio \
  bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
 # Selenium
 docker run --network changedet-network \
  test-changedetectionio \
  bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py'
--- a/changedetectionio/static/js/browser-steps.js
+++ b/changedetectionio/static/js/browser-steps.js
@@ -211,7 +211,14 @@ $(document).ready(function () {
                    $('input[type=text]', first_available).first().val(x['xpath']);
                    $('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
                    found_something = true;
-                } else {
+                }
                else if (x['tagName'] === 'select') {
                    $('select', first_available).val('<select> by option text').change();
                    $('input[type=text]', first_available).first().val(x['xpath']);
                    $('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
                    found_something = true;
                }
                else {
                    // There's no good way (that I know) to find if this
                    // see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging
                    // https://codepen.io/azaslavsky/pen/DEJVWv
@@ -251,6 +258,10 @@ $(document).ready(function () {
                400: function () {
                    // More than likely the CSRF token was lost when the server restarted
                    alert("There was a problem processing the request, please reload the page.");
                },
                401: function (err) {
                    // This will be a custom error
                    alert(err.responseText);
                }
            }
        }).done(function (data) {
--- a/changedetectionio/templates/_helpers.html
+++ b/changedetectionio/templates/_helpers.html
@@ -98,15 +98,13 @@
 {% macro playwright_warning() %}
-    <p><strong>Error - Playwright support for Chrome based fetching is not enabled.</strong> Alternatively try our <a href="https://changedetection.io">very affordable subscription based service which has all this setup for you</a>.</p>
+    <p><strong>Error - This watch needs Chrome (with playwright/sockpuppetbrowser), but Chrome based fetching is not enabled.</strong> Alternatively try our <a href="https://changedetection.io">very affordable subscription based service which has all this setup for you</a>.</p>
    <p>You may need to <a href="https://github.com/dgtlmoon/changedetection.io/blob/09ebc6ec6338545bdd694dc6eee57f2e9d2b8075/docker-compose.yml#L31">Enable playwright environment variable</a> and uncomment the <strong>sockpuppetbrowser</strong> in the <a href="https://github.com/dgtlmoon/changedetection.io/blob/master/docker-compose.yml">docker-compose.yml</a> file.</p>
    <br>
    <p>(Also Selenium/WebDriver can not extract full page screenshots reliably so Playwright is recommended here)</p>
 {% endmacro %}
-{% macro only_webdriver_type_watches_warning() %}
+{% macro only_playwright_type_watches_warning() %}
-    <p><strong>Sorry, this functionality only works with Playwright/Chrome enabled watches.<br>You need to <a href="#request">Set the fetch method to Playwright/Chrome mode and resave</a> and have the Playwright connection enabled.</strong></p><br>
+    <p><strong>Sorry, this functionality only works with Playwright/Chrome enabled watches.<br>You need to <a href="#request">Set the fetch method to Playwright/Chrome mode and resave</a> and have the SockpuppetBrowser/Playwright or Selenium enabled.</strong></p><br>
 {% endmacro %}
 {% macro render_time_schedule_form(form, available_timezones, timezone_default_config) %}
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -1,6 +1,6 @@
 {% extends 'base.html' %}
 {% block content %}
-{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
+{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
 {% from '_common_fields.html' import render_common_settings_form %}
 <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
@@ -204,7 +204,9 @@ Math: {{ 1 + 1 }}") }}
            </div>
            <div class="tab-pane-inner" id="browser-steps">
-            {% if playwright_enabled and watch_uses_webdriver %}
+            {% if watch_needs_selenium_or_playwright %}
                {# Only works with playwright #}
                {% if system_has_playwright_configured %}
                <img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
                <fieldset>
                    <div class="pure-control-group">
@@ -223,7 +225,6 @@ Math: {{ 1 + 1 }}") }}
                        <div class="flex-wrapper" >
                            <div id="browser-steps-ui" class="noselect">
                                <div class="noselect"  id="browsersteps-selector-wrapper" style="width: 100%">
                                    <span class="loader" >
                                        <span id="browsersteps-click-start">
@@ -245,15 +246,16 @@ Math: {{ 1 + 1 }}") }}
                    </div>
                </fieldset>
                {% else %}
-                    <span class="pure-form-message-inline">
+                    {# it's configured to use selenium or chrome but system says its not configured #}
-                        {% if not watch_uses_webdriver %}
+                    {{ playwright_warning() }}
-                            {{ only_webdriver_type_watches_warning() }}
+                    {% if system_has_webdriver_configured %}
-                        {% endif %}
+                        <strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
-                        {%  if not playwright_enabled %}
+                    {% endif %}
                            {{ playwright_warning() }}
                        {% endif %}
                    </span>
                {% endif %}
            {% else %}
                {# "This functionality needs chrome.." #}
                {{ only_playwright_type_watches_warning() }}
            {% endif %}
            </div>
@@ -262,7 +264,7 @@ Math: {{ 1 + 1 }}") }}
                    <div  class="pure-control-group inline-radio">
                      {{ render_checkbox_field(form.notification_muted) }}
                    </div>
-                    {% if watch_uses_webdriver %}
+                    {% if watch_needs_selenium_or_playwright %}
                    <div class="pure-control-group inline-radio">
                      {{ render_checkbox_field(form.notification_screenshot) }}
                        <span class="pure-form-message-inline">
@@ -379,13 +381,15 @@ Math: {{ 1 + 1 }}") }}
                <fieldset>
                    <div class="pure-control-group">
-                        {% if playwright_enabled and watch_uses_webdriver %}
+                        {% if watch_needs_selenium_or_playwright %}
                            {% if system_has_playwright_configured %}
                            <span class="pure-form-message-inline" id="visual-selector-heading">
                                The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
                            </span>
                            <div id="selector-header">
                                <a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
                                <!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->
                                <i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i>
                            </div>
                            <div id="selector-wrapper" style="display: none">
@@ -397,13 +401,16 @@ Math: {{ 1 + 1 }}") }}
                            </div>
                            <div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong>&nbsp;<span class="text">Loading...</span></div>
                        {% else %}
-                            {% if not watch_uses_webdriver %}
+                            {# The watch needed chrome but system says that playwright is not ready #}
-                                {{ only_webdriver_type_watches_warning() }}
+                            {{ playwright_warning() }}
                            {% endif %}
                            {% if not playwright_enabled %}
                                {{ playwright_warning() }}
                            {% endif %}
                        {% endif %}
                            {% if system_has_webdriver_configured %}
                                <strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
                            {% endif %}
                    {% else %}
                        {# "This functionality needs chrome.." #}
                        {{ only_playwright_type_watches_warning() }}
                    {% endif %}
                    </div>
                </fieldset>
            </div>
@@ -443,6 +450,13 @@ Math: {{ 1 + 1 }}") }}
                        </tr>
                        </tbody>
                    </table>
                    {% if ui_edit_stats_extras %}
                    <div class="plugin-stats-extras"> <!-- from pluggy plugin -->
                        {{ ui_edit_stats_extras|safe }}
                    </div>
                    {% endif %}
                    {% if watch.history_n %}
                        <p>
                             <a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a>
--- a/changedetectionio/tests/proxy_list/test_proxy_noconnect.py
+++ b/changedetectionio/tests/proxy_list/test_proxy_noconnect.py
@@ -0,0 +1,68 @@
 #!/usr/bin/env python3
 from flask import url_for
 from ..util import live_server_setup, wait_for_all_checks
 import os
 from ... import strtobool
 # Just to be sure the UI outputs the right error message on proxy connection failed
 # docker run -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4
 # PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py
 # FAST_PUPPETEER_CHROME_FETCHER=True PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py
 # WEBDRIVER_URL=http://127.0.0.1:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py
 def test_proxy_noconnect_custom(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
    # Goto settings, add our custom one
    res = client.post(
        url_for("settings.settings_page"),
        data={
            "requests-time_between_check-minutes": 180,
            "application-ignore_whitespace": "y",
            "application-fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else 'html_requests',
            "requests-extra_proxies-0-proxy_name": "custom-test-proxy",
            # test:awesome is set in tests/proxy_list/squid-passwords.txt
            "requests-extra_proxies-0-proxy_url": "http://127.0.0.1:3128",
        },
        follow_redirects=True
    )
    assert b"Settings updated." in res.data
    test_url = "https://changedetection.io"
    res = client.post(
        url_for("ui.ui_views.form_quick_watch_add"),
        data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
        follow_redirects=True
    )
    assert b"Watch added in Paused state, saving will unpause" in res.data
    options = {
        "url": test_url,
        "fetch_backend": "html_webdriver" if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else "html_requests",
        "proxy": "ui-0custom-test-proxy",
    }
    res = client.post(
        url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1),
        data=options,
        follow_redirects=True
    )
    assert b"unpaused" in res.data
    import time
    wait_for_all_checks(client)
    # Requests default
    check_string = b'Cannot connect to proxy'
    if os.getenv('PLAYWRIGHT_DRIVER_URL') or strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')) or os.getenv("WEBDRIVER_URL"):
        check_string = b'ERR_PROXY_CONNECTION_FAILED'
    res = client.get(url_for("watchlist.index"))
    #with open("/tmp/debug.html", 'wb') as f:
    #    f.write(res.data)
    assert check_string in res.data
--- a/changedetectionio/tests/restock/test_restock.py
+++ b/changedetectionio/tests/restock/test_restock.py
@@ -14,6 +14,8 @@ from changedetectionio.notification import (
 def set_original_response():
    test_return_data = """<html>
       <body>
       <section id=header style="padding: 50px; height: 350px">This is the header which should be ignored always - <span>add to cart</span></section>
       <!-- stock-not-in-stock.js will ignore text in the first 300px, see elementIsInEyeBallRange(), sometimes "add to cart" and other junk is here -->
     Some initial text<br>
     <p>Which is across multiple lines</p>
     <br>
@@ -52,8 +54,6 @@ def test_restock_detection(client, live_server, measure_memory_usage):
    set_original_response()
    #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
    time.sleep(1)
    live_server_setup(live_server)
    #####################
    notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json')
@@ -84,7 +84,8 @@ def test_restock_detection(client, live_server, measure_memory_usage):
    # Is it correctly show as NOT in stock?
    wait_for_all_checks(client)
    res = client.get(url_for("watchlist.index"))
-    assert b'not-in-stock' in res.data
+    assert b'processor-restock_diff' in res.data # Should have saved in restock mode
    assert b'not-in-stock' in res.data # should be out of stock
    # Is it correctly shown as in stock
    set_back_in_stock_response()
--- a/changedetectionio/tests/test_conditions.py
+++ b/changedetectionio/tests/test_conditions.py
@@ -45,11 +45,15 @@ def set_number_out_of_range_response(number="150"):
        f.write(test_return_data)
 def test_setup(client, live_server):
    """Test that both text and number conditions work together with AND logic."""
    live_server_setup(live_server)
 def test_conditions_with_text_and_number(client, live_server):
    """Test that both text and number conditions work together with AND logic."""
    set_original_response("50")
-    live_server_setup(live_server)
+    #live_server_setup(live_server)
    test_url = url_for('test_endpoint', _external=True)
@@ -192,6 +196,150 @@ def test_condition_validate_rule_row(client, live_server):
    )
    assert res.status_code == 200
    assert b'false' in res.data
    # cleanup for the next
    client.get(
        url_for("ui.form_delete", uuid="all"),
        follow_redirects=True
    )
 # If there was only a change in the whitespacing, then we shouldnt have a change detected
 def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage):
    #live_server_setup(live_server)
    test_return_data = """<html>
       <body>
     Some initial text<br>
     <p>Which is across multiple lines</p>
     <br>
     So let's see what happens.  <br>
     </body>
     </html>
    """
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(test_return_data)
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
        url_for("imports.import_page"),
        data={"urls": test_url},
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
    # Give the thread time to pick it up
    wait_for_all_checks(client)
    # Check it saved
    res = client.get(
        url_for("ui.ui_edit.edit_page", uuid="first"),
    )
    # Assert the word count is counted correctly
    assert b'<td>13</td>' in res.data
    # cleanup for the next
    client.get(
        url_for("ui.form_delete", uuid="all"),
        follow_redirects=True
    )
 # If there was only a change in the whitespacing, then we shouldnt have a change detected
 def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
    #live_server_setup(live_server)
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write("""<html>
       <body>
     Some initial text<br>
     <p>Which is across multiple lines</p>
     <br>
     So let's see what happens.  <br>
     </body>
     </html>
    """)
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
        url_for("ui.ui_views.form_quick_watch_add"),
        data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
        follow_redirects=True
    )
    assert b"Watch added in Paused state, saving will unpause" in res.data
    uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
    # Give the thread time to pick it up
    wait_for_all_checks(client)
    res = client.post(
        url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
        data={
            "url": test_url,
            "fetch_backend": "html_requests",
            "conditions_match_logic": "ALL",  # ALL = AND logic
            "conditions-0-field": "levenshtein_ratio",
            "conditions-0-operator": "<",
            "conditions-0-value": "0.8" # needs to be more of a diff to trigger a change
        },
        follow_redirects=True
    )
    assert b"unpaused" in res.data
    wait_for_all_checks(client)
    res = client.get(url_for("watchlist.index"))
    assert b'unviewed' not in res.data
    # Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions
    res = client.get(
        url_for("ui.ui_views.preview_page", uuid=uuid),
        follow_redirects=True
    )
    assert b'Which is across multiple lines' in res.data
    ############### Now change it a LITTLE bit...
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write("""<html>
       <body>
     Some initial text<br>
     <p>Which is across multiple lines</p>
     <br>
     So let's see what happenxxxxxxxxx.  <br>
     </body>
     </html>
    """)
    res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    assert b'Queued 1 watch for rechecking.' in res.data
    wait_for_all_checks(client)
    res = client.get(url_for("watchlist.index"))
    assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold
    ############### Now change it a MORE THAN 50%
    test_return_data = """<html>
       <body>
     Some sxxxx<br>
     <p>Which is across a lines</p>
     <br>
     ok.  <br>
     </body>
     </html>
    """
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(test_return_data)
    res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    assert b'Queued 1 watch for rechecking.' in res.data
    wait_for_all_checks(client)
    res = client.get(url_for("watchlist.index"))
    assert b'unviewed' in res.data
    # cleanup for the next
    client.get(
        url_for("ui.form_delete", uuid="all"),
        follow_redirects=True
    )
--- a/changedetectionio/tests/test_history_consistency.py
+++ b/changedetectionio/tests/test_history_consistency.py
@@ -10,8 +10,8 @@ from urllib.parse import urlparse, parse_qs
 def test_consistent_history(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
-
+    workers = int(os.getenv("FETCH_WORKERS", 10))
-    r = range(1, 30)
+    r = range(1, 10+workers)
    for one in r:
        test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
@@ -46,9 +46,10 @@ def test_consistent_history(client, live_server, measure_memory_usage):
    # assert the right amount of watches was found in the JSON
    assert len(json_obj['watching']) == len(r), "Correct number of watches was found in the JSON"
-
+    i=0
    # each one should have a history.txt containing just one line
    for w in json_obj['watching'].keys():
        i+=1
        history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
        assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"
@@ -58,8 +59,8 @@ def test_consistent_history(client, live_server, measure_memory_usage):
            assert len(tmp_history) == 1, "History.txt should contain 1 line"
        # Should be two files,. the history.txt , and the snapshot.txt
-        files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path,
+        files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w))
-                                                     w))
+
        # Find the snapshot one
        for fname in files_in_watch_dir:
            if fname != 'history.txt' and 'html' not in fname:
@@ -75,7 +76,6 @@ def test_consistent_history(client, live_server, measure_memory_usage):
        assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
    json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
    with open(json_db_file, 'r') as f:
        assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
--- a/changedetectionio/tests/test_ignore_text.py
+++ b/changedetectionio/tests/test_ignore_text.py
@@ -32,13 +32,14 @@ def test_strip_text_func():
    stripped_content = html_tools.strip_ignore_text(test_content, ignore)
    assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens."
-def set_original_ignore_response():
+def set_original_ignore_response(ver_stamp="123"):
-    test_return_data = """<html>
+    test_return_data = f"""<html>
       <body>
     Some initial text<br>
     <p>Which is across multiple lines</p>
     <br>
     So let's see what happens.  <br>
     <link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/>
     </body>
     </html>
@@ -48,13 +49,14 @@ def set_original_ignore_response():
        f.write(test_return_data)
-def set_modified_original_ignore_response():
+def set_modified_original_ignore_response(ver_stamp="123"):
-    test_return_data = """<html>
+    test_return_data = f"""<html>
       <body>
     Some NEW nice initial text<br>
     <p>Which is across multiple lines</p>
     <br>
     So let's see what happens.  <br>
     <link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/>
     <p>new ignore stuff</p>
     <p>blah</p>
     </body>
@@ -67,14 +69,15 @@ def set_modified_original_ignore_response():
 # Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text
-def set_modified_ignore_response():
+def set_modified_ignore_response(ver_stamp="123"):
-    test_return_data = """<html>
+    test_return_data = f"""<html>
       <body>
     Some initial text<br>
     <p>Which is across multiple lines</p>
     <P>ZZZZz</P>
     <br>
     So let's see what happens.  <br>
     <link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/>
     </body>
     </html>
@@ -165,9 +168,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
    assert b'Deleted' in res.data
 # When adding some ignore text, it should not trigger a change, even if something else on that line changes
-def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
+def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
-    #live_server_setup(live_server)
+    ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ\r\n"+extra_ignore
-    ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ"
+
    set_original_ignore_response()
    # Goto the settings page, add our ignore text
@@ -186,6 +189,10 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    if as_source:
        # Switch to source mode so we can test that too!
        test_url = "source:"+test_url
    res = client.post(
        url_for("imports.import_page"),
        data={"urls": test_url},
@@ -203,12 +210,15 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
-
+    wait_for_all_checks(client)
    # Check it saved
    res = client.get(
        url_for("settings.settings_page"),
    )
-    assert bytes(ignore_text.encode('utf-8')) in res.data
+
    for i in ignore_text.splitlines():
        assert bytes(i.encode('utf-8')) in res.data
    # Trigger a check
    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
@@ -221,7 +231,8 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
    # Make a change which includes the ignore text, it should be ignored and no 'change' triggered
    # It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list
-    set_modified_ignore_response()
+    # And tweaks the ver_stamp which should be picked up by global regex ignore
    set_modified_ignore_response(ver_stamp=time.time())
    # Trigger a check
    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
@@ -243,3 +254,11 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
 def test_check_global_ignore_text_functionality(client, live_server):
    #live_server_setup(live_server)
    _run_test_global_ignore(client, as_source=False)
 def test_check_global_ignore_text_functionality_as_source(client, live_server):
    #live_server_setup(live_server)
    _run_test_global_ignore(client, as_source=True, extra_ignore='/\?v=\d/')
--- a/changedetectionio/tests/test_request.py
+++ b/changedetectionio/tests/test_request.py
@@ -424,3 +424,27 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
    # unlink headers.txt on start/stop
    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
 def test_headers_validation(client, live_server):
    #live_server_setup(live_server)
    test_url = url_for('test_headers', _external=True)
    res = client.post(
        url_for("imports.import_page"),
        data={"urls": test_url},
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
    res = client.post(
        url_for("ui.ui_edit.edit_page", uuid="first"),
        data={
            "url": test_url,
            "fetch_backend": 'html_requests',
            "headers": "User-AGent agent-from-watch\r\nsadfsadfsadfsdaf\r\n:foobar"},
        follow_redirects=True
    )
    assert b"Line 1 is missing a &#39;:&#39; separator." in res.data
    assert b"Line 3 has an empty key." in res.data
--- a/changedetectionio/tests/util.py
+++ b/changedetectionio/tests/util.py
@@ -126,18 +126,51 @@ def extract_UUID_from_client(client):
    uuid = m.group(1)
    return uuid.strip()
-def wait_for_all_checks(client):
+
-    # actually this is not entirely true, it can still be 'processing' but not in the queue
+def wait_for_all_checks(client=None):
-    # Loop waiting until done..
+    """
-    attempt=0
+    Waits until the queue is empty and remains empty for at least `required_empty_duration` seconds,
-    # because sub-second rechecks are problematic in testing, use lots of delays
+    and also ensures no running threads have `current_uuid` set.
-    time.sleep(1)
+    Retries for up to `max_attempts` times, sleeping `wait_between_attempts` seconds between checks.
-    while attempt < 60:
+    """
-        res = client.get(url_for("watchlist.index"))
+    from changedetectionio.flask_app import update_q as global_update_q, running_update_threads
-        if not b'Checking now' in res.data:
+
-            break
+    # Configuration
-        logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
+    attempt = 0
-        time.sleep(1)
+    i=0
    max_attempts = 60
    wait_between_attempts = 2
    required_empty_duration = 2
    logger = logging.getLogger()
    time.sleep(1.2)
    empty_since = None
    while attempt < max_attempts:
        q_length = global_update_q.qsize()
        # Check if any threads are still processing
        time.sleep(1.2)
        any_threads_busy = any(t.current_uuid for t in running_update_threads)
        if q_length == 0 and not any_threads_busy:
            if empty_since is None:
                empty_since = time.time()
                logger.info(f"Queue empty and no active threads at attempt {attempt}, starting empty timer...")
            elif time.time() - empty_since >= required_empty_duration:
                logger.info(f"Queue has been empty and threads idle for {required_empty_duration} seconds. Done waiting.")
                break
            else:
                logger.info(f"Still waiting: queue empty and no active threads, but not yet {required_empty_duration} seconds...")
        else:
            if q_length != 0:
                logger.info(f"Queue not empty (size={q_length}), resetting timer.")
            if any_threads_busy:
                busy_threads = [t.name for t in running_update_threads if t.current_uuid]
                logger.info(f"Threads still busy: {busy_threads}, resetting timer.")
            empty_since = None
        attempt += 1
    time.sleep(1)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,15 +9,20 @@ services:
 #        - ./proxies.json:/datastore/proxies.json
  #    environment:
-  #        Default listening port, can also be changed with the -p option
+  #        Default listening port, can also be changed with the -p option (not to be confused with ports: below)
  #      - PORT=5000
  #
  #        Log levels are in descending order. (TRACE is the most detailed one)
  #        Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
  #      - LOGGER_LEVEL=TRACE
  #
-  #       Alternative WebDriver/selenium URL, do not use "'s or 's!
+  #
-  #      - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
+  #       Uncomment below and the "sockpuppetbrowser" to use a real Chrome browser (It uses the "playwright" protocol)
  #      - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000
  #
  #
  #       Alternative WebDriver/selenium URL, do not use "'s or 's! (old, deprecated, does not support screenshots very well)
  #      - WEBDRIVER_URL=http://browser-selenium-chrome:4444/wd/hub
  #
  #       WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy,
  #                                webdriver_proxyAutoconfigUrl, webdriver_autodetect,
@@ -25,9 +30,6 @@ services:
  #
  #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
  #
  #       Alternative target "Chrome" Playwright URL, do not use "'s or 's!
  #       "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
  #      - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
  #
  #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
  #
@@ -43,7 +45,7 @@ services:
  #        Base URL of your changedetection.io install (Added to the notification alert)
  #      - BASE_URL=https://mysite.com
  #        Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
-  #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
+  #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy
  #      - USE_X_SETTINGS=1
  #
  #        Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname.
@@ -70,7 +72,7 @@ services:
      # Comment out ports: when using behind a reverse proxy , enable networks: etc.
      ports:
-        - 5000:5000
+        - 127.0.0.1:5000:5000
      restart: unless-stopped
     # Used for fetching pages via WebDriver+Chrome where you need Javascript support.
@@ -80,14 +82,14 @@ services:
     # If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that
     # and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used)
 #      depends_on:
-#          sockpuppetbrowser:
+#          browser-sockpuppet-chrome:
 #              condition: service_started
     # Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
     # RECOMMENDED FOR FETCHING PAGES WITH CHROME, be sure to enable the "PLAYWRIGHT_DRIVER_URL" env variable in the main changedetection container
-#    sockpuppetbrowser:
+#    browser-sockpuppet-chrome:
-#        hostname: sockpuppetbrowser
+#        hostname: browser-sockpuppet-chrome
 #        image: dgtlmoon/sockpuppetbrowser:latest
 #        cap_add:
 #            - SYS_ADMIN
@@ -102,14 +104,18 @@ services:
     # Used for fetching pages via Playwright+Chrome where you need Javascript support.
     # Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector)
     #       Does not report status codes (200, 404, 403) and other issues
-#    browser-chrome:
+#    browser-selenium-chrome:
-#        hostname: browser-chrome
+#        hostname: browser-selenium-chrome
 #        image: selenium/standalone-chrome:4
 #        environment:
 #            - VNC_NO_PASSWORD=1
 #            - SCREEN_WIDTH=1920
 #            - SCREEN_HEIGHT=1080
 #            - SCREEN_DEPTH=24
 #          CHROME_OPTIONS: |
 #            --window-size=1280,1024
 #            --headless
 #            --disable-gpu
 #        volumes:
 #            # Workaround to avoid the browser crashing inside a docker container
 #            # See https://github.com/SeleniumHQ/docker-selenium#quick-start
--- a/docs/api_v1/assets/main.bundle.js
+++ b/docs/api_v1/assets/main.bundle.js
--- a/docs/api_v1/index.html
+++ b/docs/api_v1/index.html
@@ -5,13 +5,13 @@
  <meta name="description" content="Manage your changedetection.io watches via API, requires the `x-api-key` header which is found in the settings UI.">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-  <link href="assets/bootstrap.min.css?v=1701595483622" rel="stylesheet" media="screen">
+  <link href="assets/bootstrap.min.css?v=1744573753999" rel="stylesheet" media="screen">
-  <link href="assets/prism.css?v=1701595483622" rel="stylesheet" />
+  <link href="assets/prism.css?v=1744573753999" rel="stylesheet" />
-  <link href="assets/main.css?v=1701595483622" rel="stylesheet" media="screen, print">
+  <link href="assets/main.css?v=1744573753999" rel="stylesheet" media="screen, print">
-  <link href="assets/favicon.ico?v=1701595483622" rel="icon" type="image/x-icon">
+  <link href="assets/favicon.ico?v=1744573753999" rel="icon" type="image/x-icon">
-  <link href="assets/apple-touch-icon.png?v=1701595483622" rel="apple-touch-icon" sizes="180x180">
+  <link href="assets/apple-touch-icon.png?v=1744573753999" rel="apple-touch-icon" sizes="180x180">
-  <link href="assets/favicon-32x32.png?v=1701595483622" rel="icon" type="image/png" sizes="32x32">
+  <link href="assets/favicon-32x32.png?v=1744573753999" rel="icon" type="image/png" sizes="32x32">
-  <link href="assets/favicon-16x16.png?v=1701595483622" rel="icon" type="image/png" sizes="16x16">
+  <link href="assets/favicon-16x16.png?v=1744573753999" rel="icon" type="image/png" sizes="16x16">
 </head>
 <body class="container-fluid">
@@ -928,6 +928,6 @@
  </div>
 </div>
-<script src="assets/main.bundle.js?v=1701595483622"></script>
+<script src="assets/main.bundle.js?v=1744573753999"></script>
 </body>
 </html>
--- a/requirements.txt
+++ b/requirements.txt
@@ -42,7 +42,7 @@ paho-mqtt!=2.0.*
 cryptography~=42.0.8
 # Used for CSS filtering
-beautifulsoup4
+beautifulsoup4>=4.0.0
 # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
 # #2328 - 5.2.0 and 5.2.1 had extra CPU flag CFLAGS set which was not compatible on older hardware
@@ -53,7 +53,7 @@ lxml >=4.8.0,<6,!=5.2.0,!=5.2.1
 # XPath 2.0-3.1 support - 4.2.0 broke something?
 elementpath==4.1.5
-selenium~=4.14.0
+selenium~=4.31.0
 # https://github.com/pallets/werkzeug/issues/2985
 # Maybe related to pytest?
@@ -70,7 +70,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
 # playwright is installed at Dockerfile build time because it's not available on all platforms
-pyppeteer-ng==2.0.0rc9
+pyppeteer-ng==2.0.0rc10
 pyppeteerstealth>=0.0.4
@@ -90,6 +90,8 @@ extruct
 # For cleaning up unknown currency formats
 babel
 levenshtein
 # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
 greenlet >= 3.0.3
@@ -110,3 +112,6 @@ pluggy ~= 1.5
 # Needed for testing, cross-platform for process and system monitoring
 psutil==7.0.0
 ruff >= 0.11.2
 pre_commit >= 4.2.0
Author	SHA1	Message	Date
dgtlmoon	e23e099d4a	hmm	2025-05-10 10:38:10 +02:00
dgtlmoon	bff6eb3bcf	woops	2025-05-10 10:37:27 +02:00
dgtlmoon	10fc5d669b	try some matrix build	2025-05-10 10:36:41 +02:00
dgtlmoon	89be3c50d5	build test with 3.12	2025-05-10 10:28:02 +02:00
dgtlmoon	3a1f1a9a5a	Try linux/aarch64	2025-05-09 23:29:21 +02:00
Emmanuel Ferdman	bb7f7f473b	Resolve warnings of bs4 library (#3187 )	2025-05-09 14:35:35 +02:00
dgtlmoon	a9ca511004	Revert memory strategy change for html_to_text (Was hanging under high concurrency setups)	2025-05-09 09:44:02 +02:00
dgtlmoon	8df61f5eaa	0.49.16	2025-05-03 16:43:04 +02:00
dgtlmoon	162f573967	Fixes to ensure proxy errors are handled correctly (#3168 )	2025-05-03 16:05:40 +02:00
dgtlmoon	eada0ef08d	UI - Custom headers should have validation (#3172 )	2025-05-03 13:57:42 +02:00
dgtlmoon	f57bc10973	Update selenium library (#3170 )	2025-05-02 14:05:23 +02:00
dgtlmoon	d2e8f822d6	Restock detection - adding new string	2025-05-01 17:58:36 +02:00
dgtlmoon	5fd8200fd9	Conditions - Levenshtein text similarity plugin - adding test, fixing import, fixing check for watches with 1 snapshot history (#3161 )	2025-04-30 16:47:23 +02:00
dgtlmoon	d0da8c9825	Restock detection - Use cleaner logic for limiting elements to scan, refactor, improve tests (#3158 )	2025-04-30 10:57:33 +02:00
dgtlmoon	fd7574d21b	pyppeteer fast puppeteer fetch - be sure viewport is set to --window-size if --window-size is set (#3157 )	2025-04-29 17:23:37 +02:00
dgtlmoon	c70706a27b	Improved global ignore test (#3140 )	2025-04-29 11:20:21 +02:00
silversub	968c364999	Update docker-compose.yml (#3149 ) Co-authored-by: silversub <silversub@gmail.com>	2025-04-29 11:20:00 +02:00
dgtlmoon	031cb76b7d	Small fix for xpath element scraper (#3145 )	2025-04-25 17:58:04 +02:00
dgtlmoon	af568d064c	Plugins for conditions (and include Similarity / Levenshtein, wordcount conditions) Re #3108	2025-04-22 18:19:56 +02:00
dgtlmoon	a75f57de43	Browser Steps - <Select> by Option Text - #1224 , #1228 (#3138 )	2025-04-22 14:33:35 +02:00
dgtlmoon	72a1c3dda1	Browser Steps - error reporting and session shutdown improvements (#3137 )	2025-04-22 12:18:51 +02:00
dgtlmoon	ffde79ecac	0.49.15	2025-04-18 14:57:28 +02:00
dgtlmoon	66ad43b2df	Visual Selector & Browser Steps - Always recheck if the data/screenshot is ready under "Visual Selector" tab after using Browser Steps (#3130 )	2025-04-18 10:31:43 +02:00
Dror Levin	6b0e56ca80	App logs - Send TRACE and INFO logs to stdout (#3051 )	2025-04-18 10:00:09 +02:00
Luca	5a2d84d8b4	Development: introduce Ruff as linter/formatter (#3039 )	2025-04-18 09:59:18 +02:00
dgtlmoon	a941156f26	Updating restock texts (#3124 )	2025-04-17 10:44:32 +02:00
dgtlmoon	a1fdeeaa29	Only add screenshot warning if capture was greater than trim size (#3123 )	2025-04-17 00:11:20 +02:00
dgtlmoon	40ea2604a7	0.49.14	2025-04-16 23:23:18 +02:00
dgtlmoon	ceda526093	Small fix for multiprocessing start on Mac OS (#3121 #3115 )	2025-04-16 22:52:03 +02:00
Justin Goette	4197254c53	docs: Update reference URL (#3119 )	2025-04-16 21:37:50 +02:00
dgtlmoon	a0b7efb436	UI - Fix to edit and groups template	2025-04-16 18:40:30 +02:00
dgtlmoon	5f5e8ede6c	Updating API documentation	2025-04-13 21:51:17 +02:00
dgtlmoon	52ca855a29	Undo forced selenium headless mode, small refactor (#3112 )	2025-04-12 19:26:17 +02:00
dgtlmoon	079efd0a85	Playwright + Puppeteer fix for when page is taller than viewport but less than screenshot step_size (#3113 )	2025-04-12 18:37:59 +02:00
dgtlmoon	3a583a4e5d	Memory management - Run HTML to text in sub process, a few more cleanups to playwright (#3110 )	2025-04-11 18:18:29 +02:00
dgtlmoon	cfb4decf67	UI Edit/Stats - Add levenshtein distance info, explains how "different" the last two snapshot are (#3109 )	2025-04-11 17:36:29 +02:00
		`@@ -0,0 +1 @@`
							`# Import plugins package to make them discoverable`