UI - Adding thumbnails to lister page

0.49.17
Resolve warnings of bs4 library (#3187 )
2025-05-14 10:32:45 +02:00 · 2025-05-12 10:47:27 +02:00 · 2025-05-09 14:35:35 +02:00 · 2025-05-09 09:44:02 +02:00 · 2025-05-03 16:43:04 +02:00 · 2025-05-03 16:05:40 +02:00
26 changed files with 663 additions and 323 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.49.15'
+__version__ = '0.49.17'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/blueprint/browser_steps/init.py
+++ b/changedetectionio/blueprint/browser_steps/init.py
@@ -168,9 +168,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            step_optional_value = request.form.get('optional_value')
            is_last_step = strtobool(request.form.get('is_last_step'))

-            # @todo try.. accept.. nice errors not popups..
            try:
-
                browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
                                         selector=step_selector,
                                         optional_value=step_optional_value)
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -61,23 +61,6 @@ class steppable_browser_interface():

    def __init__(self, start_url):
        self.start_url = start_url
-        
-    def safe_page_operation(self, operation_fn, default_return=None):
-        """Safely execute a page operation with error handling"""
-        if self.page is None:
-            logger.warning("Attempted operation on None page object")
-            return default_return
-            
-        try:
-            return operation_fn()
-        except Exception as e:
-            logger.debug(f"Page operation failed: {str(e)}")
-            # Try to reclaim memory if possible
-            try:
-                self.page.request_gc()
-            except:
-                pass
-            return default_return

    # Convert and perform "Click Button" for example
    def call_action(self, action_name, selector=None, optional_value=None):
@@ -109,20 +92,11 @@ class steppable_browser_interface():
        if optional_value and ('{%' in optional_value or '{{' in optional_value):
            optional_value = jinja_render(template_str=optional_value)

-        try:
-            action_handler(selector, optional_value)
-            # Safely wait for timeout
-            def wait_timeout():
-                self.page.wait_for_timeout(1.5 * 1000)
-            self.safe_page_operation(wait_timeout)
-            logger.debug(f"Call action done in {time.time()-now:.2f}s")
-        except Exception as e:
-            logger.error(f"Error executing action '{call_action_name}': {str(e)}")
-            # Request garbage collection to free up resources after error
-            try:
-                self.page.request_gc()
-            except:
-                pass
+
+        action_handler(selector, optional_value)
+        # Safely wait for timeout
+        self.page.wait_for_timeout(1.5 * 1000)
+        logger.debug(f"Call action done in {time.time()-now:.2f}s")

    def action_goto_url(self, selector=None, value=None):
        if not value:
@@ -130,11 +104,7 @@ class steppable_browser_interface():
            return None
            
        now = time.time()
-        
-        def goto_operation():
-            return self.page.goto(value, timeout=0, wait_until='load')
-            
-        response = self.safe_page_operation(goto_operation)
+        response = self.page.goto(value, timeout=0, wait_until='load')
        logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
        return response

@@ -147,53 +117,40 @@ class steppable_browser_interface():
        if not value or not len(value.strip()):
            return
            
-        def click_operation():
-            elem = self.page.get_by_text(value)
-            if elem.count():
-                elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
-                
-        self.safe_page_operation(click_operation)
+        elem = self.page.get_by_text(value)
+        if elem.count():
+            elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
+

    def action_click_element_containing_text_if_exists(self, selector=None, value=''):
        logger.debug("Clicking element containing text if exists")
        if not value or not len(value.strip()):
            return
            
-        def click_if_exists_operation():
-            elem = self.page.get_by_text(value)
-            logger.debug(f"Clicking element containing text - {elem.count()} elements found")
-            if elem.count():
-                elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
+        elem = self.page.get_by_text(value)
+        logger.debug(f"Clicking element containing text - {elem.count()} elements found")
+        if elem.count():
+            elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
                
-        self.safe_page_operation(click_if_exists_operation)

    def action_enter_text_in_field(self, selector, value):
        if not selector or not len(selector.strip()):
            return

-        def fill_operation():
-            self.page.fill(selector, value, timeout=self.action_timeout)
-            
-        self.safe_page_operation(fill_operation)
+        self.page.fill(selector, value, timeout=self.action_timeout)

    def action_execute_js(self, selector, value):
        if not value:
            return None
            
-        def evaluate_operation():
-            return self.page.evaluate(value)
-            
-        return self.safe_page_operation(evaluate_operation)
+        return self.page.evaluate(value)

    def action_click_element(self, selector, value):
        logger.debug("Clicking element")
        if not selector or not len(selector.strip()):
            return

-        def click_operation():
-            self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
-            
-        self.safe_page_operation(click_operation)
+        self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))

    def action_click_element_if_exists(self, selector, value):
        import playwright._impl._errors as _api_types
@@ -201,16 +158,14 @@ class steppable_browser_interface():
        if not selector or not len(selector.strip()):
            return
            
-        def click_if_exists_operation():
-            try:
-                self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
-            except _api_types.TimeoutError:
-                return
-            except _api_types.Error:
-                # Element was there, but page redrew and now its long long gone
-                return
+        try:
+            self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
+        except _api_types.TimeoutError:
+            return
+        except _api_types.Error:
+            # Element was there, but page redrew and now its long long gone
+            return
                
-        self.safe_page_operation(click_if_exists_operation)

    def action_click_x_y(self, selector, value):
        if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
@@ -222,10 +177,8 @@ class steppable_browser_interface():
            x = int(float(x.strip()))
            y = int(float(y.strip()))
            
-            def click_xy_operation():
-                self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
+            self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
                
-            self.safe_page_operation(click_xy_operation)
        except Exception as e:
            logger.error(f"Error parsing x,y coordinates: {str(e)}")

@@ -233,27 +186,17 @@ class steppable_browser_interface():
        if not selector or not len(selector.strip()):
            return

-        def select_operation():
-            self.page.select_option(selector, label=value, timeout=self.action_timeout)
-
-        self.safe_page_operation(select_operation)
+        self.page.select_option(selector, label=value, timeout=self.action_timeout)

    def action_scroll_down(self, selector, value):
-        def scroll_operation():
-            # Some sites this doesnt work on for some reason
-            self.page.mouse.wheel(0, 600)
-            self.page.wait_for_timeout(1000)
-            
-        self.safe_page_operation(scroll_operation)
+        # Some sites this doesnt work on for some reason
+        self.page.mouse.wheel(0, 600)
+        self.page.wait_for_timeout(1000)

    def action_wait_for_seconds(self, selector, value):
        try:
            seconds = float(value.strip()) if value else 1.0
-            
-            def wait_operation():
-                self.page.wait_for_timeout(seconds * 1000)
-                
-            self.safe_page_operation(wait_operation)
+            self.page.wait_for_timeout(seconds * 1000)
        except (ValueError, TypeError) as e:
            logger.error(f"Invalid value for wait_for_seconds: {str(e)}")

@@ -263,14 +206,11 @@ class steppable_browser_interface():
            
        import json
        v = json.dumps(value)
-        
-        def wait_for_text_operation():
-            self.page.wait_for_function(
-                f'document.querySelector("body").innerText.includes({v});', 
-                timeout=30000
-            )
+        self.page.wait_for_function(
+            f'document.querySelector("body").innerText.includes({v});',
+            timeout=30000
+        )
            
-        self.safe_page_operation(wait_for_text_operation)

    def action_wait_for_text_in_element(self, selector, value):
        if not selector or not value:
@@ -280,82 +220,60 @@ class steppable_browser_interface():
        s = json.dumps(selector)
        v = json.dumps(value)
        
-        def wait_for_text_in_element_operation():
-            self.page.wait_for_function(
-                f'document.querySelector({s}).innerText.includes({v});', 
-                timeout=30000
-            )
-            
-        self.safe_page_operation(wait_for_text_in_element_operation)
+        self.page.wait_for_function(
+            f'document.querySelector({s}).innerText.includes({v});',
+            timeout=30000
+        )

    # @todo - in the future make some popout interface to capture what needs to be set
    # https://playwright.dev/python/docs/api/class-keyboard
    def action_press_enter(self, selector, value):
-        def press_operation():
-            self.page.keyboard.press("Enter", delay=randint(200, 500))
+        self.page.keyboard.press("Enter", delay=randint(200, 500))
            
-        self.safe_page_operation(press_operation)

    def action_press_page_up(self, selector, value):
-        def press_operation():
-            self.page.keyboard.press("PageUp", delay=randint(200, 500))
-            
-        self.safe_page_operation(press_operation)
+        self.page.keyboard.press("PageUp", delay=randint(200, 500))

    def action_press_page_down(self, selector, value):
-        def press_operation():
-            self.page.keyboard.press("PageDown", delay=randint(200, 500))
-            
-        self.safe_page_operation(press_operation)
+        self.page.keyboard.press("PageDown", delay=randint(200, 500))

    def action_check_checkbox(self, selector, value):
        if not selector:
            return
-            
-        def check_operation():
-            self.page.locator(selector).check(timeout=self.action_timeout)
-            
-        self.safe_page_operation(check_operation)
+
+        self.page.locator(selector).check(timeout=self.action_timeout)

    def action_uncheck_checkbox(self, selector, value):
        if not selector:
            return
            
-        def uncheck_operation():
-            self.page.locator(selector).uncheck(timeout=self.action_timeout)
+        self.page.locator(selector).uncheck(timeout=self.action_timeout)
            
-        self.safe_page_operation(uncheck_operation)

    def action_remove_elements(self, selector, value):
        """Removes all elements matching the given selector from the DOM."""
        if not selector:
            return
            
-        def remove_operation():
-            self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
-            
-        self.safe_page_operation(remove_operation)
+        self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")

    def action_make_all_child_elements_visible(self, selector, value):
        """Recursively makes all child elements inside the given selector fully visible."""
        if not selector:
            return
            
-        def make_visible_operation():
-            self.page.locator(selector).locator("*").evaluate_all("""
-                els => els.forEach(el => {
-                    el.style.display = 'block';   // Forces it to be displayed
-                    el.style.visibility = 'visible';   // Ensures it's not hidden
-                    el.style.opacity = '1';   // Fully opaque
-                    el.style.position = 'relative';   // Avoids 'absolute' hiding
-                    el.style.height = 'auto';   // Expands collapsed elements
-                    el.style.width = 'auto';   // Ensures full visibility
-                    el.removeAttribute('hidden');   // Removes hidden attribute
-                    el.classList.remove('hidden', 'd-none');  // Removes common CSS hidden classes
-                })
-            """)
-            
-        self.safe_page_operation(make_visible_operation)
+        self.page.locator(selector).locator("*").evaluate_all("""
+            els => els.forEach(el => {
+                el.style.display = 'block';   // Forces it to be displayed
+                el.style.visibility = 'visible';   // Ensures it's not hidden
+                el.style.opacity = '1';   // Fully opaque
+                el.style.position = 'relative';   // Avoids 'absolute' hiding
+                el.style.height = 'auto';   // Expands collapsed elements
+                el.style.width = 'auto';   // Ensures full visibility
+                el.removeAttribute('hidden');   // Removes hidden attribute
+                el.classList.remove('hidden', 'd-none');  // Removes common CSS hidden classes
+            })
+        """)

 # Responsible for maintaining a live 'context' with the chrome CDP
 # @todo - how long do contexts live for anyway?
--- a/changedetectionio/blueprint/watchlist/templates/watch-overview.html
+++ b/changedetectionio/blueprint/watchlist/templates/watch-overview.html
@@ -122,7 +122,11 @@
                    {% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %}
                    <a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a>
                </td>
-                <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
+                <td class="title-col inline">
+                    {% if watch.get_screenshot() %}
+                    <img class="thumbnail" src="{{url_for('static_content', group='thumbnail', filename=watch.uuid)}}" alt="thumbnail screenshot" title="thumbnail screenshot" >
+                    {% endif %}
+                    <span>{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}</span>
                    <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
                    <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>

@@ -137,13 +141,11 @@
                    {% if watch.has_browser_steps %}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" title="Browser Steps is enabled" >{% endif %}
                    {% if watch.last_error is defined and watch.last_error != False %}
                    <div class="fetch-error">{{ watch.last_error }}
-
                        {% if '403' in watch.last_error %}
                            {% if has_proxies %}
                                <a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>&nbsp;
                            {% endif %}
                            <a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
-                        
                        {% endif %}
                        {% if 'empty result or contain only an image' in watch.last_error %}
                            <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images">more help here</a>.
@@ -166,7 +168,7 @@
                      <span class="watch-tag-list">{{ watch_tag.title }}</span>
                    {% endfor %}
                </td>
-            <!-- @todo make it so any watch handler obj can expose this --->
+
 {% if any_has_restock_price_processor %}
                <td class="restock-and-price">
                    {% if watch['processor'] == 'restock_diff'  %}
--- a/changedetectionio/conditions/init.py
+++ b/changedetectionio/conditions/init.py
@@ -1,11 +1,9 @@
-from flask import Blueprint
-
 from json_logic.builtins import BUILTINS

 from .exceptions import EmptyConditionRuleRowNotUsable
 from .pluggy_interface import plugin_manager  # Import the pluggy plugin manager
 from . import default_plugin
-
+from loguru import logger
 # List of all supported JSON Logic operators
 operator_choices = [
    (None, "Choose one - Operator"),
@@ -113,12 +111,14 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat
                            application_datastruct=application_datastruct,
                            ephemeral_data=ephemeral_data
                        )
-                        
+                        logger.debug(f"Trying plugin {plugin}....")
+
                        # Set a timeout of 10 seconds
                        try:
                            new_execute_data = future.result(timeout=10)
                            if new_execute_data and isinstance(new_execute_data, dict):
                                EXECUTE_DATA.update(new_execute_data)
+
                        except concurrent.futures.TimeoutError:
                            # The plugin took too long, abort processing for this watch
                            raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.")
--- a/changedetectionio/conditions/plugins/levenshtein_plugin.py
+++ b/changedetectionio/conditions/plugins/levenshtein_plugin.py
@@ -9,15 +9,20 @@ def levenshtein_ratio_recent_history(watch, incoming_text=None):
    try:
        from Levenshtein import ratio, distance
        k = list(watch.history.keys())
-        if len(k) >= 2:
-            # When called from ui_edit_stats_extras, we don't have incoming_text
-            if incoming_text is None:
-                a = watch.get_history_snapshot(timestamp=k[-1])  # Latest snapshot
-                b = watch.get_history_snapshot(timestamp=k[-2])  # Previous snapshot
-            else:
-                a = watch.get_history_snapshot(timestamp=k[-2]) # Second newest, incoming_text will be "newest"
-                b = incoming_text
-            
+        a = None
+        b = None
+
+        # When called from ui_edit_stats_extras, we don't have incoming_text
+        if incoming_text is None:
+            a = watch.get_history_snapshot(timestamp=k[-1])  # Latest snapshot
+            b = watch.get_history_snapshot(timestamp=k[-2])  # Previous snapshot
+
+        # Needs atleast one snapshot
+        elif len(k) >= 1: # Should be atleast one snapshot to compare against
+            a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
+            b = incoming_text if incoming_text else k[-2]
+
+        if a and b:
            distance_value = distance(a, b)
            ratio_value = ratio(a, b)
            return {
@@ -53,7 +58,7 @@ def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
    # ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc

    if watch and 'text' in ephemeral_data:
-        lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data['text'])
+        lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text',''))
        if isinstance(lev_data, dict):
            res['levenshtein_ratio'] = lev_data.get('ratio', 0)
            res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)
--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@@ -194,7 +194,6 @@ class fetcher(Fetcher):
            browsersteps_interface.page = self.page

            response = browsersteps_interface.action_goto_url(value=url)
-            self.headers = response.all_headers()

            if response is None:
                context.close()
@@ -202,6 +201,8 @@ class fetcher(Fetcher):
                logger.debug("Content Fetcher > Response object from the browser communication was none")
                raise EmptyReply(url=url, status_code=None)

+            self.headers = response.all_headers()
+
            try:
                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
                    browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
--- a/changedetectionio/content_fetchers/requests.py
+++ b/changedetectionio/content_fetchers/requests.py
@@ -28,6 +28,7 @@ class fetcher(Fetcher):

        import chardet
        import requests
+        from requests.exceptions import ProxyError, ConnectionError, RequestException

        if self.browser_steps_get_valid_steps():
            raise BrowserStepsInUnsupportedFetcher(url=url)
@@ -52,14 +53,19 @@ class fetcher(Fetcher):
        if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
            from requests_file import FileAdapter
            session.mount('file://', FileAdapter())
-
-        r = session.request(method=request_method,
-                            data=request_body.encode('utf-8') if type(request_body) is str else request_body,
-                            url=url,
-                            headers=request_headers,
-                            timeout=timeout,
-                            proxies=proxies,
-                            verify=False)
+        try:
+            r = session.request(method=request_method,
+                                data=request_body.encode('utf-8') if type(request_body) is str else request_body,
+                                url=url,
+                                headers=request_headers,
+                                timeout=timeout,
+                                proxies=proxies,
+                                verify=False)
+        except Exception as e:
+            msg = str(e)
+            if proxies and 'SOCKSHTTPSConnectionPool' in msg:
+                msg = f"Proxy connection failed? {msg}"
+            raise Exception(msg) from e

        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
        # For example - some sites don't tell us it's utf-8, but return utf-8 content
--- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js
+++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js
@@ -51,6 +51,7 @@ async () => {
            'niet op voorraad',
            'no disponible',
            'no featured offers available',
+            'no longer available',
            'no longer in stock',
            'no tickets available',
            'non disponibile',
@@ -125,6 +126,20 @@ async () => {
        // so it's good to filter to just the 'above the fold' elements
        // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist

+        function elementIsInEyeBallRange(element) {
+            // outside the 'fold' or some weird text in the heading area
+            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
+            // Note: theres also an automated test that places the 'out of stock' text fairly low down
+            // Skip text that could be in the header area
+            if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) {
+                return false;
+            }
+            // Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there
+            if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) {
+                return false;
+            }
+            return true;
+        }

 // @todo - if it's SVG or IMG, go into image diff mode

@@ -161,9 +176,7 @@ async () => {
        for (let i = elementsToScan.length - 1; i >= 0; i--) {
            const element = elementsToScan[i];

-            // outside the 'fold' or some weird text in the heading area
-            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
-            if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
+            if (!elementIsInEyeBallRange(element)) {
                continue
            }

@@ -177,11 +190,11 @@ async () => {
            } catch (e) {
                console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
            }
-
            if (elementText.length) {
                // try which ones could mean its in stock
                if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
                    console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
+                    element.style.border = "2px solid green"; // highlight the element that was detected as in stock
                    return 'Possibly in stock';
                }
            }
@@ -190,10 +203,8 @@ async () => {
        // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
        for (let i = elementsToScan.length - 1; i >= 0; i--) {
            const element = elementsToScan[i];
-            // outside the 'fold' or some weird text in the heading area
-            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
-            // Note: theres also an automated test that places the 'out of stock' text fairly low down
-            if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
+
+            if (!elementIsInEyeBallRange(element)) {
                continue
            }
            elementText = "";
@@ -208,6 +219,7 @@ async () => {
                for (const outOfStockText of outOfStockTexts) {
                    if (elementText.includes(outOfStockText)) {
                        console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
+                        element.style.border = "2px solid red"; // highlight the element that was detected as out of stock
                        return outOfStockText; // item is out of stock
                    }
                }
--- a/changedetectionio/content_fetchers/webdriver_selenium.py
+++ b/changedetectionio/content_fetchers/webdriver_selenium.py
@@ -10,16 +10,13 @@ class fetcher(Fetcher):
    else:
        fetcher_description = "WebDriver Chrome/Javascript"

-    # Configs for Proxy setup
-    # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
-    selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
-                                        'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
-                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
    proxy = None
+    proxy_url = None

    def __init__(self, proxy_override=None, custom_browser_connection_url=None):
        super().__init__()
-        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
+        from urllib.parse import urlparse
+        from selenium.webdriver.common.proxy import Proxy

        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
        if not custom_browser_connection_url:
@@ -28,25 +25,27 @@ class fetcher(Fetcher):
            self.browser_connection_is_custom = True
            self.browser_connection_url = custom_browser_connection_url

-        # If any proxy settings are enabled, then we should setup the proxy object
-        proxy_args = {}
-        for k in self.selenium_proxy_settings_mappings:
-            v = os.getenv('webdriver_' + k, False)
-            if v:
-                proxy_args[k] = v.strip('"')

-        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
-        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
-            proxy_args['httpProxy'] = self.system_http_proxy
-        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
-            proxy_args['httpsProxy'] = self.system_https_proxy
+        ##### PROXY SETUP #####

-        # Allows override the proxy on a per-request basis
-        if proxy_override is not None:
-            proxy_args['httpProxy'] = proxy_override
+        proxy_sources = [
+            self.system_http_proxy,
+            self.system_https_proxy,
+            os.getenv('webdriver_proxySocks'),
+            os.getenv('webdriver_socksProxy'),
+            os.getenv('webdriver_proxyHttp'),
+            os.getenv('webdriver_httpProxy'),
+            os.getenv('webdriver_proxyHttps'),
+            os.getenv('webdriver_httpsProxy'),
+            os.getenv('webdriver_sslProxy'),
+            proxy_override, # last one should override
+        ]
+        # The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server=
+        for k in filter(None, proxy_sources):
+            if not k:
+                continue
+            self.proxy_url = k.strip()

-        if proxy_args:
-            self.proxy = SeleniumProxy(raw=proxy_args)

    def run(self,
            url,
@@ -59,9 +58,7 @@ class fetcher(Fetcher):
            is_binary=False,
            empty_pages_are_a_change=False):

-        from selenium import webdriver
        from selenium.webdriver.chrome.options import Options as ChromeOptions
-        from selenium.common.exceptions import WebDriverException
        # request_body, request_method unused for now, until some magic in the future happens.

        options = ChromeOptions()
@@ -76,59 +73,62 @@ class fetcher(Fetcher):
        for opt in CHROME_OPTIONS:
            options.add_argument(opt)

-        if self.proxy:
-            options.proxy = self.proxy
+        # 1. proxy_config /Proxy(proxy_config) selenium object is REALLY unreliable
+        # 2. selenium-wire cant be used because the websocket version conflicts with pypeteer-ng
+        # 3. selenium only allows ONE runner at a time by default!
+        # 4. driver must use quit() or it will continue to block/hold the selenium process!!

-        self.driver = webdriver.Remote(
-            command_executor=self.browser_connection_url,
-            options=options)
+        if self.proxy_url:
+            options.add_argument(f'--proxy-server={self.proxy_url}')
+
+        from selenium.webdriver.remote.remote_connection import RemoteConnection
+        from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
+        driver = None
+        try:
+            # Create the RemoteConnection and set timeout (e.g., 30 seconds)
+            remote_connection = RemoteConnection(
+                self.browser_connection_url,
+            )
+            remote_connection.set_timeout(30)  # seconds
+
+            # Now create the driver with the RemoteConnection
+            driver = RemoteWebDriver(
+                command_executor=remote_connection,
+                options=options
+            )
+
+            driver.set_page_load_timeout(int(os.getenv("WEBDRIVER_PAGELOAD_TIMEOUT", 45)))
+        except Exception as e:
+            if driver:
+                driver.quit()
+            raise e

        try:
-            self.driver.get(url)
-        except WebDriverException as e:
-            # Be sure we close the session window
-            self.quit()
-            raise
+            driver.get(url)

-        if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
-            self.driver.set_window_size(1280, 1024)
+            if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
+                driver.set_window_size(1280, 1024)

-        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
+            driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))

-        if self.webdriver_js_execute_code is not None:
-            self.driver.execute_script(self.webdriver_js_execute_code)
-            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
-            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
+            if self.webdriver_js_execute_code is not None:
+                driver.execute_script(self.webdriver_js_execute_code)
+                # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
+                driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))

+            # @todo - how to check this? is it possible?
+            self.status_code = 200
+            # @todo somehow we should try to get this working for WebDriver
+            # raise EmptyReply(url=url, status_code=r.status_code)

-        # @todo - how to check this? is it possible?
-        self.status_code = 200
-        # @todo somehow we should try to get this working for WebDriver
-        # raise EmptyReply(url=url, status_code=r.status_code)
+            # @todo - dom wait loaded?
+            time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
+            self.content = driver.page_source
+            self.headers = {}
+            self.screenshot = driver.get_screenshot_as_png()
+        except Exception as e:
+            driver.quit()
+            raise e

-        # @todo - dom wait loaded?
-        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
-        self.content = self.driver.page_source
-        self.headers = {}
+        driver.quit()

-        self.screenshot = self.driver.get_screenshot_as_png()
-
-    # Does the connection to the webdriver work? run a test connection.
-    def is_ready(self):
-        from selenium import webdriver
-        from selenium.webdriver.chrome.options import Options as ChromeOptions
-
-        self.driver = webdriver.Remote(
-            command_executor=self.command_executor,
-            options=ChromeOptions())
-
-        # driver.quit() seems to cause better exceptions
-        self.quit()
-        return True
-
-    def quit(self, watch=None):
-        if self.driver:
-            try:
-                self.driver.quit()
-            except Exception as e:
-                logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -378,6 +378,42 @@ def changedetection_app(config=None, datastore_o=None):
            except FileNotFoundError:
                abort(404)

+        if group == 'thumbnail':
+            # Could be sensitive, follow password requirements
+            if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
+                abort(403)
+
+            # Get the watch object
+            watch = datastore.data['watching'].get(filename)
+            if not watch:
+                abort(404)
+
+            # Generate thumbnail if needed
+            max_age = int(request.args.get('max_age', '3200'))
+            thumbnail_path = watch.get_screenshot_as_thumbnail(max_age=max_age)
+
+            if not thumbnail_path:
+                abort(404)
+
+            try:
+                # Get file modification time for ETag
+                file_mtime = int(os.path.getmtime(thumbnail_path))
+                etag = f'"{file_mtime}"'
+
+                # Check if browser has valid cached version
+                if request.if_none_match and etag in request.if_none_match:
+                    return "", 304  # Not Modified
+
+                # Set up response with appropriate cache headers
+                response = make_response(send_from_directory(os.path.dirname(thumbnail_path), os.path.basename(thumbnail_path)))
+                response.headers['Content-type'] = 'image/jpeg'
+                response.headers['ETag'] = etag
+                response.headers['Cache-Control'] = 'max-age=300, must-revalidate'  # Cache for 5 minutes, then revalidate
+                return response
+
+            except FileNotFoundError:
+                abort(404)
+

        if group == 'visual_selector_data':
            # Could be sensitive, follow password requirements
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -224,27 +224,37 @@ class StringDictKeyValue(StringField):

    def _value(self):
        if self.data:
-            output = u''
-            for k in self.data.keys():
-                output += "{}: {}\r\n".format(k, self.data[k])
-
+            output = ''
+            for k, v in self.data.items():
+                output += f"{k}: {v}\r\n"
            return output
        else:
-            return u''
+            return ''

-    # incoming
+    # incoming data processing + validation
    def process_formdata(self, valuelist):
+        self.data = {}
+        errors = []
        if valuelist:
-            self.data = {}
-            # Remove empty strings
-            cleaned = list(filter(None, valuelist[0].split("\n")))
-            for s in cleaned:
-                parts = s.strip().split(':', 1)
-                if len(parts) == 2:
-                    self.data.update({parts[0].strip(): parts[1].strip()})
+            # Remove empty strings (blank lines)
+            cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()]
+            for idx, s in enumerate(cleaned, start=1):
+                if ':' not in s:
+                    errors.append(f"Line {idx} is missing a ':' separator.")
+                    continue
+                parts = s.split(':', 1)
+                key = parts[0].strip()
+                value = parts[1].strip()

-        else:
-            self.data = {}
+                if not key:
+                    errors.append(f"Line {idx} has an empty key.")
+                if not value:
+                    errors.append(f"Line {idx} has an empty value.")
+
+                self.data[key] = value
+
+        if errors:
+            raise ValidationError("Invalid input:\n" + "\n".join(errors))

 class ValidateContentFetcherIsReady(object):
    """
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -309,10 +309,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
        soup = BeautifulSoup(content, 'html.parser')

        if ensure_is_ldjson_info_type:
-            bs_result = soup.findAll('script', {"type": "application/ld+json"})
+            bs_result = soup.find_all('script', {"type": "application/ld+json"})
        else:
-            bs_result = soup.findAll('script')
-        bs_result += soup.findAll('body')
+            bs_result = soup.find_all('script')
+        bs_result += soup.find_all('body')

        bs_jsons = []
        for result in bs_result:
@@ -436,55 +436,27 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
    return re.sub(pattern, repl, html_content)


-def html_to_text_sub_worker(conn, html_content: str, render_anchor_tag_content=False, is_rss=False):
+# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON

+
+def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
    from inscriptis import get_text
    from inscriptis.model.config import ParserConfig

-    """Converts html string to a string with just the text. If ignoring
-    rendering anchor tag content is enable, anchor tag content are also
-    included in the text
-
-    :param html_content: string with html content
-    :param render_anchor_tag_content: boolean flag indicating whether to extract
-    hyperlinks (the anchor tag content) together with text. This refers to the
-    'href' inside 'a' tags.
-    Anchor tag content is rendered in the following manner:
-    '[ text ](anchor tag content)'
-    :return: extracted text from the HTML
-    """
-    #  if anchor tag content flag is set to True define a config for
-    #  extracting this content
    if render_anchor_tag_content:
        parser_config = ParserConfig(
            annotation_rules={"a": ["hyperlink"]},
            display_links=True
        )
-    # otherwise set config to None/default
    else:
        parser_config = None

-    # RSS Mode - Inscriptis will treat `title` as something else.
-    # Make it as a regular block display element (//item/title)
-    # This is a bit of a hack - the real way it to use XSLT to convert it to HTML #1874
    if is_rss:
        html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
        html_content = re.sub(r'</title>', r'</h1>', html_content)

    text_content = get_text(html_content, config=parser_config)
-    conn.send(text_content)
-    conn.close()
-
-# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
-def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False):
-    from multiprocessing import Process, Pipe
-
-    parent_conn, child_conn = Pipe()
-    p = Process(target=html_to_text_sub_worker, args=(child_conn, html_content, render_anchor_tag_content, is_rss))
-    p.start()
-    text = parent_conn.recv()
-    p.join()
-    return text
+    return text_content

 # Does LD+JSON exist with a @type=='product' and a .price set anywhere?
 def has_ldjson_product_info(content):
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -401,6 +401,70 @@ class model(watch_base):
        # False is not an option for AppRise, must be type None
        return None

+    def get_screenshot_as_thumbnail(self, max_age=3200):
+        """Return path to a square thumbnail of the most recent screenshot.
+
+        Creates a 150x150 pixel thumbnail from the top portion of the screenshot.
+
+        Args:
+            max_age: Maximum age in seconds before recreating thumbnail
+
+        Returns:
+            Path to thumbnail or None if no screenshot exists
+        """
+        import os
+        import time
+
+        thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
+        top_trim = 500  # Pixels from top of screenshot to use
+
+        screenshot_path = self.get_screenshot()
+        if not screenshot_path:
+            return None
+
+        # Reuse thumbnail if it's fresh and screenshot hasn't changed
+        if os.path.isfile(thumbnail_path):
+            thumbnail_mtime = os.path.getmtime(thumbnail_path)
+            screenshot_mtime = os.path.getmtime(screenshot_path)
+
+            if screenshot_mtime <= thumbnail_mtime and time.time() - thumbnail_mtime < max_age:
+                return thumbnail_path
+
+        try:
+            from PIL import Image
+
+            with Image.open(screenshot_path) as img:
+                # Crop top portion first (full width, top_trim height)
+                top_crop_height = min(top_trim, img.height)
+                img = img.crop((0, 0, img.width, top_crop_height))
+
+                # Create a smaller intermediate image (to reduce memory usage)
+                aspect = img.width / img.height
+                interim_width = min(top_trim, img.width)
+                interim_height = int(interim_width / aspect) if aspect > 0 else top_trim
+                img = img.resize((interim_width, interim_height), Image.NEAREST)
+
+                # Convert to RGB if needed
+                if img.mode != 'RGB':
+                    img = img.convert('RGB')
+
+                # Crop to square from top center
+                square_size = min(img.width, img.height)
+                left = (img.width - square_size) // 2
+                img = img.crop((left, 0, left + square_size, square_size))
+
+                # Final resize to exact thumbnail size with better filter
+                img = img.resize((150, 150), Image.BILINEAR)
+
+                # Save with optimized settings
+                img.save(thumbnail_path, "JPEG", quality=75, optimize=True)
+
+            return thumbnail_path
+
+        except Exception as e:
+            logger.error(f"Error creating thumbnail for {self.get('uuid')}: {str(e)}")
+            return None
+
    def __get_file_ctime(self, filename):
        fname = os.path.join(self.watch_data_dir, filename)
        if os.path.isfile(fname):
--- a/changedetectionio/run_basic_tests.sh
+++ b/changedetectionio/run_basic_tests.sh
@@ -38,6 +38,9 @@ pytest tests/test_backend.py
 pytest tests/test_rss.py
 pytest tests/test_unique_lines.py

+# Try high concurrency
+FETCH_WORKERS=130 pytest  tests/test_history_consistency.py -v -l
+
 # Check file:// will pickup a file when enabled
 echo "Hello world" > /tmp/test-file.txt
 ALLOW_FILE_URI=yes pytest tests/test_security.py
--- a/changedetectionio/run_proxy_tests.sh
+++ b/changedetectionio/run_proxy_tests.sh
@@ -82,3 +82,25 @@ done


 docker kill squid-one squid-two squid-custom
+
+# Test that the UI is returning the correct error message when a proxy is not available
+
+# Requests
+docker run --network changedet-network \
+  test-changedetectionio \
+  bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py'
+
+# Playwright
+docker run --network changedet-network \
+  test-changedetectionio \
+  bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
+
+# Puppeteer fast
+docker run --network changedet-network \
+  test-changedetectionio \
+  bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
+
+# Selenium
+docker run --network changedet-network \
+  test-changedetectionio \
+  bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py'
--- a/changedetectionio/static/styles/scss/parts/_lister_extra.scss
+++ b/changedetectionio/static/styles/scss/parts/_lister_extra.scss
@@ -0,0 +1,30 @@
+.watch-table {
+
+
+  td,
+  th {
+    vertical-align: middle;
+
+  }
+
+  td.inline.title-col {
+    display: flex;
+    align-items: center;
+    gap: 0.5em;
+    flex-wrap: wrap;
+
+    * {
+      display: inline-block;
+    }
+
+    img.thumbnail {
+      width: 32px;
+      object-fit: cover; /* crop/fill if needed */
+      border-radius: 8px; /* subtle rounded corners */
+      box-shadow: 0 2px 6px rgba(0, 0, 0, 0.15); /* soft shadow */
+      border: 1px solid #ddd; /* light border for contrast */
+      filter: contrast(1.05) saturate(1.1) drop-shadow(0 0 0.5px rgba(0, 0, 0, 0.2));
+      background-color: #fff; /* fallback bg for SVGs without bg */
+    }
+  }
+}
--- a/changedetectionio/static/styles/scss/styles.scss
+++ b/changedetectionio/static/styles/scss/styles.scss
@@ -15,6 +15,7 @@
@import "parts/preview_text_filter";
@import "parts/_edit";
@import "parts/_conditions_table";
+@import "parts/_lister_extra";

 body {
  color: var(--color-text);
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@@ -623,6 +623,31 @@ ul#conditions_match_logic {
  .fieldlist_formfields .addRuleRow:hover, .fieldlist_formfields .removeRuleRow:hover, .fieldlist_formfields .verifyRuleRow:hover {
    background-color: #999; }

+.watch-table td,
+.watch-table th {
+  vertical-align: middle; }
+
+.watch-table td.inline.title-col {
+  display: flex;
+  align-items: center;
+  gap: 0.5em;
+  flex-wrap: wrap; }
+  .watch-table td.inline.title-col * {
+    display: inline-block; }
+  .watch-table td.inline.title-col img.thumbnail {
+    width: 32px;
+    object-fit: cover;
+    /* crop/fill if needed */
+    border-radius: 8px;
+    /* subtle rounded corners */
+    box-shadow: 0 2px 6px rgba(0, 0, 0, 0.15);
+    /* soft shadow */
+    border: 1px solid #ddd;
+    /* light border for contrast */
+    filter: contrast(1.05) saturate(1.1) drop-shadow(0 0 0.5px rgba(0, 0, 0, 0.2));
+    background-color: #fff;
+    /* fallback bg for SVGs without bg */ }
+
 body {
  color: var(--color-text);
  background: var(--color-background-page);
--- a/changedetectionio/tests/proxy_list/test_proxy_noconnect.py
+++ b/changedetectionio/tests/proxy_list/test_proxy_noconnect.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+from flask import url_for
+from ..util import live_server_setup, wait_for_all_checks
+import os
+from ... import strtobool
+
+
+# Just to be sure the UI outputs the right error message on proxy connection failed
+# docker run -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4
+# PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py
+# FAST_PUPPETEER_CHROME_FETCHER=True PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py
+# WEBDRIVER_URL=http://127.0.0.1:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py
+
+def test_proxy_noconnect_custom(client, live_server, measure_memory_usage):
+    live_server_setup(live_server)
+
+    # Goto settings, add our custom one
+    res = client.post(
+        url_for("settings.settings_page"),
+        data={
+            "requests-time_between_check-minutes": 180,
+            "application-ignore_whitespace": "y",
+            "application-fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else 'html_requests',
+            "requests-extra_proxies-0-proxy_name": "custom-test-proxy",
+            # test:awesome is set in tests/proxy_list/squid-passwords.txt
+            "requests-extra_proxies-0-proxy_url": "http://127.0.0.1:3128",
+        },
+        follow_redirects=True
+    )
+
+    assert b"Settings updated." in res.data
+
+    test_url = "https://changedetection.io"
+    res = client.post(
+        url_for("ui.ui_views.form_quick_watch_add"),
+        data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
+        follow_redirects=True
+    )
+
+    assert b"Watch added in Paused state, saving will unpause" in res.data
+
+    options = {
+        "url": test_url,
+        "fetch_backend": "html_webdriver" if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else "html_requests",
+        "proxy": "ui-0custom-test-proxy",
+    }
+
+    res = client.post(
+        url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1),
+        data=options,
+        follow_redirects=True
+    )
+    assert b"unpaused" in res.data
+    import time
+    wait_for_all_checks(client)
+
+    # Requests default
+    check_string = b'Cannot connect to proxy'
+
+    if os.getenv('PLAYWRIGHT_DRIVER_URL') or strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')) or os.getenv("WEBDRIVER_URL"):
+        check_string = b'ERR_PROXY_CONNECTION_FAILED'
+
+
+    res = client.get(url_for("watchlist.index"))
+    #with open("/tmp/debug.html", 'wb') as f:
+    #    f.write(res.data)
+    assert check_string in res.data
--- a/changedetectionio/tests/restock/test_restock.py
+++ b/changedetectionio/tests/restock/test_restock.py
@@ -14,6 +14,8 @@ from changedetectionio.notification import (
 def set_original_response():
    test_return_data = """<html>
       <body>
+       <section id=header style="padding: 50px; height: 350px">This is the header which should be ignored always - <span>add to cart</span></section>
+       <!-- stock-not-in-stock.js will ignore text in the first 300px, see elementIsInEyeBallRange(), sometimes "add to cart" and other junk is here -->
     Some initial text<br>
     <p>Which is across multiple lines</p>
     <br>
@@ -52,8 +54,6 @@ def test_restock_detection(client, live_server, measure_memory_usage):

    set_original_response()
    #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
-
-    time.sleep(1)
    live_server_setup(live_server)
    #####################
    notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json')
@@ -84,7 +84,8 @@ def test_restock_detection(client, live_server, measure_memory_usage):
    # Is it correctly show as NOT in stock?
    wait_for_all_checks(client)
    res = client.get(url_for("watchlist.index"))
-    assert b'not-in-stock' in res.data
+    assert b'processor-restock_diff' in res.data # Should have saved in restock mode
+    assert b'not-in-stock' in res.data # should be out of stock

    # Is it correctly shown as in stock
    set_back_in_stock_response()
--- a/changedetectionio/tests/test_conditions.py
+++ b/changedetectionio/tests/test_conditions.py
@@ -196,7 +196,11 @@ def test_condition_validate_rule_row(client, live_server):
    )
    assert res.status_code == 200
    assert b'false' in res.data
-
+    # cleanup for the next
+    client.get(
+        url_for("ui.form_delete", uuid="all"),
+        follow_redirects=True
+    )



@@ -235,4 +239,107 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage):
    )

    # Assert the word count is counted correctly
-    assert b'<td>13</td>' in res.data
+    assert b'<td>13</td>' in res.data
+
+    # cleanup for the next
+    client.get(
+        url_for("ui.form_delete", uuid="all"),
+        follow_redirects=True
+    )
+
+# If there was only a change in the whitespacing, then we shouldnt have a change detected
+def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
+    #live_server_setup(live_server)
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("""<html>
+       <body>
+     Some initial text<br>
+     <p>Which is across multiple lines</p>
+     <br>
+     So let's see what happens.  <br>
+     </body>
+     </html>
+    """)
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("ui.ui_views.form_quick_watch_add"),
+        data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
+        follow_redirects=True
+    )
+    assert b"Watch added in Paused state, saving will unpause" in res.data
+
+    uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
+    # Give the thread time to pick it up
+    wait_for_all_checks(client)
+    res = client.post(
+        url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
+        data={
+            "url": test_url,
+            "fetch_backend": "html_requests",
+            "conditions_match_logic": "ALL",  # ALL = AND logic
+            "conditions-0-field": "levenshtein_ratio",
+            "conditions-0-operator": "<",
+            "conditions-0-value": "0.8" # needs to be more of a diff to trigger a change
+        },
+        follow_redirects=True
+    )
+
+    assert b"unpaused" in res.data
+
+    wait_for_all_checks(client)
+    res = client.get(url_for("watchlist.index"))
+    assert b'unviewed' not in res.data
+
+    # Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions
+    res = client.get(
+        url_for("ui.ui_views.preview_page", uuid=uuid),
+        follow_redirects=True
+    )
+    assert b'Which is across multiple lines' in res.data
+
+
+    ############### Now change it a LITTLE bit...
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("""<html>
+       <body>
+     Some initial text<br>
+     <p>Which is across multiple lines</p>
+     <br>
+     So let's see what happenxxxxxxxxx.  <br>
+     </body>
+     </html>
+    """)
+
+    res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+    assert b'Queued 1 watch for rechecking.' in res.data
+    wait_for_all_checks(client)
+
+    res = client.get(url_for("watchlist.index"))
+    assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold
+
+    ############### Now change it a MORE THAN 50%
+    test_return_data = """<html>
+       <body>
+     Some sxxxx<br>
+     <p>Which is across a lines</p>
+     <br>
+     ok.  <br>
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+    res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+    assert b'Queued 1 watch for rechecking.' in res.data
+    wait_for_all_checks(client)
+    res = client.get(url_for("watchlist.index"))
+    assert b'unviewed' in res.data
+    # cleanup for the next
+    client.get(
+        url_for("ui.form_delete", uuid="all"),
+        follow_redirects=True
+    )
--- a/changedetectionio/tests/test_history_consistency.py
+++ b/changedetectionio/tests/test_history_consistency.py
@@ -10,8 +10,8 @@ from urllib.parse import urlparse, parse_qs

 def test_consistent_history(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
-
-    r = range(1, 30)
+    workers = int(os.getenv("FETCH_WORKERS", 10))
+    r = range(1, 10+workers)

    for one in r:
        test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
@@ -46,9 +46,10 @@ def test_consistent_history(client, live_server, measure_memory_usage):

    # assert the right amount of watches was found in the JSON
    assert len(json_obj['watching']) == len(r), "Correct number of watches was found in the JSON"
-
+    i=0
    # each one should have a history.txt containing just one line
    for w in json_obj['watching'].keys():
+        i+=1
        history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
        assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"

@@ -58,8 +59,8 @@ def test_consistent_history(client, live_server, measure_memory_usage):
            assert len(tmp_history) == 1, "History.txt should contain 1 line"

        # Should be two files,. the history.txt , and the snapshot.txt
-        files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path,
-                                                     w))
+        files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w))
+
        # Find the snapshot one
        for fname in files_in_watch_dir:
            if fname != 'history.txt' and 'html' not in fname:
@@ -75,7 +76,6 @@ def test_consistent_history(client, live_server, measure_memory_usage):

        assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"

-
    json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
    with open(json_db_file, 'r') as f:
        assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
--- a/changedetectionio/tests/test_request.py
+++ b/changedetectionio/tests/test_request.py
@@ -424,3 +424,27 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
    # unlink headers.txt on start/stop
    res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
+
+def test_headers_validation(client, live_server):
+    #live_server_setup(live_server)
+
+    test_url = url_for('test_headers', _external=True)
+    res = client.post(
+        url_for("imports.import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+
+    res = client.post(
+        url_for("ui.ui_edit.edit_page", uuid="first"),
+        data={
+            "url": test_url,
+            "fetch_backend": 'html_requests',
+            "headers": "User-AGent agent-from-watch\r\nsadfsadfsadfsdaf\r\n:foobar"},
+        follow_redirects=True
+    )
+
+    assert b"Line 1 is missing a &#39;:&#39; separator." in res.data
+    assert b"Line 3 has an empty key." in res.data
+
--- a/changedetectionio/tests/util.py
+++ b/changedetectionio/tests/util.py
@@ -126,18 +126,51 @@ def extract_UUID_from_client(client):
    uuid = m.group(1)
    return uuid.strip()

-def wait_for_all_checks(client):
-    # actually this is not entirely true, it can still be 'processing' but not in the queue
-    # Loop waiting until done..
-    attempt=0
-    # because sub-second rechecks are problematic in testing, use lots of delays
-    time.sleep(1)
-    while attempt < 60:
-        res = client.get(url_for("watchlist.index"))
-        if not b'Checking now' in res.data:
-            break
-        logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
-        time.sleep(1)
+
+def wait_for_all_checks(client=None):
+    """
+    Waits until the queue is empty and remains empty for at least `required_empty_duration` seconds,
+    and also ensures no running threads have `current_uuid` set.
+    Retries for up to `max_attempts` times, sleeping `wait_between_attempts` seconds between checks.
+    """
+    from changedetectionio.flask_app import update_q as global_update_q, running_update_threads
+
+    # Configuration
+    attempt = 0
+    i=0
+    max_attempts = 60
+    wait_between_attempts = 2
+    required_empty_duration = 2
+
+    logger = logging.getLogger()
+    time.sleep(1.2)
+
+    empty_since = None
+
+    while attempt < max_attempts:
+        q_length = global_update_q.qsize()
+
+        # Check if any threads are still processing
+        time.sleep(1.2)
+        any_threads_busy = any(t.current_uuid for t in running_update_threads)
+
+
+        if q_length == 0 and not any_threads_busy:
+            if empty_since is None:
+                empty_since = time.time()
+                logger.info(f"Queue empty and no active threads at attempt {attempt}, starting empty timer...")
+            elif time.time() - empty_since >= required_empty_duration:
+                logger.info(f"Queue has been empty and threads idle for {required_empty_duration} seconds. Done waiting.")
+                break
+            else:
+                logger.info(f"Still waiting: queue empty and no active threads, but not yet {required_empty_duration} seconds...")
+        else:
+            if q_length != 0:
+                logger.info(f"Queue not empty (size={q_length}), resetting timer.")
+            if any_threads_busy:
+                busy_threads = [t.name for t in running_update_threads if t.current_uuid]
+                logger.info(f"Threads still busy: {busy_threads}, resetting timer.")
+            empty_since = None
        attempt += 1

    time.sleep(1)
--- a/requirements.txt
+++ b/requirements.txt
@@ -42,7 +42,7 @@ paho-mqtt!=2.0.*
 cryptography~=42.0.8

 # Used for CSS filtering
-beautifulsoup4
+beautifulsoup4>=4.0.0

 # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
 # #2328 - 5.2.0 and 5.2.1 had extra CPU flag CFLAGS set which was not compatible on older hardware
@@ -53,7 +53,7 @@ lxml >=4.8.0,<6,!=5.2.0,!=5.2.1
 # XPath 2.0-3.1 support - 4.2.0 broke something?
 elementpath==4.1.5

-selenium~=4.14.0
+selenium~=4.31.0

 # https://github.com/pallets/werkzeug/issues/2985
 # Maybe related to pytest?
@@ -70,7 +70,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"

 # playwright is installed at Dockerfile build time because it's not available on all platforms

-pyppeteer-ng==2.0.0rc9
+pyppeteer-ng==2.0.0rc10

 pyppeteerstealth>=0.0.4

@@ -90,6 +90,8 @@ extruct
 # For cleaning up unknown currency formats
 babel

+levenshtein
+
 # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
 greenlet >= 3.0.3
Author	SHA1	Message	Date
dgtlmoon	ccd9419c88	UI - Adding thumbnails to lister page	2025-05-14 10:32:45 +02:00
dgtlmoon	c162ec9d52	0.49.17	2025-05-12 10:47:27 +02:00
Emmanuel Ferdman	bb7f7f473b	Resolve warnings of bs4 library (#3187 )	2025-05-09 14:35:35 +02:00
dgtlmoon	a9ca511004	Revert memory strategy change for html_to_text (Was hanging under high concurrency setups)	2025-05-09 09:44:02 +02:00
dgtlmoon	8df61f5eaa	0.49.16	2025-05-03 16:43:04 +02:00
dgtlmoon	162f573967	Fixes to ensure proxy errors are handled correctly (#3168 )	2025-05-03 16:05:40 +02:00
dgtlmoon	eada0ef08d	UI - Custom headers should have validation (#3172 )	2025-05-03 13:57:42 +02:00
dgtlmoon	f57bc10973	Update selenium library (#3170 )	2025-05-02 14:05:23 +02:00
dgtlmoon	d2e8f822d6	Restock detection - adding new string	2025-05-01 17:58:36 +02:00
dgtlmoon	5fd8200fd9	Conditions - Levenshtein text similarity plugin - adding test, fixing import, fixing check for watches with 1 snapshot history (#3161 )	2025-04-30 16:47:23 +02:00
dgtlmoon	d0da8c9825	Restock detection - Use cleaner logic for limiting elements to scan, refactor, improve tests (#3158 )	2025-04-30 10:57:33 +02:00