Attempt retry

BEtter messages
Provide more information on PageUnloadable
2022-12-16 17:29:16 +01:00 · 2022-12-16 17:04:12 +01:00 · 2022-12-15 09:13:09 +01:00 · 2022-12-14 19:06:49 +01:00 · 2022-12-14 15:08:34 +01:00 · 2022-12-14 12:16:04 +01:00
22 changed files with 527 additions and 125 deletions
--- a/5
+++ b/5
@@ -1,7 +1,7 @@
 # pip dependencies install stage
 FROM python:3.8-slim as builder

-# rustc compiler would be needed on ARM type devices but theres an issue with some deps not building..
+# See `cryptography` pin comment in requirements.txt
 ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1

 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -31,8 +31,7 @@ RUN pip install --target=/dependencies playwright~=1.27.1 \
 # Final image stage
 FROM python:3.8-slim

-# Actual packages needed at runtime, usually due to the notification (apprise) backend
-# rustc compiler would be needed on ARM type devices but theres an issue with some deps not building..
+# See `cryptography` pin comment in requirements.txt
 ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1

 # Re #93, #73, excluding rustc (adds another 430Mb~)
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,9 +1,10 @@
 recursive-include changedetectionio/api *
-recursive-include changedetectionio/templates *
-recursive-include changedetectionio/static *
+recursive-include changedetectionio/blueprint *
 recursive-include changedetectionio/model *
-recursive-include changedetectionio/tests *
 recursive-include changedetectionio/res *
+recursive-include changedetectionio/static *
+recursive-include changedetectionio/templates *
+recursive-include changedetectionio/tests *
 prune changedetectionio/static/package-lock.json
 prune changedetectionio/static/styles/node_modules
 prune changedetectionio/static/styles/package-lock.json
--- a/README.md
+++ b/README.md
@@ -187,11 +187,29 @@ When you enable a `json:` or `jq:` filter, you can even automatically extract an
 <html>
 ...
 <script type="application/ld+json">
-  {"@context":"http://schema.org","@type":"Product","name":"Nan Optipro Stage 1 Baby Formula  800g","price": 23.50 }
+
+{
+   "@context":"http://schema.org/",
+   "@type":"Product",
+   "offers":{
+      "@type":"Offer",
+      "availability":"http://schema.org/InStock",
+      "price":"3949.99",
+      "priceCurrency":"USD",
+      "url":"https://www.newegg.com/p/3D5-000D-001T1"
+   },
+   "description":"Cobratype King Cobra Hero Desktop Gaming PC",
+   "name":"Cobratype King Cobra Hero Desktop Gaming PC",
+   "sku":"3D5-000D-001T1",
+   "itemCondition":"NewCondition"
+}
 </script>
 ```  

-`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
+`json:$..price` or `jq:..price` would give `3949.99`, or you can extract the whole structure (use a JSONpath test website to validate with)
+
+The application also supports notifying you that it can follow this information automatically
+

 ## Proxy Configuration

--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -10,6 +10,7 @@ import threading
 import time
 import timeago

+from changedetectionio import queuedWatchMetaData
 from copy import deepcopy
 from distutils.util import strtobool
 from feedgen.feed import FeedGenerator
@@ -35,7 +36,7 @@ from flask_wtf import CSRFProtect
 from changedetectionio import html_tools
 from changedetectionio.api import api_v1

-__version__ = '0.39.22.1'
+__version__ = '0.40.0.2'

 datastore = None

@@ -404,7 +405,6 @@ def changedetection_app(config=None, datastore_o=None):
                sorted_watches.append(watch)

        existing_tags = datastore.get_all_tags()
-
        form = forms.quickWatchForm(request.form)
        output = render_template("watch-overview.html",
                                 form=form,
@@ -416,7 +416,7 @@ def changedetection_app(config=None, datastore_o=None):
                                 # Don't link to hosting when we're on the hosting environment
                                 hosted_sticky=os.getenv("SALTED_PASS", False) == False,
                                 guid=datastore.data['app_guid'],
-                                 queued_uuids=[uuid for p,uuid in update_q.queue])
+                                 queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue])


        if session.get('share-link'):
@@ -596,25 +596,16 @@ def changedetection_app(config=None, datastore_o=None):
                    using_default_check_time = False
                    break

-            # Use the default if its the same as system wide
+            # Use the default if it's the same as system-wide.
            if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']:
                extra_update_obj['fetch_backend'] = None


+
             # Ignore text
            form_ignore_text = form.ignore_text.data
            datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text

-            # Reset the previous_md5 so we process a new snapshot including stripping ignore text.
-            if form_ignore_text:
-                if len(datastore.data['watching'][uuid].history):
-                    extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
-
-            # Reset the previous_md5 so we process a new snapshot including stripping ignore text.
-            if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []):
-                if len(datastore.data['watching'][uuid].history):
-                    extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
-
            # Be sure proxy value is None
            if datastore.proxy_list is not None and form.data['proxy'] == '':
                extra_update_obj['proxy'] = None
@@ -632,7 +623,7 @@ def changedetection_app(config=None, datastore_o=None):
            datastore.needs_write_urgent = True

            # Queue the watch for immediate recheck, with a higher priority
-            update_q.put((1, uuid))
+            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))

            # Diff page [edit] link should go back to diff page
            if request.args.get("next") and request.args.get("next") == 'diff':
@@ -773,7 +764,7 @@ def changedetection_app(config=None, datastore_o=None):
                importer = import_url_list()
                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
                for uuid in importer.new_uuids:
-                    update_q.put((1, uuid))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))

                if len(importer.remaining_data) == 0:
                    return redirect(url_for('index'))
@@ -786,7 +777,7 @@ def changedetection_app(config=None, datastore_o=None):
                d_importer = import_distill_io_json()
                d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
                for uuid in d_importer.new_uuids:
-                    update_q.put((1, uuid))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))



@@ -1151,7 +1142,7 @@ def changedetection_app(config=None, datastore_o=None):

        if not add_paused and new_uuid:
            # Straight into the queue.
-            update_q.put((1, new_uuid))
+            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
            flash("Watch added.")

        if add_paused:
@@ -1188,7 +1179,7 @@ def changedetection_app(config=None, datastore_o=None):
            uuid = list(datastore.data['watching'].keys()).pop()

        new_uuid = datastore.clone(uuid)
-        update_q.put((5, new_uuid))
+        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
        flash('Cloned.')

        return redirect(url_for('index'))
@@ -1196,7 +1187,7 @@ def changedetection_app(config=None, datastore_o=None):
    @app.route("/api/checknow", methods=['GET'])
    @login_required
    def form_watch_checknow():
-
+        # Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
        tag = request.args.get('tag')
        uuid = request.args.get('uuid')
        i = 0
@@ -1205,11 +1196,9 @@ def changedetection_app(config=None, datastore_o=None):
        for t in running_update_threads:
            running_uuids.append(t.current_uuid)

-        # @todo check thread is running and skip
-
        if uuid:
            if uuid not in running_uuids:
-                update_q.put((1, uuid))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
            i = 1

        elif tag != None:
@@ -1217,14 +1206,14 @@ def changedetection_app(config=None, datastore_o=None):
            for watch_uuid, watch in datastore.data['watching'].items():
                if (tag != None and tag in watch['tag']):
                    if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
-                        update_q.put((1, watch_uuid))
+                        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
                        i += 1

        else:
            # No tag, no uuid, add everything.
            for watch_uuid, watch in datastore.data['watching'].items():
                if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
-                    update_q.put((1, watch_uuid))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
                    i += 1
        flash("{} watches are queued for rechecking.".format(i))
        return redirect(url_for('index', tag=tag))
@@ -1343,6 +1332,10 @@ def changedetection_app(config=None, datastore_o=None):
    import changedetectionio.blueprint.browser_steps as browser_steps
    app.register_blueprint(browser_steps.construct_blueprint(datastore), url_prefix='/browser-steps')

+    import changedetectionio.blueprint.price_data_follower as price_data_follower
+    app.register_blueprint(price_data_follower.construct_blueprint(datastore, update_q), url_prefix='/price_data_follower')
+
+
    # @todo handle ctrl break
    ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
    threading.Thread(target=notification_runner).start()
@@ -1488,7 +1481,7 @@ def ticker_thread_check_time_launch_checks():
            seconds_since_last_recheck = now - watch['last_checked']

            if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
-                if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
+                if not uuid in running_uuids and uuid not in [q_uuid.item['uuid'] for q_uuid in update_q.queue]:

                    # Proxies can be set to have a limit on seconds between which they can be called
                    watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
@@ -1519,8 +1512,9 @@ def ticker_thread_check_time_launch_checks():
                            priority,
                            watch.jitter_seconds,
                            now - watch['last_checked']))
+
                    # Into the queue with you
-                    update_q.put((priority, uuid))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))

                    # Reset for next time
                    watch.jitter_seconds = 0
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -1,3 +1,4 @@
+from changedetectionio import queuedWatchMetaData
 from flask_restful import abort, Resource
 from flask import request, make_response
 import validators
@@ -24,7 +25,7 @@ class Watch(Resource):
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))

        if request.args.get('recheck'):
-            self.update_q.put((1, uuid))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
            return "OK", 200

        # Return without history, get that via another API call
@@ -100,7 +101,7 @@ class CreateWatch(Resource):
        extras = {'title': json_data['title'].strip()} if json_data.get('title') else {}

        new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras)
-        self.update_q.put((1, new_uuid))
+        self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
        return {'uuid': new_uuid}, 201

    # Return concise list of available watches and some very basic info
@@ -118,7 +119,7 @@ class CreateWatch(Resource):

        if request.args.get('recheck_all'):
            for uuid in self.datastore.data['watching'].keys():
-                self.update_q.put((1, uuid))
+                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
            return {'status': "OK"}, 200

        return list, 200
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -75,15 +75,13 @@ class steppable_browser_interface():
    def action_goto_url(self, url, optional_value):
        # self.page.set_viewport_size({"width": 1280, "height": 5000})
        now = time.time()
-        response = self.page.goto(url, timeout=0, wait_until='domcontentloaded')
-        print("Time to goto URL", time.time() - now)
+        response = self.page.goto(url, timeout=0, wait_until='commit')

        # Wait_until = commit
        # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
        # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
        # This seemed to solve nearly all 'TimeoutErrors'
-        extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
-        self.page.wait_for_timeout(extra_wait * 1000)
+        print("Time to goto URL ", time.time() - now)

    def action_click_element_containing_text(self, selector=None, value=''):
        if not len(value.strip()):
--- a/changedetectionio/blueprint/price_data_follower/init.py
+++ b/changedetectionio/blueprint/price_data_follower/init.py
@@ -0,0 +1,33 @@
+
+from distutils.util import strtobool
+from flask import Blueprint, flash, redirect, url_for
+from flask_login import login_required
+from changedetectionio.store import ChangeDetectionStore
+from changedetectionio import queuedWatchMetaData
+from queue import PriorityQueue
+
+PRICE_DATA_TRACK_ACCEPT = 'accepted'
+PRICE_DATA_TRACK_REJECT = 'rejected'
+
+def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue):
+
+    price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
+
+    @login_required
+    @price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
+    def accept(uuid):
+        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
+        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+        return redirect(url_for("form_watch_checknow", uuid=uuid))
+
+
+    @login_required
+    @price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
+    def reject(uuid):
+        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
+        return redirect(url_for("index"))
+
+
+    return price_data_follower_blueprint
+
+
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -23,6 +23,9 @@ class Non200ErrorCodeReceived(Exception):
            self.page_text = html_tools.html_to_text(page_html)
        return

+class checksumFromPreviousCheckWasTheSame(Exception):
+    def __init__(self):
+        return

 class JSActionExceptions(Exception):
    def __init__(self, status_code, url, screenshot, message=''):
@@ -39,7 +42,7 @@ class BrowserStepsStepTimout(Exception):


 class PageUnloadable(Exception):
-    def __init__(self, status_code, url, screenshot=False, message=False):
+    def __init__(self, status_code, url, message, screenshot=False):
        # Set this so we can use it in other parts of the app
        self.status_code = status_code
        self.url = url
@@ -286,6 +289,8 @@ class base_html_playwright(Fetcher):
                proxy=self.proxy,
                # This is needed to enable JavaScript execution on GitHub and others
                bypass_csp=True,
+                # Can't think why we need the service workers for our use case?
+                service_workers='block',
                # Should never be needed
                accept_downloads=False
            )
@@ -294,24 +299,34 @@ class base_html_playwright(Fetcher):
            if len(request_headers):
                context.set_extra_http_headers(request_headers)

-            try:
                self.page.set_default_navigation_timeout(90000)
                self.page.set_default_timeout(90000)

                # Listen for all console events and handle errors
                self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))

-                # Bug - never set viewport size BEFORE page.goto
-
-
-                # Waits for the next navigation. Using Python context manager
-                # prevents a race condition between clicking and waiting for a navigation.
-                with self.page.expect_navigation():
-                    response = self.page.goto(url, wait_until='load')
+            # Goto page
+            try:
                # Wait_until = commit
                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
                # This seemed to solve nearly all 'TimeoutErrors'
+                response = self.page.goto(url, wait_until='commit')
+            except playwright._impl._api_types.Error as e:
+                # Retry once - https://github.com/browserless/chrome/issues/2485
+                # Sometimes errors related to invalid cert's and other can be random
+                print ("Content Fetcher > retrying request got error - ", str(e))
+                time.sleep(1)
+                response = self.page.goto(url, wait_until='commit')
+
+            except Exception as e:
+                print ("Content Fetcher > Other exception when page.goto", str(e))
+                context.close()
+                browser.close()
+                raise PageUnloadable(url=url, status_code=None, message=str(e))
+
+            # Execute any browser steps
+            try:
                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
                self.page.wait_for_timeout(extra_wait * 1000)

@@ -324,17 +339,15 @@ class base_html_playwright(Fetcher):
                # This can be ok, we will try to grab what we could retrieve
                pass
            except Exception as e:
-                print ("other exception when page.goto")
-                print (str(e))
+                print ("Content Fetcher > Other exception when executing custom JS code", str(e))
                context.close()
                browser.close()
-                raise PageUnloadable(url=url, status_code=None)
-
+                raise PageUnloadable(url=url, status_code=None, message=str(e))

            if response is None:
                context.close()
                browser.close()
-                print ("response object was none")
+                print ("Content Fetcher > Response object was none")
                raise EmptyReply(url=url, status_code=None)

            # Bug 2(?) Set the viewport size AFTER loading the page
@@ -353,7 +366,7 @@ class base_html_playwright(Fetcher):
            if len(self.page.content().strip()) == 0:
                context.close()
                browser.close()
-                print ("Content was empty")
+                print ("Content Fetcher > Content was empty")
                raise EmptyReply(url=url, status_code=None)

            # Bug 2(?) Set the viewport size AFTER loading the page
@@ -498,7 +511,7 @@ class base_html_webdriver(Fetcher):
            try:
                self.driver.quit()
            except Exception as e:
-                print("Exception in chrome shutdown/quit" + str(e))
+                print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))


 # "html_requests" is listed as the default fetcher in store.py!
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@@ -1,11 +1,13 @@
 import hashlib
+import json
 import logging
 import os
 import re
-import time
 import urllib3

 from changedetectionio import content_fetcher, html_tools
+from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
+from copy import deepcopy

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

@@ -38,8 +40,7 @@ class perform_site_check():

        return regex

-    def run(self, uuid):
-        from copy import deepcopy
+    def run(self, uuid, skip_when_checksum_same=True):
        changed_detected = False
        screenshot = False  # as bytes
        stripped_text_from_html = ""
@@ -122,6 +123,14 @@ class perform_site_check():
        self.screenshot = fetcher.screenshot
        self.xpath_data = fetcher.xpath_data

+        # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
+        # Saves a lot of CPU
+        update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
+        if skip_when_checksum_same:
+            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
+                raise content_fetcher.checksumFromPreviousCheckWasTheSame()
+
+
        # Fetching complete, now filters
        # @todo move to class / maybe inside of fetcher abstract base?

@@ -140,7 +149,7 @@ class perform_site_check():
            is_html = False
            is_json = False

-        include_filters_rule = watch.get('include_filters', [])
+        include_filters_rule = deepcopy(watch.get('include_filters', []))
        # include_filters_rule = watch['include_filters']
        subtractive_selectors = watch.get(
            "subtractive_selectors", []
@@ -148,6 +157,10 @@ class perform_site_check():
            "global_subtractive_selectors", []
        )

+        # Inject a virtual LD+JSON price tracker rule
+        if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
+            include_filters_rule.append(html_tools.LD_JSON_PRODUCT_OFFER_SELECTOR)
+
        has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip())
        has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())

@@ -155,6 +168,14 @@ class perform_site_check():
            include_filters_rule.append("json:$")
            has_filter_rule = True

+        if is_json:
+            # Sort the JSON so we dont get false alerts when the content is just re-ordered
+            try:
+                fetcher.content = json.dumps(json.loads(fetcher.content), sort_keys=True)
+            except Exception as e:
+                # Might have just been a snippet, or otherwise bad JSON, continue
+                pass
+
        if has_filter_rule:
            json_filter_prefixes = ['json:', 'jq:']
            for filter in include_filters_rule:
@@ -162,6 +183,8 @@ class perform_site_check():
                    stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
                    is_html = False

+
+
        if is_html or is_source:

            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
@@ -173,9 +196,13 @@ class perform_site_check():
                # Don't run get_text or xpath/css filters on plaintext
                stripped_text_from_html = html_content
            else:
+                # Does it have some ld+json price data? used for easier monitoring
+                update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(fetcher.content)
+
                # Then we assume HTML
                if has_filter_rule:
                    html_content = ""
+
                    for filter_rule in include_filters_rule:
                        # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
                        if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -10,6 +10,10 @@ import re
 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
 TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"

+# 'price' , 'lowPrice', 'highPrice' are usually under here
+# all of those may or may not appear on different websites
+LD_JSON_PRODUCT_OFFER_SELECTOR = "json:$..offers"
+
 class JSONNotFound(ValueError):
    def __init__(self, msg):
        ValueError.__init__(self, msg)
@@ -127,8 +131,10 @@ def _get_stripped_text_from_json_match(match):

    return stripped_text_from_html

-def extract_json_as_string(content, json_filter):
-
+# content - json
+# json_filter - ie json:$..price
+# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
+def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
    stripped_text_from_html = False

    # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
@@ -139,7 +145,12 @@ def extract_json_as_string(content, json_filter):
        # Foreach <script json></script> blob.. just return the first that matches json_filter
        s = []
        soup = BeautifulSoup(content, 'html.parser')
-        bs_result = soup.findAll('script')
+
+        if ensure_is_ldjson_info_type:
+            bs_result = soup.findAll('script', {"type": "application/ld+json"})
+        else:
+            bs_result = soup.findAll('script')
+

        if not bs_result:
            raise JSONNotFound("No parsable JSON found in this document")
@@ -156,7 +167,14 @@ def extract_json_as_string(content, json_filter):
                continue
            else:
                stripped_text_from_html = _parse_json(json_data, json_filter)
-                if stripped_text_from_html:
+                if ensure_is_ldjson_info_type:
+                    # Could sometimes be list, string or something else random
+                    if isinstance(json_data, dict):
+                        # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
+                        # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
+                        if json_data.get('@type', False) and json_data.get('@type','').lower() == ensure_is_ldjson_info_type.lower() and stripped_text_from_html:
+                            break
+                elif stripped_text_from_html:
                    break

    if not stripped_text_from_html:
@@ -243,6 +261,18 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:

    return text_content

+
+# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
+def has_ldjson_product_info(content):
+    try:
+        pricing_data = extract_json_as_string(content=content, json_filter=LD_JSON_PRODUCT_OFFER_SELECTOR, ensure_is_ldjson_info_type="product")
+    except JSONNotFound as e:
+        # Totally fine
+        return False
+    x=bool(pricing_data)
+    return x
+
+
 def workarounds_for_obfuscations(content):
    """
    Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -14,49 +14,52 @@ from changedetectionio.notification import (

 class model(dict):
    __newest_history_key = None
-    __history_n=0
+    __history_n = 0
    __base_config = {
-            #'history': {},  # Dict of timestamp and output stripped filename (removed)
-            #'newest_history_key': 0, (removed, taken from history.txt index)
-            'body': None,
-            'check_unique_lines': False, # On change-detected, compare against all history if its something new
-            'check_count': 0,
-            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
-            'extract_text': [],  # Extract text by regex after filters
-            'extract_title_as_title': False,
-            'fetch_backend': None,
-            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
-            'headers': {},  # Extra headers to send
-            'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
-            'include_filters': [],
-            'last_checked': 0,
-            'last_error': False,
-            'last_viewed': 0,  # history key value of the last viewed via the [diff] link
-            'method': 'GET',
-             # Custom notification content
-            'notification_body': None,
-            'notification_format': default_notification_format_for_watch,
-            'notification_muted': False,
-            'notification_title': None,
-            'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
-            'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
-            'paused': False,
-            'previous_md5': False,
-            'proxy': None, # Preferred proxy connection
-            'subtractive_selectors': [],
-            'tag': None,
-            'text_should_not_be_present': [], # Text that should not present
-            # Re #110, so then if this is set to None, we know to use the default value instead
-            # Requires setting to None on submit if it's the same as the default
-            # Should be all None by default, so we use the system default in this case.
-            'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
-            'title': None,
-            'trigger_text': [],  # List of text or regex to wait for until a change is detected
-            'url': None,
-            'uuid': str(uuid.uuid4()),
-            'webdriver_delay': None,
-            'webdriver_js_execute_code': None, # Run before change-detection
-        }
+        # 'history': {},  # Dict of timestamp and output stripped filename (removed)
+        # 'newest_history_key': 0, (removed, taken from history.txt index)
+        'body': None,
+        'check_unique_lines': False,  # On change-detected, compare against all history if its something new
+        'check_count': 0,
+        'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine.
+        'extract_text': [],  # Extract text by regex after filters
+        'extract_title_as_title': False,
+        'fetch_backend': None,
+        'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
+        'has_ldjson_price_data': None,
+        'track_ldjson_price_data': None,
+        'headers': {},  # Extra headers to send
+        'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
+        'include_filters': [],
+        'last_checked': 0,
+        'last_error': False,
+        'last_viewed': 0,  # history key value of the last viewed via the [diff] link
+        'method': 'GET',
+        # Custom notification content
+        'notification_body': None,
+        'notification_format': default_notification_format_for_watch,
+        'notification_muted': False,
+        'notification_title': None,
+        'notification_screenshot': False,  # Include the latest screenshot if available and supported by the apprise URL
+        'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
+        'paused': False,
+        'previous_md5': False,
+        'previous_md5_before_filters': False,  # Used for skipping changedetection entirely
+        'proxy': None,  # Preferred proxy connection
+        'subtractive_selectors': [],
+        'tag': None,
+        'text_should_not_be_present': [],  # Text that should not present
+        # Re #110, so then if this is set to None, we know to use the default value instead
+        # Requires setting to None on submit if it's the same as the default
+        # Should be all None by default, so we use the system default in this case.
+        'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
+        'title': None,
+        'trigger_text': [],  # List of text or regex to wait for until a change is detected
+        'url': None,
+        'uuid': str(uuid.uuid4()),
+        'webdriver_delay': None,
+        'webdriver_js_execute_code': None,  # Run before change-detection
+    }
    jitter_seconds = 0

    def __init__(self, *arg, **kw):
--- a/changedetectionio/queuedWatchMetaData.py
+++ b/changedetectionio/queuedWatchMetaData.py
@@ -0,0 +1,10 @@
+from dataclasses import dataclass, field
+from typing import Any
+
+# So that we can queue some metadata in `item`
+# https://docs.python.org/3/library/queue.html#queue.PriorityQueue
+#
+@dataclass(order=True)
+class PrioritizedItem:
+    priority: int
+    item: Any=field(compare=False)
--- a/changedetectionio/res/xpath_element_scraper.js
+++ b/changedetectionio/res/xpath_element_scraper.js
@@ -81,6 +81,14 @@ var bbox;
 for (var i = 0; i < elements.length; i++) {
    bbox = elements[i].getBoundingClientRect();

+    // Exclude items that are not interactable or visible
+    if(elements[i].style.opacity === "0") {
+        continue
+    }
+    if(elements[i].style.display === "none" || elements[i].style.pointerEvents === "none" ) {
+        continue
+    }
+
    // Forget really small ones
    if (bbox['width'] < 10 && bbox['height'] < 10) {
        continue;
@@ -166,10 +174,23 @@ if (include_filters.length) {
        }

        if (q) {
-            bbox = q.getBoundingClientRect();
-            console.log("xpath_element_scraper: Got filter element, scroll from top was "+scroll_y)
-        } else {
-            console.log("xpath_element_scraper: filter element "+f+" was not found");
+            // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
+            if (q.hasOwnProperty('getBoundingClientRect')) {
+                bbox = q.getBoundingClientRect();
+                console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
+            } else {
+                try {
+                    // Try and see we can find its ownerElement
+                    bbox = q.ownerElement.getBoundingClientRect();
+                    console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
+                } catch (e) {
+                    console.log("xpath_element_scraper: error looking up ownerElement")
+                }
+            }
+        }
+        
+        if(!q) {
+            console.log("xpath_element_scraper: filter element " + f + " was not found");
        }

        if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
--- a/changedetectionio/static/images/price-tag-icon.svg
+++ b/changedetectionio/static/images/price-tag-icon.svg
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="83.39" height="89.648" enable-background="new 0 0 122.406 122.881" version="1.1" viewBox="0 0 83.39 89.648" xml:space="preserve" xmlns="http://www.w3.org/2000/svg"><g transform="translate(5e-4 -33.234)"><path d="m44.239 42.946-39.111 39.896 34.908 34.91 39.09-39.876-1.149-34.931zm-0.91791 42.273c0.979-0.979 1.507-1.99 1.577-3.027 0.077-1.043-0.248-2.424-0.967-4.135-0.725-1.717-1.348-3.346-1.87-4.885s-0.814-3.014-0.897-4.432c-0.07-1.42 0.134-2.768 0.624-4.045 0.477-1.279 1.348-2.545 2.607-3.804 2.099-2.099 4.535-3.123 7.314-3.065 2.773 0.063 5.457 1.158 8.04 3.294l2.881 3.034c1.946 2.607 2.799 5.33 2.557 8.166-0.235 2.83-1.532 5.426-3.893 7.785l-6.296-6.297c1.291-1.291 2.035-2.531 2.238-3.727 0.191-1.197-0.165-2.252-1.081-3.168-0.821-0.82-1.717-1.195-2.69-1.139-0.967 0.064-1.908 0.547-2.817 1.457-0.922 0.922-1.393 1.914-1.412 2.977s0.306 2.416 0.973 4.064c0.661 1.652 1.24 3.25 1.736 4.801 0.496 1.553 0.782 3.035 0.858 4.445 0.076 1.426-0.127 2.787-0.591 4.104-0.477 1.316-1.336 2.596-2.588 3.848-2.125 2.125-4.522 3.186-7.212 3.18s-5.311-1.063-7.855-3.16l-3.747 3.746-2.964-2.965 3.766-3.764c-2.423-2.996-3.568-5.998-3.447-9.02 0.127-3.014 1.476-5.813 4.045-8.383l6.278 6.277c-1.412 1.412-2.175 2.799-2.277 4.16-0.108 1.367 0.414 2.627 1.571 3.783 0.839 0.84 1.755 1.26 2.741 1.242 0.985-0.017 1.92-0.47 2.798-1.347zm21.127-46.435h17.457c-0.0269 2.2368 0.69936 16.025 0.69936 16.025l0.785 23.858c0.019 0.609-0.221 1.164-0.619 1.564l5e-3 4e-3 -41.236 42.022c-0.82213 0.8378-2.175 0.83-3.004 0l-37.913-37.91c-0.83-0.83-0.83-2.176 0-3.006l41.236-42.021c0.39287-0.42671 1.502-0.53568 1.502-0.53568zm18.011 11.59c-59.392-29.687-29.696-14.843 0 0z"/></g></svg>
--- a/changedetectionio/static/styles/scss/styles.scss
+++ b/changedetectionio/static/styles/scss/styles.scss
@@ -1009,3 +1009,30 @@ ul {
  border-radius: 5px;
  color: var(--color-warning);
 }
+
+/* automatic price following helpers */
+.tracking-ldjson-price-data {
+  background-color: var(--color-background-button-green);
+  color: #000;
+  padding: 3px;
+  border-radius: 3px;
+  white-space: nowrap;
+}
+
+.ldjson-price-track-offer {
+  a.pure-button {
+    border-radius: 3px;
+    padding: 3px;
+    background-color: var(--color-background-button-green);
+  }
+
+  font-weight: bold;
+  font-style: italic;
+}
+
+.price-follow-tag-icon {
+  display: inline-block;
+  height: 0.8rem;
+  vertical-align: middle;
+}
+
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@@ -945,3 +945,24 @@ ul {
    display: inline;
    height: 26px;
    vertical-align: middle; }
+
+/* automatic price following helpers */
+.tracking-ldjson-price-data {
+  background-color: var(--color-background-button-green);
+  color: #000;
+  padding: 3px;
+  border-radius: 3px;
+  white-space: nowrap; }
+
+.ldjson-price-track-offer {
+  font-weight: bold;
+  font-style: italic; }
+  .ldjson-price-track-offer a.pure-button {
+    border-radius: 3px;
+    padding: 3px;
+    background-color: var(--color-background-button-green); }
+
+.price-follow-tag-icon {
+  display: inline-block;
+  height: 0.8rem;
+  vertical-align: middle; }
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -250,12 +250,15 @@ class ChangeDetectionStore:
    def clear_watch_history(self, uuid):
        import pathlib

-        self.__data['watching'][uuid].update(
-            {'last_checked': 0,
-             'last_viewed': 0,
-             'previous_md5': False,
-             'last_notification_error': False,
-             'last_error': False})
+        self.__data['watching'][uuid].update({
+                'last_checked': 0,
+                'has_ldjson_price_data': None,
+                'last_error': False,
+                'last_notification_error': False,
+                'last_viewed': 0,
+                'previous_md5': False,
+                'track_ldjson_price_data': None,
+            })

        # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
        for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"):
--- a/changedetectionio/templates/watch-overview.html
+++ b/changedetectionio/templates/watch-overview.html
@@ -88,9 +88,9 @@
                </td>
                <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
                    <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
-                    <a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="icon icon-spread" /></a>
+                    <a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="icon icon-spread" title="Create a link to share watch config with others" /></a>

-                    {%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
+                    {%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />{% endif %}

                    {% if watch.last_error is defined and watch.last_error != False %}
                    <div class="fetch-error">{{ watch.last_error }}</div>
@@ -98,6 +98,12 @@
                    {% if watch.last_notification_error is defined and watch.last_notification_error != False %}
                    <div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div>
                    {% endif %}
+                    {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  %}
+                    <div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
+                    {% endif %}
+                    {% if watch['track_ldjson_price_data'] == 'accepted' %}
+                    <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="price-follow-tag-icon"/> Price</span>
+                    {% endif %}
                    {% if not active_tag %}
                    <span class="watch-tag-list">{{ watch.tag}}</span>
                    {% endif %}
--- a/changedetectionio/tests/test_automatic_follow_ldjson_price.py
+++ b/changedetectionio/tests/test_automatic_follow_ldjson_price.py
@@ -0,0 +1,146 @@
+#!/usr/bin/python3
+
+import time
+from flask import url_for
+from .util import live_server_setup, extract_UUID_from_client, extract_api_key_from_UI
+
+def set_response_with_ldjson():
+    test_return_data = """<html>
+       <body>
+     Some initial text</br>
+     <p>Which is across multiple lines</p>
+     </br>
+     So let's see what happens.  </br>
+     <div class="sametext">Some text thats the same</div>
+     <div class="changetext">Some text that will change</div>
+     <script type="application/ld+json">
+        {
+           "@context":"https://schema.org/",
+           "@type":"Product",
+           "@id":"https://www.some-virtual-phone-shop.com/celular-iphone-14/p",
+           "name":"Celular Iphone 14 Pro Max 256Gb E Sim A16 Bionic",
+           "brand":{
+              "@type":"Brand",
+              "name":"APPLE"
+           },
+           "image":"https://www.some-virtual-phone-shop.com/15509426/image.jpg",
+           "description":"You dont need it",
+           "mpn":"111111",
+           "sku":"22222",
+           "offers":{
+              "@type":"AggregateOffer",
+              "lowPrice":8097000,
+              "highPrice":8099900,
+              "priceCurrency":"COP",
+              "offers":[
+                 {
+                    "@type":"Offer",
+                    "price":8097000,
+                    "priceCurrency":"COP",
+                    "availability":"http://schema.org/InStock",
+                    "sku":"102375961",
+                    "itemCondition":"http://schema.org/NewCondition",
+                    "seller":{
+                       "@type":"Organization",
+                       "name":"ajax"
+                    }
+                 }
+              ],
+              "offerCount":1
+           }
+        }
+       </script>
+     </body>
+     </html>
+"""
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+    return None
+
+def set_response_without_ldjson():
+    test_return_data = """<html>
+       <body>
+     Some initial text</br>
+     <p>Which is across multiple lines</p>
+     </br>
+     So let's see what happens.  </br>
+     <div class="sametext">Some text thats the same</div>
+     <div class="changetext">Some text that will change</div>     
+     </body>
+     </html>
+"""
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+    return None
+
+# actually only really used by the distll.io importer, but could be handy too
+def test_check_ldjson_price_autodetect(client, live_server):
+    live_server_setup(live_server)
+
+    # Give the endpoint time to spin up
+    time.sleep(1)
+
+    set_response_with_ldjson()
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+    time.sleep(3)
+
+    # Should get a notice that it's available
+    res = client.get(url_for("index"))
+    assert b'ldjson-price-track-offer' in res.data
+
+    # Accept it
+    uuid = extract_UUID_from_client(client)
+
+    client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
+    time.sleep(2)
+
+    # Trigger a check
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(2)
+    # Offer should be gone
+    res = client.get(url_for("index"))
+    assert b'Embedded price data' not in res.data
+    assert b'tracking-ldjson-price-data' in res.data
+
+    # and last snapshop (via API) should be just the price
+    api_key = extract_api_key_from_UI(client)
+    res = client.get(
+        url_for("watchsinglehistory", uuid=uuid, timestamp='latest'),
+        headers={'x-api-key': api_key},
+    )
+
+    # Should see this (dont know where the whitespace came from)
+    assert b'"highPrice": 8099900' in res.data
+    # And not this cause its not the ld-json
+    assert b"So let's see what happens" not in res.data
+
+    client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+
+    ##########################################################################################
+    # And we shouldnt see the offer
+    set_response_without_ldjson()
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+    time.sleep(3)
+    res = client.get(url_for("index"))
+    assert b'ldjson-price-track-offer' not in res.data
+    
+    ##########################################################################################
+    client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
--- a/changedetectionio/tests/test_jsonpath_jq_selector.py
+++ b/changedetectionio/tests/test_jsonpath_jq_selector.py
@@ -394,6 +394,48 @@ def check_json_ext_filter(json_filter, client, live_server):
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data

+def test_ignore_json_order(client, live_server):
+    # A change in order shouldn't trigger a notification
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write('{"hello" : 123, "world": 123}')
+
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', content_type="application/json", _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+
+    time.sleep(2)
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write('{"world" : 123, "hello": 123}')
+
+    # Trigger a check
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(2)
+
+    res = client.get(url_for("index"))
+    assert b'unviewed' not in res.data
+
+    # Just to be sure it still works
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write('{"world" : 123, "hello": 124}')
+
+    # Trigger a check
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(2)
+
+    res = client.get(url_for("index"))
+    assert b'unviewed' in res.data
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
+
 def test_check_jsonpath_ext_filter(client, live_server):
    check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)

--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -4,6 +4,7 @@ import queue
 import time

 from changedetectionio import content_fetcher
+from changedetectionio import queuedWatchMetaData
 from changedetectionio.fetch_site_status import FilterNotFoundInResponse

 # A single update worker
@@ -157,11 +158,12 @@ class update_worker(threading.Thread):
        while not self.app.config.exit.is_set():

            try:
-                priority, uuid = self.q.get(block=False)
+                queued_item_data = self.q.get(block=False)
            except queue.Empty:
                pass

            else:
+                uuid = queued_item_data.item.get('uuid')
                self.current_uuid = uuid

                if uuid in list(self.datastore.data['watching'].keys()):
@@ -171,11 +173,11 @@ class update_worker(threading.Thread):
                    update_obj= {}
                    xpath_data = False
                    process_changedetection_results = True
-                    print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
+                    print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
                    now = time.time()

                    try:
-                        changed_detected, update_obj, contents = update_handler.run(uuid)
+                        changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same'))
                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
                        # We then convert/.decode('utf-8') for the notification etc
@@ -241,6 +243,10 @@ class update_worker(threading.Thread):

                        process_changedetection_results = True

+                    except content_fetcher.checksumFromPreviousCheckWasTheSame as e:
+                        # Yes fine, so nothing todo
+                        pass
+
                    except content_fetcher.BrowserStepsStepTimout as e:

                        if not self.datastore.data['watching'].get(uuid):
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,8 +29,9 @@ apprise~=1.2.0
 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
 paho-mqtt

-# Pinned version of cryptography otherwise
-# ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly
+# This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1"
+# so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found"
+# (introduced once apprise became a dep)
 cryptography~=3.4

 # Used for CSS filtering
Author	SHA1	Message	Date
dgtlmoon	6788796788	Attempt retry BEtter messages	2022-12-16 17:29:16 +01:00
dgtlmoon	efafc9bef8	Provide more information on PageUnloadable	2022-12-16 17:04:12 +01:00
dgtlmoon	b7a2501d64	Fetching - Always sort the key order of JSON content for less false alerts (May cause an alert on upgrade, but will be better going forwards) #1219	2022-12-15 09:13:09 +01:00
dgtlmoon	e970fef991	Fetcher + VisualSelector - xPath filter with attribute filter was breaking the element finder	2022-12-14 19:06:49 +01:00
dgtlmoon	b76148a0f4	Fetcher - CPU usage - Skip processing if the previous checksum and the just fetched one was the same (#925 )	2022-12-14 15:08:34 +01:00
dgtlmoon	93cc30437f	Playwright+BrowserSteps - Fetch changes - Fetch simply after page starts rendering + delay seconds, disable service workers	2022-12-14 12:16:04 +01:00
dgtlmoon	6562d6e0d4	Improve ARM/rust build comment	2022-12-13 12:28:20 +01:00
dgtlmoon	6c217cc3b6	README.md - Improving JSONPath example for LD+JSON product data	2022-12-11 11:14:52 +01:00
dgtlmoon	f30cdf0674	0.40.0.2	2022-12-08 22:36:59 +01:00
dgtlmoon	14da0646a7	Price follower - Dont scan for ldjson data when 'no' was clicked on the suggestion (#1207 )	2022-12-08 22:35:37 +01:00
dgtlmoon	b413cdecc7	Adding missing parts for pip build Re #1206	2022-12-08 21:54:55 +01:00
dgtlmoon	7bf52d9275	0.40.0	2022-12-08 20:09:42 +01:00
dgtlmoon	09e6624afd	VisualSelector - Exclude items that are not interactable or visible	2022-12-08 20:08:41 +01:00
dgtlmoon	b58fd995b5	Automatically offer to track LD+JSON product price data (#1204 )	2022-12-08 19:28:20 +01:00