tidy up methods

2023-11-07 14:08:25 +01:00
parent 97e591fa24
commit 68d1e2736c
5 changed files with 32 additions and 22 deletions
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -667,6 +667,7 @@ class html_requests(Fetcher):
    fetcher_description = "Basic fast Plaintext/HTTP Client"

    def __init__(self, proxy_override=None):
+        super().__init__()
        self.proxy_override = proxy_override

    def run(self,
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@@ -15,15 +15,18 @@ class difference_detection_processor():
    def __init__(self, *args, datastore, watch_uuid, **kwargs):
        super().__init__(*args, **kwargs)
        self.datastore = datastore
+        self.watch = self.datastore.data['watching'].get(watch_uuid)

-        watch = self.datastore.data['watching'].get(watch_uuid)
-        url = watch.link
+
+    def call_browser(self):
+
+        url = self.watch.link

        # Requests, playwright, other browser via wss:// etc, fetch_extra_something
-        prefer_fetch_backend = watch.get('fetch_backend', 'system')
+        prefer_fetch_backend = self.watch.get('fetch_backend', 'system')

        # Proxy ID "key"
-        preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
+        preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))

        # Pluggable content self.fetcher
        if not prefer_fetch_backend or prefer_fetch_backend == 'system':
@@ -47,14 +50,14 @@ class difference_detection_processor():
                                   #browser_url_extra/configurable browser url=...
                                   )

-        if watch.has_browser_steps:
-            self.fetcher.browser_steps = watch.get('browser_steps', [])
-            self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, watch_uuid)
+        if self.watch.has_browser_steps:
+            self.fetcher.browser_steps = self.watch.get('browser_steps', [])
+            self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))

        # Tweak the base config with the per-watch ones
-        request_headers = watch.get('headers', [])
+        request_headers = self.watch.get('headers', [])
        request_headers.update(self.datastore.get_all_base_headers())
-        request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=watch_uuid))
+        request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))

        # https://github.com/psf/requests/issues/4525
        # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
@@ -64,32 +67,32 @@ class difference_detection_processor():

        timeout = self.datastore.data['settings']['requests'].get('timeout')

-        request_body = watch.get('body')
-        request_method = watch.get('method')
-        ignore_status_codes = watch.get('ignore_status_codes', False)
+        request_body = self.watch.get('body')
+        request_method = self.watch.get('method')
+        ignore_status_codes = self.watch.get('ignore_status_codes', False)

        # Configurable per-watch or global extra delay before extracting text (for webDriver types)
        system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
-        if watch['webdriver_delay'] is not None:
-            self.fetcher.render_extract_delay = watch.get('webdriver_delay')
+        if self.watch.get('webdriver_delay'):
+            self.fetcher.render_extract_delay = self.watch.get('webdriver_delay')
        elif system_webdriver_delay is not None:
            self.fetcher.render_extract_delay = system_webdriver_delay

-        if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip():
-            self.fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code')
+        if self.watch.get('webdriver_js_execute_code') is not None and self.watch.get('webdriver_js_execute_code').strip():
+            self.fetcher.webdriver_js_execute_code = self.watch.get('webdriver_js_execute_code')

        # Requests for PDF's, images etc should be passwd the is_binary flag
-        is_binary = watch.is_pdf
+        is_binary = self.watch.is_pdf

        # And here we go! call the right browser with browser-specific settings
-        self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'),
+        self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'),
                    is_binary=is_binary)
        self.fetcher.quit()

        # After init, call run() which will do the actual change-detection

    @abstractmethod
-    def run(self, uuid, skip_when_checksum_same=True):
+    def run_changedetection(self, uuid, skip_when_checksum_same=True):
        update_obj = {'last_notification_error': False, 'last_error': False}
        some_data = 'xxxxx'
        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
--- a/changedetectionio/processors/restock_diff.py
+++ b/changedetectionio/processors/restock_diff.py
@@ -19,7 +19,7 @@ class perform_site_check(difference_detection_processor):
    screenshot = None
    xpath_data = None

-    def run(self, uuid, skip_when_checksum_same=True):
+    def run_changedetection(self, uuid, skip_when_checksum_same=True):

        # DeepCopy so we can be sure we don't accidently change anything by reference
        watch = deepcopy(self.datastore.data['watching'].get(uuid))
--- a/changedetectionio/processors/text_json_diff.py
+++ b/changedetectionio/processors/text_json_diff.py
@@ -33,8 +33,9 @@ class PDFToHTMLToolNotFound(ValueError):
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):

-    def run(self, uuid, skip_when_checksum_same=True):
+    def run_changedetection(self, uuid, skip_when_checksum_same=True):
        changed_detected = False
+        html_content = ""
        screenshot = False  # as bytes
        stripped_text_from_html = ""

--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -209,6 +209,7 @@ class update_worker(threading.Thread):
        from .processors import text_json_diff, restock_diff

        while not self.app.config.exit.is_set():
+            update_handler = None

            try:
                queued_item_data = self.q.get(block=False)
@@ -253,7 +254,9 @@ class update_worker(threading.Thread):
                        # Clear last errors (move to preflight func?)
                        self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None

-                        changed_detected, update_obj, contents = update_handler.run(uuid,
+                        update_handler.call_browser()
+
+                        changed_detected, update_obj, contents = update_handler.run_changedetection(uuid,
                                                                                    skip_when_checksum_same=skip_when_same_checksum,
                                                                                    )

@@ -407,6 +410,8 @@ class update_worker(threading.Thread):
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
                        # Other serious error
                        process_changedetection_results = False
+
+                        # the thread is still running??
                    else:
                        # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
                        if not self.datastore.data['watching'].get(uuid):