Compare commits

...

36 Commits

Author SHA1 Message Date
dgtlmoon
e23e099d4a hmm 2025-05-10 10:38:10 +02:00
dgtlmoon
bff6eb3bcf woops 2025-05-10 10:37:27 +02:00
dgtlmoon
10fc5d669b try some matrix build 2025-05-10 10:36:41 +02:00
dgtlmoon
89be3c50d5 build test with 3.12 2025-05-10 10:28:02 +02:00
dgtlmoon
3a1f1a9a5a Try linux/aarch64 2025-05-09 23:29:21 +02:00
Emmanuel Ferdman
bb7f7f473b Resolve warnings of bs4 library (#3187) 2025-05-09 14:35:35 +02:00
dgtlmoon
a9ca511004 Revert memory strategy change for html_to_text (Was hanging under high concurrency setups) 2025-05-09 09:44:02 +02:00
dgtlmoon
8df61f5eaa 0.49.16 2025-05-03 16:43:04 +02:00
dgtlmoon
162f573967 Fixes to ensure proxy errors are handled correctly (#3168) 2025-05-03 16:05:40 +02:00
dgtlmoon
eada0ef08d UI - Custom headers should have validation (#3172) 2025-05-03 13:57:42 +02:00
dgtlmoon
f57bc10973 Update selenium library (#3170) 2025-05-02 14:05:23 +02:00
dgtlmoon
d2e8f822d6 Restock detection - adding new string 2025-05-01 17:58:36 +02:00
dgtlmoon
5fd8200fd9 Conditions - Levenshtein text similarity plugin - adding test, fixing import, fixing check for watches with 1 snapshot history (#3161) 2025-04-30 16:47:23 +02:00
dgtlmoon
d0da8c9825 Restock detection - Use cleaner logic for limiting elements to scan, refactor, improve tests (#3158) 2025-04-30 10:57:33 +02:00
dgtlmoon
fd7574d21b pyppeteer fast puppeteer fetch - be sure viewport is set to --window-size if --window-size is set (#3157) 2025-04-29 17:23:37 +02:00
dgtlmoon
c70706a27b Improved global ignore test (#3140) 2025-04-29 11:20:21 +02:00
silversub
968c364999 Update docker-compose.yml (#3149)
Co-authored-by: silversub <silversub@gmail.com>
2025-04-29 11:20:00 +02:00
dgtlmoon
031cb76b7d Small fix for xpath element scraper (#3145) 2025-04-25 17:58:04 +02:00
dgtlmoon
af568d064c Plugins for conditions (and include Similarity / Levenshtein, wordcount conditions) Re #3108 2025-04-22 18:19:56 +02:00
dgtlmoon
a75f57de43 Browser Steps - <Select> by Option Text - #1224, #1228 (#3138) 2025-04-22 14:33:35 +02:00
dgtlmoon
72a1c3dda1 Browser Steps - error reporting and session shutdown improvements (#3137) 2025-04-22 12:18:51 +02:00
dgtlmoon
ffde79ecac 0.49.15 2025-04-18 14:57:28 +02:00
dgtlmoon
66ad43b2df Visual Selector & Browser Steps - Always recheck if the data/screenshot is ready under "Visual Selector" tab after using Browser Steps (#3130) 2025-04-18 10:31:43 +02:00
Dror Levin
6b0e56ca80 App logs - Send TRACE and INFO logs to stdout (#3051) 2025-04-18 10:00:09 +02:00
Luca
5a2d84d8b4 Development: introduce Ruff as linter/formatter (#3039) 2025-04-18 09:59:18 +02:00
dgtlmoon
a941156f26 Updating restock texts (#3124) 2025-04-17 10:44:32 +02:00
dgtlmoon
a1fdeeaa29 Only add screenshot warning if capture was greater than trim size (#3123) 2025-04-17 00:11:20 +02:00
dgtlmoon
40ea2604a7 0.49.14 2025-04-16 23:23:18 +02:00
dgtlmoon
ceda526093 Small fix for multiprocessing start on Mac OS (#3121 #3115) 2025-04-16 22:52:03 +02:00
Justin Goette
4197254c53 docs: Update reference URL (#3119) 2025-04-16 21:37:50 +02:00
dgtlmoon
a0b7efb436 UI - Fix to edit and groups template 2025-04-16 18:40:30 +02:00
dgtlmoon
5f5e8ede6c Updating API documentation 2025-04-13 21:51:17 +02:00
dgtlmoon
52ca855a29 Undo forced selenium headless mode, small refactor (#3112) 2025-04-12 19:26:17 +02:00
dgtlmoon
079efd0a85 Playwright + Puppeteer fix for when page is taller than viewport but less than screenshot step_size (#3113) 2025-04-12 18:37:59 +02:00
dgtlmoon
3a583a4e5d Memory management - Run HTML to text in sub process, a few more cleanups to playwright (#3110) 2025-04-11 18:18:29 +02:00
dgtlmoon
cfb4decf67 UI Edit/Stats - Add levenshtein distance info, explains how "different" the last two snapshot are (#3109) 2025-04-11 17:36:29 +02:00
46 changed files with 1503 additions and 450 deletions

View File

@@ -1,10 +1,5 @@
name: ChangeDetection.io Container Build Test name: ChangeDetection.io Container Build Test
# Triggers the workflow on push or pull request events
# This line doesnt work, even tho it is the documented one
#on: [push, pull_request]
on: on:
push: push:
paths: paths:
@@ -20,51 +15,53 @@ on:
- .github/workflows/* - .github/workflows/*
- .github/test/Dockerfile* - .github/test/Dockerfile*
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
# @todo: some kind of path filter for requirements.txt and Dockerfile
jobs: jobs:
test-container-build: test-container-build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy:
matrix:
python_version: [3.11, 3.12, 3.13]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: 3.11
# Just test that the build works, some libraries won't compile on ARM/rPi etc - name: Set up Python ${{ matrix.python_version }}
- name: Set up QEMU uses: actions/setup-python@v5
uses: docker/setup-qemu-action@v3 with:
with: python-version: ${{ matrix.python_version }}
image: tonistiigi/binfmt:latest
platforms: all
- name: Set up Docker Buildx # Set up QEMU
id: buildx - name: Set up QEMU
uses: docker/setup-buildx-action@v3 uses: docker/setup-qemu-action@v3
with: with:
install: true image: tonistiigi/binfmt:latest
version: latest platforms: all
driver-opts: image=moby/buildkit:master
# https://github.com/dgtlmoon/changedetection.io/pull/1067 - name: Set up Docker Buildx
# Check we can still build under alpine/musl id: buildx
- name: Test that the docker containers can build (musl via alpine check) uses: docker/setup-buildx-action@v3
id: docker_build_musl with:
uses: docker/build-push-action@v6 install: true
with: version: latest
context: ./ driver-opts: image=moby/buildkit:master
file: ./.github/test/Dockerfile-alpine
platforms: linux/amd64,linux/arm64
- name: Test that the docker containers can build # Musl (alpine) build test (runs once per matrix job, or can skip with conditional if needed)
id: docker_build - name: Test that the docker containers can build (musl via alpine check)
uses: docker/build-push-action@v6 id: docker_build_musl
# https://github.com/docker/build-push-action#customizing uses: docker/build-push-action@v6
with: with:
context: ./ context: ./
file: ./Dockerfile file: ./.github/test/Dockerfile-alpine
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 platforms: linux/amd64,linux/arm64
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
# Main build with matrix Python version
- name: Test that the docker containers can build (Python ${{ matrix.python_version }})
id: docker_build
uses: docker/build-push-action@v6
with:
context: ./
file: ./Dockerfile
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8,linux/aarch64
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
build: |
args=PYTHON_VERSION=${{ matrix.python_version }}

View File

@@ -8,13 +8,13 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Lint with flake8 - name: Lint with Ruff
run: | run: |
pip3 install flake8 pip install ruff
# stop the build if there are Python syntax errors or undefined names # Check for syntax errors and undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics ruff check . --select E9,F63,F7,F82
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide # Complete check with errors treated as warnings
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics ruff check . --exit-zero
test-application-3-10: test-application-3-10:
needs: lint-code needs: lint-code
@@ -41,5 +41,4 @@ jobs:
uses: ./.github/workflows/test-stack-reusable-workflow.yml uses: ./.github/workflows/test-stack-reusable-workflow.yml
with: with:
python-version: '3.13' python-version: '3.13'
skip-pypuppeteer: true skip-pypuppeteer: true

View File

@@ -172,8 +172,8 @@ jobs:
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
# Check whether TRACE log is enabled. # Check whether TRACE log is enabled.
# Also, check whether TRACE is came from STDERR # Also, check whether TRACE came from STDOUT
docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1 docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
# Check whether DEBUG is came from STDOUT # Check whether DEBUG is came from STDOUT
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1

1
.gitignore vendored
View File

@@ -16,6 +16,7 @@ dist/
.env .env
.venv/ .venv/
venv/ venv/
.python-version
# IDEs # IDEs
.idea .idea

9
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,9 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.2
hooks:
# Lint (and apply safe fixes)
- id: ruff
args: [--fix]
# Fomrat
- id: ruff-format

48
.ruff.toml Normal file
View File

@@ -0,0 +1,48 @@
# Minimum supported version
target-version = "py310"
# Formatting options
line-length = 100
indent-width = 4
exclude = [
"__pycache__",
".eggs",
".git",
".tox",
".venv",
"*.egg-info",
"*.pyc",
]
[lint]
# https://docs.astral.sh/ruff/rules/
select = [
"B", # flake8-bugbear
"B9",
"C",
"E", # pycodestyle
"F", # Pyflakes
"I", # isort
"N", # pep8-naming
"UP", # pyupgrade
"W", # pycodestyle
]
ignore = [
"B007", # unused-loop-control-variable
"B909", # loop-iterator-mutation
"E203", # whitespace-before-punctuation
"E266", # multiple-leading-hashes-for-block-comment
"E501", # redundant-backslash
"F403", # undefined-local-with-import-star
"N802", # invalid-function-name
"N806", # non-lowercase-variable-in-function
"N815", # mixed-case-variable-in-class-scope
]
[lint.mccabe]
max-complexity = 12
[format]
indent-style = "space"
quote-style = "preserve"

View File

@@ -68,7 +68,7 @@ COPY changedetection.py /app/changedetection.py
# Github Action test purpose(test-only.yml). # Github Action test purpose(test-only.yml).
# On production, it is effectively LOGGER_LEVEL=''. # On production, it is effectively LOGGER_LEVEL=''.
ARG LOGGER_LEVEL='' ARG LOGGER_LEVEL=''
ENV LOGGER_LEVEL "$LOGGER_LEVEL" ENV LOGGER_LEVEL="$LOGGER_LEVEL"
WORKDIR /app WORKDIR /app
CMD ["python", "./changedetection.py", "-d", "/datastore"] CMD ["python", "./changedetection.py", "-d", "/datastore"]

View File

@@ -3,4 +3,6 @@
# Only exists for direct CLI usage # Only exists for direct CLI usage
import changedetectionio import changedetectionio
changedetectionio.main()
if __name__ == '__main__':
changedetectionio.main()

View File

@@ -0,0 +1,98 @@
# Creating Plugins for changedetection.io
This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways.
## Plugin Types
### UI Stats Tab Plugins
These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch.
#### Creating a UI Stats Tab Plugin
1. Create a Python file in a directory that will be loaded by the plugin system.
2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook:
```python
import pluggy
from loguru import logger
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
@global_hookimpl
def ui_edit_stats_extras(watch):
"""Add custom content to the stats tab"""
# Calculate or retrieve your stats
my_stat = calculate_something(watch)
# Return HTML content as a string
html = f"""
<div class="my-plugin-stats">
<h4>My Plugin Statistics</h4>
<p>My statistic: {my_stat}</p>
</div>
"""
return html
```
3. The HTML you return will be included in the Stats tab.
## Plugin Loading
Plugins can be loaded from:
1. Built-in plugin directories in the codebase
2. External packages using setuptools entry points
To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`.
## Example Plugin
Here's a simple example of a plugin that adds a word count statistic to the Stats tab:
```python
import pluggy
from loguru import logger
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
def count_words_in_history(watch):
"""Count words in the latest snapshot"""
try:
if not watch.history.keys():
return 0
latest_key = list(watch.history.keys())[-1]
latest_content = watch.get_history_snapshot(latest_key)
return len(latest_content.split())
except Exception as e:
logger.error(f"Error counting words: {str(e)}")
return 0
@global_hookimpl
def ui_edit_stats_extras(watch):
"""Add word count to the Stats tab"""
word_count = count_words_in_history(watch)
html = f"""
<div class="word-count-stats">
<h4>Content Analysis</h4>
<table class="pure-table">
<tbody>
<tr>
<td>Word count (latest snapshot)</td>
<td>{word_count}</td>
</tr>
</tbody>
</table>
</div>
"""
return html
```
## Testing Your Plugin
1. Place your plugin in one of the directories scanned by the plugin system
2. Restart changedetection.io
3. Go to the Edit page of a watch and check the Stats tab to see your content

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki # Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.49.13' __version__ = '0.49.16'
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
@@ -106,7 +106,7 @@ def main():
# Without this, a logger will be duplicated # Without this, a logger will be duplicated
logger.remove() logger.remove()
try: try:
log_level_for_stdout = { 'DEBUG', 'SUCCESS' } log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' }
logger.configure(handlers=[ logger.configure(handlers=[
{"sink": sys.stdout, "level": logger_level, {"sink": sys.stdout, "level": logger_level,
"filter" : lambda record: record['level'].name in log_level_for_stdout}, "filter" : lambda record: record['level'].name in log_level_for_stdout},

View File

@@ -53,14 +53,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
a = "?" if not '?' in base_url else '&' a = "?" if not '?' in base_url else '&'
base_url += a + f"timeout={keepalive_ms}" base_url += a + f"timeout={keepalive_ms}"
try: browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
except Exception as e:
if 'ECONNREFUSED' in str(e):
return make_response('Unable to start the Playwright Browser session, is it running?', 401)
else:
# Other errors, bad URL syntax, bad reply etc
return make_response(str(e), 401)
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid) proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
proxy = None proxy = None
@@ -109,7 +102,16 @@ def construct_blueprint(datastore: ChangeDetectionStore):
logger.debug("Starting connection with playwright") logger.debug("Starting connection with playwright")
logger.debug("browser_steps.py connecting") logger.debug("browser_steps.py connecting")
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
try:
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
except Exception as e:
if 'ECONNREFUSED' in str(e):
return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
else:
# Other errors, bad URL syntax, bad reply etc
return make_response(str(e), 401)
logger.debug("Starting connection with playwright - done") logger.debug("Starting connection with playwright - done")
return {'browsersteps_session_id': browsersteps_session_id} return {'browsersteps_session_id': browsersteps_session_id}
@@ -166,9 +168,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
step_optional_value = request.form.get('optional_value') step_optional_value = request.form.get('optional_value')
is_last_step = strtobool(request.form.get('is_last_step')) is_last_step = strtobool(request.form.get('is_last_step'))
# @todo try.. accept.. nice errors not popups..
try: try:
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation, browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
selector=step_selector, selector=step_selector,
optional_value=step_optional_value) optional_value=step_optional_value)

View File

@@ -1,6 +1,8 @@
import os import os
import time import time
import re import re
import sys
import traceback
from random import randint from random import randint
from loguru import logger from loguru import logger
@@ -35,6 +37,7 @@ browser_step_ui_config = {'Choose one': '0 0',
'Make all child elements visible': '1 0', 'Make all child elements visible': '1 0',
'Press Enter': '0 0', 'Press Enter': '0 0',
'Select by label': '1 1', 'Select by label': '1 1',
'<select> by option text': '1 1',
'Scroll down': '0 0', 'Scroll down': '0 0',
'Uncheck checkbox': '1 0', 'Uncheck checkbox': '1 0',
'Wait for seconds': '0 1', 'Wait for seconds': '0 1',
@@ -54,7 +57,6 @@ browser_step_ui_config = {'Choose one': '0 0',
class steppable_browser_interface(): class steppable_browser_interface():
page = None page = None
start_url = None start_url = None
action_timeout = 10 * 1000 action_timeout = 10 * 1000
def __init__(self, start_url): def __init__(self, start_url):
@@ -62,6 +64,10 @@ class steppable_browser_interface():
# Convert and perform "Click Button" for example # Convert and perform "Click Button" for example
def call_action(self, action_name, selector=None, optional_value=None): def call_action(self, action_name, selector=None, optional_value=None):
if self.page is None:
logger.warning("Cannot call action on None page object")
return
now = time.time() now = time.time()
call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower()) call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower())
if call_action_name == 'choose_one': if call_action_name == 'choose_one':
@@ -72,28 +78,33 @@ class steppable_browser_interface():
if selector and selector.startswith('/') and not selector.startswith('//'): if selector and selector.startswith('/') and not selector.startswith('//'):
selector = "xpath=" + selector selector = "xpath=" + selector
# Check if action handler exists
if not hasattr(self, "action_" + call_action_name):
logger.warning(f"Action handler for '{call_action_name}' not found")
return
action_handler = getattr(self, "action_" + call_action_name) action_handler = getattr(self, "action_" + call_action_name)
# Support for Jinja2 variables in the value and selector # Support for Jinja2 variables in the value and selector
if selector and ('{%' in selector or '{{' in selector): if selector and ('{%' in selector or '{{' in selector):
selector = jinja_render(template_str=selector) selector = jinja_render(template_str=selector)
if optional_value and ('{%' in optional_value or '{{' in optional_value): if optional_value and ('{%' in optional_value or '{{' in optional_value):
optional_value = jinja_render(template_str=optional_value) optional_value = jinja_render(template_str=optional_value)
action_handler(selector, optional_value) action_handler(selector, optional_value)
# Safely wait for timeout
self.page.wait_for_timeout(1.5 * 1000) self.page.wait_for_timeout(1.5 * 1000)
logger.debug(f"Call action done in {time.time()-now:.2f}s") logger.debug(f"Call action done in {time.time()-now:.2f}s")
def action_goto_url(self, selector=None, value=None): def action_goto_url(self, selector=None, value=None):
# self.page.set_viewport_size({"width": 1280, "height": 5000}) if not value:
logger.warning("No URL provided for goto_url action")
return None
now = time.time() now = time.time()
response = self.page.goto(value, timeout=0, wait_until='load') response = self.page.goto(value, timeout=0, wait_until='load')
# Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout)
#and also wait for seconds ?
#await page.waitForTimeout(1000);
#await page.waitForTimeout(extra_wait_ms);
logger.debug(f"Time to goto URL {time.time()-now:.2f}s") logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
return response return response
@@ -103,36 +114,40 @@ class steppable_browser_interface():
def action_click_element_containing_text(self, selector=None, value=''): def action_click_element_containing_text(self, selector=None, value=''):
logger.debug("Clicking element containing text") logger.debug("Clicking element containing text")
if not len(value.strip()): if not value or not len(value.strip()):
return return
elem = self.page.get_by_text(value) elem = self.page.get_by_text(value)
if elem.count(): if elem.count():
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
def action_click_element_containing_text_if_exists(self, selector=None, value=''): def action_click_element_containing_text_if_exists(self, selector=None, value=''):
logger.debug("Clicking element containing text if exists") logger.debug("Clicking element containing text if exists")
if not len(value.strip()): if not value or not len(value.strip()):
return return
elem = self.page.get_by_text(value) elem = self.page.get_by_text(value)
logger.debug(f"Clicking element containing text - {elem.count()} elements found") logger.debug(f"Clicking element containing text - {elem.count()} elements found")
if elem.count(): if elem.count():
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
else:
return
def action_enter_text_in_field(self, selector, value): def action_enter_text_in_field(self, selector, value):
if not len(selector.strip()): if not selector or not len(selector.strip()):
return return
self.page.fill(selector, value, timeout=self.action_timeout) self.page.fill(selector, value, timeout=self.action_timeout)
def action_execute_js(self, selector, value): def action_execute_js(self, selector, value):
response = self.page.evaluate(value) if not value:
return response return None
return self.page.evaluate(value)
def action_click_element(self, selector, value): def action_click_element(self, selector, value):
logger.debug("Clicking element") logger.debug("Clicking element")
if not len(selector.strip()): if not selector or not len(selector.strip()):
return return
self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500)) self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
@@ -140,24 +155,38 @@ class steppable_browser_interface():
def action_click_element_if_exists(self, selector, value): def action_click_element_if_exists(self, selector, value):
import playwright._impl._errors as _api_types import playwright._impl._errors as _api_types
logger.debug("Clicking element if exists") logger.debug("Clicking element if exists")
if not len(selector.strip()): if not selector or not len(selector.strip()):
return return
try: try:
self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500)) self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
except _api_types.TimeoutError as e: except _api_types.TimeoutError:
return return
except _api_types.Error as e: except _api_types.Error:
# Element was there, but page redrew and now its long long gone # Element was there, but page redrew and now its long long gone
return return
def action_click_x_y(self, selector, value): def action_click_x_y(self, selector, value):
if not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value): if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
raise Exception("'Click X,Y' step should be in the format of '100 , 90'") logger.warning("'Click X,Y' step should be in the format of '100 , 90'")
return
x, y = value.strip().split(',') try:
x = int(float(x.strip())) x, y = value.strip().split(',')
y = int(float(y.strip())) x = int(float(x.strip()))
self.page.mouse.click(x=x, y=y, delay=randint(200, 500)) y = int(float(y.strip()))
self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
except Exception as e:
logger.error(f"Error parsing x,y coordinates: {str(e)}")
def action__select_by_option_text(self, selector, value):
if not selector or not len(selector.strip()):
return
self.page.select_option(selector, label=value, timeout=self.action_timeout)
def action_scroll_down(self, selector, value): def action_scroll_down(self, selector, value):
# Some sites this doesnt work on for some reason # Some sites this doesnt work on for some reason
@@ -165,23 +194,42 @@ class steppable_browser_interface():
self.page.wait_for_timeout(1000) self.page.wait_for_timeout(1000)
def action_wait_for_seconds(self, selector, value): def action_wait_for_seconds(self, selector, value):
self.page.wait_for_timeout(float(value.strip()) * 1000) try:
seconds = float(value.strip()) if value else 1.0
self.page.wait_for_timeout(seconds * 1000)
except (ValueError, TypeError) as e:
logger.error(f"Invalid value for wait_for_seconds: {str(e)}")
def action_wait_for_text(self, selector, value): def action_wait_for_text(self, selector, value):
if not value:
return
import json import json
v = json.dumps(value) v = json.dumps(value)
self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000) self.page.wait_for_function(
f'document.querySelector("body").innerText.includes({v});',
timeout=30000
)
def action_wait_for_text_in_element(self, selector, value): def action_wait_for_text_in_element(self, selector, value):
if not selector or not value:
return
import json import json
s = json.dumps(selector) s = json.dumps(selector)
v = json.dumps(value) v = json.dumps(value)
self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000)
self.page.wait_for_function(
f'document.querySelector({s}).innerText.includes({v});',
timeout=30000
)
# @todo - in the future make some popout interface to capture what needs to be set # @todo - in the future make some popout interface to capture what needs to be set
# https://playwright.dev/python/docs/api/class-keyboard # https://playwright.dev/python/docs/api/class-keyboard
def action_press_enter(self, selector, value): def action_press_enter(self, selector, value):
self.page.keyboard.press("Enter", delay=randint(200, 500)) self.page.keyboard.press("Enter", delay=randint(200, 500))
def action_press_page_up(self, selector, value): def action_press_page_up(self, selector, value):
self.page.keyboard.press("PageUp", delay=randint(200, 500)) self.page.keyboard.press("PageUp", delay=randint(200, 500))
@@ -190,17 +238,30 @@ class steppable_browser_interface():
self.page.keyboard.press("PageDown", delay=randint(200, 500)) self.page.keyboard.press("PageDown", delay=randint(200, 500))
def action_check_checkbox(self, selector, value): def action_check_checkbox(self, selector, value):
if not selector:
return
self.page.locator(selector).check(timeout=self.action_timeout) self.page.locator(selector).check(timeout=self.action_timeout)
def action_uncheck_checkbox(self, selector, value): def action_uncheck_checkbox(self, selector, value):
if not selector:
return
self.page.locator(selector).uncheck(timeout=self.action_timeout) self.page.locator(selector).uncheck(timeout=self.action_timeout)
def action_remove_elements(self, selector, value): def action_remove_elements(self, selector, value):
"""Removes all elements matching the given selector from the DOM.""" """Removes all elements matching the given selector from the DOM."""
if not selector:
return
self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())") self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
def action_make_all_child_elements_visible(self, selector, value): def action_make_all_child_elements_visible(self, selector, value):
"""Recursively makes all child elements inside the given selector fully visible.""" """Recursively makes all child elements inside the given selector fully visible."""
if not selector:
return
self.page.locator(selector).locator("*").evaluate_all(""" self.page.locator(selector).locator("*").evaluate_all("""
els => els.forEach(el => { els => els.forEach(el => {
el.style.display = 'block'; // Forces it to be displayed el.style.display = 'block'; // Forces it to be displayed
@@ -224,7 +285,9 @@ class browsersteps_live_ui(steppable_browser_interface):
# bump and kill this if idle after X sec # bump and kill this if idle after X sec
age_start = 0 age_start = 0
headers = {} headers = {}
# Track if resources are properly cleaned up
_is_cleaned_up = False
# use a special driver, maybe locally etc # use a special driver, maybe locally etc
command_executor = os.getenv( command_executor = os.getenv(
"PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL" "PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL"
@@ -243,9 +306,14 @@ class browsersteps_live_ui(steppable_browser_interface):
self.age_start = time.time() self.age_start = time.time()
self.playwright_browser = playwright_browser self.playwright_browser = playwright_browser
self.start_url = start_url self.start_url = start_url
self._is_cleaned_up = False
if self.context is None: if self.context is None:
self.connect(proxy=proxy) self.connect(proxy=proxy)
def __del__(self):
# Ensure cleanup happens if object is garbage collected
self.cleanup()
# Connect and setup a new context # Connect and setup a new context
def connect(self, proxy=None): def connect(self, proxy=None):
# Should only get called once - test that # Should only get called once - test that
@@ -264,31 +332,74 @@ class browsersteps_live_ui(steppable_browser_interface):
user_agent=manage_user_agent(headers=self.headers), user_agent=manage_user_agent(headers=self.headers),
) )
self.page = self.context.new_page() self.page = self.context.new_page()
# self.page.set_default_navigation_timeout(keep_open) # self.page.set_default_navigation_timeout(keep_open)
self.page.set_default_timeout(keep_open) self.page.set_default_timeout(keep_open)
# @todo probably this doesnt work # Set event handlers
self.page.on( self.page.on("close", self.mark_as_closed)
"close",
self.mark_as_closed,
)
# Listen for all console events and handle errors # Listen for all console events and handle errors
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}")) self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
logger.debug(f"Time to browser setup {time.time()-now:.2f}s") logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
self.page.wait_for_timeout(1 * 1000) self.page.wait_for_timeout(1 * 1000)
def mark_as_closed(self): def mark_as_closed(self):
logger.debug("Page closed, cleaning up..") logger.debug("Page closed, cleaning up..")
self.cleanup()
def cleanup(self):
"""Properly clean up all resources to prevent memory leaks"""
if self._is_cleaned_up:
return
logger.debug("Cleaning up browser steps resources")
# Clean up page
if hasattr(self, 'page') and self.page is not None:
try:
# Force garbage collection before closing
self.page.request_gc()
except Exception as e:
logger.debug(f"Error during page garbage collection: {str(e)}")
try:
# Remove event listeners before closing
self.page.remove_listener("close", self.mark_as_closed)
except Exception as e:
logger.debug(f"Error removing event listeners: {str(e)}")
try:
self.page.close()
except Exception as e:
logger.debug(f"Error closing page: {str(e)}")
self.page = None
# Clean up context
if hasattr(self, 'context') and self.context is not None:
try:
self.context.close()
except Exception as e:
logger.debug(f"Error closing context: {str(e)}")
self.context = None
self._is_cleaned_up = True
logger.debug("Browser steps resources cleanup complete")
@property @property
def has_expired(self): def has_expired(self):
if not self.page: if not self.page or self._is_cleaned_up:
return True return True
# Check if session has expired based on age
max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10)) # Default 10 minutes
if (time.time() - self.age_start) > max_age_seconds:
logger.debug(f"Browser steps session expired after {max_age_seconds} seconds")
return True
return False
def get_current_state(self): def get_current_state(self):
"""Return the screenshot and interactive elements mapping, generally always called after action_()""" """Return the screenshot and interactive elements mapping, generally always called after action_()"""
@@ -297,36 +408,55 @@ class browsersteps_live_ui(steppable_browser_interface):
# because we for now only run browser steps in playwright mode (not puppeteer mode) # because we for now only run browser steps in playwright mode (not puppeteer mode)
from changedetectionio.content_fetchers.playwright import capture_full_page from changedetectionio.content_fetchers.playwright import capture_full_page
# Safety check - don't proceed if resources are cleaned up
if self._is_cleaned_up or self.page is None:
logger.warning("Attempted to get current state after cleanup")
return (None, None)
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
now = time.time() now = time.time()
self.page.wait_for_timeout(1 * 1000) self.page.wait_for_timeout(1 * 1000)
screenshot = capture_full_page(page=self.page) screenshot = None
xpath_data = None
try:
# Get screenshot first
screenshot = capture_full_page(page=self.page)
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s") # Then get interactive elements
now = time.time()
self.page.evaluate("var include_filters=''")
self.page.request_gc()
now = time.time() scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
self.page.evaluate("var include_filters=''")
# Go find the interactive elements
# @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
self.page.request_gc() MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
"visualselector_xpath_selectors": scan_elements,
"max_height": MAX_TOTAL_HEIGHT
}))
self.page.request_gc()
scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' # Sort elements by size
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
"visualselector_xpath_selectors": scan_elements, except Exception as e:
"max_height": MAX_TOTAL_HEIGHT logger.error(f"Error getting current state: {str(e)}")
})) # Attempt recovery - force garbage collection
self.page.request_gc() try:
self.page.request_gc()
# So the JS will find the smallest one first except:
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) pass
logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
# Request garbage collection one final time
# playwright._impl._api_types.Error: Browser closed. try:
# @todo show some countdown timer? self.page.request_gc()
except:
pass
return (screenshot, xpath_data) return (screenshot, xpath_data)

View File

@@ -104,6 +104,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
uuid = list(datastore.data['settings']['application']['tags'].keys()).pop() uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
default = datastore.data['settings']['application']['tags'].get(uuid) default = datastore.data['settings']['application']['tags'].get(uuid)
if not default:
flash("Tag not found", "error")
return redirect(url_for('watchlist.index'))
form = group_restock_settings_form( form = group_restock_settings_form(
formdata=request.form if request.method == 'POST' else None, formdata=request.form if request.method == 'POST' else None,

View File

@@ -66,7 +66,7 @@
<div class="pure-control-group inline-radio"> <div class="pure-control-group inline-radio">
{{ render_checkbox_field(form.notification_muted) }} {{ render_checkbox_field(form.notification_muted) }}
</div> </div>
{% if is_html_webdriver %} {% if 1 %}
<div class="pure-control-group inline-radio"> <div class="pure-control-group inline-radio">
{{ render_checkbox_field(form.notification_screenshot) }} {{ render_checkbox_field(form.notification_screenshot) }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">

View File

@@ -213,9 +213,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
if request.method == 'POST' and not form.validate(): if request.method == 'POST' and not form.validate():
flash("An error occurred, please see below.", "error") flash("An error occurred, please see below.", "error")
visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid)
# JQ is difficult to install on windows and must be manually added (outside requirements.txt) # JQ is difficult to install on windows and must be manually added (outside requirements.txt)
jq_support = True jq_support = True
try: try:
@@ -225,16 +222,20 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
watch = datastore.data['watching'].get(uuid) watch = datastore.data['watching'].get(uuid)
# if system or watch is configured to need a chrome type browser
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
watch_needs_selenium_or_playwright = False
watch_uses_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
watch_uses_webdriver = True watch_needs_selenium_or_playwright = True
from zoneinfo import available_timezones from zoneinfo import available_timezones
# Only works reliably with Playwright # Only works reliably with Playwright
# Import the global plugin system
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras
template_args = { template_args = {
'available_processors': processors.available_processors(), 'available_processors': processors.available_processors(),
'available_timezones': sorted(available_timezones()), 'available_timezones': sorted(available_timezones()),
@@ -247,14 +248,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False, 'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0, 'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
'has_special_tag_options': _watch_has_tag_options_set(watch=watch), 'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
'watch_uses_webdriver': watch_uses_webdriver,
'jq_support': jq_support, 'jq_support': jq_support,
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
'settings_application': datastore.data['settings']['application'], 'settings_application': datastore.data['settings']['application'],
'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
'timezone_default_config': datastore.data['settings']['application'].get('timezone'), 'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
'using_global_webdriver_wait': not default['webdriver_delay'], 'using_global_webdriver_wait': not default['webdriver_delay'],
'uuid': uuid, 'uuid': uuid,
'watch': watch 'watch': watch,
'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright,
} }
included_content = None included_content = None

View File

@@ -5,7 +5,7 @@ from json_logic.builtins import BUILTINS
from .exceptions import EmptyConditionRuleRowNotUsable from .exceptions import EmptyConditionRuleRowNotUsable
from .pluggy_interface import plugin_manager # Import the pluggy plugin manager from .pluggy_interface import plugin_manager # Import the pluggy plugin manager
from . import default_plugin from . import default_plugin
from loguru import logger
# List of all supported JSON Logic operators # List of all supported JSON Logic operators
operator_choices = [ operator_choices = [
(None, "Choose one - Operator"), (None, "Choose one - Operator"),
@@ -94,20 +94,41 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat
EXECUTE_DATA = {} EXECUTE_DATA = {}
result = True result = True
ruleset_settings = application_datastruct['watching'].get(current_watch_uuid) watch = application_datastruct['watching'].get(current_watch_uuid)
if ruleset_settings.get("conditions"): if watch and watch.get("conditions"):
logic_operator = "and" if ruleset_settings.get("conditions_match_logic", "ALL") == "ALL" else "or" logic_operator = "and" if watch.get("conditions_match_logic", "ALL") == "ALL" else "or"
complete_rules = filter_complete_rules(ruleset_settings['conditions']) complete_rules = filter_complete_rules(watch['conditions'])
if complete_rules: if complete_rules:
# Give all plugins a chance to update the data dict again (that we will test the conditions against) # Give all plugins a chance to update the data dict again (that we will test the conditions against)
for plugin in plugin_manager.get_plugins(): for plugin in plugin_manager.get_plugins():
new_execute_data = plugin.add_data(current_watch_uuid=current_watch_uuid, try:
application_datastruct=application_datastruct, import concurrent.futures
ephemeral_data=ephemeral_data) import time
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(
plugin.add_data,
current_watch_uuid=current_watch_uuid,
application_datastruct=application_datastruct,
ephemeral_data=ephemeral_data
)
logger.debug(f"Trying plugin {plugin}....")
if new_execute_data and isinstance(new_execute_data, dict): # Set a timeout of 10 seconds
EXECUTE_DATA.update(new_execute_data) try:
new_execute_data = future.result(timeout=10)
if new_execute_data and isinstance(new_execute_data, dict):
EXECUTE_DATA.update(new_execute_data)
except concurrent.futures.TimeoutError:
# The plugin took too long, abort processing for this watch
raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.")
except Exception as e:
# Log the error but continue with the next plugin
import logging
logging.error(f"Error executing plugin {plugin.__class__.__name__}: {str(e)}")
continue
# Create the ruleset # Create the ruleset
ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules) ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules)
@@ -132,3 +153,18 @@ for plugin in plugin_manager.get_plugins():
if isinstance(new_field_choices, list): if isinstance(new_field_choices, list):
field_choices.extend(new_field_choices) field_choices.extend(new_field_choices)
def collect_ui_edit_stats_extras(watch):
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
extras_content = []
for plugin in plugin_manager.get_plugins():
try:
content = plugin.ui_edit_stats_extras(watch=watch)
if content:
extras_content.append(content)
except Exception as e:
# Skip plugins that don't implement the hook or have errors
pass
return "\n".join(extras_content) if extras_content else ""

View File

@@ -1,5 +1,8 @@
import pluggy import pluggy
from . import default_plugin # Import the default plugin import os
import importlib
import sys
from . import default_plugin
# ✅ Ensure that the namespace in HookspecMarker matches PluginManager # ✅ Ensure that the namespace in HookspecMarker matches PluginManager
PLUGIN_NAMESPACE = "changedetectionio_conditions" PLUGIN_NAMESPACE = "changedetectionio_conditions"
@@ -30,6 +33,11 @@ class ConditionsSpec:
def add_data(current_watch_uuid, application_datastruct, ephemeral_data): def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
"""Add to the datadict""" """Add to the datadict"""
pass pass
@hookspec
def ui_edit_stats_extras(watch):
"""Return HTML content to add to the stats tab in the edit view"""
pass
# ✅ Set up Pluggy Plugin Manager # ✅ Set up Pluggy Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -40,5 +48,27 @@ plugin_manager.add_hookspecs(ConditionsSpec)
# ✅ Register built-in plugins manually # ✅ Register built-in plugins manually
plugin_manager.register(default_plugin, "default_plugin") plugin_manager.register(default_plugin, "default_plugin")
# ✅ Load plugins from the plugins directory
def load_plugins_from_directory():
plugins_dir = os.path.join(os.path.dirname(__file__), 'plugins')
if not os.path.exists(plugins_dir):
return
# Get all Python files (excluding __init__.py)
for filename in os.listdir(plugins_dir):
if filename.endswith(".py") and filename != "__init__.py":
module_name = filename[:-3] # Remove .py extension
module_path = f"changedetectionio.conditions.plugins.{module_name}"
try:
module = importlib.import_module(module_path)
# Register the plugin with pluggy
plugin_manager.register(module, module_name)
except (ImportError, AttributeError) as e:
print(f"Error loading plugin {module_name}: {e}")
# Load plugins from the plugins directory
load_plugins_from_directory()
# ✅ Discover installed plugins from external packages (if any) # ✅ Discover installed plugins from external packages (if any)
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)

View File

@@ -0,0 +1 @@
# Import plugins package to make them discoverable

View File

@@ -0,0 +1,107 @@
import pluggy
from loguru import logger
# Support both plugin systems
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
def levenshtein_ratio_recent_history(watch, incoming_text=None):
try:
from Levenshtein import ratio, distance
k = list(watch.history.keys())
a = None
b = None
# When called from ui_edit_stats_extras, we don't have incoming_text
if incoming_text is None:
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot
b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot
# Needs atleast one snapshot
elif len(k) >= 1: # Should be atleast one snapshot to compare against
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
b = incoming_text if incoming_text else k[-2]
if a and b:
distance_value = distance(a, b)
ratio_value = ratio(a, b)
return {
'distance': distance_value,
'ratio': ratio_value,
'percent_similar': round(ratio_value * 100, 2)
}
except Exception as e:
logger.warning(f"Unable to calc similarity: {str(e)}")
return ''
@conditions_hookimpl
def register_operators():
pass
@conditions_hookimpl
def register_operator_choices():
pass
@conditions_hookimpl
def register_field_choices():
return [
("levenshtein_ratio", "Levenshtein - Text similarity ratio"),
("levenshtein_distance", "Levenshtein - Text change distance"),
]
@conditions_hookimpl
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
res = {}
watch = application_datastruct['watching'].get(current_watch_uuid)
# ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc
if watch and 'text' in ephemeral_data:
lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text',''))
if isinstance(lev_data, dict):
res['levenshtein_ratio'] = lev_data.get('ratio', 0)
res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)
res['levenshtein_distance'] = lev_data.get('distance', 0)
return res
@global_hookimpl
def ui_edit_stats_extras(watch):
"""Add Levenshtein stats to the UI using the global plugin system"""
"""Generate the HTML for Levenshtein stats - shared by both plugin systems"""
if len(watch.history.keys()) < 2:
return "<p>Not enough history to calculate Levenshtein metrics</p>"
try:
lev_data = levenshtein_ratio_recent_history(watch)
if not lev_data or not isinstance(lev_data, dict):
return "<p>Unable to calculate Levenshtein metrics</p>"
html = f"""
<div class="levenshtein-stats">
<h4>Levenshtein Text Similarity Details</h4>
<table class="pure-table">
<tbody>
<tr>
<td>Raw distance (edits needed)</td>
<td>{lev_data['distance']}</td>
</tr>
<tr>
<td>Similarity ratio</td>
<td>{lev_data['ratio']:.4f}</td>
</tr>
<tr>
<td>Percent similar</td>
<td>{lev_data['percent_similar']}%</td>
</tr>
</tbody>
</table>
<p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p>
</div>
"""
return html
except Exception as e:
logger.error(f"Error generating Levenshtein UI extras: {str(e)}")
return "<p>Error calculating Levenshtein metrics</p>"

View File

@@ -0,0 +1,82 @@
import pluggy
from loguru import logger
# Support both plugin systems
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
def count_words_in_history(watch, incoming_text=None):
"""Count words in snapshot text"""
try:
if incoming_text is not None:
# When called from add_data with incoming text
return len(incoming_text.split())
elif watch.history.keys():
# When called from UI extras to count latest snapshot
latest_key = list(watch.history.keys())[-1]
latest_content = watch.get_history_snapshot(latest_key)
return len(latest_content.split())
return 0
except Exception as e:
logger.error(f"Error counting words: {str(e)}")
return 0
# Implement condition plugin hooks
@conditions_hookimpl
def register_operators():
# No custom operators needed
return {}
@conditions_hookimpl
def register_operator_choices():
# No custom operator choices needed
return []
@conditions_hookimpl
def register_field_choices():
# Add a field that will be available in conditions
return [
("word_count", "Word count of content"),
]
@conditions_hookimpl
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
"""Add word count data for conditions"""
result = {}
watch = application_datastruct['watching'].get(current_watch_uuid)
if watch and 'text' in ephemeral_data:
word_count = count_words_in_history(watch, ephemeral_data['text'])
result['word_count'] = word_count
return result
def _generate_stats_html(watch):
"""Generate the HTML content for the stats tab"""
word_count = count_words_in_history(watch)
html = f"""
<div class="word-count-stats">
<h4>Content Analysis</h4>
<table class="pure-table">
<tbody>
<tr>
<td>Word count (latest snapshot)</td>
<td>{word_count}</td>
</tr>
</tbody>
</table>
<p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p>
</div>
"""
return html
@conditions_hookimpl
def ui_edit_stats_extras(watch):
"""Add word count stats to the UI through conditions plugin system"""
return _generate_stats_html(watch)
@global_hookimpl
def ui_edit_stats_extras(watch):
"""Add word count stats to the UI using the global plugin system"""
return _generate_stats_html(watch)

View File

@@ -26,9 +26,11 @@ def capture_full_page(page):
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
screenshot_chunks = [] screenshot_chunks = []
y = 0 y = 0
# If page height is larger than current viewport, use a larger viewport for better capturing
if page_height > page.viewport_size['height']: if page_height > page.viewport_size['height']:
if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
# Set viewport to a larger size to capture more content at once # Set viewport to a larger size to capture more content at once
page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size}) page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
@@ -59,7 +61,10 @@ def capture_full_page(page):
p.join() p.join()
logger.debug( logger.debug(
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s") f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
# Explicit cleanup
del screenshot_chunks
del p
del parent_conn, child_conn
screenshot_chunks = None screenshot_chunks = None
return screenshot return screenshot
@@ -189,7 +194,6 @@ class fetcher(Fetcher):
browsersteps_interface.page = self.page browsersteps_interface.page = self.page
response = browsersteps_interface.action_goto_url(value=url) response = browsersteps_interface.action_goto_url(value=url)
self.headers = response.all_headers()
if response is None: if response is None:
context.close() context.close()
@@ -197,6 +201,8 @@ class fetcher(Fetcher):
logger.debug("Content Fetcher > Response object from the browser communication was none") logger.debug("Content Fetcher > Response object from the browser communication was none")
raise EmptyReply(url=url, status_code=None) raise EmptyReply(url=url, status_code=None)
self.headers = response.all_headers()
try: try:
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code): if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None) browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
@@ -286,12 +292,28 @@ class fetcher(Fetcher):
pass pass
# Clean up resources properly # Clean up resources properly
context.close() try:
context = None self.page.request_gc()
except:
pass
self.page.close() try:
self.page.close()
except:
pass
self.page = None self.page = None
browser.close() try:
borwser = None context.close()
except:
pass
context = None
try:
browser.close()
except:
pass
browser = None

View File

@@ -46,9 +46,10 @@ async def capture_full_page(page):
screenshot_chunks = [] screenshot_chunks = []
y = 0 y = 0
if page_height > page.viewport['height']: if page_height > page.viewport['height']:
if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
await page.setViewport({'width': page.viewport['width'], 'height': step_size}) await page.setViewport({'width': page.viewport['width'], 'height': step_size})
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT): while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
await page.evaluate(f"window.scrollTo(0, {y})") await page.evaluate(f"window.scrollTo(0, {y})")
screenshot_chunks.append(await page.screenshot(type_='jpeg', screenshot_chunks.append(await page.screenshot(type_='jpeg',
@@ -146,7 +147,7 @@ class fetcher(Fetcher):
is_binary, is_binary,
empty_pages_are_a_change empty_pages_are_a_change
): ):
import re
self.delete_browser_steps_screenshots() self.delete_browser_steps_screenshots()
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
@@ -171,6 +172,17 @@ class fetcher(Fetcher):
# headless - ask a new page # headless - ask a new page
self.page = (pages := await browser.pages) and len(pages) or await browser.newPage() self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
if '--window-size' in self.browser_connection_url:
# Be sure the viewport is always the window-size, this is often not the same thing
match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url)
if match:
logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}")
await self.page.setViewport({
"width": int(match.group(1)),
"height": int(match.group(2))
})
logger.debug(f"Puppeteer viewport size {self.page.viewport}")
try: try:
from pyppeteerstealth import inject_evasions_into_page from pyppeteerstealth import inject_evasions_into_page
except ImportError: except ImportError:
@@ -217,7 +229,6 @@ class fetcher(Fetcher):
response = await self.page.goto(url, waitUntil="load") response = await self.page.goto(url, waitUntil="load")
if response is None: if response is None:
await self.page.close() await self.page.close()
await browser.close() await browser.close()

View File

@@ -28,6 +28,7 @@ class fetcher(Fetcher):
import chardet import chardet
import requests import requests
from requests.exceptions import ProxyError, ConnectionError, RequestException
if self.browser_steps_get_valid_steps(): if self.browser_steps_get_valid_steps():
raise BrowserStepsInUnsupportedFetcher(url=url) raise BrowserStepsInUnsupportedFetcher(url=url)
@@ -52,14 +53,19 @@ class fetcher(Fetcher):
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'): if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
from requests_file import FileAdapter from requests_file import FileAdapter
session.mount('file://', FileAdapter()) session.mount('file://', FileAdapter())
try:
r = session.request(method=request_method, r = session.request(method=request_method,
data=request_body.encode('utf-8') if type(request_body) is str else request_body, data=request_body.encode('utf-8') if type(request_body) is str else request_body,
url=url, url=url,
headers=request_headers, headers=request_headers,
timeout=timeout, timeout=timeout,
proxies=proxies, proxies=proxies,
verify=False) verify=False)
except Exception as e:
msg = str(e)
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
msg = f"Proxy connection failed? {msg}"
raise Exception(msg) from e
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks. # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
# For example - some sites don't tell us it's utf-8, but return utf-8 content # For example - some sites don't tell us it's utf-8, but return utf-8 content

View File

@@ -10,6 +10,7 @@ async () => {
'article épuisé', 'article épuisé',
'artikel zurzeit vergriffen', 'artikel zurzeit vergriffen',
'as soon as stock is available', 'as soon as stock is available',
'aucune offre n\'est disponible',
'ausverkauft', // sold out 'ausverkauft', // sold out
'available for back order', 'available for back order',
'awaiting stock', 'awaiting stock',
@@ -25,9 +26,8 @@ async () => {
'dieser artikel ist bald wieder verfügbar', 'dieser artikel ist bald wieder verfügbar',
'dostępne wkrótce', 'dostępne wkrótce',
'en rupture', 'en rupture',
'en rupture de stock',
'épuisé',
'esgotado', 'esgotado',
'in kürze lieferbar',
'indisponible', 'indisponible',
'indisponível', 'indisponível',
'isn\'t in stock right now', 'isn\'t in stock right now',
@@ -50,10 +50,12 @@ async () => {
'niet leverbaar', 'niet leverbaar',
'niet op voorraad', 'niet op voorraad',
'no disponible', 'no disponible',
'non disponibile', 'no featured offers available',
'non disponible', 'no longer available',
'no longer in stock', 'no longer in stock',
'no tickets available', 'no tickets available',
'non disponibile',
'non disponible',
'not available', 'not available',
'not currently available', 'not currently available',
'not in stock', 'not in stock',
@@ -89,13 +91,15 @@ async () => {
'vergriffen', 'vergriffen',
'vorbestellen', 'vorbestellen',
'vorbestellung ist bald möglich', 'vorbestellung ist bald möglich',
'we don\'t currently have any',
'we couldn\'t find any products that match', 'we couldn\'t find any products that match',
'we do not currently have an estimate of when this product will be back in stock.', 'we do not currently have an estimate of when this product will be back in stock.',
'we don\'t currently have any',
'we don\'t know when or if this item will be back in stock.', 'we don\'t know when or if this item will be back in stock.',
'we were not able to find a match', 'we were not able to find a match',
'when this arrives in stock', 'when this arrives in stock',
'when this item is available to order',
'zur zeit nicht an lager', 'zur zeit nicht an lager',
'épuisé',
'品切れ', '品切れ',
'已售', '已售',
'已售完', '已售完',
@@ -122,6 +126,20 @@ async () => {
// so it's good to filter to just the 'above the fold' elements // so it's good to filter to just the 'above the fold' elements
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
function elementIsInEyeBallRange(element) {
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
// Note: theres also an automated test that places the 'out of stock' text fairly low down
// Skip text that could be in the header area
if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) {
return false;
}
// Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there
if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) {
return false;
}
return true;
}
// @todo - if it's SVG or IMG, go into image diff mode // @todo - if it's SVG or IMG, go into image diff mode
@@ -158,9 +176,7 @@ async () => {
for (let i = elementsToScan.length - 1; i >= 0; i--) { for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i]; const element = elementsToScan[i];
// outside the 'fold' or some weird text in the heading area if (!elementIsInEyeBallRange(element)) {
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
continue continue
} }
@@ -174,11 +190,11 @@ async () => {
} catch (e) { } catch (e) {
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e); console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
} }
if (elementText.length) { if (elementText.length) {
// try which ones could mean its in stock // try which ones could mean its in stock
if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) { if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`) console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
element.style.border = "2px solid green"; // highlight the element that was detected as in stock
return 'Possibly in stock'; return 'Possibly in stock';
} }
} }
@@ -187,10 +203,8 @@ async () => {
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) { for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i]; const element = elementsToScan[i];
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden if (!elementIsInEyeBallRange(element)) {
// Note: theres also an automated test that places the 'out of stock' text fairly low down
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
continue continue
} }
elementText = ""; elementText = "";
@@ -205,6 +219,7 @@ async () => {
for (const outOfStockText of outOfStockTexts) { for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) { if (elementText.includes(outOfStockText)) {
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`) console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
element.style.border = "2px solid red"; // highlight the element that was detected as out of stock
return outOfStockText; // item is out of stock return outOfStockText; // item is out of stock
} }
} }

View File

@@ -202,7 +202,6 @@ async (options) => {
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
for (const f of include_filters) { for (const f of include_filters) {
bbox = false; bbox = false;
q = false;
if (!f.length) { if (!f.length) {
console.log("xpath_element_scraper: Empty filter, skipping"); console.log("xpath_element_scraper: Empty filter, skipping");
@@ -255,7 +254,7 @@ async (options) => {
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
} catch (e) { } catch (e) {
console.log(e) console.log(e)
console.log("xpath_element_scraper: error looking up q.ownerElement") console.log("xpath_element_scraper: error looking up node.ownerElement")
} }
} }

View File

@@ -31,33 +31,33 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
# Draw caption on top (overlaid, not extending canvas) # Draw caption on top (overlaid, not extending canvas)
draw = ImageDraw.Draw(stitched) draw = ImageDraw.Draw(stitched)
if original_page_height > capture_height:
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long" caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
padding = 10 padding = 10
font_size = 35 font_size = 35
font_color = (255, 0, 0) font_color = (255, 0, 0)
background_color = (255, 255, 255) background_color = (255, 255, 255)
# Try to load a proper font # Try to load a proper font
try: try:
font = ImageFont.truetype("arial.ttf", font_size) font = ImageFont.truetype("arial.ttf", font_size)
except IOError: except IOError:
font = ImageFont.load_default() font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), caption_text, font=font) bbox = draw.textbbox((0, 0), caption_text, font=font)
text_width = bbox[2] - bbox[0] text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1] text_height = bbox[3] - bbox[1]
# Draw white rectangle background behind text # Draw white rectangle background behind text
rect_top = 0 rect_top = 0
rect_bottom = text_height + 2 * padding rect_bottom = text_height + 2 * padding
draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color) draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
# Draw text centered horizontally, 10px padding from top of the rectangle # Draw text centered horizontally, 10px padding from top of the rectangle
text_x = (max_width - text_width) // 2 text_x = (max_width - text_width) // 2
text_y = padding text_y = padding
draw.text((text_x, text_y), caption_text, font=font, fill=font_color) draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
# Encode and send image # Encode and send image
output = io.BytesIO() output = io.BytesIO()

View File

@@ -10,16 +10,13 @@ class fetcher(Fetcher):
else: else:
fetcher_description = "WebDriver Chrome/Javascript" fetcher_description = "WebDriver Chrome/Javascript"
# Configs for Proxy setup
# In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
proxy = None proxy = None
proxy_url = None
def __init__(self, proxy_override=None, custom_browser_connection_url=None): def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__() super().__init__()
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy from urllib.parse import urlparse
from selenium.webdriver.common.proxy import Proxy
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
if not custom_browser_connection_url: if not custom_browser_connection_url:
@@ -28,25 +25,27 @@ class fetcher(Fetcher):
self.browser_connection_is_custom = True self.browser_connection_is_custom = True
self.browser_connection_url = custom_browser_connection_url self.browser_connection_url = custom_browser_connection_url
# If any proxy settings are enabled, then we should setup the proxy object
proxy_args = {}
for k in self.selenium_proxy_settings_mappings:
v = os.getenv('webdriver_' + k, False)
if v:
proxy_args[k] = v.strip('"')
# Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy ##### PROXY SETUP #####
if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
proxy_args['httpProxy'] = self.system_http_proxy
if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
proxy_args['httpsProxy'] = self.system_https_proxy
# Allows override the proxy on a per-request basis proxy_sources = [
if proxy_override is not None: self.system_http_proxy,
proxy_args['httpProxy'] = proxy_override self.system_https_proxy,
os.getenv('webdriver_proxySocks'),
os.getenv('webdriver_socksProxy'),
os.getenv('webdriver_proxyHttp'),
os.getenv('webdriver_httpProxy'),
os.getenv('webdriver_proxyHttps'),
os.getenv('webdriver_httpsProxy'),
os.getenv('webdriver_sslProxy'),
proxy_override, # last one should override
]
# The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server=
for k in filter(None, proxy_sources):
if not k:
continue
self.proxy_url = k.strip()
if proxy_args:
self.proxy = SeleniumProxy(raw=proxy_args)
def run(self, def run(self,
url, url,
@@ -59,63 +58,77 @@ class fetcher(Fetcher):
is_binary=False, is_binary=False,
empty_pages_are_a_change=False): empty_pages_are_a_change=False):
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.common.exceptions import WebDriverException
# request_body, request_method unused for now, until some magic in the future happens. # request_body, request_method unused for now, until some magic in the future happens.
options = ChromeOptions() options = ChromeOptions()
options.add_argument("--headless")
if self.proxy:
options.proxy = self.proxy
self.driver = webdriver.Remote( # Load Chrome options from env
command_executor=self.browser_connection_url, CHROME_OPTIONS = [
options=options) line.strip()
for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
if line.strip()
]
for opt in CHROME_OPTIONS:
options.add_argument(opt)
# 1. proxy_config /Proxy(proxy_config) selenium object is REALLY unreliable
# 2. selenium-wire cant be used because the websocket version conflicts with pypeteer-ng
# 3. selenium only allows ONE runner at a time by default!
# 4. driver must use quit() or it will continue to block/hold the selenium process!!
if self.proxy_url:
options.add_argument(f'--proxy-server={self.proxy_url}')
from selenium.webdriver.remote.remote_connection import RemoteConnection
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
driver = None
try:
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
remote_connection = RemoteConnection(
self.browser_connection_url,
)
remote_connection.set_timeout(30) # seconds
# Now create the driver with the RemoteConnection
driver = RemoteWebDriver(
command_executor=remote_connection,
options=options
)
driver.set_page_load_timeout(int(os.getenv("WEBDRIVER_PAGELOAD_TIMEOUT", 45)))
except Exception as e:
if driver:
driver.quit()
raise e
try: try:
self.driver.get(url) driver.get(url)
except WebDriverException as e:
# Be sure we close the session window
self.quit()
raise
self.driver.set_window_size(1280, 1024) if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) driver.set_window_size(1280, 1024)
if self.webdriver_js_execute_code is not None: driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
self.driver.execute_script(self.webdriver_js_execute_code)
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
# @todo - how to check this? is it possible? if self.webdriver_js_execute_code is not None:
self.status_code = 200 driver.execute_script(self.webdriver_js_execute_code)
# @todo somehow we should try to get this working for WebDriver # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
# raise EmptyReply(url=url, status_code=r.status_code) driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
# @todo - dom wait loaded? # @todo - how to check this? is it possible?
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) self.status_code = 200
self.content = self.driver.page_source # @todo somehow we should try to get this working for WebDriver
self.headers = {} # raise EmptyReply(url=url, status_code=r.status_code)
self.screenshot = self.driver.get_screenshot_as_png() # @todo - dom wait loaded?
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
self.content = driver.page_source
self.headers = {}
self.screenshot = driver.get_screenshot_as_png()
except Exception as e:
driver.quit()
raise e
# Does the connection to the webdriver work? run a test connection. driver.quit()
def is_ready(self):
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions
self.driver = webdriver.Remote(
command_executor=self.command_executor,
options=ChromeOptions())
# driver.quit() seems to cause better exceptions
self.quit()
return True
def quit(self, watch=None):
if self.driver:
try:
self.driver.quit()
except Exception as e:
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")

View File

@@ -224,27 +224,37 @@ class StringDictKeyValue(StringField):
def _value(self): def _value(self):
if self.data: if self.data:
output = u'' output = ''
for k in self.data.keys(): for k, v in self.data.items():
output += "{}: {}\r\n".format(k, self.data[k]) output += f"{k}: {v}\r\n"
return output return output
else: else:
return u'' return ''
# incoming # incoming data processing + validation
def process_formdata(self, valuelist): def process_formdata(self, valuelist):
self.data = {}
errors = []
if valuelist: if valuelist:
self.data = {} # Remove empty strings (blank lines)
# Remove empty strings cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()]
cleaned = list(filter(None, valuelist[0].split("\n"))) for idx, s in enumerate(cleaned, start=1):
for s in cleaned: if ':' not in s:
parts = s.strip().split(':', 1) errors.append(f"Line {idx} is missing a ':' separator.")
if len(parts) == 2: continue
self.data.update({parts[0].strip(): parts[1].strip()}) parts = s.split(':', 1)
key = parts[0].strip()
value = parts[1].strip()
else: if not key:
self.data = {} errors.append(f"Line {idx} has an empty key.")
if not value:
errors.append(f"Line {idx} has an empty value.")
self.data[key] = value
if errors:
raise ValidationError("Invalid input:\n" + "\n".join(errors))
class ValidateContentFetcherIsReady(object): class ValidateContentFetcherIsReady(object):
""" """

View File

@@ -309,10 +309,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
soup = BeautifulSoup(content, 'html.parser') soup = BeautifulSoup(content, 'html.parser')
if ensure_is_ldjson_info_type: if ensure_is_ldjson_info_type:
bs_result = soup.findAll('script', {"type": "application/ld+json"}) bs_result = soup.find_all('script', {"type": "application/ld+json"})
else: else:
bs_result = soup.findAll('script') bs_result = soup.find_all('script')
bs_result += soup.findAll('body') bs_result += soup.find_all('body')
bs_jsons = [] bs_jsons = []
for result in bs_result: for result in bs_result:
@@ -435,45 +435,29 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
return re.sub(pattern, repl, html_content) return re.sub(pattern, repl, html_content)
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
from inscriptis import get_text from inscriptis import get_text
from inscriptis.model.config import ParserConfig from inscriptis.model.config import ParserConfig
"""Converts html string to a string with just the text. If ignoring
rendering anchor tag content is enable, anchor tag content are also
included in the text
:param html_content: string with html content
:param render_anchor_tag_content: boolean flag indicating whether to extract
hyperlinks (the anchor tag content) together with text. This refers to the
'href' inside 'a' tags.
Anchor tag content is rendered in the following manner:
'[ text ](anchor tag content)'
:return: extracted text from the HTML
"""
# if anchor tag content flag is set to True define a config for
# extracting this content
if render_anchor_tag_content: if render_anchor_tag_content:
parser_config = ParserConfig( parser_config = ParserConfig(
annotation_rules={"a": ["hyperlink"]}, annotation_rules={"a": ["hyperlink"]},
display_links=True display_links=True
) )
# otherwise set config to None/default
else: else:
parser_config = None parser_config = None
# RSS Mode - Inscriptis will treat `title` as something else.
# Make it as a regular block display element (//item/title)
# This is a bit of a hack - the real way it to use XSLT to convert it to HTML #1874
if is_rss: if is_rss:
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content) html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
html_content = re.sub(r'</title>', r'</h1>', html_content) html_content = re.sub(r'</title>', r'</h1>', html_content)
text_content = get_text(html_content, config=parser_config) text_content = get_text(html_content, config=parser_config)
return text_content return text_content
# Does LD+JSON exist with a @type=='product' and a .price set anywhere? # Does LD+JSON exist with a @type=='product' and a .price set anywhere?
def has_ldjson_product_info(content): def has_ldjson_product_info(content):
try: try:

View File

@@ -0,0 +1,82 @@
import pluggy
import os
import importlib
import sys
# Global plugin namespace for changedetection.io
PLUGIN_NAMESPACE = "changedetectionio"
hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE)
hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE)
class ChangeDetectionSpec:
"""Hook specifications for extending changedetection.io functionality."""
@hookspec
def ui_edit_stats_extras(watch):
"""Return HTML content to add to the stats tab in the edit view.
Args:
watch: The watch object being edited
Returns:
str: HTML content to be inserted in the stats tab
"""
pass
# Set up Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
# Register hookspecs
plugin_manager.add_hookspecs(ChangeDetectionSpec)
# Load plugins from subdirectories
def load_plugins_from_directories():
# Dictionary of directories to scan for plugins
plugin_dirs = {
'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'),
# Add more plugin directories here as needed
}
# Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory
for dir_name, dir_path in plugin_dirs.items():
if not os.path.exists(dir_path):
continue
# Get all Python files (excluding __init__.py)
for filename in os.listdir(dir_path):
if filename.endswith(".py") and filename != "__init__.py":
module_name = filename[:-3] # Remove .py extension
module_path = f"changedetectionio.{dir_name}.plugins.{module_name}"
try:
module = importlib.import_module(module_path)
# Register the plugin with pluggy
plugin_manager.register(module, module_name)
except (ImportError, AttributeError) as e:
print(f"Error loading plugin {module_name}: {e}")
# Load plugins
load_plugins_from_directories()
# Discover installed plugins from external packages (if any)
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
# Helper function to collect UI stats extras from all plugins
def collect_ui_edit_stats_extras(watch):
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
extras_content = []
# Get all plugins that implement the ui_edit_stats_extras hook
results = plugin_manager.hook.ui_edit_stats_extras(watch=watch)
# If we have results, add them to our content
if results:
for result in results:
if result: # Skip empty results
extras_content.append(result)
return "\n".join(extras_content) if extras_content else ""

View File

@@ -38,6 +38,9 @@ pytest tests/test_backend.py
pytest tests/test_rss.py pytest tests/test_rss.py
pytest tests/test_unique_lines.py pytest tests/test_unique_lines.py
# Try high concurrency
FETCH_WORKERS=130 pytest tests/test_history_consistency.py -v -l
# Check file:// will pickup a file when enabled # Check file:// will pickup a file when enabled
echo "Hello world" > /tmp/test-file.txt echo "Hello world" > /tmp/test-file.txt
ALLOW_FILE_URI=yes pytest tests/test_security.py ALLOW_FILE_URI=yes pytest tests/test_security.py

View File

@@ -82,3 +82,25 @@ done
docker kill squid-one squid-two squid-custom docker kill squid-one squid-two squid-custom
# Test that the UI is returning the correct error message when a proxy is not available
# Requests
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py'
# Playwright
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
# Puppeteer fast
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
# Selenium
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py'

View File

@@ -211,7 +211,14 @@ $(document).ready(function () {
$('input[type=text]', first_available).first().val(x['xpath']); $('input[type=text]', first_available).first().val(x['xpath']);
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus(); $('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
found_something = true; found_something = true;
} else { }
else if (x['tagName'] === 'select') {
$('select', first_available).val('<select> by option text').change();
$('input[type=text]', first_available).first().val(x['xpath']);
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
found_something = true;
}
else {
// There's no good way (that I know) to find if this // There's no good way (that I know) to find if this
// see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging // see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging
// https://codepen.io/azaslavsky/pen/DEJVWv // https://codepen.io/azaslavsky/pen/DEJVWv
@@ -251,6 +258,10 @@ $(document).ready(function () {
400: function () { 400: function () {
// More than likely the CSRF token was lost when the server restarted // More than likely the CSRF token was lost when the server restarted
alert("There was a problem processing the request, please reload the page."); alert("There was a problem processing the request, please reload the page.");
},
401: function (err) {
// This will be a custom error
alert(err.responseText);
} }
} }
}).done(function (data) { }).done(function (data) {

View File

@@ -98,15 +98,13 @@
{% macro playwright_warning() %} {% macro playwright_warning() %}
<p><strong>Error - Playwright support for Chrome based fetching is not enabled.</strong> Alternatively try our <a href="https://changedetection.io">very affordable subscription based service which has all this setup for you</a>.</p> <p><strong>Error - This watch needs Chrome (with playwright/sockpuppetbrowser), but Chrome based fetching is not enabled.</strong> Alternatively try our <a href="https://changedetection.io">very affordable subscription based service which has all this setup for you</a>.</p>
<p>You may need to <a href="https://github.com/dgtlmoon/changedetection.io/blob/09ebc6ec6338545bdd694dc6eee57f2e9d2b8075/docker-compose.yml#L31">Enable playwright environment variable</a> and uncomment the <strong>sockpuppetbrowser</strong> in the <a href="https://github.com/dgtlmoon/changedetection.io/blob/master/docker-compose.yml">docker-compose.yml</a> file.</p> <p>You may need to <a href="https://github.com/dgtlmoon/changedetection.io/blob/09ebc6ec6338545bdd694dc6eee57f2e9d2b8075/docker-compose.yml#L31">Enable playwright environment variable</a> and uncomment the <strong>sockpuppetbrowser</strong> in the <a href="https://github.com/dgtlmoon/changedetection.io/blob/master/docker-compose.yml">docker-compose.yml</a> file.</p>
<br> <br>
<p>(Also Selenium/WebDriver can not extract full page screenshots reliably so Playwright is recommended here)</p>
{% endmacro %} {% endmacro %}
{% macro only_webdriver_type_watches_warning() %} {% macro only_playwright_type_watches_warning() %}
<p><strong>Sorry, this functionality only works with Playwright/Chrome enabled watches.<br>You need to <a href="#request">Set the fetch method to Playwright/Chrome mode and resave</a> and have the Playwright connection enabled.</strong></p><br> <p><strong>Sorry, this functionality only works with Playwright/Chrome enabled watches.<br>You need to <a href="#request">Set the fetch method to Playwright/Chrome mode and resave</a> and have the SockpuppetBrowser/Playwright or Selenium enabled.</strong></p><br>
{% endmacro %} {% endmacro %}
{% macro render_time_schedule_form(form, available_timezones, timezone_default_config) %} {% macro render_time_schedule_form(form, available_timezones, timezone_default_config) %}

View File

@@ -1,6 +1,6 @@
{% extends 'base.html' %} {% extends 'base.html' %}
{% block content %} {% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %} {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
{% from '_common_fields.html' import render_common_settings_form %} {% from '_common_fields.html' import render_common_settings_form %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
@@ -204,7 +204,9 @@ Math: {{ 1 + 1 }}") }}
</div> </div>
<div class="tab-pane-inner" id="browser-steps"> <div class="tab-pane-inner" id="browser-steps">
{% if playwright_enabled and watch_uses_webdriver %} {% if watch_needs_selenium_or_playwright %}
{# Only works with playwright #}
{% if system_has_playwright_configured %}
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality"> <img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
<fieldset> <fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
@@ -223,7 +225,6 @@ Math: {{ 1 + 1 }}") }}
<div class="flex-wrapper" > <div class="flex-wrapper" >
<div id="browser-steps-ui" class="noselect"> <div id="browser-steps-ui" class="noselect">
<div class="noselect" id="browsersteps-selector-wrapper" style="width: 100%"> <div class="noselect" id="browsersteps-selector-wrapper" style="width: 100%">
<span class="loader" > <span class="loader" >
<span id="browsersteps-click-start"> <span id="browsersteps-click-start">
@@ -245,15 +246,16 @@ Math: {{ 1 + 1 }}") }}
</div> </div>
</fieldset> </fieldset>
{% else %} {% else %}
<span class="pure-form-message-inline"> {# it's configured to use selenium or chrome but system says its not configured #}
{% if not watch_uses_webdriver %} {{ playwright_warning() }}
{{ only_webdriver_type_watches_warning() }} {% if system_has_webdriver_configured %}
{% endif %} <strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
{% if not playwright_enabled %} {% endif %}
{{ playwright_warning() }}
{% endif %}
</span>
{% endif %} {% endif %}
{% else %}
{# "This functionality needs chrome.." #}
{{ only_playwright_type_watches_warning() }}
{% endif %}
</div> </div>
@@ -262,7 +264,7 @@ Math: {{ 1 + 1 }}") }}
<div class="pure-control-group inline-radio"> <div class="pure-control-group inline-radio">
{{ render_checkbox_field(form.notification_muted) }} {{ render_checkbox_field(form.notification_muted) }}
</div> </div>
{% if watch_uses_webdriver %} {% if watch_needs_selenium_or_playwright %}
<div class="pure-control-group inline-radio"> <div class="pure-control-group inline-radio">
{{ render_checkbox_field(form.notification_screenshot) }} {{ render_checkbox_field(form.notification_screenshot) }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
@@ -379,13 +381,15 @@ Math: {{ 1 + 1 }}") }}
<fieldset> <fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
{% if playwright_enabled and watch_uses_webdriver %} {% if watch_needs_selenium_or_playwright %}
{% if system_has_playwright_configured %}
<span class="pure-form-message-inline" id="visual-selector-heading"> <span class="pure-form-message-inline" id="visual-selector-heading">
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items. The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
</span> </span>
<div id="selector-header"> <div id="selector-header">
<a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a> <a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
<!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->
<i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i> <i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i>
</div> </div>
<div id="selector-wrapper" style="display: none"> <div id="selector-wrapper" style="display: none">
@@ -397,13 +401,16 @@ Math: {{ 1 + 1 }}") }}
</div> </div>
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong>&nbsp;<span class="text">Loading...</span></div> <div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong>&nbsp;<span class="text">Loading...</span></div>
{% else %} {% else %}
{% if not watch_uses_webdriver %} {# The watch needed chrome but system says that playwright is not ready #}
{{ only_webdriver_type_watches_warning() }} {{ playwright_warning() }}
{% endif %}
{% if not playwright_enabled %}
{{ playwright_warning() }}
{% endif %}
{% endif %} {% endif %}
{% if system_has_webdriver_configured %}
<strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
{% endif %}
{% else %}
{# "This functionality needs chrome.." #}
{{ only_playwright_type_watches_warning() }}
{% endif %}
</div> </div>
</fieldset> </fieldset>
</div> </div>
@@ -443,6 +450,13 @@ Math: {{ 1 + 1 }}") }}
</tr> </tr>
</tbody> </tbody>
</table> </table>
{% if ui_edit_stats_extras %}
<div class="plugin-stats-extras"> <!-- from pluggy plugin -->
{{ ui_edit_stats_extras|safe }}
</div>
{% endif %}
{% if watch.history_n %} {% if watch.history_n %}
<p> <p>
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a> <a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a>

View File

@@ -0,0 +1,68 @@
#!/usr/bin/env python3
from flask import url_for
from ..util import live_server_setup, wait_for_all_checks
import os
from ... import strtobool
# Just to be sure the UI outputs the right error message on proxy connection failed
# docker run -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
# PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py
# FAST_PUPPETEER_CHROME_FETCHER=True PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py
# WEBDRIVER_URL=http://127.0.0.1:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py
def test_proxy_noconnect_custom(client, live_server, measure_memory_usage):
live_server_setup(live_server)
# Goto settings, add our custom one
res = client.post(
url_for("settings.settings_page"),
data={
"requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y",
"application-fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else 'html_requests',
"requests-extra_proxies-0-proxy_name": "custom-test-proxy",
# test:awesome is set in tests/proxy_list/squid-passwords.txt
"requests-extra_proxies-0-proxy_url": "http://127.0.0.1:3128",
},
follow_redirects=True
)
assert b"Settings updated." in res.data
test_url = "https://changedetection.io"
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
follow_redirects=True
)
assert b"Watch added in Paused state, saving will unpause" in res.data
options = {
"url": test_url,
"fetch_backend": "html_webdriver" if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else "html_requests",
"proxy": "ui-0custom-test-proxy",
}
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1),
data=options,
follow_redirects=True
)
assert b"unpaused" in res.data
import time
wait_for_all_checks(client)
# Requests default
check_string = b'Cannot connect to proxy'
if os.getenv('PLAYWRIGHT_DRIVER_URL') or strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')) or os.getenv("WEBDRIVER_URL"):
check_string = b'ERR_PROXY_CONNECTION_FAILED'
res = client.get(url_for("watchlist.index"))
#with open("/tmp/debug.html", 'wb') as f:
# f.write(res.data)
assert check_string in res.data

View File

@@ -14,6 +14,8 @@ from changedetectionio.notification import (
def set_original_response(): def set_original_response():
test_return_data = """<html> test_return_data = """<html>
<body> <body>
<section id=header style="padding: 50px; height: 350px">This is the header which should be ignored always - <span>add to cart</span></section>
<!-- stock-not-in-stock.js will ignore text in the first 300px, see elementIsInEyeBallRange(), sometimes "add to cart" and other junk is here -->
Some initial text<br> Some initial text<br>
<p>Which is across multiple lines</p> <p>Which is across multiple lines</p>
<br> <br>
@@ -52,8 +54,6 @@ def test_restock_detection(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
#assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
time.sleep(1)
live_server_setup(live_server) live_server_setup(live_server)
##################### #####################
notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json') notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json')
@@ -84,7 +84,8 @@ def test_restock_detection(client, live_server, measure_memory_usage):
# Is it correctly show as NOT in stock? # Is it correctly show as NOT in stock?
wait_for_all_checks(client) wait_for_all_checks(client)
res = client.get(url_for("watchlist.index")) res = client.get(url_for("watchlist.index"))
assert b'not-in-stock' in res.data assert b'processor-restock_diff' in res.data # Should have saved in restock mode
assert b'not-in-stock' in res.data # should be out of stock
# Is it correctly shown as in stock # Is it correctly shown as in stock
set_back_in_stock_response() set_back_in_stock_response()

View File

@@ -45,11 +45,15 @@ def set_number_out_of_range_response(number="150"):
f.write(test_return_data) f.write(test_return_data)
def test_setup(client, live_server):
"""Test that both text and number conditions work together with AND logic."""
live_server_setup(live_server)
def test_conditions_with_text_and_number(client, live_server): def test_conditions_with_text_and_number(client, live_server):
"""Test that both text and number conditions work together with AND logic.""" """Test that both text and number conditions work together with AND logic."""
set_original_response("50") set_original_response("50")
live_server_setup(live_server) #live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
@@ -192,6 +196,150 @@ def test_condition_validate_rule_row(client, live_server):
) )
assert res.status_code == 200 assert res.status_code == 200
assert b'false' in res.data assert b'false' in res.data
# cleanup for the next
client.get(
url_for("ui.form_delete", uuid="all"),
follow_redirects=True
)
# If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
test_return_data = """<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
# Check it saved
res = client.get(
url_for("ui.ui_edit.edit_page", uuid="first"),
)
# Assert the word count is counted correctly
assert b'<td>13</td>' in res.data
# cleanup for the next
client.get(
url_for("ui.form_delete", uuid="all"),
follow_redirects=True
)
# If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
</body>
</html>
""")
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
follow_redirects=True
)
assert b"Watch added in Paused state, saving will unpause" in res.data
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
# Give the thread time to pick it up
wait_for_all_checks(client)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
data={
"url": test_url,
"fetch_backend": "html_requests",
"conditions_match_logic": "ALL", # ALL = AND logic
"conditions-0-field": "levenshtein_ratio",
"conditions-0-operator": "<",
"conditions-0-value": "0.8" # needs to be more of a diff to trigger a change
},
follow_redirects=True
)
assert b"unpaused" in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' not in res.data
# Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions
res = client.get(
url_for("ui.ui_views.preview_page", uuid=uuid),
follow_redirects=True
)
assert b'Which is across multiple lines' in res.data
############### Now change it a LITTLE bit...
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happenxxxxxxxxx. <br>
</body>
</html>
""")
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold
############### Now change it a MORE THAN 50%
test_return_data = """<html>
<body>
Some sxxxx<br>
<p>Which is across a lines</p>
<br>
ok. <br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' in res.data
# cleanup for the next
client.get(
url_for("ui.form_delete", uuid="all"),
follow_redirects=True
)

View File

@@ -10,8 +10,8 @@ from urllib.parse import urlparse, parse_qs
def test_consistent_history(client, live_server, measure_memory_usage): def test_consistent_history(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
workers = int(os.getenv("FETCH_WORKERS", 10))
r = range(1, 30) r = range(1, 10+workers)
for one in r: for one in r:
test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True) test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
@@ -46,9 +46,10 @@ def test_consistent_history(client, live_server, measure_memory_usage):
# assert the right amount of watches was found in the JSON # assert the right amount of watches was found in the JSON
assert len(json_obj['watching']) == len(r), "Correct number of watches was found in the JSON" assert len(json_obj['watching']) == len(r), "Correct number of watches was found in the JSON"
i=0
# each one should have a history.txt containing just one line # each one should have a history.txt containing just one line
for w in json_obj['watching'].keys(): for w in json_obj['watching'].keys():
i+=1
history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt') history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}" assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"
@@ -58,8 +59,8 @@ def test_consistent_history(client, live_server, measure_memory_usage):
assert len(tmp_history) == 1, "History.txt should contain 1 line" assert len(tmp_history) == 1, "History.txt should contain 1 line"
# Should be two files,. the history.txt , and the snapshot.txt # Should be two files,. the history.txt , and the snapshot.txt
files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path, files_in_watch_dir = os.listdir(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w))
w))
# Find the snapshot one # Find the snapshot one
for fname in files_in_watch_dir: for fname in files_in_watch_dir:
if fname != 'history.txt' and 'html' not in fname: if fname != 'history.txt' and 'html' not in fname:
@@ -75,7 +76,6 @@ def test_consistent_history(client, live_server, measure_memory_usage):
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot" assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
with open(json_db_file, 'r') as f: with open(json_db_file, 'r') as f:
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved" assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"

View File

@@ -32,13 +32,14 @@ def test_strip_text_func():
stripped_content = html_tools.strip_ignore_text(test_content, ignore) stripped_content = html_tools.strip_ignore_text(test_content, ignore)
assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens." assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens."
def set_original_ignore_response(): def set_original_ignore_response(ver_stamp="123"):
test_return_data = """<html> test_return_data = f"""<html>
<body> <body>
Some initial text<br> Some initial text<br>
<p>Which is across multiple lines</p> <p>Which is across multiple lines</p>
<br> <br>
So let's see what happens. <br> So let's see what happens. <br>
<link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/>
</body> </body>
</html> </html>
@@ -48,13 +49,14 @@ def set_original_ignore_response():
f.write(test_return_data) f.write(test_return_data)
def set_modified_original_ignore_response(): def set_modified_original_ignore_response(ver_stamp="123"):
test_return_data = """<html> test_return_data = f"""<html>
<body> <body>
Some NEW nice initial text<br> Some NEW nice initial text<br>
<p>Which is across multiple lines</p> <p>Which is across multiple lines</p>
<br> <br>
So let's see what happens. <br> So let's see what happens. <br>
<link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/>
<p>new ignore stuff</p> <p>new ignore stuff</p>
<p>blah</p> <p>blah</p>
</body> </body>
@@ -67,14 +69,15 @@ def set_modified_original_ignore_response():
# Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text # Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text
def set_modified_ignore_response(): def set_modified_ignore_response(ver_stamp="123"):
test_return_data = """<html> test_return_data = f"""<html>
<body> <body>
Some initial text<br> Some initial text<br>
<p>Which is across multiple lines</p> <p>Which is across multiple lines</p>
<P>ZZZZz</P> <P>ZZZZz</P>
<br> <br>
So let's see what happens. <br> So let's see what happens. <br>
<link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/>
</body> </body>
</html> </html>
@@ -165,9 +168,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
assert b'Deleted' in res.data assert b'Deleted' in res.data
# When adding some ignore text, it should not trigger a change, even if something else on that line changes # When adding some ignore text, it should not trigger a change, even if something else on that line changes
def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage): def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
#live_server_setup(live_server) ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ\r\n"+extra_ignore
ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ"
set_original_ignore_response() set_original_ignore_response()
# Goto the settings page, add our ignore text # Goto the settings page, add our ignore text
@@ -186,6 +189,10 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
if as_source:
# Switch to source mode so we can test that too!
test_url = "source:"+test_url
res = client.post( res = client.post(
url_for("imports.import_page"), url_for("imports.import_page"),
data={"urls": test_url}, data={"urls": test_url},
@@ -203,12 +210,15 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
follow_redirects=True follow_redirects=True
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
wait_for_all_checks(client)
# Check it saved # Check it saved
res = client.get( res = client.get(
url_for("settings.settings_page"), url_for("settings.settings_page"),
) )
assert bytes(ignore_text.encode('utf-8')) in res.data
for i in ignore_text.splitlines():
assert bytes(i.encode('utf-8')) in res.data
# Trigger a check # Trigger a check
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
@@ -221,7 +231,8 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
# Make a change which includes the ignore text, it should be ignored and no 'change' triggered # Make a change which includes the ignore text, it should be ignored and no 'change' triggered
# It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list # It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list
set_modified_ignore_response() # And tweaks the ver_stamp which should be picked up by global regex ignore
set_modified_ignore_response(ver_stamp=time.time())
# Trigger a check # Trigger a check
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
@@ -243,3 +254,11 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_check_global_ignore_text_functionality(client, live_server):
#live_server_setup(live_server)
_run_test_global_ignore(client, as_source=False)
def test_check_global_ignore_text_functionality_as_source(client, live_server):
#live_server_setup(live_server)
_run_test_global_ignore(client, as_source=True, extra_ignore='/\?v=\d/')

View File

@@ -424,3 +424,27 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
# unlink headers.txt on start/stop # unlink headers.txt on start/stop
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_headers_validation(client, live_server):
#live_server_setup(live_server)
test_url = url_for('test_headers', _external=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={
"url": test_url,
"fetch_backend": 'html_requests',
"headers": "User-AGent agent-from-watch\r\nsadfsadfsadfsdaf\r\n:foobar"},
follow_redirects=True
)
assert b"Line 1 is missing a &#39;:&#39; separator." in res.data
assert b"Line 3 has an empty key." in res.data

View File

@@ -126,18 +126,51 @@ def extract_UUID_from_client(client):
uuid = m.group(1) uuid = m.group(1)
return uuid.strip() return uuid.strip()
def wait_for_all_checks(client):
# actually this is not entirely true, it can still be 'processing' but not in the queue def wait_for_all_checks(client=None):
# Loop waiting until done.. """
attempt=0 Waits until the queue is empty and remains empty for at least `required_empty_duration` seconds,
# because sub-second rechecks are problematic in testing, use lots of delays and also ensures no running threads have `current_uuid` set.
time.sleep(1) Retries for up to `max_attempts` times, sleeping `wait_between_attempts` seconds between checks.
while attempt < 60: """
res = client.get(url_for("watchlist.index")) from changedetectionio.flask_app import update_q as global_update_q, running_update_threads
if not b'Checking now' in res.data:
break # Configuration
logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt)) attempt = 0
time.sleep(1) i=0
max_attempts = 60
wait_between_attempts = 2
required_empty_duration = 2
logger = logging.getLogger()
time.sleep(1.2)
empty_since = None
while attempt < max_attempts:
q_length = global_update_q.qsize()
# Check if any threads are still processing
time.sleep(1.2)
any_threads_busy = any(t.current_uuid for t in running_update_threads)
if q_length == 0 and not any_threads_busy:
if empty_since is None:
empty_since = time.time()
logger.info(f"Queue empty and no active threads at attempt {attempt}, starting empty timer...")
elif time.time() - empty_since >= required_empty_duration:
logger.info(f"Queue has been empty and threads idle for {required_empty_duration} seconds. Done waiting.")
break
else:
logger.info(f"Still waiting: queue empty and no active threads, but not yet {required_empty_duration} seconds...")
else:
if q_length != 0:
logger.info(f"Queue not empty (size={q_length}), resetting timer.")
if any_threads_busy:
busy_threads = [t.name for t in running_update_threads if t.current_uuid]
logger.info(f"Threads still busy: {busy_threads}, resetting timer.")
empty_since = None
attempt += 1 attempt += 1
time.sleep(1) time.sleep(1)

View File

@@ -9,15 +9,20 @@ services:
# - ./proxies.json:/datastore/proxies.json # - ./proxies.json:/datastore/proxies.json
# environment: # environment:
# Default listening port, can also be changed with the -p option # Default listening port, can also be changed with the -p option (not to be confused with ports: below)
# - PORT=5000 # - PORT=5000
# #
# Log levels are in descending order. (TRACE is the most detailed one) # Log levels are in descending order. (TRACE is the most detailed one)
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL # Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
# - LOGGER_LEVEL=TRACE # - LOGGER_LEVEL=TRACE
# #
# Alternative WebDriver/selenium URL, do not use "'s or 's! #
# - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub # Uncomment below and the "sockpuppetbrowser" to use a real Chrome browser (It uses the "playwright" protocol)
# - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000
#
#
# Alternative WebDriver/selenium URL, do not use "'s or 's! (old, deprecated, does not support screenshots very well)
# - WEBDRIVER_URL=http://browser-selenium-chrome:4444/wd/hub
# #
# WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy, # WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy,
# webdriver_proxyAutoconfigUrl, webdriver_autodetect, # webdriver_proxyAutoconfigUrl, webdriver_autodetect,
@@ -25,9 +30,6 @@ services:
# #
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy # https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
# #
# Alternative target "Chrome" Playwright URL, do not use "'s or 's!
# "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
# - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
# #
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password # Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
# #
@@ -43,7 +45,7 @@ services:
# Base URL of your changedetection.io install (Added to the notification alert) # Base URL of your changedetection.io install (Added to the notification alert)
# - BASE_URL=https://mysite.com # - BASE_URL=https://mysite.com
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;` # Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory # More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy
# - USE_X_SETTINGS=1 # - USE_X_SETTINGS=1
# #
# Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname. # Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname.
@@ -70,7 +72,7 @@ services:
# Comment out ports: when using behind a reverse proxy , enable networks: etc. # Comment out ports: when using behind a reverse proxy , enable networks: etc.
ports: ports:
- 5000:5000 - 127.0.0.1:5000:5000
restart: unless-stopped restart: unless-stopped
# Used for fetching pages via WebDriver+Chrome where you need Javascript support. # Used for fetching pages via WebDriver+Chrome where you need Javascript support.
@@ -80,14 +82,14 @@ services:
# If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that # If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that
# and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used) # and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used)
# depends_on: # depends_on:
# sockpuppetbrowser: # browser-sockpuppet-chrome:
# condition: service_started # condition: service_started
# Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages. # Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
# RECOMMENDED FOR FETCHING PAGES WITH CHROME, be sure to enable the "PLAYWRIGHT_DRIVER_URL" env variable in the main changedetection container # RECOMMENDED FOR FETCHING PAGES WITH CHROME, be sure to enable the "PLAYWRIGHT_DRIVER_URL" env variable in the main changedetection container
# sockpuppetbrowser: # browser-sockpuppet-chrome:
# hostname: sockpuppetbrowser # hostname: browser-sockpuppet-chrome
# image: dgtlmoon/sockpuppetbrowser:latest # image: dgtlmoon/sockpuppetbrowser:latest
# cap_add: # cap_add:
# - SYS_ADMIN # - SYS_ADMIN
@@ -102,14 +104,18 @@ services:
# Used for fetching pages via Playwright+Chrome where you need Javascript support. # Used for fetching pages via Playwright+Chrome where you need Javascript support.
# Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) # Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector)
# Does not report status codes (200, 404, 403) and other issues # Does not report status codes (200, 404, 403) and other issues
# browser-chrome: # browser-selenium-chrome:
# hostname: browser-chrome # hostname: browser-selenium-chrome
# image: selenium/standalone-chrome:4 # image: selenium/standalone-chrome:4
# environment: # environment:
# - VNC_NO_PASSWORD=1 # - VNC_NO_PASSWORD=1
# - SCREEN_WIDTH=1920 # - SCREEN_WIDTH=1920
# - SCREEN_HEIGHT=1080 # - SCREEN_HEIGHT=1080
# - SCREEN_DEPTH=24 # - SCREEN_DEPTH=24
# CHROME_OPTIONS: |
# --window-size=1280,1024
# --headless
# --disable-gpu
# volumes: # volumes:
# # Workaround to avoid the browser crashing inside a docker container # # Workaround to avoid the browser crashing inside a docker container
# # See https://github.com/SeleniumHQ/docker-selenium#quick-start # # See https://github.com/SeleniumHQ/docker-selenium#quick-start

File diff suppressed because one or more lines are too long

View File

@@ -5,13 +5,13 @@
<meta name="description" content="Manage your changedetection.io watches via API, requires the `x-api-key` header which is found in the settings UI."> <meta name="description" content="Manage your changedetection.io watches via API, requires the `x-api-key` header which is found in the settings UI.">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<link href="assets/bootstrap.min.css?v=1701595483622" rel="stylesheet" media="screen"> <link href="assets/bootstrap.min.css?v=1744573753999" rel="stylesheet" media="screen">
<link href="assets/prism.css?v=1701595483622" rel="stylesheet" /> <link href="assets/prism.css?v=1744573753999" rel="stylesheet" />
<link href="assets/main.css?v=1701595483622" rel="stylesheet" media="screen, print"> <link href="assets/main.css?v=1744573753999" rel="stylesheet" media="screen, print">
<link href="assets/favicon.ico?v=1701595483622" rel="icon" type="image/x-icon"> <link href="assets/favicon.ico?v=1744573753999" rel="icon" type="image/x-icon">
<link href="assets/apple-touch-icon.png?v=1701595483622" rel="apple-touch-icon" sizes="180x180"> <link href="assets/apple-touch-icon.png?v=1744573753999" rel="apple-touch-icon" sizes="180x180">
<link href="assets/favicon-32x32.png?v=1701595483622" rel="icon" type="image/png" sizes="32x32"> <link href="assets/favicon-32x32.png?v=1744573753999" rel="icon" type="image/png" sizes="32x32">
<link href="assets/favicon-16x16.png?v=1701595483622" rel="icon" type="image/png" sizes="16x16"> <link href="assets/favicon-16x16.png?v=1744573753999" rel="icon" type="image/png" sizes="16x16">
</head> </head>
<body class="container-fluid"> <body class="container-fluid">
@@ -928,6 +928,6 @@
</div> </div>
</div> </div>
<script src="assets/main.bundle.js?v=1701595483622"></script> <script src="assets/main.bundle.js?v=1744573753999"></script>
</body> </body>
</html> </html>

View File

@@ -42,7 +42,7 @@ paho-mqtt!=2.0.*
cryptography~=42.0.8 cryptography~=42.0.8
# Used for CSS filtering # Used for CSS filtering
beautifulsoup4 beautifulsoup4>=4.0.0
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe. # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
# #2328 - 5.2.0 and 5.2.1 had extra CPU flag CFLAGS set which was not compatible on older hardware # #2328 - 5.2.0 and 5.2.1 had extra CPU flag CFLAGS set which was not compatible on older hardware
@@ -53,7 +53,7 @@ lxml >=4.8.0,<6,!=5.2.0,!=5.2.1
# XPath 2.0-3.1 support - 4.2.0 broke something? # XPath 2.0-3.1 support - 4.2.0 broke something?
elementpath==4.1.5 elementpath==4.1.5
selenium~=4.14.0 selenium~=4.31.0
# https://github.com/pallets/werkzeug/issues/2985 # https://github.com/pallets/werkzeug/issues/2985
# Maybe related to pytest? # Maybe related to pytest?
@@ -70,7 +70,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
# playwright is installed at Dockerfile build time because it's not available on all platforms # playwright is installed at Dockerfile build time because it's not available on all platforms
pyppeteer-ng==2.0.0rc9 pyppeteer-ng==2.0.0rc10
pyppeteerstealth>=0.0.4 pyppeteerstealth>=0.0.4
@@ -90,6 +90,8 @@ extruct
# For cleaning up unknown currency formats # For cleaning up unknown currency formats
babel babel
levenshtein
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096 # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
greenlet >= 3.0.3 greenlet >= 3.0.3
@@ -110,3 +112,6 @@ pluggy ~= 1.5
# Needed for testing, cross-platform for process and system monitoring # Needed for testing, cross-platform for process and system monitoring
psutil==7.0.0 psutil==7.0.0
ruff >= 0.11.2
pre_commit >= 4.2.0