Compare commits
46 Commits
0.39.19.1
...
fetchers-a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00ac8645f7 | ||
|
|
bef4b40d7f | ||
|
|
fff32cef0d | ||
|
|
6dd26226bc | ||
|
|
779e9c1780 | ||
|
|
d9ed04ee56 | ||
|
|
0f63dca9f7 | ||
|
|
8fb146f3e4 | ||
|
|
770b0faa45 | ||
|
|
f6faa90340 | ||
|
|
669fd3ae0b | ||
|
|
17d37fb626 | ||
|
|
dfa7fc3a81 | ||
|
|
cd467df97a | ||
|
|
71bc2fed82 | ||
|
|
738fcfe01c | ||
|
|
3ebb2ab9ba | ||
|
|
ac98bc9144 | ||
|
|
3705ce6681 | ||
|
|
da7f613e9f | ||
|
|
bb03879aad | ||
|
|
d23a39a7d8 | ||
|
|
f7ea99412f | ||
|
|
d4715e2bc8 | ||
|
|
8567a83c47 | ||
|
|
77fdf59ae3 | ||
|
|
0e194aa4b4 | ||
|
|
2ba55bb477 | ||
|
|
4c759490da | ||
|
|
58a52c1f60 | ||
|
|
338b4dacd0 | ||
|
|
c0fcae0076 | ||
|
|
0e0bd93234 | ||
|
|
c5b0c19836 | ||
|
|
c00459e18f | ||
|
|
41db6652fe | ||
|
|
20869a13b3 | ||
|
|
97c2cd633d | ||
|
|
9244e2fb9c | ||
|
|
a86cbd8b7a | ||
|
|
f35d91e4fb | ||
|
|
687cf9beb4 | ||
|
|
f59b198ffb | ||
|
|
518bdf5a3f | ||
|
|
dcd09359eb | ||
|
|
425f8ea632 |
46
.github/workflows/test-container-build.yml
vendored
Normal file
46
.github/workflows/test-container-build.yml
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
name: ChangeDetection.io Container Build Test
|
||||
|
||||
# Triggers the workflow on push or pull request events
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- requirements.txt
|
||||
- Dockerfile
|
||||
|
||||
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
|
||||
# @todo: some kind of path filter for requirements.txt and Dockerfile
|
||||
jobs:
|
||||
test-container-build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.9
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
driver-opts: image=moby/buildkit:master
|
||||
|
||||
- name: Test that the docker containers can build
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v2
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64,
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
12
.github/workflows/test-only.yml
vendored
12
.github/workflows/test-only.yml
vendored
@@ -1,28 +1,25 @@
|
||||
name: ChangeDetection.io Test
|
||||
name: ChangeDetection.io App Test
|
||||
|
||||
# Triggers the workflow on push or pull request events
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
test-build:
|
||||
test-application:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.9
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
|
||||
- name: Show env vars
|
||||
run: set
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install flake8 pytest
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
|
||||
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
@@ -39,7 +36,4 @@ jobs:
|
||||
# Each test is totally isolated and performs its own cleanup/reset
|
||||
cd changedetectionio; ./run_all_tests.sh
|
||||
|
||||
# https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ?
|
||||
# https://github.com/docker/buildx/issues/59 ? Needs to be one platform?
|
||||
|
||||
# https://github.com/docker/buildx/issues/495#issuecomment-918925854
|
||||
|
||||
12
Dockerfile
12
Dockerfile
@@ -5,13 +5,15 @@ FROM python:3.8-slim as builder
|
||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libssl-dev \
|
||||
libffi-dev \
|
||||
cmake \
|
||||
g++ \
|
||||
gcc \
|
||||
libc-dev \
|
||||
libffi-dev \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
zlib1g-dev \
|
||||
g++
|
||||
make \
|
||||
zlib1g-dev
|
||||
|
||||
RUN mkdir /install
|
||||
WORKDIR /install
|
||||
@@ -22,7 +24,7 @@ RUN pip install --target=/dependencies -r /requirements.txt
|
||||
|
||||
# Playwright is an alternative to Selenium
|
||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||
RUN pip install --target=/dependencies playwright~=1.24 \
|
||||
RUN pip install --target=/dependencies playwright~=1.26 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
|
||||
# Final image stage
|
||||
|
||||
@@ -33,7 +33,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
||||
#### Key Features
|
||||
|
||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
|
||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
|
||||
- Switch between fast non-JS and Chrome JS based "fetchers"
|
||||
- Easily specify how often a site should be checked
|
||||
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
|
||||
|
||||
70
README.md
70
README.md
@@ -12,11 +12,14 @@ Know when important content changes, we support notifications via Discord, Teleg
|
||||
|
||||
[**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
||||
|
||||
- Chrome browser included.
|
||||
- Super fast, no registration needed setup.
|
||||
- Start watching and receiving change notifications instantly.
|
||||
|
||||
|
||||
- Automatic Updates, Automatic Backups, No Heroku "paused application", don't miss a change!
|
||||
- Javascript browser included
|
||||
- Unlimited checks and watches!
|
||||
Easily see what changed, examine by word, line, or individual character.
|
||||
|
||||
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />
|
||||
|
||||
|
||||
#### Example use cases
|
||||
@@ -44,22 +47,18 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
||||
#### Key Features
|
||||
|
||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
|
||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
|
||||
- Switch between fast non-JS and Chrome JS based "fetchers"
|
||||
- Easily specify how often a site should be checked
|
||||
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
|
||||
- Override Request Headers, Specify `POST` or `GET` and other methods
|
||||
- Use the "Visual Selector" to help target specific elements
|
||||
- Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
|
||||
|
||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
|
||||
|
||||
## Screenshots
|
||||
|
||||
### Examine differences in content.
|
||||
|
||||
Easily see what changed, examine by word, line, or individual character.
|
||||
|
||||
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />
|
||||
|
||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
||||
|
||||
### Filter by elements using the Visual Selector tool.
|
||||
@@ -122,7 +121,7 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
|
||||
|
||||
|
||||
## Filters
|
||||
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
||||
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
||||
|
||||
(We support LXML `re:test`, `re:math` and `re:replace`.)
|
||||
|
||||
@@ -152,7 +151,7 @@ Now you can also customise your notification content!
|
||||
|
||||
## JSON API Monitoring
|
||||
|
||||
Detect changes and monitor data in JSON API's by using the built-in JSONPath selectors as a filter / selector.
|
||||
Detect changes and monitor data in JSON API's by using either JSONPath or jq to filter, parse, and restructure JSON as needed.
|
||||
|
||||

|
||||
|
||||
@@ -160,9 +159,52 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
|
||||
|
||||

|
||||
|
||||
### JSONPath or jq?
|
||||
|
||||
For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more information on jq.
|
||||
|
||||
The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10.
|
||||
|
||||
#### Sample input data from API
|
||||
```
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"name": "Product A",
|
||||
"priceInCents": 2500
|
||||
},
|
||||
{
|
||||
"name": "Product B",
|
||||
"priceInCents": 500
|
||||
},
|
||||
{
|
||||
"name": "Product C",
|
||||
"priceInCents": 2000
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample jq
|
||||
`jq:.items[] | . + { "priceInDollars": (.priceInCents / 100) } | select(.priceInDollars > 10)`
|
||||
|
||||
#### Sample output data
|
||||
```
|
||||
{
|
||||
"name": "Product A",
|
||||
"priceInCents": 2500,
|
||||
"priceInDollars": 25
|
||||
}
|
||||
{
|
||||
"name": "Product C",
|
||||
"priceInCents": 2000,
|
||||
"priceInDollars": 20
|
||||
}
|
||||
```
|
||||
|
||||
### Parse JSON embedded in HTML!
|
||||
|
||||
When you enable a `json:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.
|
||||
When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.
|
||||
|
||||
```
|
||||
<html>
|
||||
@@ -172,7 +214,7 @@ When you enable a `json:` filter, you can even automatically extract and parse e
|
||||
</script>
|
||||
```
|
||||
|
||||
`json:$.price` would give `23.50`, or you can extract the whole structure
|
||||
`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
|
||||
|
||||
## Proxy configuration
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.api import api_v1
|
||||
|
||||
__version__ = '0.39.19.1'
|
||||
__version__ = '0.39.20.1'
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -396,18 +396,20 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
existing_tags = datastore.get_all_tags()
|
||||
|
||||
form = forms.quickWatchForm(request.form)
|
||||
webdriver_enabled = True if os.getenv('PLAYWRIGHT_DRIVER_URL', False) or os.getenv('PLAYWRIGHT_DRIVER_URL', False) else False
|
||||
|
||||
output = render_template("watch-overview.html",
|
||||
form=form,
|
||||
watches=sorted_watches,
|
||||
tags=existing_tags,
|
||||
active_tag=limit_tag,
|
||||
app_rss_token=datastore.data['settings']['application']['rss_access_token'],
|
||||
has_unviewed=datastore.has_unviewed,
|
||||
# Don't link to hosting when we're on the hosting environment
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
form=form,
|
||||
guid=datastore.data['app_guid'],
|
||||
queued_uuids=[uuid for p,uuid in update_q.queue])
|
||||
|
||||
has_unviewed=datastore.has_unviewed,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
queued_uuids=[uuid for p, uuid in update_q.queue],
|
||||
tags=existing_tags,
|
||||
watches=sorted_watches,
|
||||
webdriver_enabled=webdriver_enabled
|
||||
)
|
||||
|
||||
if session.get('share-link'):
|
||||
del(session['share-link'])
|
||||
@@ -489,7 +491,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
import hashlib
|
||||
|
||||
from changedetectionio import fetch_site_status
|
||||
from .fetch_processor import json_html_plaintext
|
||||
|
||||
# Get the most recent one
|
||||
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
|
||||
@@ -503,7 +505,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
encoding='utf-8') as file:
|
||||
raw_content = file.read()
|
||||
|
||||
handler = fetch_site_status.perform_site_check(datastore=datastore)
|
||||
handler = json_html_plaintext.perform_site_check(datastore=datastore)
|
||||
stripped_content = html_tools.strip_ignore_text(raw_content,
|
||||
datastore.data['watching'][uuid]['ignore_text'])
|
||||
|
||||
@@ -547,6 +549,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# Defaults for proxy choice
|
||||
if datastore.proxy_list is not None: # When enabled
|
||||
# @todo
|
||||
# Radio needs '' not None, or incase that the chosen one no longer exists
|
||||
if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list):
|
||||
default['proxy'] = ''
|
||||
@@ -560,7 +563,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
||||
del form.proxy
|
||||
else:
|
||||
form.proxy.choices = [('', 'Default')] + datastore.proxy_list
|
||||
form.proxy.choices = [('', 'Default')]
|
||||
for p in datastore.proxy_list:
|
||||
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
|
||||
|
||||
|
||||
if request.method == 'POST' and form.validate():
|
||||
extra_update_obj = {}
|
||||
@@ -632,20 +638,31 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Only works reliably with Playwright
|
||||
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
|
||||
# Which tabs to show/hide ?
|
||||
enabled_tabs = []
|
||||
if watch.get('fetch_processor') == 'json_html_plaintext' or not watch.get('fetch_processor'):
|
||||
enabled_tabs.append('visual-selector')
|
||||
enabled_tabs.append('text-filters-and-triggers')
|
||||
|
||||
if watch.get('fetch_processor') == 'image':
|
||||
enabled_tabs.append('visual-selector')
|
||||
|
||||
output = render_template("edit.html",
|
||||
uuid=uuid,
|
||||
watch=datastore.data['watching'][uuid],
|
||||
form=form,
|
||||
has_empty_checktime=using_default_check_time,
|
||||
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
using_global_webdriver_wait=default['webdriver_delay'] is None,
|
||||
current_base_url=datastore.data['settings']['application']['base_url'],
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
enabled_tabs = enabled_tabs,
|
||||
form=form,
|
||||
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
has_empty_checktime=using_default_check_time,
|
||||
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
using_global_webdriver_wait=default['webdriver_delay'] is None,
|
||||
uuid=uuid,
|
||||
visualselector_data_is_ready=visualselector_data_is_ready,
|
||||
visualselector_enabled=visualselector_enabled,
|
||||
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
||||
watch=watch,
|
||||
)
|
||||
|
||||
return output
|
||||
@@ -657,15 +674,16 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
default = deepcopy(datastore.data['settings'])
|
||||
if datastore.proxy_list is not None:
|
||||
available_proxies = list(datastore.proxy_list.keys())
|
||||
# When enabled
|
||||
system_proxy = datastore.data['settings']['requests']['proxy']
|
||||
# In the case it doesnt exist anymore
|
||||
if not any([system_proxy in tup for tup in datastore.proxy_list]):
|
||||
if not system_proxy in available_proxies:
|
||||
system_proxy = None
|
||||
|
||||
default['requests']['proxy'] = system_proxy if system_proxy is not None else datastore.proxy_list[0][0]
|
||||
default['requests']['proxy'] = system_proxy if system_proxy is not None else available_proxies[0]
|
||||
# Used by the form handler to keep or remove the proxy settings
|
||||
default['proxy_list'] = datastore.proxy_list
|
||||
default['proxy_list'] = available_proxies[0]
|
||||
|
||||
|
||||
# Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status
|
||||
@@ -680,7 +698,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
||||
del form.requests.form.proxy
|
||||
else:
|
||||
form.requests.form.proxy.choices = datastore.proxy_list
|
||||
form.requests.form.proxy.choices = []
|
||||
for p in datastore.proxy_list:
|
||||
form.requests.form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
|
||||
|
||||
|
||||
if request.method == 'POST':
|
||||
# Password unset is a GET, but we can lock the session to a salted env password to always need the password
|
||||
@@ -774,6 +795,86 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
return redirect(url_for('index'))
|
||||
|
||||
|
||||
@app.route("/diff/image/<string:uuid>", methods=['GET'])
|
||||
@login_required
|
||||
def diff_image_history_page(uuid):
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
history = watch.history
|
||||
dates = list(history.keys())
|
||||
|
||||
if len(dates) < 2:
|
||||
flash("Not enough saved change detection snapshots to produce a report.", "error")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
previous_version = dates[-2]
|
||||
|
||||
datastore.set_last_viewed(uuid, time.time())
|
||||
|
||||
output = render_template("diff-image.html",
|
||||
watch=watch,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
versions=dates[:-1], # All except current/last
|
||||
uuid=uuid,
|
||||
newest_version_timestamp=dates[-1],
|
||||
current_previous_version=str(previous_version),
|
||||
current_diff_url=watch['url'],
|
||||
extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
|
||||
left_sticky=True,
|
||||
last_error=watch['last_error'],
|
||||
last_error_text=watch.get_error_text(),
|
||||
last_error_screenshot=watch.get_error_snapshot()
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
@app.route("/preview/image/<string:uuid>", methods=['GET'])
|
||||
@login_required
|
||||
def preview_image_history_page(uuid):
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
history = watch.history
|
||||
dates = list(history.keys())
|
||||
|
||||
if len(dates) < 1:
|
||||
flash("Not enough saved change detection snapshots to produce a report.", "error")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
output = render_template("preview-image.html",
|
||||
watch=watch,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
uuid=uuid,
|
||||
current_diff_url=watch['url'],
|
||||
newest_history_key = watch.newest_history_key,
|
||||
extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
|
||||
left_sticky=True,
|
||||
last_error=watch['last_error'],
|
||||
last_error_text=watch.get_error_text(),
|
||||
last_error_screenshot=watch.get_error_snapshot()
|
||||
)
|
||||
return output
|
||||
|
||||
@app.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_required
|
||||
def diff_history_page(uuid):
|
||||
@@ -939,6 +1040,67 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
return output
|
||||
|
||||
@app.route("/preview/image/<string:uuid>/<string:history_timestamp>")
|
||||
def render_single_image(uuid, history_timestamp):
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
dates = list(watch.history.keys())
|
||||
|
||||
|
||||
if not history_timestamp or history_timestamp == 'None':
|
||||
history_timestamp = dates[-2]
|
||||
|
||||
|
||||
filename = watch.history[history_timestamp]
|
||||
with open(filename, 'rb') as f:
|
||||
img = f.read()
|
||||
|
||||
response = make_response(img)
|
||||
|
||||
response.headers['Content-type'] = 'image/png'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
response.headers['Expires'] = 0
|
||||
|
||||
return response
|
||||
|
||||
|
||||
|
||||
# Diff renderer for images
|
||||
# Renders the diff which includes the red box around what changes
|
||||
# We always compare the newest against whatever compare_date we are given
|
||||
@app.route("/diff/image/<string:uuid>/<string:compare_date>")
|
||||
def render_diff_image(uuid, compare_date):
|
||||
from changedetectionio import image_diff
|
||||
|
||||
from flask import make_response
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
flash("Not enough saved change detection snapshots to produce a report.", "error")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
if not compare_date or compare_date == 'None':
|
||||
compare_date = dates[-2]
|
||||
|
||||
new_img = watch.history[watch.newest_history_key]
|
||||
prev_img = watch.history[compare_date]
|
||||
|
||||
try:
|
||||
img = image_diff.render_diff(new_img, prev_img)
|
||||
except ValueError as e:
|
||||
print ("EXCEPTION: Diff image - got exception {} reverting to raw image without rendering difference".format(str(e)))
|
||||
with open(new_img, 'rb') as f:
|
||||
img = f.read()
|
||||
|
||||
|
||||
resp = make_response(img)
|
||||
resp.headers['Content-Type'] = 'image/jpeg'
|
||||
return resp
|
||||
|
||||
|
||||
|
||||
@app.route("/settings/notification-logs", methods=['GET'])
|
||||
@login_required
|
||||
def notification_logs():
|
||||
@@ -1087,12 +1249,24 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
return redirect(url_for('index'))
|
||||
|
||||
url = request.form.get('url').strip()
|
||||
|
||||
if datastore.url_exists(url):
|
||||
flash('The URL {} already exists'.format(url), "error")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
|
||||
fetch_processor = request.form.get('fetch_processor')
|
||||
|
||||
extras = {'paused': add_paused}
|
||||
if fetch_processor:
|
||||
extras['fetch_processor']=fetch_processor
|
||||
if fetch_processor == 'image':
|
||||
extras['fetch_backend'] = 'html_webdriver'
|
||||
|
||||
new_uuid = datastore.add_watch(url=url,
|
||||
tag=request.form.get('tag').strip(),
|
||||
extras=extras
|
||||
)
|
||||
|
||||
|
||||
if not add_paused and new_uuid:
|
||||
@@ -1368,6 +1542,8 @@ def ticker_thread_check_time_launch_checks():
|
||||
import random
|
||||
from changedetectionio import update_worker
|
||||
|
||||
proxy_last_called_time = {}
|
||||
|
||||
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
|
||||
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
|
||||
|
||||
@@ -1428,10 +1604,30 @@ def ticker_thread_check_time_launch_checks():
|
||||
if watch.jitter_seconds == 0:
|
||||
watch.jitter_seconds = random.uniform(-abs(jitter), jitter)
|
||||
|
||||
|
||||
seconds_since_last_recheck = now - watch['last_checked']
|
||||
|
||||
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
||||
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
||||
|
||||
# Proxies can be set to have a limit on seconds between which they can be called
|
||||
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
if watch_proxy and watch_proxy in list(datastore.proxy_list.keys()):
|
||||
# Proxy may also have some threshold minimum
|
||||
proxy_list_reuse_time_minimum = int(datastore.proxy_list.get(watch_proxy, {}).get('reuse_time_minimum', 0))
|
||||
if proxy_list_reuse_time_minimum:
|
||||
proxy_last_used_time = proxy_last_called_time.get(watch_proxy, 0)
|
||||
time_since_proxy_used = int(time.time() - proxy_last_used_time)
|
||||
if time_since_proxy_used < proxy_list_reuse_time_minimum:
|
||||
# Not enough time difference reached, skip this watch
|
||||
print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
|
||||
watch_proxy,
|
||||
time_since_proxy_used,
|
||||
proxy_list_reuse_time_minimum))
|
||||
continue
|
||||
else:
|
||||
# Record the last used time
|
||||
proxy_last_called_time[watch_proxy] = int(time.time())
|
||||
|
||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||
priority = int(time.time())
|
||||
print(
|
||||
|
||||
@@ -21,7 +21,6 @@ class Non200ErrorCodeReceived(Exception):
|
||||
self.page_text = html_tools.html_to_text(page_html)
|
||||
return
|
||||
|
||||
|
||||
class JSActionExceptions(Exception):
|
||||
def __init__(self, status_code, url, screenshot, message=''):
|
||||
self.status_code = status_code
|
||||
@@ -66,13 +65,14 @@ class ReplyWithContentButNoText(Exception):
|
||||
return
|
||||
|
||||
class Fetcher():
|
||||
error = None
|
||||
status_code = None
|
||||
content = None
|
||||
headers = None
|
||||
|
||||
error = None
|
||||
fetcher_description = "No description"
|
||||
headers = None
|
||||
raw_content = None
|
||||
status_code = None
|
||||
webdriver_js_execute_code = None
|
||||
|
||||
xpath_element_js = """
|
||||
// Include the getXpath script directly, easier than fetching
|
||||
!function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
|
||||
@@ -202,6 +202,7 @@ class Fetcher():
|
||||
|
||||
# Will be needed in the future by the VisualSelector, always get this where possible.
|
||||
screenshot = False
|
||||
element_screenshot = None
|
||||
system_http_proxy = os.getenv('HTTP_PROXY')
|
||||
system_https_proxy = os.getenv('HTTPS_PROXY')
|
||||
|
||||
@@ -310,12 +311,14 @@ class base_html_playwright(Fetcher):
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_css_filter=None):
|
||||
current_css_filter=None
|
||||
):
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
import playwright._impl._api_types
|
||||
from playwright._impl._api_types import Error, TimeoutError
|
||||
response = None
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser_type = getattr(p, self.browser_type)
|
||||
|
||||
@@ -373,8 +376,11 @@ class base_html_playwright(Fetcher):
|
||||
print("response object was none")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
|
||||
# Bug 2(?) Set the viewport size AFTER loading the page
|
||||
page.set_viewport_size({"width": 1280, "height": 1024})
|
||||
|
||||
# Removed browser-set-size, seemed to be needed to make screenshots work reliably in older playwright versions
|
||||
# Was causing exceptions like 'waiting for page but content is changing' etc
|
||||
# https://www.browserstack.com/docs/automate/playwright/change-browser-window-size 1280x720 should be the default
|
||||
|
||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||
time.sleep(extra_wait)
|
||||
|
||||
@@ -398,12 +404,26 @@ class base_html_playwright(Fetcher):
|
||||
|
||||
raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url)
|
||||
|
||||
else:
|
||||
# JS eval was run, now we also wait some time if possible to let the page settle
|
||||
if self.render_extract_delay:
|
||||
page.wait_for_timeout(self.render_extract_delay * 1000)
|
||||
|
||||
page.wait_for_timeout(500)
|
||||
|
||||
self.content = page.content()
|
||||
self.raw_content = page.content()
|
||||
|
||||
self.status_code = response.status
|
||||
self.headers = response.all_headers()
|
||||
|
||||
if current_css_filter is not None:
|
||||
if current_css_filter is not None and len(current_css_filter):
|
||||
page.evaluate("var css_filter={}".format(json.dumps(current_css_filter)))
|
||||
|
||||
el = page.locator(current_css_filter)
|
||||
if el:
|
||||
el.scroll_into_view_if_needed()
|
||||
self.element_screenshot = el.screenshot()
|
||||
else:
|
||||
page.evaluate("var css_filter=''")
|
||||
|
||||
@@ -418,9 +438,9 @@ class base_html_playwright(Fetcher):
|
||||
# acceptable screenshot quality here
|
||||
try:
|
||||
# Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
|
||||
page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
|
||||
#page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
|
||||
# The actual screenshot
|
||||
self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
|
||||
self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 82)))
|
||||
except Exception as e:
|
||||
context.close()
|
||||
browser.close()
|
||||
@@ -514,8 +534,6 @@ class base_html_webdriver(Fetcher):
|
||||
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
||||
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||
|
||||
self.screenshot = self.driver.get_screenshot_as_png()
|
||||
|
||||
# @todo - how to check this? is it possible?
|
||||
self.status_code = 200
|
||||
# @todo somehow we should try to get this working for WebDriver
|
||||
@@ -524,8 +542,11 @@ class base_html_webdriver(Fetcher):
|
||||
# @todo - dom wait loaded?
|
||||
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
|
||||
self.content = self.driver.page_source
|
||||
self.raw_content = self.driver.page_source
|
||||
self.headers = {}
|
||||
|
||||
self.screenshot = self.driver.get_screenshot_as_png()
|
||||
|
||||
# Does the connection to the webdriver work? run a test connection.
|
||||
def is_ready(self):
|
||||
from selenium import webdriver
|
||||
@@ -564,6 +585,11 @@ class html_requests(Fetcher):
|
||||
ignore_status_codes=False,
|
||||
current_css_filter=None):
|
||||
|
||||
# Make requests use a more modern looking user-agent
|
||||
if not 'User-Agent' in request_headers:
|
||||
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
|
||||
|
||||
proxies = {}
|
||||
|
||||
# Allows override the proxy on a per-request basis
|
||||
@@ -603,6 +629,7 @@ class html_requests(Fetcher):
|
||||
|
||||
self.status_code = r.status_code
|
||||
self.content = r.text
|
||||
self.raw_content = r.content
|
||||
self.headers = r.headers
|
||||
|
||||
|
||||
|
||||
12
changedetectionio/fetch_processor/__init__.py
Normal file
12
changedetectionio/fetch_processor/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
available_fetchers = [('json_html_plaintext', 'JSON/HTML/Text'), ('image', 'Graphically by image or web-page')]
|
||||
|
||||
class fetch_processor():
|
||||
contents = b''
|
||||
screenshot = None
|
||||
datastore = None
|
||||
|
||||
"""
|
||||
base class for all fetch processors
|
||||
- json_html_plaintext
|
||||
- image (future)
|
||||
"""
|
||||
130
changedetectionio/fetch_processor/image.py
Normal file
130
changedetectionio/fetch_processor/image.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import hashlib
|
||||
import imagehash
|
||||
from PIL import Image
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import urllib3
|
||||
|
||||
# fetch processor for requesting and comparing a single image
|
||||
# can use both requests and playwright/selenium
|
||||
|
||||
# - imagehash for change detection (or https://github.com/dgtlmoon/changedetection.io/pull/419/files#diff-7d3854710a6c0faead783f75850100a4c4b69409309200d3a83692dc9783bf6eR17 ?)
|
||||
# - skimage.metrics import structural_similarity for viewing the diff
|
||||
|
||||
|
||||
from changedetectionio import content_fetcher, html_tools
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
from . import fetch_processor
|
||||
|
||||
|
||||
# Some common stuff here that can be moved to a base class
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check(fetch_processor):
|
||||
xpath_data = None
|
||||
|
||||
def __init__(self, *args, datastore, **kwargs):
|
||||
self.datastore = datastore
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def run(self, uuid):
|
||||
changed_detected = False
|
||||
screenshot = False # as bytes
|
||||
stripped_text_from_html = ""
|
||||
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
|
||||
|
||||
if watch.get('fetch_backend') != 'html_webdriver':
|
||||
raise Exception(
|
||||
"Requires a Chrome compatible fetcher enabled."
|
||||
)
|
||||
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
|
||||
extra_headers = self.datastore.data['watching'][uuid].get('headers')
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
request_headers = self.datastore.data['settings']['headers'].copy()
|
||||
request_headers.update(extra_headers)
|
||||
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||
# do this by accident.
|
||||
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
timeout = self.datastore.data['settings']['requests']['timeout']
|
||||
url = watch.get('url')
|
||||
request_body = self.datastore.data['watching'][uuid].get('body')
|
||||
request_method = self.datastore.data['watching'][uuid].get('method')
|
||||
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
|
||||
|
||||
prefer_backend = watch['fetch_backend']
|
||||
if hasattr(content_fetcher, prefer_backend):
|
||||
klass = getattr(content_fetcher, prefer_backend)
|
||||
else:
|
||||
# If the klass doesnt exist, just use a default
|
||||
klass = getattr(content_fetcher, "html_requests")
|
||||
|
||||
proxy_args = self.datastore.get_preferred_proxy_for_watch(uuid)
|
||||
fetcher = klass(proxy_override=proxy_args)
|
||||
|
||||
fetcher.run(
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
current_css_filter=watch.get('css_filter'),
|
||||
timeout=timeout,
|
||||
url=url
|
||||
)
|
||||
|
||||
fetcher.quit()
|
||||
|
||||
# if not image/foobar in mimetype
|
||||
# raise content_fecther.NotAnImage(mimetype) ?
|
||||
# or better to try load with PIL and catch exception?
|
||||
|
||||
update_obj["last_check_status"] = fetcher.get_last_status_code()
|
||||
|
||||
if 'image' in fetcher.headers['content-type']:
|
||||
self.contents = fetcher.raw_content
|
||||
else:
|
||||
self.contents = fetcher.element_screenshot if fetcher.element_screenshot else fetcher.screenshot
|
||||
|
||||
# Used for visual-selector
|
||||
self.xpath_data = fetcher.xpath_data
|
||||
self.screenshot = fetcher.screenshot
|
||||
|
||||
now = time.time()
|
||||
image = Image.open(io.BytesIO(self.contents))
|
||||
|
||||
# @todo different choice?
|
||||
# https://github.com/JohannesBuchner/imagehash#references
|
||||
fetched_hash = str(imagehash.average_hash(image))
|
||||
print(uuid, "Time to image hash", time.time() - now)
|
||||
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
if watch['previous_md5'] != fetched_hash:
|
||||
changed_detected = True
|
||||
|
||||
# Always record the new checksum
|
||||
update_obj["previous_md5"] = fetched_hash
|
||||
|
||||
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
|
||||
if not watch.get('previous_md5'):
|
||||
watch['previous_md5'] = fetched_hash
|
||||
|
||||
return changed_detected, update_obj
|
||||
@@ -9,44 +9,18 @@ from changedetectionio import content_fetcher, html_tools
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
from . import fetch_processor
|
||||
|
||||
# Some common stuff here that can be moved to a base class
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check():
|
||||
class perform_site_check(fetch_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def __init__(self, *args, datastore, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.datastore = datastore
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
# If there was a proxy list enabled, figure out what proxy_args/which proxy to use
|
||||
# if watch.proxy use that
|
||||
# fetcher.proxy_override = watch.proxy or main config proxy
|
||||
# Allows override the proxy on a per-request basis
|
||||
# ALWAYS use the first one is nothing selected
|
||||
|
||||
def set_proxy_from_list(self, watch):
|
||||
proxy_args = None
|
||||
if self.datastore.proxy_list is None:
|
||||
return None
|
||||
|
||||
# If its a valid one
|
||||
if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
|
||||
proxy_args = watch['proxy']
|
||||
|
||||
# not valid (including None), try the system one
|
||||
else:
|
||||
system_proxy = self.datastore.data['settings']['requests']['proxy']
|
||||
# Is not None and exists
|
||||
if any([system_proxy in p for p in self.datastore.proxy_list]):
|
||||
proxy_args = system_proxy
|
||||
|
||||
# Fallback - Did not resolve anything, use the first available
|
||||
if proxy_args is None:
|
||||
proxy_args = self.datastore.proxy_list[0][0]
|
||||
|
||||
return proxy_args
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "foobar(?i)" type configuration
|
||||
@@ -68,6 +42,8 @@ class perform_site_check():
|
||||
stripped_text_from_html = ""
|
||||
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return
|
||||
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
|
||||
@@ -90,7 +66,7 @@ class perform_site_check():
|
||||
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
timeout = self.datastore.data['settings']['requests']['timeout']
|
||||
timeout = self.datastore.data['settings']['requests'].get('timeout')
|
||||
url = watch.get('url')
|
||||
request_body = self.datastore.data['watching'][uuid].get('body')
|
||||
request_method = self.datastore.data['watching'][uuid].get('method')
|
||||
@@ -110,9 +86,13 @@ class perform_site_check():
|
||||
# If the klass doesnt exist, just use a default
|
||||
klass = getattr(content_fetcher, "html_requests")
|
||||
|
||||
proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
proxy_url = None
|
||||
if proxy_id:
|
||||
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
|
||||
print ("UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||
|
||||
proxy_args = self.set_proxy_from_list(watch)
|
||||
fetcher = klass(proxy_override=proxy_args)
|
||||
fetcher = klass(proxy_override=proxy_url)
|
||||
|
||||
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
||||
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
|
||||
@@ -163,8 +143,9 @@ class perform_site_check():
|
||||
has_filter_rule = True
|
||||
|
||||
if has_filter_rule:
|
||||
if 'json:' in css_filter_rule:
|
||||
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
|
||||
json_filter_prefixes = ['json:', 'jq:']
|
||||
if any(prefix in css_filter_rule for prefix in json_filter_prefixes):
|
||||
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, json_filter=css_filter_rule)
|
||||
is_html = False
|
||||
|
||||
if is_html or is_source:
|
||||
@@ -315,4 +296,6 @@ class perform_site_check():
|
||||
if not watch.get('previous_md5'):
|
||||
watch['previous_md5'] = fetched_md5
|
||||
|
||||
return changed_detected, update_obj, text_content_before_ignored_filter
|
||||
self.contents = text_content_before_ignored_filter
|
||||
|
||||
return changed_detected, update_obj
|
||||
@@ -304,10 +304,28 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
# Re #265 - maybe in the future fetch the page and offer a
|
||||
# warning/notice that its possible the rule doesnt yet match anything?
|
||||
|
||||
if 'jq:' in line:
|
||||
if not self.allow_json:
|
||||
raise ValidationError("jq not permitted in this field!")
|
||||
|
||||
import jq
|
||||
input = line.replace('jq:', '')
|
||||
|
||||
try:
|
||||
jq.compile(input)
|
||||
except (ValueError) as e:
|
||||
message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
|
||||
raise ValidationError(message % (input, str(e)))
|
||||
except:
|
||||
raise ValidationError("A system-error occurred when validating your jq expression")
|
||||
|
||||
|
||||
class quickWatchForm(Form):
|
||||
from . import fetch_processor
|
||||
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tag = StringField('Group tag', [validators.Optional()])
|
||||
fetch_processor = RadioField(u'Compare as', choices=fetch_processor.available_fetchers, default=fetch_processor.available_fetchers[0][0])
|
||||
watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ from typing import List
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from jsonpath_ng.ext import parse
|
||||
import jq
|
||||
import re
|
||||
from inscriptis import get_text
|
||||
from inscriptis.model.config import ParserConfig
|
||||
@@ -79,19 +80,26 @@ def extract_element(find='title', html_content=''):
|
||||
return element_text
|
||||
|
||||
#
|
||||
def _parse_json(json_data, jsonpath_filter):
|
||||
s=[]
|
||||
jsonpath_expression = parse(jsonpath_filter.replace('json:', ''))
|
||||
match = jsonpath_expression.find(json_data)
|
||||
def _parse_json(json_data, json_filter):
|
||||
if 'json:' in json_filter:
|
||||
jsonpath_expression = parse(json_filter.replace('json:', ''))
|
||||
match = jsonpath_expression.find(json_data)
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
if 'jq:' in json_filter:
|
||||
jq_expression = jq.compile(json_filter.replace('jq:', ''))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
def _get_stripped_text_from_json_match(match):
|
||||
s = []
|
||||
# More than one result, we will return it as a JSON list.
|
||||
if len(match) > 1:
|
||||
for i in match:
|
||||
s.append(i.value)
|
||||
s.append(i.value if hasattr(i, 'value') else i)
|
||||
|
||||
# Single value, use just the value, as it could be later used in a token in notifications.
|
||||
if len(match) == 1:
|
||||
s = match[0].value
|
||||
s = match[0].value if hasattr(match[0], 'value') else match[0]
|
||||
|
||||
# Re #257 - Better handling where it does not exist, in the case the original 's' value was False..
|
||||
if not match:
|
||||
@@ -103,16 +111,16 @@ def _parse_json(json_data, jsonpath_filter):
|
||||
|
||||
return stripped_text_from_html
|
||||
|
||||
def extract_json_as_string(content, jsonpath_filter):
|
||||
def extract_json_as_string(content, json_filter):
|
||||
|
||||
stripped_text_from_html = False
|
||||
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
|
||||
try:
|
||||
stripped_text_from_html = _parse_json(json.loads(content), jsonpath_filter)
|
||||
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
||||
except json.JSONDecodeError:
|
||||
|
||||
# Foreach <script json></script> blob.. just return the first that matches jsonpath_filter
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
s = []
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
bs_result = soup.findAll('script')
|
||||
@@ -131,7 +139,7 @@ def extract_json_as_string(content, jsonpath_filter):
|
||||
# Just skip it
|
||||
continue
|
||||
else:
|
||||
stripped_text_from_html = _parse_json(json_data, jsonpath_filter)
|
||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||
if stripped_text_from_html:
|
||||
break
|
||||
|
||||
|
||||
44
changedetectionio/image_diff.py
Normal file
44
changedetectionio/image_diff.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from skimage.metrics import structural_similarity as compare_ssim
|
||||
import argparse
|
||||
import imutils
|
||||
import cv2
|
||||
|
||||
# From https://www.pyimagesearch.com/2017/06/19/image-difference-with-opencv-and-python/
|
||||
def render_diff(fpath_imageA, fpath_imageB):
|
||||
|
||||
import time
|
||||
now = time.time()
|
||||
|
||||
imageA = cv2.imread(fpath_imageA)
|
||||
imageB = cv2.imread(fpath_imageB)
|
||||
|
||||
# convert the images to grayscale
|
||||
grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
|
||||
grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# compute the Structural Similarity Index (SSIM) between the two
|
||||
# images, ensuring that the difference image is returned
|
||||
(score, diff) = compare_ssim(grayA, grayB, full=True)
|
||||
diff = (diff * 255).astype("uint8")
|
||||
print("SSIM: {}".format(score))
|
||||
|
||||
# threshold the difference image, followed by finding contours to
|
||||
# obtain the regions of the two input images that differ
|
||||
thresh = cv2.threshold(diff, 0, 255,
|
||||
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
|
||||
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
|
||||
cv2.CHAIN_APPROX_SIMPLE)
|
||||
cnts = imutils.grab_contours(cnts)
|
||||
|
||||
# loop over the contours
|
||||
for c in cnts:
|
||||
# compute the bounding box of the contour and then draw the
|
||||
# bounding box on both input images to represent where the two
|
||||
# images differ
|
||||
(x, y, w, h) = cv2.boundingRect(c)
|
||||
cv2.rectangle(imageA, (x, y), (x + w, y + h), (0, 0, 255), 1)
|
||||
cv2.rectangle(imageB, (x, y), (x + w, y + h), (0, 0, 255), 1)
|
||||
|
||||
#return cv2.imencode('.jpg', imageB)[1].tobytes()
|
||||
print ("Image comparison processing time", time.time()-now)
|
||||
return cv2.imencode('.jpg', imageA)[1].tobytes()
|
||||
@@ -13,10 +13,6 @@ class model(dict):
|
||||
'watching': {},
|
||||
'settings': {
|
||||
'headers': {
|
||||
'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
|
||||
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
|
||||
},
|
||||
'requests': {
|
||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||
|
||||
@@ -14,42 +14,43 @@ class model(dict):
|
||||
__newest_history_key = None
|
||||
__history_n=0
|
||||
__base_config = {
|
||||
'url': None,
|
||||
'tag': None,
|
||||
'last_checked': 0,
|
||||
'paused': False,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
#'newest_history_key': 0,
|
||||
'title': None,
|
||||
'previous_md5': False,
|
||||
'uuid': str(uuid_builder.uuid4()),
|
||||
'headers': {}, # Extra headers to send
|
||||
'body': None,
|
||||
'method': 'GET',
|
||||
#'history': {}, # Dict of timestamp and output stripped filename
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
# Custom notification content
|
||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||
'notification_title': None,
|
||||
'notification_body': None,
|
||||
'notification_format': default_notification_format_for_watch,
|
||||
'notification_muted': False,
|
||||
'css_filter': '',
|
||||
'last_error': False,
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'subtractive_selectors': [],
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'text_should_not_be_present': [], # Text that should not present
|
||||
'fetch_backend': None,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'extract_title_as_title': False,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'proxy': None, # Preferred proxy connection
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
# Requires setting to None on submit if it's the same as the default
|
||||
# Should be all None by default, so we use the system default in this case.
|
||||
#'history': {}, # Dict of timestamp and output stripped filename
|
||||
#'newest_history_key': 0,
|
||||
'body': None,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'css_filter': '',
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': None,
|
||||
'fetch_processor': 'json_html_plaintext', # json_html_plaintext, image, rendered_webpage
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'last_checked': 0,
|
||||
'last_error': False,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
'method': 'GET',
|
||||
'notification_body': None,
|
||||
'notification_format': default_notification_format_for_watch,
|
||||
'notification_muted': False,
|
||||
'notification_title': None,
|
||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'subtractive_selectors': [],
|
||||
'tag': None,
|
||||
'text_should_not_be_present': [], # Text that should not present
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
||||
'title': None,
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'url': None,
|
||||
'uuid': str(uuid_builder.uuid4()),
|
||||
'webdriver_delay': None,
|
||||
'webdriver_js_execute_code': None, # Run before change-detection
|
||||
}
|
||||
@@ -145,18 +146,25 @@ class model(dict):
|
||||
bump = self.history
|
||||
return self.__newest_history_key
|
||||
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
def save_history_text(self, contents, timestamp):
|
||||
def save_history_artifact(self, contents: bytes, timestamp):
|
||||
import uuid
|
||||
import logging
|
||||
import magic
|
||||
import re
|
||||
suffix = 'bin'
|
||||
# detect extension type
|
||||
mtype = magic.from_buffer(contents, mime=True)
|
||||
if mtype:
|
||||
r = re.search(r'image/(\w+)', mtype, re.IGNORECASE)
|
||||
if r:
|
||||
suffix = r.group(1)
|
||||
|
||||
output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
|
||||
logging.debug("Saving history text {}".format(snapshot_fname))
|
||||
snapshot_fname = "{}/{}.{}".format(output_path, uuid.uuid4(), suffix)
|
||||
logging.debug("Saving history artifact {}".format(snapshot_fname))
|
||||
|
||||
with open(snapshot_fname, 'wb') as f:
|
||||
f.write(contents)
|
||||
|
||||
@@ -48,4 +48,48 @@ pytest tests/test_errorhandling.py
|
||||
pytest tests/visualselector/test_fetch_data.py
|
||||
|
||||
unset PLAYWRIGHT_DRIVER_URL
|
||||
docker kill $$-test_browserless
|
||||
docker kill $$-test_browserless
|
||||
|
||||
# Test proxy list handling, starting two squids on different ports
|
||||
# Each squid adds a different header to the response, which is the main thing we test for.
|
||||
docker run -d --name $$-squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3128:3128 ubuntu/squid:4.13-21.10_edge
|
||||
docker run -d --name $$-squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3129:3128 ubuntu/squid:4.13-21.10_edge
|
||||
|
||||
|
||||
# So, basic HTTP as env var test
|
||||
export HTTP_PROXY=http://localhost:3128
|
||||
export HTTPS_PROXY=http://localhost:3128
|
||||
pytest tests/proxy_list/test_proxy.py
|
||||
docker logs $$-squid-one 2>/dev/null|grep one.changedetection.io
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Did not see a request to one.changedetection.io in the squid logs (while checking env vars HTTP_PROXY/HTTPS_PROXY)"
|
||||
fi
|
||||
unset HTTP_PROXY
|
||||
unset HTTPS_PROXY
|
||||
|
||||
|
||||
# 2nd test actually choose the preferred proxy from proxies.json
|
||||
cp tests/proxy_list/proxies.json-example ./test-datastore/proxies.json
|
||||
# Makes a watch use a preferred proxy
|
||||
pytest tests/proxy_list/test_multiple_proxy.py
|
||||
|
||||
# Should be a request in the default "first" squid
|
||||
docker logs $$-squid-one 2>/dev/null|grep chosen.changedetection.io
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
|
||||
fi
|
||||
|
||||
# And one in the 'second' squid (user selects this as preferred)
|
||||
docker logs $$-squid-two 2>/dev/null|grep chosen.changedetection.io
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
|
||||
fi
|
||||
|
||||
# @todo - test system override proxy selection and watch defaults, setup a 3rd squid?
|
||||
docker kill $$-squid-one
|
||||
docker kill $$-squid-two
|
||||
|
||||
|
||||
|
||||
149
changedetectionio/static/images/picture-frame.svg
Normal file
149
changedetectionio/static/images/picture-frame.svg
Normal file
@@ -0,0 +1,149 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Generator: Adobe Illustrator 19.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||
|
||||
<svg
|
||||
version="1.1"
|
||||
id="Layer_1"
|
||||
x="0px"
|
||||
y="0px"
|
||||
viewBox="0 0 20.745352 20.745251"
|
||||
xml:space="preserve"
|
||||
width="20.745352"
|
||||
height="20.745251"
|
||||
sodipodi:docname="picture-frame.svg"
|
||||
inkscape:version="1.1.1 (1:1.1+202109281949+c3084ef5ed)"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"><sodipodi:namedview
|
||||
id="namedview31"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pagecheckerboard="0"
|
||||
showgrid="false"
|
||||
fit-margin-top="0"
|
||||
fit-margin-left="0"
|
||||
fit-margin-right="0"
|
||||
fit-margin-bottom="0"
|
||||
inkscape:zoom="24.215073"
|
||||
inkscape:cx="11.810825"
|
||||
inkscape:cy="10.158962"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="1056"
|
||||
inkscape:window-x="1920"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="g1325" /><defs
|
||||
id="defs57">
|
||||
|
||||
|
||||
</defs>
|
||||
<g
|
||||
id="g22"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
|
||||
|
||||
|
||||
|
||||
<g
|
||||
id="g986"
|
||||
transform="matrix(0.09174031,0,0,0.09174031,139.41786,139.41786)"><g
|
||||
id="g1313" /><g
|
||||
id="g18">
|
||||
|
||||
|
||||
<g
|
||||
id="g1325"
|
||||
transform="matrix(1.0989302,0,0,1.0989302,-30.889712,-13.037446)"><g
|
||||
id="g1413"><rect
|
||||
x="58.112999"
|
||||
y="58.112999"
|
||||
style="fill:#95e1d3"
|
||||
width="190.77765"
|
||||
height="190.77299"
|
||||
id="rect4"
|
||||
rx="0"
|
||||
ry="0" /><polygon
|
||||
style="fill:#eaffd0"
|
||||
points="117.389,248.887 183.138,135.007 248.887,248.887 "
|
||||
id="polygon6" /><polygon
|
||||
style="fill:#eaffd0"
|
||||
points="100.26,175.887 58.113,248.887 117.389,248.887 129.898,227.221 "
|
||||
id="polygon8" /><circle
|
||||
style="fill:#fce38a"
|
||||
cx="141.82001"
|
||||
cy="119.433"
|
||||
r="16.547001"
|
||||
id="circle10" /><path
|
||||
style="fill:#414042"
|
||||
d="M 248.887,50.613 H 58.113 c -4.142,0 -7.5,3.357 -7.5,7.5 v 190.773 c 0,4.118 3.362,7.5 7.5,7.5 h 59.276 131.498 c 4.06,0 7.5,-3.304 7.5,-7.5 V 58.113 c 0,-4.142 -3.358,-7.5 -7.5,-7.5 z m -7.5,15 v 155.283 l -51.754,-89.64 c -2.886,-4.998 -10.11,-4.988 -12.99,0 l -46.745,80.965 -23.143,-40.085 c -2.886,-4.998 -10.11,-4.988 -12.99,0 l -28.151,48.76 V 65.613 Z m -141.127,125.274 20.978,36.335 -7.823,13.549 -0.356,0.616 H 71.103 Z m 30.12,50.5 6.013,-10.415 c 0.001,-0.002 0.002,-0.004 0.003,-0.006 l 46.742,-80.959 52.759,91.38 z"
|
||||
id="path14" /><path
|
||||
style="fill:#414042"
|
||||
d="m 141.82,143.48 c 13.259,0 24.046,-10.787 24.046,-24.047 0,-13.26 -10.787,-24.047 -24.046,-24.047 -13.259,0 -24.046,10.787 -24.046,24.047 0,13.26 10.786,24.047 24.046,24.047 z m 0,-33.093 c 4.988,0 9.046,4.059 9.046,9.047 0,4.988 -4.058,9.047 -9.046,9.047 -4.988,0 -9.046,-4.059 -9.046,-9.047 -0.001,-4.989 4.057,-9.047 9.046,-9.047 z"
|
||||
id="path16" /></g></g>
|
||||
</g></g>
|
||||
</g>
|
||||
<g
|
||||
id="g24"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g26"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g28"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g30"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g32"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g34"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g36"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g38"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g40"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g42"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g44"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g46"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g48"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g50"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
<g
|
||||
id="g52"
|
||||
transform="translate(-141.68664,-143.32441)">
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 4.1 KiB |
@@ -578,3 +578,15 @@ ul {
|
||||
display: inline;
|
||||
height: 26px;
|
||||
vertical-align: middle; }
|
||||
|
||||
#quickwatch-fetch-processor {
|
||||
color: #fff;
|
||||
font-size: 80%; }
|
||||
#quickwatch-fetch-processor ul {
|
||||
padding: 0px;
|
||||
list-style-type: none; }
|
||||
#quickwatch-fetch-processor ul li {
|
||||
display: inline-block;
|
||||
margin-right: 1em; }
|
||||
#quickwatch-fetch-processor ul li label:hover {
|
||||
cursor: pointer; }
|
||||
|
||||
@@ -803,4 +803,24 @@ ul {
|
||||
padding: 0.5rem;
|
||||
border-radius: 5px;
|
||||
color: #ff3300;
|
||||
}
|
||||
}
|
||||
|
||||
#quickwatch-fetch-processor {
|
||||
color: #fff;
|
||||
font-size: 80%;
|
||||
|
||||
ul {
|
||||
padding: 0px;
|
||||
list-style-type: none;
|
||||
li {
|
||||
display: inline-block;
|
||||
margin-right: 1em;
|
||||
label {
|
||||
&:hover {
|
||||
cursor: pointer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -81,8 +81,6 @@ class ChangeDetectionStore:
|
||||
except (FileNotFoundError, json.decoder.JSONDecodeError):
|
||||
if include_default_watches:
|
||||
print("Creating JSON store at", self.datastore_path)
|
||||
|
||||
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
|
||||
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
|
||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io')
|
||||
|
||||
@@ -113,9 +111,7 @@ class ChangeDetectionStore:
|
||||
self.__data['settings']['application']['api_access_token'] = secret
|
||||
|
||||
# Proxy list support - available as a selection in settings when text file is imported
|
||||
# CSV list
|
||||
# "name, address", or just "name"
|
||||
proxy_list_file = "{}/proxies.txt".format(self.datastore_path)
|
||||
proxy_list_file = "{}/proxies.json".format(self.datastore_path)
|
||||
if path.isfile(proxy_list_file):
|
||||
self.import_proxy_list(proxy_list_file)
|
||||
|
||||
@@ -437,20 +433,42 @@ class ChangeDetectionStore:
|
||||
unlink(item)
|
||||
|
||||
def import_proxy_list(self, filename):
|
||||
import csv
|
||||
with open(filename, newline='') as f:
|
||||
reader = csv.reader(f, skipinitialspace=True)
|
||||
# @todo This loop can could be improved
|
||||
l = []
|
||||
for row in reader:
|
||||
if len(row):
|
||||
if len(row)>=2:
|
||||
l.append(tuple(row[:2]))
|
||||
else:
|
||||
l.append(tuple([row[0], row[0]]))
|
||||
self.proxy_list = l if len(l) else None
|
||||
with open(filename) as f:
|
||||
self.proxy_list = json.load(f)
|
||||
print ("Registered proxy list", list(self.proxy_list.keys()))
|
||||
|
||||
|
||||
def get_preferred_proxy_for_watch(self, uuid):
|
||||
"""
|
||||
Returns the preferred proxy by ID key
|
||||
:param uuid: UUID
|
||||
:return: proxy "key" id
|
||||
"""
|
||||
|
||||
proxy_id = None
|
||||
if self.proxy_list is None:
|
||||
return None
|
||||
|
||||
# If its a valid one
|
||||
watch = self.data['watching'].get(uuid)
|
||||
|
||||
if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()):
|
||||
return watch.get('proxy')
|
||||
|
||||
# not valid (including None), try the system one
|
||||
else:
|
||||
system_proxy_id = self.data['settings']['requests'].get('proxy')
|
||||
# Is not None and exists
|
||||
if self.proxy_list.get(system_proxy_id):
|
||||
return system_proxy_id
|
||||
|
||||
# Fallback - Did not resolve anything, use the first available
|
||||
if system_proxy_id is None:
|
||||
first_default = list(self.proxy_list)[0]
|
||||
return first_default
|
||||
|
||||
return None
|
||||
|
||||
# Run all updates
|
||||
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
|
||||
# So therefor - each `update_n` should be very careful about checking if it needs to actually run
|
||||
@@ -557,3 +575,11 @@ class ChangeDetectionStore:
|
||||
continue
|
||||
return
|
||||
|
||||
|
||||
# We incorrectly used common header overrides that should only apply to Requests
|
||||
# These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium
|
||||
def update_7(self):
|
||||
# These were hard-coded in early versions
|
||||
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
|
||||
if self.data['settings']['headers'].get(v):
|
||||
del self.data['settings']['headers'][v]
|
||||
|
||||
64
changedetectionio/templates/diff-image.html
Normal file
64
changedetectionio/templates/diff-image.html
Normal file
@@ -0,0 +1,64 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block content %}
|
||||
|
||||
<div id="settings">
|
||||
<h1>Differences</h1>
|
||||
<form class="pure-form " action="" method="GET">
|
||||
<fieldset>
|
||||
{% if versions|length >= 1 %}
|
||||
<label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
|
||||
<select id="diff-version" name="previous_version">
|
||||
{% for version in versions %}
|
||||
<option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
|
||||
{{version}}
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<button type="submit" class="pure-button pure-button-primary">Go</button>
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
<div id="diff-ui">
|
||||
<script
|
||||
defer
|
||||
src="https://unpkg.com/img-comparison-slider@7/dist/index.js"
|
||||
></script>
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://unpkg.com/img-comparison-slider@7/dist/styles.css"
|
||||
/>
|
||||
|
||||
<img-comparison-slider>
|
||||
<img slot="first" src="{{ url_for('render_diff_image', uuid=uuid, compare_date=current_previous_version) }}" />
|
||||
<img slot="second" src="{{ url_for('render_single_image', uuid=uuid, history_timestamp=current_previous_version) }}" />
|
||||
|
||||
</img-comparison-slider>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff.js')}}"></script>
|
||||
|
||||
<script defer="">
|
||||
window.onload = function() {
|
||||
/* Set current version date as local time in the browser also */
|
||||
var current_v = document.getElementById("current-v-date");
|
||||
var dateObject = new Date({{ newest_version_timestamp }}*1000);
|
||||
current_v.innerHTML=dateObject.toLocaleString();
|
||||
|
||||
/* Convert what is options from UTC time.time() to local browser time */
|
||||
var diffList=document.getElementById("diff-version");
|
||||
if (typeof(diffList) != 'undefined' && diffList != null) {
|
||||
for (var option of diffList.options) {
|
||||
var dateObject = new Date(option.value*1000);
|
||||
option.label=dateObject.toLocaleString();
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
{% endblock %}
|
||||
@@ -25,7 +25,9 @@
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#request">Request</a></li>
|
||||
{% if 'visual-selector' in enabled_tabs %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
|
||||
{%endif%}
|
||||
<li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
</ul>
|
||||
@@ -77,6 +79,7 @@
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
|
||||
</span>
|
||||
</div>
|
||||
{% if form.proxy %}
|
||||
@@ -154,6 +157,7 @@ User-Agent: wonderbra 1.0") }}
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||
{% if 'text-filters-and-triggers' in enabled_tabs %}
|
||||
<div class="pure-control-group">
|
||||
<strong>Pro-tips:</strong><br/>
|
||||
<ul>
|
||||
@@ -165,12 +169,14 @@ User-Agent: wonderbra 1.0") }}
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.check_unique_lines) }}
|
||||
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
|
||||
</div>
|
||||
</fieldset>
|
||||
{% endif %}
|
||||
<div class="pure-control-group">
|
||||
{% set field = render_field(form.css_filter,
|
||||
placeholder=".class-name or #some-id, or other CSS selector rule.",
|
||||
@@ -183,8 +189,12 @@ User-Agent: wonderbra 1.0") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required, <a
|
||||
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
|
||||
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a>.
|
||||
<ul>
|
||||
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
|
||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
|
||||
<ul>
|
||||
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
|
||||
@@ -193,10 +203,13 @@ User-Agent: wonderbra 1.0") }}
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
|
||||
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath, or jq selector rules before filing an issue on GitHub! <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{% if 'text-filters-and-triggers' in enabled_tabs %}
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
|
||||
footer
|
||||
@@ -272,6 +285,8 @@ Unavailable") }}
|
||||
</span>
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||
|
||||
11
changedetectionio/templates/preview-image.html
Normal file
11
changedetectionio/templates/preview-image.html
Normal file
@@ -0,0 +1,11 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
<div id="settings">
|
||||
<h1>Preview</h1>
|
||||
</div>
|
||||
|
||||
<div id="diff-ui">
|
||||
<img style="max-width: 100%" src="{{ url_for('render_single_image', uuid=uuid, history_timestamp=newest_history_key) }}" />
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
||||
@@ -99,6 +99,8 @@
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
</span>
|
||||
<br/>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
|
||||
</div>
|
||||
<fieldset class="pure-group" id="webdriver-override-options">
|
||||
<div class="pure-form-message-inline">
|
||||
|
||||
@@ -15,10 +15,18 @@
|
||||
<div>
|
||||
{{ render_simple_field(form.url, placeholder="https://...", required=true) }}
|
||||
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
|
||||
<span>
|
||||
{{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
|
||||
{{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
|
||||
</span>
|
||||
{% if webdriver_enabled %}
|
||||
<div id="quickwatch-fetch-processor">
|
||||
{{ render_field(form.fetch_processor) }}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div>
|
||||
{{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
|
||||
{{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
@@ -87,11 +95,11 @@
|
||||
<a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
|
||||
</td>
|
||||
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
|
||||
<a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a>
|
||||
|
||||
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
|
||||
<a class="external" title="Open in new window" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
|
||||
<a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" alt="Share" title="Share"/></a>
|
||||
|
||||
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" alt="Fetching with Chrome" title="Fetching with Chrome"/>{% endif %}
|
||||
{%if watch.fetch_processor == "image" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='picture-frame.svg')}}" alt="Comparing graphically" title="Comparing graphically"/>{% endif %}
|
||||
{% if watch.last_error is defined and watch.last_error != False %}
|
||||
<div class="fetch-error">{{ watch.last_error }}</div>
|
||||
{% endif %}
|
||||
@@ -114,10 +122,20 @@
|
||||
class="recheck pure-button button-small pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
|
||||
<a href="{{ url_for('edit_page', uuid=watch.uuid)}}" class="pure-button button-small pure-button-primary">Edit</a>
|
||||
{% if watch.history_n >= 2 %}
|
||||
<a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
|
||||
{% if watch.fetch_processor == "image" or watch.fetch_processor == "rendered_webpage" %}
|
||||
<a href="{{ url_for('diff_image_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
|
||||
{% else %}
|
||||
<a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
|
||||
{% endif %}
|
||||
|
||||
{% else %}
|
||||
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
|
||||
<a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>
|
||||
|
||||
{% if watch.fetch_processor == "image" or watch.fetch_processor == "rendered_webpage" %}
|
||||
<a href="{{ url_for('preview_image_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Preview</a>
|
||||
{% else %}
|
||||
<a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</td>
|
||||
|
||||
2
changedetectionio/tests/proxy_list/__init__.py
Normal file
2
changedetectionio/tests/proxy_list/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Tests for the app."""
|
||||
|
||||
14
changedetectionio/tests/proxy_list/conftest.py
Normal file
14
changedetectionio/tests/proxy_list/conftest.py
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
from .. import conftest
|
||||
|
||||
#def pytest_addoption(parser):
|
||||
# parser.addoption("--url_suffix", action="store", default="identifier for request")
|
||||
|
||||
|
||||
#def pytest_generate_tests(metafunc):
|
||||
# # This is called for every test. Only get/set command line arguments
|
||||
# # if the argument is specified in the list of test "fixturenames".
|
||||
# option_value = metafunc.config.option.url_suffix
|
||||
# if 'url_suffix' in metafunc.fixturenames and option_value is not None:
|
||||
# metafunc.parametrize("url_suffix", [option_value])
|
||||
10
changedetectionio/tests/proxy_list/proxies.json-example
Normal file
10
changedetectionio/tests/proxy_list/proxies.json-example
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"proxy-one": {
|
||||
"label": "One",
|
||||
"url": "http://127.0.0.1:3128"
|
||||
},
|
||||
"proxy-two": {
|
||||
"label": "two",
|
||||
"url": "http://127.0.0.1:3129"
|
||||
}
|
||||
}
|
||||
41
changedetectionio/tests/proxy_list/squid.conf
Normal file
41
changedetectionio/tests/proxy_list/squid.conf
Normal file
@@ -0,0 +1,41 @@
|
||||
acl localnet src 0.0.0.1-0.255.255.255 # RFC 1122 "this" network (LAN)
|
||||
acl localnet src 10.0.0.0/8 # RFC 1918 local private network (LAN)
|
||||
acl localnet src 100.64.0.0/10 # RFC 6598 shared address space (CGN)
|
||||
acl localnet src 169.254.0.0/16 # RFC 3927 link-local (directly plugged) machines
|
||||
acl localnet src 172.16.0.0/12 # RFC 1918 local private network (LAN)
|
||||
acl localnet src 192.168.0.0/16 # RFC 1918 local private network (LAN)
|
||||
acl localnet src fc00::/7 # RFC 4193 local private network range
|
||||
acl localnet src fe80::/10 # RFC 4291 link-local (directly plugged) machines
|
||||
acl localnet src 159.65.224.174
|
||||
acl SSL_ports port 443
|
||||
acl Safe_ports port 80 # http
|
||||
acl Safe_ports port 21 # ftp
|
||||
acl Safe_ports port 443 # https
|
||||
acl Safe_ports port 70 # gopher
|
||||
acl Safe_ports port 210 # wais
|
||||
acl Safe_ports port 1025-65535 # unregistered ports
|
||||
acl Safe_ports port 280 # http-mgmt
|
||||
acl Safe_ports port 488 # gss-http
|
||||
acl Safe_ports port 591 # filemaker
|
||||
acl Safe_ports port 777 # multiling http
|
||||
acl CONNECT method CONNECT
|
||||
|
||||
http_access deny !Safe_ports
|
||||
http_access deny CONNECT !SSL_ports
|
||||
http_access allow localhost manager
|
||||
http_access deny manager
|
||||
http_access allow localhost
|
||||
http_access allow localnet
|
||||
http_access deny all
|
||||
http_port 3128
|
||||
coredump_dir /var/spool/squid
|
||||
refresh_pattern ^ftp: 1440 20% 10080
|
||||
refresh_pattern ^gopher: 1440 0% 1440
|
||||
refresh_pattern -i (/cgi-bin/|\?) 0 0% 0
|
||||
refresh_pattern \/(Packages|Sources)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
|
||||
refresh_pattern \/Release(|\.gpg)$ 0 0% 0 refresh-ims
|
||||
refresh_pattern \/InRelease$ 0 0% 0 refresh-ims
|
||||
refresh_pattern \/(Translation-.*)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
|
||||
refresh_pattern . 0 20% 4320
|
||||
logfile_rotate 0
|
||||
|
||||
38
changedetectionio/tests/proxy_list/test_multiple_proxy.py
Normal file
38
changedetectionio/tests/proxy_list/test_multiple_proxy.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup
|
||||
|
||||
def test_preferred_proxy(client, live_server):
|
||||
time.sleep(1)
|
||||
live_server_setup(live_server)
|
||||
time.sleep(1)
|
||||
url = "http://chosen.changedetection.io"
|
||||
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
# Because a URL wont show in squid/proxy logs due it being SSLed
|
||||
# Use plain HTTP or a specific domain-name here
|
||||
data={"urls": url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(2)
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
"css_filter": "",
|
||||
"fetch_backend": "html_requests",
|
||||
"headers": "",
|
||||
"proxy": "proxy-two",
|
||||
"tag": "",
|
||||
"url": url,
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
time.sleep(2)
|
||||
# Now the request should appear in the second-squid logs
|
||||
19
changedetectionio/tests/proxy_list/test_proxy.py
Normal file
19
changedetectionio/tests/proxy_list/test_proxy.py
Normal file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
|
||||
# just make a request, we will grep in the docker logs to see it actually got called
|
||||
def test_check_basic_change_detection_functionality(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
# Because a URL wont show in squid/proxy logs due it being SSLed
|
||||
# Use plain HTTP or a specific domain-name here
|
||||
data={"urls": "http://one.changedetection.io"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(3)
|
||||
@@ -47,7 +47,6 @@ def set_modified_response():
|
||||
|
||||
# Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
|
||||
def test_css_filter_output():
|
||||
from changedetectionio import fetch_site_status
|
||||
from inscriptis import get_text
|
||||
|
||||
# Check text with sub-parts renders correctly
|
||||
|
||||
@@ -71,7 +71,6 @@ def set_modified_response():
|
||||
|
||||
|
||||
def test_element_removal_output():
|
||||
from changedetectionio import fetch_site_status
|
||||
from inscriptis import get_text
|
||||
|
||||
# Check text with sub-parts renders correctly
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from . util import live_server_setup
|
||||
from changedetectionio import html_tools
|
||||
|
||||
@@ -11,7 +9,7 @@ def test_setup(live_server):
|
||||
# Unit test of the stripper
|
||||
# Always we are dealing in utf-8
|
||||
def test_strip_regex_text_func():
|
||||
from changedetectionio import fetch_site_status
|
||||
from ..fetch_processor import json_html_plaintext
|
||||
|
||||
test_content = """
|
||||
but sometimes we want to remove the lines.
|
||||
@@ -23,7 +21,7 @@ def test_strip_regex_text_func():
|
||||
|
||||
ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]
|
||||
|
||||
fetcher = fetch_site_status.perform_site_check(datastore=False)
|
||||
fetcher = json_html_plaintext.perform_site_check(datastore=False)
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
|
||||
|
||||
assert b"but 1 lines" in stripped_content
|
||||
|
||||
@@ -11,7 +11,7 @@ def test_setup(live_server):
|
||||
# Unit test of the stripper
|
||||
# Always we are dealing in utf-8
|
||||
def test_strip_text_func():
|
||||
from changedetectionio import fetch_site_status
|
||||
from ..fetch_processor import json_html_plaintext
|
||||
|
||||
test_content = """
|
||||
Some content
|
||||
@@ -23,7 +23,9 @@ def test_strip_text_func():
|
||||
|
||||
ignore_lines = ["sometimes"]
|
||||
|
||||
fetcher = fetch_site_status.perform_site_check(datastore=False)
|
||||
from ..fetch_processor import json_html_plaintext
|
||||
|
||||
fetcher = json_html_plaintext.perform_site_check(datastore=False)
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
|
||||
|
||||
assert b"sometimes" not in stripped_content
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# coding=utf-8
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from flask import url_for, escape
|
||||
from . util import live_server_setup
|
||||
import pytest
|
||||
|
||||
@@ -36,16 +36,26 @@ and it can also be repeated
|
||||
from .. import html_tools
|
||||
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(content, "$.offers.price")
|
||||
text = html_tools.extract_json_as_string(content, "json:$.offers.price")
|
||||
assert text == "23.5"
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "$.id")
|
||||
# also check for jq
|
||||
text = html_tools.extract_json_as_string(content, "jq:.offers.price")
|
||||
assert text == "23.5"
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
|
||||
assert text == "5"
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
|
||||
assert text == "5"
|
||||
|
||||
# When nothing at all is found, it should throw JSONNOTFound
|
||||
# Which is caught and shown to the user in the watch-overview table
|
||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "$.id")
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id")
|
||||
|
||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
|
||||
|
||||
def set_original_ext_response():
|
||||
data = """
|
||||
@@ -66,6 +76,7 @@ def set_original_ext_response():
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
return None
|
||||
|
||||
def set_modified_ext_response():
|
||||
data = """
|
||||
@@ -86,6 +97,7 @@ def set_modified_ext_response():
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
return None
|
||||
|
||||
def set_original_response():
|
||||
test_return_data = """
|
||||
@@ -184,10 +196,10 @@ def test_check_json_without_filter(client, live_server):
|
||||
assert b'"<b>' in res.data
|
||||
assert res.data.count(b'{\n') >= 2
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_json_filter(client, live_server):
|
||||
json_filter = 'json:boss.name'
|
||||
|
||||
def check_json_filter(json_filter, client, live_server):
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
@@ -226,7 +238,7 @@ def test_check_json_filter(client, live_server):
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
)
|
||||
assert bytes(json_filter.encode('utf-8')) in res.data
|
||||
assert bytes(escape(json_filter).encode('utf-8')) in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
@@ -252,10 +264,16 @@ def test_check_json_filter(client, live_server):
|
||||
# And #462 - check we see the proper utf-8 string there
|
||||
assert "Örnsköldsvik".encode('utf-8') in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_json_filter_bool_val(client, live_server):
|
||||
json_filter = "json:$['available']"
|
||||
def test_check_jsonpath_filter(client, live_server):
|
||||
check_json_filter('json:boss.name', client, live_server)
|
||||
|
||||
def test_check_jq_filter(client, live_server):
|
||||
check_json_filter('jq:.boss.name', client, live_server)
|
||||
|
||||
def check_json_filter_bool_val(json_filter, client, live_server):
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
@@ -304,14 +322,21 @@ def test_check_json_filter_bool_val(client, live_server):
|
||||
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions
|
||||
assert b'false' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_jsonpath_filter_bool_val(client, live_server):
|
||||
check_json_filter_bool_val("json:$['available']", client, live_server)
|
||||
|
||||
def test_check_jq_filter_bool_val(client, live_server):
|
||||
check_json_filter_bool_val("jq:.available", client, live_server)
|
||||
|
||||
# Re #265 - Extended JSON selector test
|
||||
# Stuff to consider here
|
||||
# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
|
||||
# - The 'diff' tab could show the old and new content
|
||||
# - Form should let us enter a selector that doesnt (yet) match anything
|
||||
def test_check_json_ext_filter(client, live_server):
|
||||
json_filter = 'json:$[?(@.status==Sold)]'
|
||||
|
||||
def check_json_ext_filter(json_filter, client, live_server):
|
||||
set_original_ext_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
@@ -350,7 +375,7 @@ def test_check_json_ext_filter(client, live_server):
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
)
|
||||
assert bytes(json_filter.encode('utf-8')) in res.data
|
||||
assert bytes(escape(json_filter).encode('utf-8')) in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
@@ -376,3 +401,11 @@ def test_check_json_ext_filter(client, live_server):
|
||||
assert b'ForSale' not in res.data
|
||||
assert b'Sold' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_jsonpath_ext_filter(client, live_server):
|
||||
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
|
||||
|
||||
def test_check_jq_ext_filter(client, live_server):
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
@@ -92,7 +92,6 @@ def wait_for_all_checks(client):
|
||||
if not b'Checking now' in res.data:
|
||||
break
|
||||
logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
|
||||
|
||||
attempt += 1
|
||||
|
||||
def live_server_setup(live_server):
|
||||
|
||||
@@ -13,9 +13,9 @@ def test_visual_selector_content_ready(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page, maybe better to use something we control?
|
||||
# We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket
|
||||
test_url = 'https://news.ycombinator.com'
|
||||
# Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
|
||||
test_url = "https://changedetection.io/ci-test/test-runjs.html"
|
||||
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
||||
@@ -25,13 +25,27 @@ def test_visual_selector_content_ready(client, live_server):
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first", unpause_on_save=1),
|
||||
data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"},
|
||||
data={
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
"headers": "",
|
||||
'fetch_backend': "html_webdriver",
|
||||
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();'
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"unpaused" in res.data
|
||||
time.sleep(1)
|
||||
wait_for_all_checks(client)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
# Check the JS execute code before extract worked
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'I smell JavaScript' in res.data
|
||||
|
||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
|
||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
|
||||
|
||||
|
||||
@@ -120,10 +120,6 @@ class update_worker(threading.Thread):
|
||||
os.unlink(full_path)
|
||||
|
||||
def run(self):
|
||||
from changedetectionio import fetch_site_status
|
||||
|
||||
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
|
||||
|
||||
while not self.app.config.exit.is_set():
|
||||
|
||||
try:
|
||||
@@ -135,21 +131,34 @@ class update_worker(threading.Thread):
|
||||
self.current_uuid = uuid
|
||||
|
||||
if uuid in list(self.datastore.data['watching'].keys()):
|
||||
update_handler = None # Interface object
|
||||
changed_detected = False
|
||||
contents = b''
|
||||
screenshot = False
|
||||
update_obj= {}
|
||||
xpath_data = False
|
||||
update_obj = {}
|
||||
process_changedetection_results = True
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, watch.get('url')))
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||
update_handler = None
|
||||
|
||||
if watch.get('fetch_processor') == 'image':
|
||||
from .fetch_processor import image as processor_image
|
||||
update_handler = processor_image.perform_site_check(datastore=self.datastore)
|
||||
elif watch.get('fetch_processor') == 'rendered_webpage':
|
||||
from .fetch_processor import image as processor_rendered_webpage
|
||||
update_handler = processor_rendered_webpage.perform_site_check(datastore=self.datastore)
|
||||
else:
|
||||
# Anything else for now will be `json_html_plaintext`
|
||||
from .fetch_processor import json_html_plaintext as processor_json_html_plaintext
|
||||
update_handler = processor_json_html_plaintext.perform_site_check(datastore=self.datastore)
|
||||
|
||||
changed_detected, update_obj = update_handler.run(uuid)
|
||||
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
# We then convert/.decode('utf-8') for the notification etc
|
||||
if not isinstance(contents, (bytes, bytearray)):
|
||||
if not isinstance(update_handler.contents, (bytes, bytearray)):
|
||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
except PermissionError as e:
|
||||
self.app.logger.error("File permission error updating", uuid, str(e))
|
||||
@@ -256,13 +265,12 @@ class update_worker(threading.Thread):
|
||||
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
watch = self.datastore.data['watching'][uuid]
|
||||
fname = "" # Saved history text filename
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
|
||||
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
||||
if changed_detected or not watch['last_checked']:
|
||||
# A change was detected
|
||||
watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
|
||||
watch.save_history_artifact(contents=update_handler.contents, timestamp=str(round(time.time())))
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
|
||||
@@ -6,6 +6,8 @@ services:
|
||||
hostname: changedetection
|
||||
volumes:
|
||||
- changedetection-data:/datastore
|
||||
# Configurable proxy list support, see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#proxy-list-support
|
||||
# - ./proxies.json:/datastore/proxies.json
|
||||
|
||||
# environment:
|
||||
# Default listening port, can also be changed with the -p option
|
||||
|
||||
BIN
docs/proxy-example.jpg
Normal file
BIN
docs/proxy-example.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 46 KiB |
@@ -10,15 +10,19 @@ flask_restful
|
||||
pytz
|
||||
|
||||
# Set these versions together to avoid a RequestsDependencyWarning
|
||||
requests[socks] ~= 2.26
|
||||
# >= 2.26 also adds Brotli support if brotli is installed
|
||||
brotli ~= 1.0
|
||||
requests[socks] ~= 2.28
|
||||
|
||||
urllib3 > 1.26
|
||||
chardet > 2.3.0
|
||||
|
||||
wtforms ~= 3.0
|
||||
jsonpath-ng ~= 1.5.3
|
||||
jq ~= 1.3.0
|
||||
|
||||
# Notification library
|
||||
apprise ~= 1.0.0
|
||||
apprise ~= 1.1.0
|
||||
|
||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||
paho-mqtt
|
||||
@@ -42,3 +46,11 @@ selenium ~= 4.1.0
|
||||
werkzeug ~= 2.0.0
|
||||
|
||||
# playwright is installed at Dockerfile build time because it's not available on all platforms
|
||||
|
||||
|
||||
imagehash ~= 4.3.0
|
||||
pillow
|
||||
scikit-image
|
||||
imutils
|
||||
opencv-python
|
||||
python-magic
|
||||
|
||||
Reference in New Issue
Block a user