Compare commits
24 Commits
0.39.10
...
ticket-16-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
487967de29 | ||
|
|
304ef746e4 | ||
|
|
082634f851 | ||
|
|
b9222e3243 | ||
|
|
1d92d9461a | ||
|
|
334010025f | ||
|
|
81aa8fa16b | ||
|
|
c79d6824e3 | ||
|
|
946377d2be | ||
|
|
5db9a30ad4 | ||
|
|
1d060225e1 | ||
|
|
7e0f0d0fd8 | ||
|
|
8b2afa2220 | ||
|
|
f55ffa0f62 | ||
|
|
942c3f021f | ||
|
|
5483f5d694 | ||
|
|
f2fa638480 | ||
|
|
82d1a7f73e | ||
|
|
9fc291fb63 | ||
|
|
3e8a15456a | ||
|
|
2a03f3f57e | ||
|
|
ffad5cca97 | ||
|
|
60a9a786e0 | ||
|
|
165e950e55 |
20
.github/workflows/containers.yml
vendored
20
.github/workflows/containers.yml
vendored
@@ -2,16 +2,20 @@ name: Build and push containers
|
||||
|
||||
on:
|
||||
# Automatically triggered by a testing workflow passing, but this is only checked when it lands in the `master`/default branch
|
||||
workflow_run:
|
||||
workflows: ["ChangeDetection.io Test"]
|
||||
branches: [master]
|
||||
tags: ['0.*']
|
||||
types: [completed]
|
||||
# workflow_run:
|
||||
# workflows: ["ChangeDetection.io Test"]
|
||||
# branches: [master]
|
||||
# tags: ['0.*']
|
||||
# types: [completed]
|
||||
|
||||
# Or a new tagged release
|
||||
release:
|
||||
types: [published, edited]
|
||||
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
metadata:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -91,8 +95,7 @@ jobs:
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest
|
||||
ghcr.io/${{ github.repository }}:latest
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest,ghcr.io/${{ github.repository }}:latest
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
@@ -107,8 +110,7 @@ jobs:
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }}
|
||||
ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }},ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -7,4 +7,6 @@ __pycache__
|
||||
.pytest_cache
|
||||
build
|
||||
dist
|
||||
venv
|
||||
*.egg-info*
|
||||
.vscode/settings.json
|
||||
|
||||
@@ -2,5 +2,5 @@ recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/static *
|
||||
include changedetection.py
|
||||
global-exclude *.pyc
|
||||
global-exclude *node_modules*
|
||||
global-exclude node_modules
|
||||
global-exclude venv
|
||||
@@ -9,10 +9,10 @@ _Know when web pages change! Stay ontop of new information!_
|
||||
|
||||
Live your data-life *pro-actively* instead of *re-actively*.
|
||||
|
||||
Open source web page monitoring, notification and change detection.
|
||||
Free, Open-source web page monitoring, notification and change detection. Don't have time? [Try our $6.99/month plan - unlimited checks and watches!](https://lemonade.changedetection.io/start)
|
||||
|
||||
|
||||
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
|
||||
|
||||
|
||||
**Get your own private instance now! Let us host it for you!**
|
||||
@@ -163,9 +163,9 @@ See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configura
|
||||
|
||||
Raspberry Pi and linux/arm/v6 linux/arm/v7 arm64 devices are supported! See the wiki for [details](https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver)
|
||||
|
||||
## Windows native support?
|
||||
## Windows support?
|
||||
|
||||
Sorry not yet :( https://github.com/dgtlmoon/changedetection.io/labels/windows
|
||||
YES! See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
|
||||
|
||||
## Support us
|
||||
|
||||
|
||||
@@ -1,110 +1,11 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
# Launch as a eventlet.wsgi server instance.
|
||||
|
||||
import getopt
|
||||
import os
|
||||
import sys
|
||||
|
||||
import eventlet
|
||||
import eventlet.wsgi
|
||||
import changedetectionio
|
||||
|
||||
from changedetectionio import store
|
||||
|
||||
def main():
|
||||
ssl_mode = False
|
||||
host = ''
|
||||
port = os.environ.get('PORT') or 5000
|
||||
do_cleanup = False
|
||||
|
||||
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
|
||||
datastore_path = os.path.join(os.getcwd(), "datastore")
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "Ccsd:h:p:", "port")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path]')
|
||||
sys.exit(2)
|
||||
|
||||
create_datastore_dir = False
|
||||
|
||||
for opt, arg in opts:
|
||||
# if opt == '--purge':
|
||||
# Remove history, the actual files you need to delete manually.
|
||||
# for uuid, watch in datastore.data['watching'].items():
|
||||
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
|
||||
|
||||
if opt == '-s':
|
||||
ssl_mode = True
|
||||
|
||||
if opt == '-h':
|
||||
host = arg
|
||||
|
||||
if opt == '-p':
|
||||
port = int(arg)
|
||||
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
|
||||
# Create the datadir if it doesnt exist
|
||||
if opt == '-C':
|
||||
create_datastore_dir = True
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
|
||||
if not os.path.isdir(app_config['datastore_path']):
|
||||
if create_datastore_dir:
|
||||
os.mkdir(app_config['datastore_path'])
|
||||
else:
|
||||
print ("ERROR: Directory path for the datastore '{}' does not exist, cannot start, please make sure the directory exists.\n"
|
||||
"Alternatively, use the -C parameter.".format(app_config['datastore_path']),file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=changedetectionio.__version__)
|
||||
app = changedetectionio.changedetection_app(app_config, datastore)
|
||||
|
||||
# Go into cleanup mode
|
||||
if do_cleanup:
|
||||
datastore.remove_unused_snapshots()
|
||||
|
||||
app.config['datastore_path'] = datastore_path
|
||||
|
||||
|
||||
@app.context_processor
|
||||
def inject_version():
|
||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||
has_password=datastore.data['settings']['application']['password'] != False
|
||||
)
|
||||
|
||||
# Proxy sub-directory support
|
||||
# Set environment var USE_X_SETTINGS=1 on this script
|
||||
# And then in your proxy_pass settings
|
||||
#
|
||||
# proxy_set_header Host "localhost";
|
||||
# proxy_set_header X-Forwarded-Prefix /app;
|
||||
|
||||
if os.getenv('USE_X_SETTINGS'):
|
||||
print ("USE_X_SETTINGS is ENABLED\n")
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
|
||||
|
||||
if ssl_mode:
|
||||
# @todo finalise SSL config, but this should get you in the right direction if you need it.
|
||||
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port)),
|
||||
certfile='cert.pem',
|
||||
keyfile='privkey.pem',
|
||||
server_side=True), app)
|
||||
|
||||
else:
|
||||
eventlet.wsgi.server(eventlet.listen((host, int(port))), app)
|
||||
# Entry-point for running from the CLI when not installed via Pip, Pip will handle the console_scripts entry_points's from setup.py
|
||||
# It's recommended to use `pip3 install changedetection.io` and start with `changedetection.py` instead, it will be linkd to your global path.
|
||||
# or Docker.
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
from changedetectionio import changedetection
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
changedetection.main()
|
||||
|
||||
1
changedetectionio/.gitignore
vendored
Normal file
1
changedetectionio/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
test-datastore
|
||||
@@ -35,10 +35,11 @@ from flask import (
|
||||
url_for,
|
||||
)
|
||||
from flask_login import login_required
|
||||
from flask_wtf import CSRFProtect
|
||||
|
||||
from changedetectionio import html_tools
|
||||
|
||||
__version__ = '0.39.10'
|
||||
__version__ = '0.39.11'
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -52,11 +53,10 @@ update_q = queue.Queue()
|
||||
|
||||
notification_q = queue.Queue()
|
||||
|
||||
# Needs to be set this way because we also build and publish via pip
|
||||
base_path = os.path.dirname(os.path.realpath(__file__))
|
||||
app = Flask(__name__,
|
||||
static_url_path="{}/static".format(base_path),
|
||||
template_folder="{}/templates".format(base_path))
|
||||
static_url_path="",
|
||||
static_folder="static",
|
||||
template_folder="templates")
|
||||
|
||||
# Stop browser caching of assets
|
||||
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
|
||||
@@ -72,6 +72,9 @@ app.config['LOGIN_DISABLED'] = False
|
||||
# Disables caching of the templates
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = True
|
||||
|
||||
csrf = CSRFProtect()
|
||||
csrf.init_app(app)
|
||||
|
||||
notification_debug_log=[]
|
||||
|
||||
def init_app_secret(datastore_path):
|
||||
@@ -269,7 +272,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
@app.route("/rss", methods=['GET'])
|
||||
@login_required
|
||||
def rss():
|
||||
|
||||
from . import diff
|
||||
limit_tag = request.args.get('tag')
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
@@ -298,6 +301,15 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
fg.link(href='https://changedetection.io')
|
||||
|
||||
for watch in sorted_watches:
|
||||
|
||||
dates = list(watch['history'].keys())
|
||||
# Convert to int, sort and back to str again
|
||||
# @todo replace datastore getter that does this automatically
|
||||
dates = [int(i) for i in dates]
|
||||
dates.sort(reverse=True)
|
||||
dates = [str(i) for i in dates]
|
||||
prev_fname = watch['history'][dates[1]]
|
||||
|
||||
if not watch['viewed']:
|
||||
# Re #239 - GUID needs to be individual for each event
|
||||
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
||||
@@ -313,12 +325,16 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
diff_link = {'href': "{}{}".format(base_url, url_for('diff_history_page', uuid=watch['uuid']))}
|
||||
|
||||
# @todo use title if it exists
|
||||
fe.link(link=diff_link)
|
||||
fe.title(title=watch['url'])
|
||||
|
||||
# @todo in the future <description><![CDATA[<html><body>Any code html is valid.</body></html>]]></description>
|
||||
fe.description(description=watch['url'])
|
||||
# @todo watch should be a getter - watch.get('title') (internally if URL else..)
|
||||
|
||||
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
|
||||
fe.title(title=watch_title)
|
||||
latest_fname = watch['history'][dates[0]]
|
||||
|
||||
html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="</br>")
|
||||
fe.description(description="<![CDATA[<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff))
|
||||
|
||||
fe.guid(guid, permalink=False)
|
||||
dt = datetime.datetime.fromtimestamp(int(watch['newest_history_key']))
|
||||
@@ -504,13 +520,13 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
'headers': form.headers.data,
|
||||
'body': form.body.data,
|
||||
'method': form.method.data,
|
||||
'ignore_status_codes': form.ignore_status_codes.data,
|
||||
'fetch_backend': form.fetch_backend.data,
|
||||
'trigger_text': form.trigger_text.data,
|
||||
'notification_title': form.notification_title.data,
|
||||
'notification_body': form.notification_body.data,
|
||||
'notification_format': form.notification_format.data,
|
||||
'extract_title_as_title': form.extract_title_as_title.data
|
||||
|
||||
'extract_title_as_title': form.extract_title_as_title.data,
|
||||
}
|
||||
|
||||
# Notification URLs
|
||||
@@ -610,16 +626,15 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
form.notification_format.data = datastore.data['settings']['application']['notification_format']
|
||||
form.base_url.data = datastore.data['settings']['application']['base_url']
|
||||
|
||||
# Password unset is a GET, but we can lock the session to always need the password
|
||||
if not os.getenv("SALTED_PASS", False) and request.values.get('removepassword') == 'yes':
|
||||
from pathlib import Path
|
||||
if request.method == 'POST' and form.data.get('removepassword_button') == True:
|
||||
# Password unset is a GET, but we can lock the session to a salted env password to always need the password
|
||||
if not os.getenv("SALTED_PASS", False):
|
||||
datastore.data['settings']['application']['password'] = False
|
||||
flash("Password protection removed.", 'notice')
|
||||
flask_login.logout_user()
|
||||
return redirect(url_for('settings_page'))
|
||||
|
||||
if request.method == 'POST' and form.validate():
|
||||
|
||||
datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data
|
||||
datastore.data['settings']['requests']['minutes_between_check'] = form.minutes_between_check.data
|
||||
datastore.data['settings']['application']['extract_title_as_title'] = form.extract_title_as_title.data
|
||||
|
||||
114
changedetectionio/changedetection.py
Executable file
114
changedetectionio/changedetection.py
Executable file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
# Launch as a eventlet.wsgi server instance.
|
||||
|
||||
import getopt
|
||||
import os
|
||||
import sys
|
||||
|
||||
import eventlet
|
||||
import eventlet.wsgi
|
||||
from . import store, changedetection_app
|
||||
from . import __version__
|
||||
|
||||
def main():
|
||||
ssl_mode = False
|
||||
host = ''
|
||||
port = os.environ.get('PORT') or 5000
|
||||
do_cleanup = False
|
||||
datastore_path = None
|
||||
|
||||
# On Windows, create and use a default path.
|
||||
if os.name == 'nt':
|
||||
datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io')
|
||||
os.makedirs(datastore_path, exist_ok=True)
|
||||
else:
|
||||
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
|
||||
datastore_path = os.path.join(os.getcwd(), "../datastore")
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "Ccsd:h:p:", "port")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path]')
|
||||
sys.exit(2)
|
||||
|
||||
create_datastore_dir = False
|
||||
|
||||
for opt, arg in opts:
|
||||
# if opt == '--purge':
|
||||
# Remove history, the actual files you need to delete manually.
|
||||
# for uuid, watch in datastore.data['watching'].items():
|
||||
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
|
||||
|
||||
if opt == '-s':
|
||||
ssl_mode = True
|
||||
|
||||
if opt == '-h':
|
||||
host = arg
|
||||
|
||||
if opt == '-p':
|
||||
port = int(arg)
|
||||
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
|
||||
# Create the datadir if it doesnt exist
|
||||
if opt == '-C':
|
||||
create_datastore_dir = True
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
|
||||
if not os.path.isdir(app_config['datastore_path']):
|
||||
if create_datastore_dir:
|
||||
os.mkdir(app_config['datastore_path'])
|
||||
else:
|
||||
print(
|
||||
"ERROR: Directory path for the datastore '{}' does not exist, cannot start, please make sure the directory exists or specify a directory with the -d option.\n"
|
||||
"Or use the -C parameter to create the directory.".format(app_config['datastore_path']), file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
# Go into cleanup mode
|
||||
if do_cleanup:
|
||||
datastore.remove_unused_snapshots()
|
||||
|
||||
app.config['datastore_path'] = datastore_path
|
||||
|
||||
|
||||
@app.context_processor
|
||||
def inject_version():
|
||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||
has_password=datastore.data['settings']['application']['password'] != False
|
||||
)
|
||||
|
||||
# Proxy sub-directory support
|
||||
# Set environment var USE_X_SETTINGS=1 on this script
|
||||
# And then in your proxy_pass settings
|
||||
#
|
||||
# proxy_set_header Host "localhost";
|
||||
# proxy_set_header X-Forwarded-Prefix /app;
|
||||
|
||||
if os.getenv('USE_X_SETTINGS'):
|
||||
print ("USE_X_SETTINGS is ENABLED\n")
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
|
||||
|
||||
if ssl_mode:
|
||||
# @todo finalise SSL config, but this should get you in the right direction if you need it.
|
||||
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port)),
|
||||
certfile='cert.pem',
|
||||
keyfile='privkey.pem',
|
||||
server_side=True), app)
|
||||
|
||||
else:
|
||||
eventlet.wsgi.server(eventlet.listen((host, int(port))), app)
|
||||
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import os
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
import chardet
|
||||
import os
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
|
||||
from selenium.common.exceptions import WebDriverException
|
||||
import requests
|
||||
import time
|
||||
import urllib3.exceptions
|
||||
|
||||
|
||||
@@ -20,7 +22,7 @@ class EmptyReply(Exception):
|
||||
class Fetcher():
|
||||
error = None
|
||||
status_code = None
|
||||
content = None # Should always be bytes.
|
||||
content = None
|
||||
headers = None
|
||||
|
||||
fetcher_description ="No description"
|
||||
@@ -30,7 +32,13 @@ class Fetcher():
|
||||
return self.error
|
||||
|
||||
@abstractmethod
|
||||
def run(self, url, timeout, request_headers, request_body, request_method):
|
||||
def run(self,
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False):
|
||||
# Should set self.error, self.status_code and self.content
|
||||
pass
|
||||
|
||||
@@ -97,7 +105,13 @@ class html_webdriver(Fetcher):
|
||||
if proxy_args:
|
||||
self.proxy = SeleniumProxy(raw=proxy_args)
|
||||
|
||||
def run(self, url, timeout, request_headers, request_body, request_method):
|
||||
def run(self,
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False):
|
||||
|
||||
# request_body, request_method unused for now, until some magic in the future happens.
|
||||
|
||||
@@ -145,8 +159,13 @@ class html_webdriver(Fetcher):
|
||||
class html_requests(Fetcher):
|
||||
fetcher_description = "Basic fast Plaintext/HTTP Client"
|
||||
|
||||
def run(self, url, timeout, request_headers, request_body, request_method):
|
||||
import requests
|
||||
def run(self,
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False):
|
||||
|
||||
r = requests.request(method=request_method,
|
||||
data=request_body,
|
||||
@@ -155,16 +174,21 @@ class html_requests(Fetcher):
|
||||
timeout=timeout,
|
||||
verify=False)
|
||||
|
||||
# https://stackoverflow.com/questions/44203397/python-requests-get-returns-improperly-decoded-text-instead-of-utf-8
|
||||
# Return bytes here
|
||||
html = r.text
|
||||
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
|
||||
# For example - some sites don't tell us it's utf-8, but return utf-8 content
|
||||
# This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
|
||||
# https://github.com/psf/requests/issues/1604 good info about requests encoding detection
|
||||
if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
|
||||
# @todo test this
|
||||
# @todo maybe you really want to test zero-byte return pages?
|
||||
if not r or not html or not len(html):
|
||||
if (not ignore_status_codes and not r) or not r.content or not len(r.content):
|
||||
raise EmptyReply(url=url, status_code=r.status_code)
|
||||
|
||||
self.status_code = r.status_code
|
||||
self.content = html
|
||||
self.content = r.text
|
||||
self.headers = r.headers
|
||||
|
||||
|
||||
@@ -2,22 +2,31 @@
|
||||
|
||||
import difflib
|
||||
|
||||
|
||||
def same_slicer(l, a, b):
|
||||
if a == b:
|
||||
return [l[a]]
|
||||
else:
|
||||
return l[a:b]
|
||||
|
||||
# like .compare but a little different output
|
||||
def customSequenceMatcher(before, after, include_equal=False):
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after)
|
||||
|
||||
# @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?)
|
||||
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
||||
if include_equal and tag == 'equal':
|
||||
g = before[alo:ahi]
|
||||
yield g
|
||||
elif tag == 'delete':
|
||||
g = "(removed) {}".format(before[alo])
|
||||
g = ["(removed) " + i for i in same_slicer(before, alo, ahi)]
|
||||
yield g
|
||||
elif tag == 'replace':
|
||||
g = ["(changed) {}".format(before[alo]), "(-> into) {}".format(after[blo])]
|
||||
g = ["(changed) " + i for i in same_slicer(before, alo, ahi)]
|
||||
g += ["(into ) " + i for i in same_slicer(after, blo, bhi)]
|
||||
yield g
|
||||
elif tag == 'insert':
|
||||
g = "(added) {}".format(after[blo])
|
||||
g = ["(added ) " + i for i in same_slicer(after, blo, bhi)]
|
||||
yield g
|
||||
|
||||
# only_differences - only return info about the differences, no context
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
|
||||
import urllib3
|
||||
from inscriptis import get_text
|
||||
|
||||
from inscriptis import get_text
|
||||
from changedetectionio import content_fetcher, html_tools
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
@@ -24,8 +24,14 @@ class perform_site_check():
|
||||
stripped_text_from_html = ""
|
||||
|
||||
watch = self.datastore.data['watching'][uuid]
|
||||
# Unset any existing notification error
|
||||
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
|
||||
extra_headers = self.datastore.get_val(uuid, 'headers')
|
||||
@@ -47,6 +53,7 @@ class perform_site_check():
|
||||
url = self.datastore.get_val(uuid, 'url')
|
||||
request_body = self.datastore.get_val(uuid, 'body')
|
||||
request_method = self.datastore.get_val(uuid, 'method')
|
||||
ignore_status_code = self.datastore.get_val(uuid, 'ignore_status_codes')
|
||||
|
||||
# Pluggable content fetcher
|
||||
prefer_backend = watch['fetch_backend']
|
||||
@@ -58,7 +65,7 @@ class perform_site_check():
|
||||
|
||||
|
||||
fetcher = klass()
|
||||
fetcher.run(url, timeout, request_headers, request_body, request_method)
|
||||
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code)
|
||||
# Fetching complete, now filters
|
||||
# @todo move to class / maybe inside of fetcher abstract base?
|
||||
|
||||
|
||||
@@ -325,6 +325,7 @@ class watchForm(commonSettingsForm):
|
||||
headers = StringDictKeyValue('Request Headers')
|
||||
body = TextAreaField('Request Body', [validators.Optional()])
|
||||
method = SelectField('Request Method', choices=valid_method, default=default_method)
|
||||
ignore_status_codes = BooleanField('Ignore Status Codes (process non-2xx status codes as normal)', default=False)
|
||||
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
@@ -350,6 +351,8 @@ class globalSettingsForm(commonSettingsForm):
|
||||
[validators.NumberRange(min=1)])
|
||||
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title')
|
||||
base_url = StringField('Base URL', validators=[validators.Optional()])
|
||||
global_subtractive_selectors = StringListField('Ignore elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||
@@ -78,7 +78,8 @@ def _parse_json(json_data, jsonpath_filter):
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
return ''
|
||||
|
||||
stripped_text_from_html = json.dumps(s, indent=4)
|
||||
# Ticket #462 - allow the original encoding through, usually it's UTF-8 or similar
|
||||
stripped_text_from_html = json.dumps(s, indent=4, ensure_ascii=False)
|
||||
|
||||
return stripped_text_from_html
|
||||
|
||||
|
||||
@@ -37,6 +37,9 @@ section.content {
|
||||
align-items: center;
|
||||
justify-content: center; }
|
||||
|
||||
code {
|
||||
background: #eee; }
|
||||
|
||||
/* table related */
|
||||
.watch-table {
|
||||
width: 100%;
|
||||
@@ -241,7 +244,7 @@ footer {
|
||||
.sticky-tab {
|
||||
position: absolute;
|
||||
top: 60px;
|
||||
font-size: 8px;
|
||||
font-size: 65%;
|
||||
background: #fff;
|
||||
padding: 10px; }
|
||||
.sticky-tab#left-sticky {
|
||||
@@ -307,14 +310,23 @@ footer {
|
||||
#nav-menu {
|
||||
overflow-x: scroll; } }
|
||||
|
||||
/*
|
||||
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 800px) {
|
||||
div.sticky-tab#hosted-sticky {
|
||||
top: 60px;
|
||||
left: 0px;
|
||||
right: auto; }
|
||||
section.content {
|
||||
padding-top: 110px; }
|
||||
div.tabs ul li {
|
||||
display: block;
|
||||
border-radius: 0px; }
|
||||
input[type='text'] {
|
||||
width: 100%; }
|
||||
/*
|
||||
Max width before this PARTICULAR table gets nasty
|
||||
This query will take effect for any screen smaller than 760px
|
||||
and also iPads specifically.
|
||||
*/
|
||||
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 1024px) {
|
||||
input[type='text'] {
|
||||
width: 100%; }
|
||||
.watch-table {
|
||||
/* Force table to not be like tables anymore */
|
||||
/* Force table to not be like tables anymore */
|
||||
|
||||
@@ -42,6 +42,10 @@ section.content {
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
code {
|
||||
background: #eee;
|
||||
}
|
||||
|
||||
/* table related */
|
||||
.watch-table {
|
||||
width: 100%;
|
||||
@@ -318,7 +322,7 @@ footer {
|
||||
.sticky-tab {
|
||||
position: absolute;
|
||||
top: 60px;
|
||||
font-size: 8px;
|
||||
font-size: 65%;
|
||||
background: #fff;
|
||||
padding: 10px;
|
||||
&#left-sticky {
|
||||
@@ -418,18 +422,35 @@ footer {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 800px) {
|
||||
|
||||
div.sticky-tab#hosted-sticky {
|
||||
top: 60px;
|
||||
left: 0px;
|
||||
right: auto;
|
||||
}
|
||||
|
||||
section.content {
|
||||
padding-top: 110px;
|
||||
}
|
||||
|
||||
// Make the tabs easier to hit, they will be all nice and horizontal
|
||||
div.tabs ul li {
|
||||
display: block;
|
||||
border-radius: 0px;
|
||||
}
|
||||
|
||||
input[type='text'] {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/*
|
||||
Max width before this PARTICULAR table gets nasty
|
||||
This query will take effect for any screen smaller than 760px
|
||||
and also iPads specifically.
|
||||
*/
|
||||
|
||||
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 1024px) {
|
||||
|
||||
input[type='text'] {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.watch-table {
|
||||
/* Force table to not be like tables anymore */
|
||||
thead, tbody, th, td, tr {
|
||||
|
||||
@@ -400,7 +400,7 @@ class ChangeDetectionStore:
|
||||
# system was out of memory, out of RAM etc
|
||||
with open(self.json_store_path+".tmp", 'w') as json_file:
|
||||
json.dump(data, json_file, indent=4)
|
||||
os.rename(self.json_store_path+".tmp", self.json_store_path)
|
||||
os.replace(self.json_store_path+".tmp", self.json_store_path)
|
||||
except Exception as e:
|
||||
logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
<div class="box-wrap inner">
|
||||
<form class="pure-form pure-form-stacked"
|
||||
action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next') ) }}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
|
||||
<div class="tab-pane-inner" id="general">
|
||||
<fieldset>
|
||||
@@ -80,6 +81,9 @@ User-Agent: wonderbra 1.0") }}
|
||||
\"car\":null
|
||||
}") }}
|
||||
</div>
|
||||
<div>
|
||||
{{ render_field(form.ignore_status_codes) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
<br/>
|
||||
</div>
|
||||
@@ -113,9 +117,9 @@ User-Agent: wonderbra 1.0") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <b>"json:"</b>, <a
|
||||
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required, <a
|
||||
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
|
||||
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example <b>//*[contains(@class, 'sametext')]</b>, <a
|
||||
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example <code>//*[contains(@class, 'sametext')]</code>, <a
|
||||
href="http://xpather.com/" target="new">test your XPath here</a></li>
|
||||
</ul>
|
||||
Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
|
||||
@@ -142,7 +146,7 @@ nav
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||
<li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li>
|
||||
<li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
|
||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||
<li>Use the preview/show current tab to see ignores</li>
|
||||
</ul>
|
||||
@@ -158,8 +162,8 @@ nav
|
||||
<ul>
|
||||
<li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
|
||||
<li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
|
||||
<li>Each line is process separately (think of each line as "OR")</li>
|
||||
<li>Note: Wrap in forward slash / to use regex example: <span style="font-family: monospace; background: #eee">/foo\d/</span></li>
|
||||
<li>Each line is processed separately (think of each line as "OR")</li>
|
||||
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
|
||||
</ul>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
<div class="edit-form">
|
||||
<div class="inner">
|
||||
<form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
<fieldset class="pure-group">
|
||||
<legend>
|
||||
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):
|
||||
|
||||
@@ -4,11 +4,12 @@
|
||||
<div class="login-form">
|
||||
<div class="inner">
|
||||
<form class="pure-form pure-form-stacked" action="{{url_for('login')}}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
<label for="password">Password</label>
|
||||
<input type="password" id="password" required="" name="password" value=""
|
||||
size="15"/>
|
||||
size="15" autofocus />
|
||||
<input type="hidden" id="email" name="email" value="defaultuser@changedetection.io" />
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<form class="pure-form pure-form-stacked" action="{{url_for('scrub_page')}}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
This will remove all version snapshots/data, but keep your list of URLs. <br/>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block content %}
|
||||
{% from '_helpers.jinja' import render_field %}
|
||||
{% from '_helpers.jinja' import render_field, render_button %}
|
||||
{% from '_common_fields.jinja' import render_common_settings_form %}
|
||||
|
||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='settings.js')}}" defer></script>
|
||||
@@ -18,6 +18,7 @@
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
<form class="pure-form pure-form-stacked settings" action="{{url_for('settings_page')}}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
<div class="tab-pane-inner" id="general">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
@@ -27,8 +28,7 @@
|
||||
<div class="pure-control-group">
|
||||
{% if not hide_remove_pass %}
|
||||
{% if current_user.is_authenticated %}
|
||||
<a href="{{url_for('settings_page', removepassword='yes')}}"
|
||||
class="pure-button pure-button-primary">Remove password</a>
|
||||
{{ render_button(form.removepassword_button) }}
|
||||
{% else %}
|
||||
{{ render_field(form.password) }}
|
||||
<span class="pure-form-message-inline">Password protection for your changedetection.io application.</span>
|
||||
@@ -104,7 +104,7 @@ nav
|
||||
<ul>
|
||||
<li>Note: This is applied globally in addition to the per-watch rules.</li>
|
||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||
<li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li>
|
||||
<li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
|
||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||
<li>Use the preview/show current tab to see ignores</li>
|
||||
</ul>
|
||||
@@ -114,11 +114,9 @@ nav
|
||||
|
||||
<div id="actions">
|
||||
<div class="pure-control-group">
|
||||
<button type="submit" class="pure-button pure-button-primary">Save</button>
|
||||
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
|
||||
<a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete
|
||||
History
|
||||
Snapshot Data</a>
|
||||
{{ render_button(form.save_button) }}
|
||||
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
|
||||
<a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete History Snapshot Data</a>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
<div class="box">
|
||||
|
||||
<form class="pure-form" action="{{ url_for('api_watch_add') }}" method="POST" id="new-watch-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
<fieldset>
|
||||
<legend>Add a new change detection watch</legend>
|
||||
{{ render_simple_field(form.url, placeholder="https://...", required=true) }}
|
||||
|
||||
@@ -42,6 +42,9 @@ def app(request):
|
||||
cleanup(app_config['datastore_path'])
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False)
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
# Disable CSRF while running tests
|
||||
app.config['WTF_CSRF_ENABLED'] = False
|
||||
app.config['STOP_THREADS'] = True
|
||||
|
||||
def teardown():
|
||||
|
||||
@@ -4,8 +4,8 @@ from flask import url_for
|
||||
def test_check_access_control(app, client):
|
||||
# Still doesnt work, but this is closer.
|
||||
|
||||
with app.test_client() as c:
|
||||
# Check we dont have any password protection enabled yet.
|
||||
with app.test_client(use_cookies=True) as c:
|
||||
# Check we don't have any password protection enabled yet.
|
||||
res = c.get(url_for("settings_page"))
|
||||
assert b"Remove password" not in res.data
|
||||
|
||||
@@ -46,15 +46,20 @@ def test_check_access_control(app, client):
|
||||
assert b"BACKUP" in res.data
|
||||
assert b"IMPORT" in res.data
|
||||
assert b"LOG OUT" in res.data
|
||||
assert b"minutes_between_check" in res.data
|
||||
assert b"fetch_backend" in res.data
|
||||
|
||||
# Now remove the password so other tests function, @todo this should happen before each test automatically
|
||||
res = c.get(url_for("settings_page", removepassword="yes"),
|
||||
follow_redirects=True)
|
||||
assert b"Password protection removed." in res.data
|
||||
|
||||
res = c.get(url_for("index"))
|
||||
assert b"LOG OUT" not in res.data
|
||||
|
||||
res = c.post(
|
||||
url_for("settings_page"),
|
||||
data={
|
||||
"minutes_between_check": 180,
|
||||
"tag": "",
|
||||
"headers": "",
|
||||
"fetch_backend": "html_webdriver",
|
||||
"removepassword_button": "Remove password"
|
||||
},
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
# There was a bug where saving the settings form would submit a blank password
|
||||
def test_check_access_control_no_blank_password(app, client):
|
||||
@@ -71,8 +76,7 @@ def test_check_access_control_no_blank_password(app, client):
|
||||
data={"password": "",
|
||||
"minutes_between_check": 180,
|
||||
'fetch_backend': "html_requests"},
|
||||
|
||||
follow_redirects=True
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Password protection enabled." not in res.data
|
||||
@@ -91,7 +95,8 @@ def test_check_access_no_remote_access_to_remove_password(app, client):
|
||||
# Enable password check.
|
||||
res = c.post(
|
||||
url_for("settings_page"),
|
||||
data={"password": "password", "minutes_between_check": 180,
|
||||
data={"password": "password",
|
||||
"minutes_between_check": 180,
|
||||
'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
@@ -99,8 +104,17 @@ def test_check_access_no_remote_access_to_remove_password(app, client):
|
||||
assert b"Password protection enabled." in res.data
|
||||
assert b"Login" in res.data
|
||||
|
||||
res = c.get(url_for("settings_page", removepassword="yes"),
|
||||
follow_redirects=True)
|
||||
res = c.post(
|
||||
url_for("settings_page"),
|
||||
data={
|
||||
"minutes_between_check": 180,
|
||||
"tag": "",
|
||||
"headers": "",
|
||||
"fetch_backend": "html_webdriver",
|
||||
"removepassword_button": "Remove password"
|
||||
},
|
||||
follow_redirects=True,
|
||||
)
|
||||
assert b"Password protection removed." not in res.data
|
||||
|
||||
res = c.get(url_for("index"),
|
||||
|
||||
@@ -25,6 +25,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
||||
data={"urls": url_for('test_endpoint', _external=True)},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
@@ -69,6 +70,11 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
||||
res = client.get(url_for("rss"))
|
||||
expected_url = url_for('test_endpoint', _external=True)
|
||||
assert b'<rss' in res.data
|
||||
|
||||
# re #16 should have the diff in here too
|
||||
assert b'(into ) which has this one new line' in res.data
|
||||
assert b'CDATA' in res.data
|
||||
|
||||
assert expected_url.encode('utf-8') in res.data
|
||||
|
||||
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
|
||||
|
||||
87
changedetectionio/tests/test_encoding.py
Normal file
87
changedetectionio/tests/test_encoding.py
Normal file
@@ -0,0 +1,87 @@
|
||||
#!/usr/bin/python3
|
||||
# coding=utf-8
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup
|
||||
import pytest
|
||||
|
||||
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
|
||||
def set_html_response():
|
||||
test_return_data = """
|
||||
<html><body><span class="nav_second_img_text">
|
||||
铸大国重器,挺制造脊梁,致力能源未来,赋能美好生活。
|
||||
</span>
|
||||
</body></html>
|
||||
"""
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
return None
|
||||
|
||||
|
||||
# In the case the server does not issue a charset= or doesnt have content_type header set
|
||||
def test_check_encoding_detection(client, live_server):
|
||||
set_html_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||
client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(2)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Should see the proper string
|
||||
assert "铸大国重".encode('utf-8') in res.data
|
||||
# Should not see the failed encoding
|
||||
assert b'\xc2\xa7' not in res.data
|
||||
|
||||
|
||||
# In the case the server does not issue a charset= or doesnt have content_type header set
|
||||
def test_check_encoding_detection_missing_content_type_header(client, live_server):
|
||||
set_html_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(2)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Should see the proper string
|
||||
assert "铸大国重".encode('utf-8') in res.data
|
||||
# Should not see the failed encoding
|
||||
assert b'\xc2\xa7' not in res.data
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
|
||||
from flask import url_for
|
||||
from . util import live_server_setup
|
||||
|
||||
@@ -17,7 +18,9 @@ def test_error_handler(client, live_server):
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint_403_error', _external=True)
|
||||
test_url = url_for('test_endpoint',
|
||||
status_code=403,
|
||||
_external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
|
||||
190
changedetectionio/tests/test_ignorestatuscode.py
Normal file
190
changedetectionio/tests/test_ignorestatuscode.py
Normal file
@@ -0,0 +1,190 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from . util import live_server_setup
|
||||
|
||||
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
def set_some_changed_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>Which is across multiple lines, and a new thing too.</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
|
||||
def test_normal_page_check_works_with_ignore_status_code(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
set_original_response()
|
||||
|
||||
# Goto the settings page, add our ignore text
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
data={
|
||||
"minutes_between_check": 180,
|
||||
"ignore_status_codes": "y",
|
||||
'fetch_backend': "html_requests"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
set_some_changed_response()
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
|
||||
# Tests the whole stack works with staus codes ignored
|
||||
def test_403_page_check_works_with_ignore_status_code(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', status_code=403, _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# Goto the edit page, check our ignore option
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"ignore_status_codes": "y", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
# Make a change
|
||||
set_some_changed_response()
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should have 'unviewed' still
|
||||
# Because it should be looking at only that 'sametext' id
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
|
||||
# Tests the whole stack works with staus codes ignored
|
||||
def test_403_page_check_fails_without_ignore_status_code(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', status_code=403, _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# Goto the edit page, check our ignore option
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
# Make a change
|
||||
set_some_changed_response()
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should have 'unviewed' still
|
||||
# Because it should be looking at only that 'sametext' id
|
||||
res = client.get(url_for("index"))
|
||||
assert b'Status Code 403' in res.data
|
||||
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/python3
|
||||
# coding=utf-8
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
@@ -142,7 +143,7 @@ def set_modified_response():
|
||||
}
|
||||
],
|
||||
"boss": {
|
||||
"name": "Foobar"
|
||||
"name": "Örnsköldsvik"
|
||||
},
|
||||
"available": false
|
||||
}
|
||||
@@ -246,8 +247,10 @@ def test_check_json_filter(client, live_server):
|
||||
|
||||
# Should not see this, because its not in the JSONPath we entered
|
||||
res = client.get(url_for("diff_history_page", uuid="first"))
|
||||
|
||||
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions
|
||||
assert b'Foobar' in res.data
|
||||
# And #462 - check we see the proper utf-8 string there
|
||||
assert "Örnsköldsvik".encode('utf-8') in res.data
|
||||
|
||||
|
||||
def test_check_json_filter_bool_val(client, live_server):
|
||||
|
||||
@@ -125,7 +125,7 @@ def test_check_notification(client, live_server):
|
||||
# Diff was correctly executed
|
||||
assert "Diff Full: Some initial text" in notification_submission
|
||||
assert "Diff: (changed) Which is across multiple lines" in notification_submission
|
||||
assert "(-> into) which has this one new line" in notification_submission
|
||||
assert "(into ) which has this one new line" in notification_submission
|
||||
|
||||
|
||||
if env_base_url:
|
||||
|
||||
36
changedetectionio/tests/test_security.py
Normal file
36
changedetectionio/tests/test_security.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from flask import url_for
|
||||
from . util import set_original_response, set_modified_response, live_server_setup
|
||||
import time
|
||||
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
def test_file_access(client, live_server):
|
||||
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": 'https://localhost'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Attempt to add a body with a GET method
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
"url": 'file:///etc/passwd',
|
||||
"tag": "",
|
||||
"method": "GET",
|
||||
"fetch_backend": "html_requests",
|
||||
"body": ""},
|
||||
follow_redirects=True
|
||||
)
|
||||
time.sleep(3)
|
||||
|
||||
res = client.get(
|
||||
url_for("index", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'denied for security reasons' in res.data
|
||||
3
changedetectionio/tests/unit/test-content/after-2.txt
Normal file
3
changedetectionio/tests/unit/test-content/after-2.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
After twenty years, as cursed as I may be
|
||||
ok
|
||||
and insure that I'm one of those computer nerds.
|
||||
@@ -2,5 +2,6 @@ After twenty years, as cursed as I may be
|
||||
for having learned computerese,
|
||||
I continue to examine bits, bytes and words
|
||||
xok
|
||||
next-x-ok
|
||||
and insure that I'm one of those computer nerds.
|
||||
and something new
|
||||
@@ -12,12 +12,19 @@ from changedetectionio import diff
|
||||
class TestDiffBuilder(unittest.TestCase):
|
||||
|
||||
def test_expected_diff_output(self):
|
||||
base_dir=os.path.dirname(__file__)
|
||||
output = diff.render_diff(base_dir+"/test-content/before.txt", base_dir+"/test-content/after.txt")
|
||||
base_dir = os.path.dirname(__file__)
|
||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt")
|
||||
output = output.split("\n")
|
||||
self.assertIn("(changed) ok", output)
|
||||
self.assertIn("(-> into) xok", output)
|
||||
self.assertIn("(added) and something new", output)
|
||||
self.assertIn('(changed) ok', output)
|
||||
self.assertIn('(into ) xok', output)
|
||||
self.assertIn('(into ) next-x-ok', output)
|
||||
self.assertIn('(added ) and something new', output)
|
||||
|
||||
|
||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt")
|
||||
output = output.split("\n")
|
||||
self.assertIn('(removed) for having learned computerese,', output)
|
||||
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
|
||||
|
||||
# @todo test blocks of changed, blocks of added, blocks of removed
|
||||
|
||||
|
||||
@@ -38,21 +38,19 @@ def set_modified_response():
|
||||
|
||||
def live_server_setup(live_server):
|
||||
|
||||
|
||||
@live_server.app.route('/test-endpoint')
|
||||
def test_endpoint():
|
||||
ctype = request.args.get('content_type')
|
||||
status_code = request.args.get('status_code')
|
||||
|
||||
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||
with open("test-datastore/endpoint-content.txt", "r") as f:
|
||||
resp = make_response(f.read())
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
return resp
|
||||
|
||||
@live_server.app.route('/test-403')
|
||||
def test_endpoint_403_error():
|
||||
resp = make_response('', 403)
|
||||
return resp
|
||||
try:
|
||||
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||
with open("test-datastore/endpoint-content.txt", "r") as f:
|
||||
resp = make_response(f.read(), status_code)
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
return resp
|
||||
except FileNotFoundError:
|
||||
return make_response('', status_code)
|
||||
|
||||
# Just return the headers in the request
|
||||
@live_server.app.route('/test-headers')
|
||||
|
||||
@@ -42,7 +42,6 @@ class update_worker(threading.Thread):
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||
|
||||
# Re #342
|
||||
@@ -50,8 +49,6 @@ class update_worker(threading.Thread):
|
||||
# We then convert/.decode('utf-8') for the notification etc
|
||||
if not isinstance(contents, (bytes, bytearray)):
|
||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
|
||||
|
||||
except PermissionError as e:
|
||||
self.app.logger.error("File permission error updating", uuid, str(e))
|
||||
except content_fetcher.EmptyReply as e:
|
||||
|
||||
@@ -2,7 +2,7 @@ version: '2'
|
||||
services:
|
||||
changedetection:
|
||||
image: ghcr.io/dgtlmoon/changedetection.io
|
||||
container_name: changedetection.io
|
||||
container_name: changedetection
|
||||
hostname: changedetection
|
||||
volumes:
|
||||
- changedetection-data:/datastore
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
flask~= 2.0
|
||||
|
||||
flask_wtf
|
||||
eventlet>=0.31.0
|
||||
validators
|
||||
timeago ~=1.0
|
||||
@@ -35,3 +35,8 @@ lxml
|
||||
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
|
||||
selenium ~= 4.1.0
|
||||
|
||||
# https://stackoverflow.com/questions/71652965/importerror-cannot-import-name-safe-str-cmp-from-werkzeug-security/71653849#71653849
|
||||
# ImportError: cannot import name 'safe_str_cmp' from 'werkzeug.security'
|
||||
# need to revisit flask login versions
|
||||
werkzeug ~= 2.0.0
|
||||
|
||||
|
||||
6
setup.py
6
setup.py
@@ -32,11 +32,11 @@ setup(
|
||||
long_description_content_type='text/markdown',
|
||||
keywords='website change monitor for changes notification change detection '
|
||||
'alerts tracking website tracker change alert website and monitoring',
|
||||
zip_safe=False,
|
||||
entry_points={"console_scripts": ["changedetection.io=changedetection:main"]},
|
||||
entry_points={"console_scripts": ["changedetection.io=changedetectionio.changedetection:main"]},
|
||||
zip_safe=True,
|
||||
scripts=["changedetection.py"],
|
||||
author='dgtlmoon',
|
||||
url='https://changedetection.io',
|
||||
scripts=['changedetection.py'],
|
||||
packages=['changedetectionio'],
|
||||
include_package_data=True,
|
||||
install_requires=install_requires,
|
||||
|
||||
Reference in New Issue
Block a user