Massive improvements to error handling - show separate output for non HTTP 200 status replies

This commit is contained in:
dgtlmoon
2022-08-15 18:56:53 +02:00
committed by GitHub
parent 1eb5726cbf
commit 9942107016
19 changed files with 394 additions and 198 deletions

View File

@@ -115,18 +115,19 @@ def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
return timeago.format(int(watch_obj['last_checked']), time.time()) return timeago.format(int(watch_obj['last_checked']), time.time())
# @app.context_processor
# def timeago():
# def _timeago(lower_time, now):
# return timeago.format(lower_time, now)
# return dict(timeago=_timeago)
@app.template_filter('format_timestamp_timeago') @app.template_filter('format_timestamp_timeago')
def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
if timestamp == False:
return 'Not yet'
return timeago.format(timestamp, time.time()) return timeago.format(timestamp, time.time())
# return timeago.format(timestamp, time.time())
# return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) @app.template_filter('format_seconds_ago')
def _jinja2_filter_seconds_precise(timestamp):
if timestamp == False:
return 'Not yet'
return format(int(time.time()-timestamp), ',d')
# When nobody is logged in Flask-Login's current_user is set to an AnonymousUser object. # When nobody is logged in Flask-Login's current_user is set to an AnonymousUser object.
class User(flask_login.UserMixin): class User(flask_login.UserMixin):
@@ -830,7 +831,7 @@ def changedetection_app(config=None, datastore_o=None):
previous_version_file_contents = "Unable to read {}.\n".format(previous_file) previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
screenshot_url = datastore.get_screenshot(uuid) screenshot_url = watch.get_screenshot()
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
@@ -850,7 +851,11 @@ def changedetection_app(config=None, datastore_o=None):
extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']), extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
left_sticky=True, left_sticky=True,
screenshot=screenshot_url, screenshot=screenshot_url,
is_html_webdriver=is_html_webdriver) is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_text=watch.get_error_text(),
last_error_screenshot=watch.get_error_snapshot()
)
return output return output
@@ -865,20 +870,34 @@ def changedetection_app(config=None, datastore_o=None):
if uuid == 'first': if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop() uuid = list(datastore.data['watching'].keys()).pop()
# Normally you would never reach this, because the 'preview' button is not available when there's no history
# However they may try to clear snapshots and reload the page
if datastore.data['watching'][uuid].history_n == 0:
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
return redirect(url_for('index'))
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
try: try:
watch = datastore.data['watching'][uuid] watch = datastore.data['watching'][uuid]
except KeyError: except KeyError:
flash("No history found for the specified link, bad link?", "error") flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('index')) return redirect(url_for('index'))
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
# Never requested successfully, but we detected a fetch error
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
output = render_template("preview.html",
content=content,
history_n=watch.history_n,
extra_stylesheets=extra_stylesheets,
# current_diff_url=watch['url'],
watch=watch,
uuid=uuid,
is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_text=watch.get_error_text(),
last_error_screenshot=watch.get_error_snapshot())
return output
timestamp = list(watch.history.keys())[-1] timestamp = list(watch.history.keys())[-1]
filename = watch.history[timestamp] filename = watch.history[timestamp]
@@ -913,23 +932,20 @@ def changedetection_app(config=None, datastore_o=None):
except Exception as e: except Exception as e:
content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''}) content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''})
screenshot_url = datastore.get_screenshot(uuid)
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
output = render_template("preview.html", output = render_template("preview.html",
content=content, content=content,
history_n=watch.history_n,
extra_stylesheets=extra_stylesheets, extra_stylesheets=extra_stylesheets,
ignored_line_numbers=ignored_line_numbers, ignored_line_numbers=ignored_line_numbers,
triggered_line_numbers=trigger_line_numbers, triggered_line_numbers=trigger_line_numbers,
current_diff_url=watch['url'], current_diff_url=watch['url'],
screenshot=screenshot_url, screenshot=watch.get_screenshot(),
watch=watch, watch=watch,
uuid=uuid, uuid=uuid,
is_html_webdriver=is_html_webdriver) is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_text=watch.get_error_text(),
last_error_screenshot=watch.get_error_snapshot())
return output return output
@@ -1029,11 +1045,12 @@ def changedetection_app(config=None, datastore_o=None):
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated: if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
abort(403) abort(403)
screenshot_filename = "last-screenshot.png" if not request.args.get('error_screenshot') else "last-error-screenshot.png"
# These files should be in our subdirectory # These files should be in our subdirectory
try: try:
# set nocache, set content-type # set nocache, set content-type
watch_dir = datastore_o.datastore_path + "/" + filename response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), screenshot_filename))
response = make_response(send_from_directory(filename="last-screenshot.png", directory=watch_dir, path=watch_dir + "/last-screenshot.png"))
response.headers['Content-type'] = 'image/png' response.headers['Content-type'] = 'image/png'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache' response.headers['Pragma'] = 'no-cache'

View File

@@ -6,38 +6,63 @@ import requests
import time import time
import sys import sys
class PageUnloadable(Exception):
def __init__(self, status_code, url): class Non200ErrorCodeReceived(Exception):
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
self.screenshot = screenshot
self.xpath_data = xpath_data
self.page_text = None
if page_html:
from changedetectionio import html_tools
self.page_text = html_tools.html_to_text(page_html)
return
class JSActionExceptions(Exception):
def __init__(self, status_code, url, screenshot, message=''):
self.status_code = status_code
self.url = url
self.screenshot = screenshot
self.message = message
return
class PageUnloadable(Exception):
def __init__(self, status_code, url, screenshot=False):
# Set this so we can use it in other parts of the app
self.status_code = status_code
self.url = url
self.screenshot = screenshot
return return
pass
class EmptyReply(Exception): class EmptyReply(Exception):
def __init__(self, status_code, url): def __init__(self, status_code, url, screenshot=None):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
self.screenshot = screenshot
return return
pass
class ScreenshotUnavailable(Exception): class ScreenshotUnavailable(Exception):
def __init__(self, status_code, url): def __init__(self, status_code, url, page_html=None):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
if page_html:
from html_tools import html_to_text
self.page_text = html_to_text(page_html)
return return
pass
class ReplyWithContentButNoText(Exception): class ReplyWithContentButNoText(Exception):
def __init__(self, status_code, url): def __init__(self, status_code, url, screenshot=None):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
self.screenshot = screenshot
return return
pass
class Fetcher(): class Fetcher():
error = None error = None
@@ -180,7 +205,7 @@ class Fetcher():
system_https_proxy = os.getenv('HTTPS_PROXY') system_https_proxy = os.getenv('HTTPS_PROXY')
# Time ONTOP of the system defined env minimum time # Time ONTOP of the system defined env minimum time
render_extract_delay=0 render_extract_delay = 0
@abstractmethod @abstractmethod
def get_error(self): def get_error(self):
@@ -325,9 +350,10 @@ class base_html_playwright(Fetcher):
browser.close() browser.close()
# This can be ok, we will try to grab what we could retrieve # This can be ok, we will try to grab what we could retrieve
pass pass
except Exception as e: except Exception as e:
print ("other exception when page.goto") print("other exception when page.goto")
print (str(e)) print(str(e))
context.close() context.close()
browser.close() browser.close()
raise PageUnloadable(url=url, status_code=None) raise PageUnloadable(url=url, status_code=None)
@@ -335,7 +361,7 @@ class base_html_playwright(Fetcher):
if response is None: if response is None:
context.close() context.close()
browser.close() browser.close()
print ("response object was none") print("response object was none")
raise EmptyReply(url=url, status_code=None) raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page # Bug 2(?) Set the viewport size AFTER loading the page
@@ -344,18 +370,27 @@ class base_html_playwright(Fetcher):
time.sleep(extra_wait) time.sleep(extra_wait)
if self.webdriver_js_execute_code is not None: if self.webdriver_js_execute_code is not None:
try:
page.evaluate(self.webdriver_js_execute_code) page.evaluate(self.webdriver_js_execute_code)
time.sleep(2) except Exception as e:
# Is it possible to get a screenshot?
error_screenshot = False
try:
page.screenshot(type='jpeg',
clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024},
quality=1)
# The actual screenshot
error_screenshot = page.screenshot(type='jpeg',
full_page=True,
quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
except Exception as s:
pass
raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url)
self.content = page.content() self.content = page.content()
self.status_code = response.status self.status_code = response.status
if len(self.content.strip()) == 0:
context.close()
browser.close()
print ("Content was empty")
raise EmptyReply(url=url, status_code=None)
self.headers = response.all_headers() self.headers = response.all_headers()
if current_css_filter is not None: if current_css_filter is not None:
@@ -382,9 +417,17 @@ class base_html_playwright(Fetcher):
browser.close() browser.close()
raise ScreenshotUnavailable(url=url, status_code=None) raise ScreenshotUnavailable(url=url, status_code=None)
if len(self.content.strip()) == 0:
context.close()
browser.close()
print("Content was empty")
raise EmptyReply(url=url, status_code=None, screenshot=self.screenshot)
context.close() context.close()
browser.close() browser.close()
if not ignore_status_codes and self.status_code!=200:
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=self.content, screenshot=self.screenshot)
class base_html_webdriver(Fetcher): class base_html_webdriver(Fetcher):
if os.getenv("WEBDRIVER_URL"): if os.getenv("WEBDRIVER_URL"):
@@ -512,7 +555,7 @@ class html_requests(Fetcher):
ignore_status_codes=False, ignore_status_codes=False,
current_css_filter=None): current_css_filter=None):
proxies={} proxies = {}
# Allows override the proxy on a per-request basis # Allows override the proxy on a per-request basis
if self.proxy_override: if self.proxy_override:
@@ -540,10 +583,14 @@ class html_requests(Fetcher):
if encoding: if encoding:
r.encoding = encoding r.encoding = encoding
if not r.content or not len(r.content):
raise EmptyReply(url=url, status_code=r.status_code)
# @todo test this # @todo test this
# @todo maybe you really want to test zero-byte return pages? # @todo maybe you really want to test zero-byte return pages?
if (not ignore_status_codes and not r) or not r.content or not len(r.content): if r.status_code != 200 and not ignore_status_codes:
raise EmptyReply(url=url, status_code=r.status_code) # maybe check with content works?
raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text)
self.status_code = r.status_code self.status_code = r.status_code
self.content = r.text self.content = r.text

View File

@@ -94,7 +94,7 @@ class perform_site_check():
url = self.datastore.get_val(uuid, 'url') url = self.datastore.get_val(uuid, 'url')
request_body = self.datastore.get_val(uuid, 'body') request_body = self.datastore.get_val(uuid, 'body')
request_method = self.datastore.get_val(uuid, 'method') request_method = self.datastore.get_val(uuid, 'method')
ignore_status_code = self.datastore.get_val(uuid, 'ignore_status_codes') ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
# source: support # source: support
is_source = False is_source = False
@@ -124,7 +124,7 @@ class perform_site_check():
if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip(): if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code'] fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter']) fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter'])
fetcher.quit() fetcher.quit()
# Fetching complete, now filters # Fetching complete, now filters
@@ -210,7 +210,7 @@ class perform_site_check():
# Treat pages with no renderable text content as a change? No by default # Treat pages with no renderable text content as a change? No by default
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=200) raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)
# We rely on the actual text in the html output.. many sites have random script vars etc, # We rely on the actual text in the html output.. many sites have random script vars etc,
# in the future we'll implement other mechanisms. # in the future we'll implement other mechanisms.

View File

@@ -38,6 +38,7 @@ class model(dict):
'notification_format': default_notification_format, 'notification_format': default_notification_format,
'notification_muted': False, 'notification_muted': False,
'css_filter': '', 'css_filter': '',
'last_error': False,
'extract_text': [], # Extract text by regex after filters 'extract_text': [], # Extract text by regex after filters
'subtractive_selectors': [], 'subtractive_selectors': [],
'trigger_text': [], # List of text or regex to wait for until a change is detected 'trigger_text': [], # List of text or regex to wait for until a change is detected
@@ -122,19 +123,17 @@ class model(dict):
bump = self.history bump = self.history
return self.__newest_history_key return self.__newest_history_key
# Save some text file to the appropriate path and bump the history # Save some text file to the appropriate path and bump the history
# result_obj from fetch_site_status.run() # result_obj from fetch_site_status.run()
def save_history_text(self, contents, timestamp): def save_history_text(self, contents, timestamp):
import uuid import uuid
from os import mkdir, path, unlink
import logging import logging
output_path = "{}/{}".format(self.__datastore_path, self['uuid']) output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
# Incase the operator deleted it, check and create. # Incase the operator deleted it, check and create.
if not os.path.isdir(output_path): if not os.path.isdir(output_path):
mkdir(output_path) os.mkdir(output_path)
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4()) snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
logging.debug("Saving history text {}".format(snapshot_fname)) logging.debug("Saving history text {}".format(snapshot_fname))
@@ -172,7 +171,7 @@ class model(dict):
return seconds return seconds
# Iterate over all history texts and see if something new exists # Iterate over all history texts and see if something new exists
def lines_contain_something_unique_compared_to_history(self, lines=[]): def lines_contain_something_unique_compared_to_history(self, lines: list):
local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
# Compare each lines (set) against each history text file (set) looking for something new.. # Compare each lines (set) against each history text file (set) looking for something new..
@@ -184,3 +183,51 @@ class model(dict):
# Check that everything in local_lines(new stuff) already exists in existing_history - it should # Check that everything in local_lines(new stuff) already exists in existing_history - it should
# if not, something new happened # if not, something new happened
return not local_lines.issubset(existing_history) return not local_lines.issubset(existing_history)
def get_screenshot(self):
fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png")
if os.path.isfile(fname):
return fname
return False
def __get_file_ctime(self, filename):
fname = os.path.join(self.__datastore_path, self['uuid'], filename)
if os.path.isfile(fname):
return int(os.path.getmtime(fname))
return False
@property
def error_text_ctime(self):
return self.__get_file_ctime('last-error.txt')
@property
def snapshot_text_ctime(self):
if self.history_n==0:
return False
timestamp = list(self.history.keys())[-1]
return int(timestamp)
@property
def snapshot_screenshot_ctime(self):
return self.__get_file_ctime('last-screenshot.png')
@property
def snapshot_error_screenshot_ctime(self):
return self.__get_file_ctime('last-error-screenshot.png')
def get_error_text(self):
"""Return the text saved from a previous request that resulted in a non-200 error"""
fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt")
if os.path.isfile(fname):
with open(fname, 'r') as f:
return f.read()
return False
def get_error_snapshot(self):
"""Return path to the screenshot that resulted in a non-200 error"""
fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png")
if os.path.isfile(fname):
return fname
return False

View File

@@ -32,6 +32,7 @@ docker run -d --name $$-test_selenium -p 4444:4444 --rm --shm-size="2g" seleni
sleep 5 sleep 5
export WEBDRIVER_URL=http://localhost:4444/wd/hub export WEBDRIVER_URL=http://localhost:4444/wd/hub
pytest tests/fetchers/test_content.py pytest tests/fetchers/test_content.py
pytest tests/test_errorhandling.py
unset WEBDRIVER_URL unset WEBDRIVER_URL
docker kill $$-test_selenium docker kill $$-test_selenium
@@ -43,5 +44,7 @@ docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-siz
sleep 5 sleep 5
export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000
pytest tests/fetchers/test_content.py pytest tests/fetchers/test_content.py
pytest tests/test_errorhandling.py
unset PLAYWRIGHT_DRIVER_URL unset PLAYWRIGHT_DRIVER_URL
docker kill $$-test_browserless docker kill $$-test_browserless

View File

@@ -10,7 +10,13 @@ $(document).ready(function () {
if (hash_name === '#screenshot') { if (hash_name === '#screenshot') {
$("img#screenshot-img").attr('src', screenshot_url); $("img#screenshot-img").attr('src', screenshot_url);
$("#settings").hide(); $("#settings").hide();
} else { } else if (hash_name === '#error-screenshot') {
$("img#error-screenshot-img").attr('src', error_screenshot_url);
$("#settings").hide();
}
else {
$("#settings").show(); $("#settings").show();
} }
} }

View File

@@ -1,12 +1,6 @@
// Rewrite this is a plugin.. is all this JS really 'worth it?' // Rewrite this is a plugin.. is all this JS really 'worth it?'
window.addEventListener('hashchange', function () {
if(!window.location.hash) {
var tab=document.querySelectorAll("#default-tab a");
tab[0].click();
}
window.addEventListener('hashchange', function() {
var tabs = document.getElementsByClassName('active'); var tabs = document.getElementsByClassName('active');
while (tabs[0]) { while (tabs[0]) {
tabs[0].classList.remove('active') tabs[0].classList.remove('active')
@@ -14,11 +8,10 @@ window.addEventListener('hashchange', function() {
set_active_tab(); set_active_tab();
}, false); }, false);
var has_errors=document.querySelectorAll(".messages .error"); var has_errors = document.querySelectorAll(".messages .error");
if (!has_errors.length) { if (!has_errors.length) {
if (document.location.hash == "" ) { if (document.location.hash == "") {
document.location.hash = "#general"; document.querySelector(".tabs ul li:first-child a").click();
document.getElementById("default-tab").className = "active";
} else { } else {
set_active_tab(); set_active_tab();
} }
@@ -27,9 +20,9 @@ if (!has_errors.length) {
} }
function set_active_tab() { function set_active_tab() {
var tab=document.querySelectorAll("a[href='"+location.hash+"']"); var tab = document.querySelectorAll("a[href='" + location.hash + "']");
if (tab.length) { if (tab.length) {
tab[0].parentElement.className="active"; tab[0].parentElement.className = "active";
} }
// hash could move the page down // hash could move the page down
window.scrollTo(0, 0); window.scrollTo(0, 0);
@@ -38,12 +31,12 @@ function set_active_tab() {
function focus_error_tab() { function focus_error_tab() {
// time to use jquery or vuejs really, // time to use jquery or vuejs really,
// activate the tab with the error // activate the tab with the error
var tabs = document.querySelectorAll('.tabs li a'),i; var tabs = document.querySelectorAll('.tabs li a'), i;
for (i = 0; i < tabs.length; ++i) { for (i = 0; i < tabs.length; ++i) {
var tab_name=tabs[i].hash.replace('#',''); var tab_name = tabs[i].hash.replace('#', '');
var pane_errors=document.querySelectorAll('#'+tab_name+' .error') var pane_errors = document.querySelectorAll('#' + tab_name + ' .error')
if (pane_errors.length) { if (pane_errors.length) {
document.location.hash = '#'+tab_name; document.location.hash = '#' + tab_name;
return true; return true;
} }
} }

View File

@@ -539,3 +539,13 @@ ul {
100% { 100% {
-webkit-transform: rotate(360deg); -webkit-transform: rotate(360deg);
transform: rotate(360deg); } } transform: rotate(360deg); } }
.snapshot-age {
padding: 4px;
background-color: #dfdfdf;
border-radius: 3px;
font-weight: bold;
margin-bottom: 4px; }
.snapshot-age.error {
background-color: #ff0000;
color: #fff; }

View File

@@ -771,3 +771,15 @@ ul {
} }
} }
.snapshot-age {
padding: 4px;
background-color: #dfdfdf;
border-radius: 3px;
font-weight: bold;
margin-bottom: 4px;
&.error {
background-color: #ff0000;
color: #fff;
}
}

View File

@@ -336,14 +336,6 @@ class ChangeDetectionStore:
self.sync_to_json() self.sync_to_json()
return new_uuid return new_uuid
def get_screenshot(self, watch_uuid):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
fname = "{}/last-screenshot.png".format(output_path)
if path.isfile(fname):
return fname
return False
def visualselector_data_is_ready(self, watch_uuid): def visualselector_data_is_ready(self, watch_uuid):
output_path = "{}/{}".format(self.datastore_path, watch_uuid) output_path = "{}/{}".format(self.datastore_path, watch_uuid)
screenshot_filename = "{}/last-screenshot.png".format(output_path) screenshot_filename = "{}/last-screenshot.png".format(output_path)
@@ -354,17 +346,32 @@ class ChangeDetectionStore:
return False return False
# Save as PNG, PNG is larger but better for doing visual diff in the future # Save as PNG, PNG is larger but better for doing visual diff in the future
def save_screenshot(self, watch_uuid, screenshot: bytes): def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
fname = "{}/last-screenshot.png".format(output_path) if as_error:
with open(fname, 'wb') as f: target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png")
with open(target_path, 'wb') as f:
f.write(screenshot) f.write(screenshot)
f.close() f.close()
def save_xpath_data(self, watch_uuid, data): def save_error_text(self, watch_uuid, contents):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
fname = "{}/elements.json".format(output_path) target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
with open(fname, 'w') as f:
with open(target_path, 'w') as f:
f.write(contents)
def save_xpath_data(self, watch_uuid, data, as_error=False):
if as_error:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
with open(target_path, 'w') as f:
f.write(json.dumps(data)) f.write(json.dumps(data))
f.close() f.close()

View File

@@ -3,6 +3,9 @@
{% block content %} {% block content %}
<script> <script>
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}"; const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
{% if last_error_screenshot %}
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
{% endif %}
</script> </script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
@@ -43,15 +46,31 @@
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<div class="tabs"> <div class="tabs">
<ul> <ul>
<li class="tab" id="default-tab"><a href="#text">Text</a></li> {% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">Error Screenshot</a></li> {% endif %}
<li class="tab" id=""><a href="#text">Text</a></li>
<li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li> <li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li>
</ul> </ul>
</div> </div>
<div id="diff-ui"> <div id="diff-ui">
<div class="tab-pane-inner" id="error-text">
<div class="snapshot-age error">{{watch_a.error_text_ctime|format_seconds_ago}} seconds ago</div>
<pre>
{{ last_error_text }}
</pre>
</div>
<div class="tab-pane-inner" id="error-screenshot">
<div class="snapshot-age error">{{watch_a.snapshot_error_screenshot_ctime|format_seconds_ago}} seconds ago</div>
<img id="error-screenshot-img" style="max-width: 80%" alt="Current error-ing screenshot from most recent request"/>
</div>
<div class="tab-pane-inner" id="text"> <div class="tab-pane-inner" id="text">
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored. <div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.
</div> </div>
<div class="snapshot-age">{{watch_a.snapshot_text_ctime|format_timestamp_timeago}}</div>
<table> <table>
<tbody> <tbody>
<tr> <tr>
@@ -70,9 +89,9 @@
<div class="tip"> <div class="tip">
For now, Differences are performed on text, not graphically, only the latest screenshot is available. For now, Differences are performed on text, not graphically, only the latest screenshot is available.
</div> </div>
</br>
{% if is_html_webdriver %} {% if is_html_webdriver %}
{% if screenshot %} {% if screenshot %}
<div class="snapshot-age">{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
<img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/> <img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/>
{% else %} {% else %}
No screenshot available just yet! Try rechecking the page. No screenshot available just yet! Try rechecking the page.
@@ -88,7 +107,6 @@
<script defer=""> <script defer="">
var a = document.getElementById('a'); var a = document.getElementById('a');
var b = document.getElementById('b'); var b = document.getElementById('b');
var result = document.getElementById('result'); var result = document.getElementById('result');

View File

@@ -23,7 +23,7 @@
<div class="tabs collapsable"> <div class="tabs collapsable">
<ul> <ul>
<li class="tab" id="default-tab"><a href="#general">General</a></li> <li class="tab" id=""><a href="#general">General</a></li>
<li class="tab"><a href="#request">Request</a></li> <li class="tab"><a href="#request">Request</a></li>
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li> <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
<li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li> <li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
@@ -87,6 +87,9 @@
</span> </span>
</div> </div>
{% endif %} {% endif %}
<div class="pure-control-group inline-radio">
{{ render_checkbox_field(form.ignore_status_codes) }}
</div>
<fieldset id="webdriver-override-options"> <fieldset id="webdriver-override-options">
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.webdriver_delay) }} {{ render_field(form.webdriver_delay) }}
@@ -128,11 +131,7 @@ User-Agent: wonderbra 1.0") }}
\"car\":null \"car\":null
}") }} }") }}
</div> </div>
<div id="ignore-status-codes-option">
{{ render_checkbox_field(form.ignore_status_codes) }}
</div>
</fieldset> </fieldset>
<br/>
</div> </div>
<div class="tab-pane-inner" id="notifications"> <div class="tab-pane-inner" id="notifications">

View File

@@ -5,7 +5,7 @@
<div class="tabs collapsable"> <div class="tabs collapsable">
<ul> <ul>
<li class="tab" id="default-tab"><a href="#url-list">URL List</a></li> <li class="tab" id=""><a href="#url-list">URL List</a></li>
<li class="tab"><a href="#distill-io">Distill.io</a></li> <li class="tab"><a href="#distill-io">Distill.io</a></li>
</ul> </ul>
</div> </div>

View File

@@ -3,23 +3,39 @@
{% block content %} {% block content %}
<script> <script>
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}"; const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
{% if last_error_screenshot %}
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
{% endif %}
</script> </script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
<div id="settings">
<h1>Current - {{watch.last_checked|format_timestamp_timeago}}</h1>
</div>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<div class="tabs"> <div class="tabs">
<ul> <ul>
<li class="tab" id="default-tab"><a href="#text">Text</a></li> {% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">Error Screenshot</a></li> {% endif %}
{% if history_n > 0 %}
<li class="tab" id="text-tab"><a href="#text">Text</a></li>
<li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li> <li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li>
{% endif %}
</ul> </ul>
</div> </div>
<div id="diff-ui"> <div id="diff-ui">
<div class="tab-pane-inner" id="error-text">
<div class="snapshot-age error">{{watch.error_text_ctime|format_seconds_ago}} seconds ago</div>
<pre>
{{ last_error_text }}
</pre>
</div>
<div class="tab-pane-inner" id="error-screenshot">
<div class="snapshot-age error">{{watch.snapshot_error_screenshot_ctime|format_seconds_ago}} seconds ago</div>
<img id="error-screenshot-img" style="max-width: 80%" alt="Current erroring screenshot from most recent request"/>
</div>
<div class="tab-pane-inner" id="text"> <div class="tab-pane-inner" id="text">
<div class="snapshot-age">{{watch.snapshot_text_ctime|format_timestamp_timeago}}</div>
<span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span> <span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span>
<table> <table>
<tbody> <tbody>
@@ -33,6 +49,7 @@
</tbody> </tbody>
</table> </table>
</div> </div>
<div class="tab-pane-inner" id="screenshot"> <div class="tab-pane-inner" id="screenshot">
<div class="tip"> <div class="tip">
For now, Differences are performed on text, not graphically, only the latest screenshot is available. For now, Differences are performed on text, not graphically, only the latest screenshot is available.

View File

@@ -16,7 +16,7 @@
<div class="edit-form"> <div class="edit-form">
<div class="tabs collapsable"> <div class="tabs collapsable">
<ul> <ul>
<li class="tab" id="default-tab"><a href="#general">General</a></li> <li class="tab" id=""><a href="#general">General</a></li>
<li class="tab"><a href="#notifications">Notifications</a></li> <li class="tab"><a href="#notifications">Notifications</a></li>
<li class="tab"><a href="#fetching">Fetching</a></li> <li class="tab"><a href="#fetching">Fetching</a></li>
<li class="tab"><a href="#filters">Global Filters</a></li> <li class="tab"><a href="#filters">Global Filters</a></li>

View File

@@ -90,7 +90,7 @@
{% if watch.history_n >= 2 %} {% if watch.history_n >= 2 %}
<a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a> <a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
{% else %} {% else %}
{% if watch.history_n == 1 %} {% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
<a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a> <a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>
{% endif %} {% endif %}
{% endif %} {% endif %}

View File

@@ -11,16 +11,17 @@ def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
def test_error_handler(client, live_server): def _runner_test_http_errors(client, live_server, http_code, expected_text):
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("Now you going to get a {} error code\n".format(http_code))
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint', test_url = url_for('test_endpoint',
status_code=403, status_code=http_code,
_external=True) _external=True)
res = client.post( res = client.post(
url_for("import_page"), url_for("import_page"),
data={"urls": test_url}, data={"urls": test_url},
@@ -29,15 +30,38 @@ def test_error_handler(client, live_server):
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(3) time.sleep(2)
res = client.get(url_for("index")) res = client.get(url_for("index"))
# no change
assert b'unviewed' not in res.data assert b'unviewed' not in res.data
assert b'Status Code 403' in res.data assert bytes(expected_text.encode('utf-8')) in res.data
assert bytes("just now".encode('utf-8')) in res.data
# Error viewing tabs should appear
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'Error Text' in res.data
# 'Error Screenshot' only when in playwright mode
#assert b'Error Screenshot' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_http_error_handler(client, live_server):
_runner_test_http_errors(client, live_server, 403, 'Access denied')
_runner_test_http_errors(client, live_server, 404, 'Page not found')
_runner_test_http_errors(client, live_server, 500, '(Internal server Error) received')
_runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400')
# Just to be sure error text is properly handled # Just to be sure error text is properly handled
def test_error_text_handler(client, live_server): def test_DNS_errors(client, live_server):
# Give the endpoint time to spin up # Give the endpoint time to spin up
time.sleep(1) time.sleep(1)
@@ -54,5 +78,6 @@ def test_error_text_handler(client, live_server):
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'Name or service not known' in res.data assert b'Name or service not known' in res.data
# Should always record that we tried
assert bytes("just now".encode('utf-8')) in res.data assert bytes("just now".encode('utf-8')) in res.data

View File

@@ -137,54 +137,3 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server):
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' in res.data assert b'unviewed' in res.data
# Tests the whole stack works with staus codes ignored
def test_403_page_check_fails_without_ignore_status_code(client, live_server):
sleep_time_for_fetch_thread = 3
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', status_code=403, _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Goto the edit page, check our ignore option
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Make a change
set_some_changed_response()
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should have 'unviewed' still
# Because it should be looking at only that 'sametext' id
res = client.get(url_for("index"))
assert b'Status Code 403' in res.data

View File

@@ -1,3 +1,4 @@
import os
import threading import threading
import queue import queue
import time import time
@@ -107,6 +108,14 @@ class update_worker(threading.Thread):
self.notification_q.put(n_object) self.notification_q.put(n_object)
print("Sent filter not found notification for {}".format(watch_uuid)) print("Sent filter not found notification for {}".format(watch_uuid))
def cleanup_error_artifacts(self, uuid):
# All went fine, remove error artifacts
cleanup_files = ["last-error-screenshot.png", "last-error.txt"]
for f in cleanup_files:
full_path = os.path.join(self.datastore.datastore_path, uuid, f)
if os.path.isfile(full_path):
os.unlink(full_path)
def run(self): def run(self):
from changedetectionio import fetch_site_status from changedetectionio import fetch_site_status
@@ -146,7 +155,31 @@ class update_worker(threading.Thread):
# Totally fine, it's by choice - just continue on, nothing more to care about # Totally fine, it's by choice - just continue on, nothing more to care about
# Page had elements/content but no renderable text # Page had elements/content but no renderable text
# Backend (not filters) gave zero output # Backend (not filters) gave zero output
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)})
if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
process_changedetection_results = False
except content_fetcher.Non200ErrorCodeReceived as e:
if e.status_code == 403:
err_text = "Error - 403 (Access denied) received"
elif e.status_code == 404:
err_text = "Error - 404 (Page not found) received"
elif e.status_code == 500:
err_text = "Error - 500 (Internal server Error) received"
else:
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
if e.xpath_data:
self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True)
if e.page_text:
self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
# So that we get a trigger when the content is added again
'previous_md5': ''})
process_changedetection_results = False process_changedetection_results = False
except FilterNotFoundInResponse as e: except FilterNotFoundInResponse as e:
@@ -182,8 +215,17 @@ class update_worker(threading.Thread):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code}) 'last_check_status': e.status_code})
process_changedetection_results = False process_changedetection_results = False
except content_fetcher.JSActionExceptions as e:
err_text = "Error running JS Actions - Page request - "+e.message
if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
except content_fetcher.PageUnloadable as e: except content_fetcher.PageUnloadable as e:
err_text = "Page request from server didnt respond correctly" err_text = "Page request from server didnt respond correctly"
if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code}) 'last_check_status': e.status_code})
except Exception as e: except Exception as e:
@@ -192,9 +234,13 @@ class update_worker(threading.Thread):
# Other serious error # Other serious error
process_changedetection_results = False process_changedetection_results = False
else: else:
# Mark that we never had any failures # Mark that we never had any failures
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
update_obj['consecutive_filter_failures'] = 0 update_obj['consecutive_filter_failures'] = 0
self.cleanup_error_artifacts(uuid)
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc) # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
if not self.datastore.data['watching'].get(uuid): if not self.datastore.data['watching'].get(uuid):
continue continue