* Re #342 - check for accidental python byte encoding of non-utf8/string, check return type of fetcher and fix encoding of notification content
This commit is contained in:
@@ -14,7 +14,7 @@ class EmptyReply(Exception):
|
|||||||
class Fetcher():
|
class Fetcher():
|
||||||
error = None
|
error = None
|
||||||
status_code = None
|
status_code = None
|
||||||
content = None # Should be bytes?
|
content = None # Should always be bytes.
|
||||||
|
|
||||||
fetcher_description ="No description"
|
fetcher_description ="No description"
|
||||||
|
|
||||||
@@ -129,7 +129,6 @@ class html_webdriver(Fetcher):
|
|||||||
# driver.quit() seems to cause better exceptions
|
# driver.quit() seems to cause better exceptions
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# "html_requests" is listed as the default fetcher in store.py!
|
# "html_requests" is listed as the default fetcher in store.py!
|
||||||
@@ -146,6 +145,8 @@ class html_requests(Fetcher):
|
|||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
verify=False)
|
verify=False)
|
||||||
|
|
||||||
|
# https://stackoverflow.com/questions/44203397/python-requests-get-returns-improperly-decoded-text-instead-of-utf-8
|
||||||
|
# Return bytes here
|
||||||
html = r.text
|
html = r.text
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -367,6 +367,10 @@ class ChangeDetectionStore:
|
|||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
|
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
|
||||||
|
# Incase the operator deleted it, check and create.
|
||||||
|
if not os.path.isdir(output_path):
|
||||||
|
mkdir(output_path)
|
||||||
|
|
||||||
fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
|
fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
|
||||||
with open(fname, 'wb') as f:
|
with open(fname, 'wb') as f:
|
||||||
f.write(contents)
|
f.write(contents)
|
||||||
|
|||||||
@@ -159,6 +159,9 @@ def test_check_notification(client, live_server):
|
|||||||
|
|
||||||
with open("test-datastore/notification.txt", "r") as f:
|
with open("test-datastore/notification.txt", "r") as f:
|
||||||
notification_submission = f.read()
|
notification_submission = f.read()
|
||||||
|
print ("Notification submission was:", notification_submission)
|
||||||
|
# Re #342 - check for accidental python byte encoding of non-utf8/string
|
||||||
|
assert "b'" not in notification_submission
|
||||||
|
|
||||||
assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE)
|
assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE)
|
||||||
assert "Watch title: my title" in notification_submission
|
assert "Watch title: my title" in notification_submission
|
||||||
|
|||||||
@@ -2,7 +2,12 @@ import threading
|
|||||||
import queue
|
import queue
|
||||||
import time
|
import time
|
||||||
|
|
||||||
# Requests for checking on the site use a pool of thread Workers managed by a Queue.
|
# A single update worker
|
||||||
|
#
|
||||||
|
# Requests for checking on a single site(watch) from a queue of watches
|
||||||
|
# (another process inserts watches into the queue that are time-ready for checking)
|
||||||
|
|
||||||
|
|
||||||
class update_worker(threading.Thread):
|
class update_worker(threading.Thread):
|
||||||
current_uuid = None
|
current_uuid = None
|
||||||
|
|
||||||
@@ -39,6 +44,13 @@ class update_worker(threading.Thread):
|
|||||||
now = time.time()
|
now = time.time()
|
||||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||||
|
|
||||||
|
# Re #342
|
||||||
|
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||||
|
# We then convert/.decode('utf-8') for the notification etc
|
||||||
|
if not isinstance(contents, (bytes, bytearray)):
|
||||||
|
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||||
|
|
||||||
|
|
||||||
# Always record that we atleast tried
|
# Always record that we atleast tried
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3)})
|
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3)})
|
||||||
|
|
||||||
@@ -111,7 +123,7 @@ class update_worker(threading.Thread):
|
|||||||
n_object.update({
|
n_object.update({
|
||||||
'watch_url': watch['url'],
|
'watch_url': watch['url'],
|
||||||
'uuid': uuid,
|
'uuid': uuid,
|
||||||
'current_snapshot': str(contents),
|
'current_snapshot': contents.decode('utf-8'),
|
||||||
'diff_full': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep),
|
'diff_full': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep),
|
||||||
'diff': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep)
|
'diff': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep)
|
||||||
})
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user