[new filter] Filter option - Trigger only when NEW content (lines) are detected ( compared to earlier text snapshots ) (#685)
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
@@ -262,6 +263,16 @@ class perform_site_check():
|
|||||||
if not watch['title'] or not len(watch['title']):
|
if not watch['title'] or not len(watch['title']):
|
||||||
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
|
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
|
||||||
|
|
||||||
|
if changed_detected:
|
||||||
|
if watch.get('check_unique_lines', False):
|
||||||
|
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
|
||||||
|
# One or more lines? unsure?
|
||||||
|
if not has_unique_lines:
|
||||||
|
logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
|
||||||
|
changed_detected = False
|
||||||
|
else:
|
||||||
|
logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
|
||||||
|
|
||||||
# Always record the new checksum
|
# Always record the new checksum
|
||||||
update_obj["previous_md5"] = fetched_md5
|
update_obj["previous_md5"] = fetched_md5
|
||||||
|
|
||||||
|
|||||||
@@ -340,6 +340,7 @@ class watchForm(commonSettingsForm):
|
|||||||
body = TextAreaField('Request body', [validators.Optional()])
|
body = TextAreaField('Request body', [validators.Optional()])
|
||||||
method = SelectField('Request method', choices=valid_method, default=default_method)
|
method = SelectField('Request method', choices=valid_method, default=default_method)
|
||||||
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
||||||
|
check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
|
||||||
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||||
text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
|
text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ class model(dict):
|
|||||||
'text_should_not_be_present': [], # Text that should not present
|
'text_should_not_be_present': [], # Text that should not present
|
||||||
'fetch_backend': None,
|
'fetch_backend': None,
|
||||||
'extract_title_as_title': False,
|
'extract_title_as_title': False,
|
||||||
|
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||||
'proxy': None, # Preferred proxy connection
|
'proxy': None, # Preferred proxy connection
|
||||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||||
# Requires setting to None on submit if it's the same as the default
|
# Requires setting to None on submit if it's the same as the default
|
||||||
@@ -163,3 +164,16 @@ class model(dict):
|
|||||||
if x:
|
if x:
|
||||||
seconds += x * n
|
seconds += x * n
|
||||||
return seconds
|
return seconds
|
||||||
|
|
||||||
|
# Iterate over all history texts and see if something new exists
|
||||||
|
def lines_contain_something_unique_compared_to_history(self, lines=[]):
|
||||||
|
local_lines = [l.decode('utf-8').strip().lower() for l in lines]
|
||||||
|
|
||||||
|
# Compare each lines (set) against each history text file (set) looking for something new..
|
||||||
|
for k, v in self.history.items():
|
||||||
|
alist = [line.decode('utf-8').strip().lower() for line in open(v, 'rb')]
|
||||||
|
res = set(alist) != set(local_lines)
|
||||||
|
if res:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|||||||
@@ -147,6 +147,12 @@ User-Agent: wonderbra 1.0") }}
|
|||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
<fieldset>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_checkbox_field(form.check_unique_lines) }}
|
||||||
|
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
|
||||||
|
</div>
|
||||||
|
</fieldset>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
|
{{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
|
||||||
class="m-d") }}
|
class="m-d") }}
|
||||||
|
|||||||
104
changedetectionio/tests/test_unique_lines.py
Normal file
104
changedetectionio/tests/test_unique_lines.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from .util import live_server_setup
|
||||||
|
|
||||||
|
|
||||||
|
def set_original_ignore_response():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
<p>Some initial text</p>
|
||||||
|
<p>Which is across multiple lines</p>
|
||||||
|
<p>So let's see what happens.</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
|
||||||
|
# The same but just re-ordered the text
|
||||||
|
def set_modified_swapped_lines():
|
||||||
|
# Re-ordered and with some whitespacing, should get stripped() too.
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
<p>Some initial text</p>
|
||||||
|
<p> So let's see what happens.</p>
|
||||||
|
<p> Which is across multiple lines</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
|
||||||
|
def set_modified_with_trigger_text_response():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
<p>Some initial text</p>
|
||||||
|
<p>So let's see what happens.</p>
|
||||||
|
<p>and a new line!</p>
|
||||||
|
<p>Which is across multiple lines</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
|
||||||
|
def test_unique_lines_functionality(client, live_server):
|
||||||
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
|
set_original_ignore_response()
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={"check_unique_lines": "y",
|
||||||
|
"url": test_url,
|
||||||
|
"fetch_backend": "html_requests"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
|
# Make a change
|
||||||
|
set_modified_swapped_lines()
|
||||||
|
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
|
|
||||||
|
# Now set the content which contains the new text and re-ordered existing text
|
||||||
|
set_modified_with_trigger_text_response()
|
||||||
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
Reference in New Issue
Block a user