Re #133 Option for ignoring whitespacing (#345)

* Global setting option to ignore whitespace when detecting a change
This commit is contained in:
dgtlmoon
2022-01-02 22:28:34 +01:00
committed by GitHub
parent 489671dcca
commit b5c1fce136
9 changed files with 196 additions and 76 deletions

View File

@@ -18,7 +18,8 @@ def cleanup(datastore_path):
'url-watches.json',
'notification.txt',
'count.txt',
'endpoint-content.txt']
'endpoint-content.txt'
]
for file in files:
try:
os.unlink("{}/{}".format(datastore_path, file))

View File

@@ -235,4 +235,4 @@ def test_check_global_ignore_text_functionality(client, live_server):
assert b'unviewed' in res.data
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
assert b'Deleted' in res.data

View File

@@ -0,0 +1,96 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
def test_setup(live_server):
live_server_setup(live_server)
# Should be the same as set_original_ignore_response() but with a little more whitespacing
def set_original_ignore_response_but_with_whitespace():
test_return_data = """<html>
<body>
Some initial text</br>
<p>
Which is across multiple lines</p>
<br>
</br>
So let's see what happens. </br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
# If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_check_ignore_whitespace(client, live_server):
sleep_time_for_fetch_thread = 3
# Give the endpoint time to spin up
time.sleep(1)
set_original_ignore_response()
# Goto the settings page, add our ignore text
res = client.post(
url_for("settings_page"),
data={
"minutes_between_check": 180,
"ignore_whitespace": "y",
'fetch_backend': "html_requests"
},
follow_redirects=True
)
assert b"Settings updated." in res.data
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
set_original_ignore_response_but_with_whitespace()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data