New filter - Block change-detection if text matches - for example, block change-detection while the text "out of stock" is on the page, know when the text is no longer on the page (#698)

This commit is contained in:
dgtlmoon
2022-06-15 22:56:43 +02:00
parent bb732d3d2e
commit 7da32f9ac3
9 changed files with 198 additions and 20 deletions

View File

@@ -225,25 +225,40 @@ class perform_site_check():
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
############ Blocking rules, after checksum #################
blocked_by_not_found_trigger_text = False
blocked = False
if len(watch['trigger_text']):
# Yeah, lets block first until something matches
blocked_by_not_found_trigger_text = True
# Assume blocked
blocked = True
# Filter and trigger works the same, so reuse it
# It should return the line numbers that match
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
wordlist=watch['trigger_text'],
mode="line numbers")
# If it returned any lines that matched..
# Unblock if the trigger was found
if result:
blocked_by_not_found_trigger_text = False
blocked = False
if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
if len(watch['text_should_not_be_present']):
# If anything matched, then we should block a change from happening
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
wordlist=watch['text_should_not_be_present'],
mode="line numbers")
if result:
blocked = True
# The main thing that all this at the moment comes down to :)
if watch['previous_md5'] != fetched_md5:
changed_detected = True
# Looks like something changed, but did it match all the rules?
if blocked:
changed_detected = False
else:
update_obj["last_changed"] = timestamp
# Extract title as title
if is_html:
if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
@@ -257,5 +272,4 @@ class perform_site_check():
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data