Option to control if pages with no renderable content are a change (example: JS webapps that dont render any text sometimes) (#608)

This commit is contained in:
dgtlmoon
2022-05-17 22:22:00 +02:00
committed by GitHub
parent 16809b48f8
commit 6734fb91a2
7 changed files with 125 additions and 2 deletions

View File

@@ -184,6 +184,11 @@ class perform_site_check():
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
# Treat pages with no renderable text content as a change? No by default
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=200)
# We rely on the actual text in the html output.. many sites have random script vars etc,
# in the future we'll implement other mechanisms.