Add filter to remove elements by CSS rule from HTML before change detection is run (#445)
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
import time
|
||||
from changedetectionio import content_fetcher
|
||||
from changedetectionio import html_tools
|
||||
import hashlib
|
||||
from inscriptis import get_text
|
||||
import urllib3
|
||||
from . import html_tools
|
||||
import re
|
||||
import time
|
||||
|
||||
import urllib3
|
||||
from inscriptis import get_text
|
||||
|
||||
from changedetectionio import content_fetcher, html_tools
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
@@ -72,8 +72,15 @@ class perform_site_check():
|
||||
is_json = 'application/json' in fetcher.headers.get('Content-Type', '')
|
||||
is_html = not is_json
|
||||
css_filter_rule = watch['css_filter']
|
||||
subtractive_selectors = watch.get(
|
||||
"subtractive_selectors", []
|
||||
) + self.datastore.data["settings"]["application"].get(
|
||||
"global_subtractive_selectors", []
|
||||
)
|
||||
|
||||
has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
|
||||
has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
|
||||
|
||||
if is_json and not has_filter_rule:
|
||||
css_filter_rule = "json:$"
|
||||
has_filter_rule = True
|
||||
@@ -100,11 +107,11 @@ class perform_site_check():
|
||||
else:
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
|
||||
|
||||
if has_subtractive_selectors:
|
||||
html_content = html_tools.element_removal(subtractive_selectors, html_content)
|
||||
# get_text() via inscriptis
|
||||
stripped_text_from_html = get_text(html_content)
|
||||
|
||||
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user