Compare commits

...

13 Commits

41 changed files with 257 additions and 232 deletions

View File

@@ -49,6 +49,7 @@ Requires Playwright to be enabled.
- Governmental department updates (changes are often only on their websites)
- New software releases, security advisories when you're not on their mailing list.
- Festivals with changes
- Discogs restock alerts and monitoring
- Realestate listing changes
- Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
- COVID related news from government websites
@@ -63,6 +64,7 @@ Requires Playwright to be enabled.
- You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product)
- Get notified when certain keywords appear in Twitter search results
- Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords.
- Get alerts when new job positions are open on Bamboo HR and other job platforms
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_
@@ -100,6 +102,8 @@ $ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/d
`:latest` tag is our latest stable release, `:dev` tag is our bleeding edge `master` branch.
Alternative docker repository over at ghcr - [ghcr.io/dgtlmoon/changedetection.io](https://ghcr.io/dgtlmoon/changedetection.io)
### Windows
See the install instructions at the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows

View File

@@ -36,7 +36,7 @@ from flask import (
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.40.2'
__version__ = '0.40.3'
datastore = None
@@ -361,7 +361,7 @@ def changedetection_app(config=None, datastore_o=None):
fe.title(title=watch_title)
latest_fname = watch.history[dates[-1]]
html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="</br>")
html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="<br>")
fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff),
type='CDATA')
@@ -1168,7 +1168,8 @@ def changedetection_app(config=None, datastore_o=None):
new_uuid = datastore.clone(uuid)
if new_uuid:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
if not datastore.data['watching'].get(uuid).get('paused'):
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
flash('Cloned.')
return redirect(url_for('index'))

View File

@@ -106,8 +106,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
if step_operation == 'Goto site':
step_operation = 'goto_url'
step_optional_value = None
step_selector = datastore.data['watching'][uuid].get('url')
step_optional_value = datastore.data['watching'][uuid].get('url')
step_selector = None
# @todo try.. accept.. nice errors not popups..
try:

View File

@@ -25,6 +25,7 @@ browser_step_ui_config = {'Choose one': '0 0',
'Execute JS': '0 1',
# 'Extract text and use as filter': '1 0',
'Goto site': '0 0',
'Goto URL': '0 1',
'Press Enter': '0 0',
'Select by label': '1 1',
'Scroll down': '0 0',
@@ -54,7 +55,7 @@ class steppable_browser_interface():
print("> action calling", call_action_name)
# https://playwright.dev/python/docs/selectors#xpath-selectors
if selector.startswith('/') and not selector.startswith('//'):
if selector and selector.startswith('/') and not selector.startswith('//'):
selector = "xpath=" + selector
action_handler = getattr(self, "action_" + call_action_name)
@@ -73,10 +74,10 @@ class steppable_browser_interface():
self.page.wait_for_timeout(3 * 1000)
print("Call action done in", time.time() - now)
def action_goto_url(self, url, optional_value):
def action_goto_url(self, selector, value):
# self.page.set_viewport_size({"width": 1280, "height": 5000})
now = time.time()
response = self.page.goto(url, timeout=0, wait_until='commit')
response = self.page.goto(value, timeout=0, wait_until='commit')
# Wait_until = commit
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.

View File

@@ -10,7 +10,7 @@ def same_slicer(l, a, b):
return l[a:b]
# like .compare but a little different output
def customSequenceMatcher(before, after, include_equal=False):
def customSequenceMatcher(before, after, include_equal=False, include_removed=True, include_added=True):
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after)
# @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?)
@@ -18,20 +18,20 @@ def customSequenceMatcher(before, after, include_equal=False):
if include_equal and tag == 'equal':
g = before[alo:ahi]
yield g
elif tag == 'delete':
elif include_removed and tag == 'delete':
g = ["(removed) " + i for i in same_slicer(before, alo, ahi)]
yield g
elif tag == 'replace':
g = ["(changed) " + i for i in same_slicer(before, alo, ahi)]
g += ["(into ) " + i for i in same_slicer(after, blo, bhi)]
g += ["(into) " + i for i in same_slicer(after, blo, bhi)]
yield g
elif tag == 'insert':
g = ["(added ) " + i for i in same_slicer(after, blo, bhi)]
elif include_added and tag == 'insert':
g = ["(added) " + i for i in same_slicer(after, blo, bhi)]
yield g
# only_differences - only return info about the differences, no context
# line_feed_sep could be "<br/>" or "<li>" or "\n" etc
def render_diff(previous_file, newest_file, include_equal=False, line_feed_sep="\n"):
# line_feed_sep could be "<br>" or "<li>" or "\n" etc
def render_diff(previous_file, newest_file, include_equal=False, include_removed=True, include_added=True, line_feed_sep="\n"):
with open(newest_file, 'r') as f:
newest_version_file_contents = f.read()
newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
@@ -45,7 +45,7 @@ def render_diff(previous_file, newest_file, include_equal=False, line_feed_sep="
rendered_diff = customSequenceMatcher(previous_version_file_contents,
newest_version_file_contents,
include_equal)
include_equal, include_removed, include_added)
# Recursively join lists
f = lambda L: line_feed_sep.join([f(x) if type(x) is list else x for x in L])

View File

View File

@@ -1,3 +1,5 @@
# HTML to TEXT/JSON DIFFERENCE FETCHER
import hashlib
import json
import logging

View File

@@ -147,12 +147,12 @@ class ValidateContentFetcherIsReady(object):
except urllib3.exceptions.MaxRetryError as e:
driver_url = some_object.command_executor
message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data))
message += '<br/>' + field.gettext(
message += '<br>' + field.gettext(
'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.')
message += '<br/>' + field.gettext('Did you follow the instructions in the wiki?')
message += '<br/><br/>' + field.gettext('WebDriver Host: %s' % (driver_url))
message += '<br/><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>'
message += '<br/>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e)))
message += '<br>' + field.gettext('Did you follow the instructions in the wiki?')
message += '<br><br>' + field.gettext('WebDriver Host: %s' % (driver_url))
message += '<br><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>'
message += '<br>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e)))
raise ValidationError(message)

View File

@@ -8,7 +8,7 @@ import json
import re
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"
TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
# 'price' , 'lowPrice', 'highPrice' are usually under here
# all of those may or may not appear on different websites

View File

@@ -311,17 +311,6 @@ class model(dict):
# False is not an option for AppRise, must be type None
return None
def get_screenshot_as_jpeg(self):
# Created by save_screenshot()
fname = os.path.join(self.watch_data_dir, "last-screenshot.jpg")
if os.path.isfile(fname):
return fname
# False is not an option for AppRise, must be type None
return None
def __get_file_ctime(self, filename):
fname = os.path.join(self.watch_data_dir, filename)
if os.path.isfile(fname):

View File

@@ -10,6 +10,8 @@ valid_tokens = {
'watch_title': '',
'watch_tag': '',
'diff': '',
'diff_added': '',
'diff_removed': '',
'diff_full': '',
'diff_url': '',
'preview_url': '',
@@ -120,10 +122,10 @@ def process_notification(n_object, datastore):
url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
if url.startswith('tgram://'):
# Telegram only supports a limit subset of HTML, remove the '<br/>' we place in.
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
# re https://github.com/dgtlmoon/changedetection.io/issues/555
# @todo re-use an existing library we have already imported to strip all non-allowed tags
n_body = n_body.replace('<br/>', '\n')
n_body = n_body.replace('<br>', '\n')
n_body = n_body.replace('</br>', '\n')
# real limit is 4096, but minus some for extra metadata
payload_max_size = 3600
@@ -215,6 +217,8 @@ def create_notification_parameters(n_object, datastore):
'watch_tag': watch_tag if watch_tag is not None else '',
'diff_url': diff_url,
'diff': n_object.get('diff', ''), # Null default in the case we use a test
'diff_added': n_object.get('diff_added', ''), # Null default in the case we use a test
'diff_removed': n_object.get('diff_removed', ''), # Null default in the case we use a test
'diff_full': n_object.get('diff_full', ''), # Null default in the case we use a test
'preview_url': preview_url,
'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else ''

View File

@@ -360,11 +360,6 @@ class ChangeDetectionStore:
f.write(screenshot)
f.close()
# Make a JPEG that's used in notifications (due to being a smaller size) available
from PIL import Image
im1 = Image.open(target_path)
im1.convert('RGB').save(target_path.replace('.png','.jpg'), quality=int(os.getenv("NOTIFICATION_SCREENSHOT_JPG_QUALITY", 75)))
def save_error_text(self, watch_uuid, contents):
if not self.data['watching'].get(watch_uuid):

View File

@@ -55,39 +55,51 @@
</thead>
<tbody>
<tr>
<td><code>{{ '{{ base_url }}' }}</code></td>
<td><code>{{ '{{base_url}}' }}</code></td>
<td>The URL of the changedetection.io instance you are running.</td>
</tr>
<tr>
<td><code>{{ '{{ watch_url }}' }}</code></td>
<td><code>{{ '{{watch_url}}' }}</code></td>
<td>The URL being watched.</td>
</tr>
<tr>
<td><code>{{ '{{ watch_uuid }}' }}</code></td>
<td><code>{{ '{{watch_uuid}}' }}</code></td>
<td>The UUID of the watch.</td>
</tr>
<tr>
<td><code>{{ '{{ watch_title }}' }}</code></td>
<td><code>{{ '{{watch_title}}' }}</code></td>
<td>The title of the watch.</td>
</tr>
<tr>
<td><code>{{ '{{ watch_tag }}' }}</code></td>
<td><code>{{ '{{watch_tag}}' }}</code></td>
<td>The watch label / tag</td>
</tr>
<tr>
<td><code>{{ '{{ preview_url }}' }}</code></td>
<td><code>{{ '{{preview_url}}' }}</code></td>
<td>The URL of the preview page generated by changedetection.io.</td>
</tr>
<tr>
<td><code>{{ '{{ diff_url }}' }}</code></td>
<td>The diff output - differences only</td>
<td><code>{{ '{{diff_url}}' }}</code></td>
<td>The URL of the diff output for the watch.</td>
</tr>
<tr>
<td><code>{{ '{{diff}}' }}</code></td>
<td>The diff output - only changes, additions, and removals</td>
</tr>
<tr>
<td><code>{{ '{{diff_added}}' }}</code></td>
<td>The diff output - only changes and additions</td>
</tr>
<tr>
<td><code>{{ '{{diff_removed}}' }}</code></td>
<td>The diff output - only changes and removals</td>
</tr>
<tr>
<td><code>{{ '{{ diff_full }}' }}</code></td>
<td><code>{{ '{{diff_full}}' }}</code></td>
<td>The diff output - full difference output</td>
</tr>
<tr>
<td><code>{{ '{{ current_snapshot }}' }}</code></td>
<td><code>{{ '{{current_snapshot}}' }}</code></td>
<td>The current snapshot value, useful when combined with JSON or CSS filters
</td>
</tr>
@@ -95,8 +107,10 @@
</table>
<div class="pure-form-message-inline">
<br>
URLs generated by changedetection.io (such as <code>{{ '{{ diff_url }}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br/>
URLs generated by changedetection.io (such as <code>{{ '{{diff_url}}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br>
Your <code>BASE_URL</code> var is currently "{{settings_application['current_base_url']}}"
<br>
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removal%7D%7D-notification-tokens">More Here</a> </br>
</div>
</div>
</div>

View File

@@ -124,12 +124,12 @@
<div class="pure-control-group">
{{ render_field(extract_form.extract_regex) }}
<span class="pure-form-message-inline">
A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br/>
A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br>
<p>
For example, to extract only the numbers from text &dash;</br>
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code></br>
<strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br/>
For example, to extract only the numbers from text &dash;<br>
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
<strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
</p>
<p>
<a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
@@ -154,4 +154,4 @@
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>
{% endblock %}
{% endblock %}

View File

@@ -49,8 +49,8 @@
<fieldset>
<div class="pure-control-group">
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br/>
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br/>
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
</div>
<div class="pure-control-group">
{{ render_field(form.title, class="m-d") }}
@@ -106,10 +106,10 @@
{{ render_field(form.webdriver_delay) }}
<div class="pure-form-message-inline">
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
<br/>
<br>
This will wait <i>n</i> seconds before extracting the text.
{% if using_global_webdriver_wait %}
<br/><strong>Using the current global default settings</strong>
<br><strong>Using the current global default settings</strong>
{% endif %}
</div>
</div>
@@ -216,7 +216,7 @@ User-Agent: wonderbra 1.0") }}
<div class="tab-pane-inner" id="filters-and-triggers">
<div class="pure-control-group">
<strong>Pro-tips:</strong><br/>
<strong>Pro-tips:</strong><br>
<ul>
<li>
Use the preview page to see your filters and triggers highlighted.
@@ -241,9 +241,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
%}
{{ field }}
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br/>
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/>
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
<ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
@@ -266,7 +266,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
</li>
</ul>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</span>
</div>
<div class="pure-control-group">
@@ -334,7 +334,7 @@ Unavailable") }}
<li>Extracts text in the final output (line by line) after other filters using regular expressions;
<ul>
<li>Regular expression &dash; example <code>/reports.+?2022/i</code></li>
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br/></li>
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
<li>Keyword example &dash; example <code>Out of stock</code></li>
<li>Use groups to extract just that text &dash; example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
</ul>
@@ -353,7 +353,7 @@ Unavailable") }}
<div class="pure-control-group">
{% if visualselector_enabled %}
<span class="pure-form-message-inline">
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection &dash; after the <i>Browser Steps</i> has completed.<br/><br/>
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection &dash; after the <i>Browser Steps</i> has completed.<br><br>
</span>
<div id="selector-header">

View File

@@ -41,12 +41,12 @@
<fieldset class="pure-group">
<legend>
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.</br>
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
<br/>
<br>
<p>
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br/>
Be sure to set your default fetcher to Chrome if required.</br>
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
Be sure to set your default fetcher to Chrome if required.<br>
</p>
</legend>

View File

@@ -54,7 +54,7 @@
<div class="tip">
For now, Differences are performed on text, not graphically, only the latest screenshot is available.
</div>
</br>
<br>
{% if is_html_webdriver %}
{% if screenshot %}
<div class="snapshot-age">{{watch.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
@@ -67,4 +67,4 @@
{% endif %}
</div>
</div>
{% endblock %}
{% endblock %}

View File

@@ -40,7 +40,7 @@
<div class="pure-control-group">
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
<span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
<br/>
<br>
Set to <strong>0</strong> to disable
</span>
</div>
@@ -66,7 +66,7 @@
{{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
class="m-d") }}
<span class="pure-form-message-inline">
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br/>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
</span>
</div>
@@ -105,13 +105,13 @@
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
</span>
<br/>
<br>
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
</div>
<fieldset class="pure-group" id="webdriver-override-options">
<div class="pure-form-message-inline">
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
<br/>
<br>
This will wait <i>n</i> seconds before extracting the text.
</div>
<div class="pure-control-group">
@@ -124,14 +124,14 @@
<fieldset class="pure-group">
{{ render_checkbox_field(form.application.form.ignore_whitespace) }}
<span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br/>
<span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br>
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
</span>
</fieldset>
<fieldset class="pure-group">
{{ render_checkbox_field(form.application.form.render_anchor_tag_content) }}
<span class="pure-form-message-inline">Render anchor tag content, default disabled, when enabled renders links as <code>(link text)[https://somesite.com]</code>
<br/>
<br>
<i>Note:</i> Changing this could affect the content of your existing watches, possibly trigger alerts etc.
</span>
</fieldset>
@@ -151,7 +151,7 @@ nav
{{ render_field(form.application.form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
/some.regex\d{2}/ for case-INsensitive regex
") }}
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br/>
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
<span class="pure-form-message-inline">
<ul>
<li>Note: This is applied globally in addition to the per-watch rules.</li>
@@ -170,8 +170,8 @@ nav
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
<div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header</div><br/>
<div class="pure-form-message-inline"><br/>API Key <span id="api-key">{{api_key}}</span>
<div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header</div><br>
<div class="pure-form-message-inline"><br>API Key <span id="api-key">{{api_key}}</span>
<span style="display:none;" id="api-key-copy" >copy</span>
</div>
</div>
@@ -181,7 +181,7 @@ nav
<p><strong>Tip</strong>: You can connect to websites using <a href="https://brightdata.grsm.io/n0r16zf7eivq">BrightData</a> proxies, their service <strong>WebUnlocker</strong> will solve most CAPTCHAs, whilst their <strong>Residential Proxies</strong> may help to avoid CAPTCHA altogether. </p>
<p>It may be easier to try <strong>WebUnlocker</strong> first, WebUnlocker also supports country selection.</p>
<p>
When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the example URL below<br/>
When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the example URL below<br>
The Proxy URL with BrightData should start with <code>http://brd-customer...</code>
</p>

View File

@@ -11,10 +11,10 @@ import uuid
def set_original_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div id="sametext">Some text thats the same</div>
<div id="changetext">Some text that will change</div>
</body>
@@ -29,10 +29,10 @@ def set_original_response():
def set_modified_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>which has this one new line</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div id="sametext">Some text thats the same</div>
<div id="changetext">Some text that changes</div>
</body>

View File

@@ -7,10 +7,10 @@ from .util import live_server_setup, extract_UUID_from_client, extract_api_key_f
def set_response_with_ldjson():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div class="sametext">Some text thats the same</div>
<div class="changetext">Some text that will change</div>
<script type="application/ld+json">
@@ -61,10 +61,10 @@ def set_response_with_ldjson():
def set_response_without_ldjson():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div class="sametext">Some text thats the same</div>
<div class="changetext">Some text that will change</div>
</body>
@@ -143,4 +143,4 @@ def test_check_ldjson_price_autodetect(client, live_server):
assert b'ldjson-price-track-offer' not in res.data
##########################################################################################
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

View File

@@ -11,7 +11,7 @@ sleep_time_for_fetch_thread = 3
# Basic test to check inscriptus is not adding return line chars, basically works etc
def test_inscriptus():
from inscriptis import get_text
html_content = "<html><body>test!<br/>ok man</body></html>"
html_content = "<html><body>test!<br>ok man</body></html>"
stripped_text_from_html = get_text(html_content)
assert stripped_text_from_html == 'test!\nok man'
@@ -82,7 +82,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
assert b'<rss' in res.data
# re #16 should have the diff in here too
assert b'(into ) which has this one new line' in res.data
assert b'(into) which has this one new line' in res.data
assert b'CDATA' in res.data
assert expected_url.encode('utf-8') in res.data

View File

@@ -8,10 +8,10 @@ from changedetectionio import html_tools
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
@@ -24,10 +24,10 @@ def set_original_ignore_response():
def set_modified_original_ignore_response():
test_return_data = """<html>
<body>
Some NEW nice initial text</br>
Some NEW nice initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<p>new ignore stuff</p>
<p>out of stock</p>
<p>blah</p>
@@ -44,11 +44,11 @@ def set_modified_original_ignore_response():
def set_modified_response_minus_block_text():
test_return_data = """<html>
<body>
Some NEW nice initial text</br>
Some NEW nice initial text<br>
<p>Which is across multiple lines</p>
<p>now on sale $2/p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<p>new ignore stuff</p>
<p>blah</p>
</body>

View File

@@ -12,10 +12,10 @@ def test_setup(live_server):
def set_original_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div id="sametext">Some text thats the same</div>
<div id="changetext">Some text that will change</div>
</body>
@@ -29,10 +29,10 @@ def set_original_response():
def set_modified_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>which has this one new line</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div id="sametext">Some text thats the same</div>
<div id="changetext">Some text that changes</div>
</body>

View File

@@ -25,10 +25,10 @@ def set_original_response():
</ul>
</nav>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div id="changetext">Some text that will change</div>
</body>
<footer>
@@ -54,10 +54,10 @@ def set_modified_response():
</ul>
</nav>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div id="changetext">Some text that changes</div>
</body>
<footer>
@@ -71,7 +71,6 @@ def set_modified_response():
def test_element_removal_output():
from changedetectionio import fetch_site_status
from inscriptis import get_text
# Check text with sub-parts renders correctly
@@ -85,7 +84,7 @@ def test_element_removal_output():
</ul>
</nav>
<body>
Some initial text</br>
Some initial text<br>
<p>across multiple lines</p>
<div id="changetext">Some text that changes</div>
</body>

View File

@@ -10,10 +10,10 @@ from ..html_tools import *
def set_original_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div id="sametext">Some text thats the same</div>
<div class="changetext">Some text that will change</div>
</body>
@@ -28,12 +28,12 @@ def set_original_response():
def set_modified_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>which has this one new line</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div id="sametext">Some text thats the same</div>
<div class="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/> 2000 online )</div>
<div class="changetext">Some text that did change ( 1000 online <br> 80 guests<br> 2000 online )</div>
<div class="changetext">SomeCase insensitive 3456</div>
</body>
</html>
@@ -49,8 +49,8 @@ def set_multiline_response():
test_return_data = """<html>
<body>
<p>Something <br/>
across 6 billion multiple<br/>
<p>Something <br>
across 6 billion multiple<br>
lines
</p>

View File

@@ -11,10 +11,10 @@ from changedetectionio.model import App
def set_response_without_filter():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div id="nope-doesnt-exist">Some text thats the same</div>
</body>
</html>
@@ -28,10 +28,10 @@ def set_response_without_filter():
def set_response_with_filter():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div class="ticket-available">Ticket now on sale!</div>
</body>
</html>

View File

@@ -8,10 +8,10 @@ from changedetectionio.model import App
def set_response_with_filter():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div id="nope-doesnt-exist">Some text thats the same</div>
</body>
</html>
@@ -145,4 +145,4 @@ def test_check_xpath_filter_failure_notification(client, live_server):
time.sleep(1)
run_filter_test(client, '//*[@id="nope-doesnt-exist"]')
# Test that notification is never sent
# Test that notification is never sent

View File

@@ -6,11 +6,11 @@ from ..html_tools import html_to_text
def test_html_to_text_func():
test_html = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
<a href="/first_link"> More Text </a>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<a href="second_link.com"> Even More Text </a>
</body>
</html>
@@ -21,7 +21,7 @@ def test_html_to_text_func():
no_links_text = \
"Some initial text\n\nWhich is across multiple " \
"lines\n\nMore Text So let's see what happens. Even More Text"
"lines\n\nMore Text\nSo let's see what happens.\nEven More Text"
# check that no links are in the extracted text
assert text_content == no_links_text
@@ -31,7 +31,7 @@ def test_html_to_text_func():
links_text = \
"Some initial text\n\nWhich is across multiple lines\n\n[ More Text " \
"](/first_link) So let's see what happens. [ Even More Text ]" \
"](/first_link)\nSo let's see what happens.\n[ Even More Text ]" \
"(second_link.com)"
# check that links are present in the extracted text

View File

@@ -1,7 +1,5 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
from changedetectionio import html_tools
@@ -11,7 +9,7 @@ def test_setup(live_server):
# Unit test of the stripper
# Always we are dealing in utf-8
def test_strip_regex_text_func():
from changedetectionio import fetch_site_status
from ..fetchers import text_json_diff as fetch_site_status
test_content = """
but sometimes we want to remove the lines.

View File

@@ -11,7 +11,7 @@ def test_setup(live_server):
# Unit test of the stripper
# Always we are dealing in utf-8
def test_strip_text_func():
from changedetectionio import fetch_site_status
from ..fetchers import text_json_diff as fetch_site_status
test_content = """
Some content
@@ -33,10 +33,10 @@ def test_strip_text_func():
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
@@ -49,10 +49,10 @@ def set_original_ignore_response():
def set_modified_original_ignore_response():
test_return_data = """<html>
<body>
Some NEW nice initial text</br>
Some NEW nice initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<p>new ignore stuff</p>
<p>blah</p>
</body>
@@ -68,11 +68,11 @@ def set_modified_original_ignore_response():
def set_modified_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
<P>ZZZZz</P>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>

View File

@@ -12,10 +12,10 @@ def test_setup(live_server):
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<a href="/original_link"> Some More Text </a>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
"""
@@ -29,10 +29,10 @@ def set_original_ignore_response():
def set_modified_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<a href="/modified_link"> Some More Text </a>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
"""

View File

@@ -12,10 +12,10 @@ def test_setup(live_server):
def set_original_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
"""
@@ -27,10 +27,10 @@ def set_original_response():
def set_some_changed_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines, and a new thing too.</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
"""

View File

@@ -12,15 +12,15 @@ def test_setup(live_server):
def set_original_ignore_response_but_with_whitespace():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>
Which is across multiple lines</p>
<br>
</br>
<br>
So let's see what happens. </br>
So let's see what happens. <br>
</body>
@@ -34,10 +34,10 @@ def set_original_ignore_response_but_with_whitespace():
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>

View File

@@ -73,16 +73,12 @@ def test_check_notification(client, live_server):
# We write the PNG to disk, but a JPEG should appear in the notification
# Write the last screenshot png
testimage_png = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='
# This one is created when we save the screenshot from the webdriver/playwright session (converted from PNG)
testimage_jpg = '/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AKp//2Q=='
uuid = extract_UUID_from_client(client)
datastore = 'test-datastore'
with open(os.path.join(datastore, str(uuid), 'last-screenshot.png'), 'wb') as f:
f.write(base64.b64decode(testimage_png))
with open(os.path.join(datastore, str(uuid), 'last-screenshot.jpg'), 'wb') as f:
f.write(base64.b64decode(testimage_jpg))
# Goto the edit page, add our ignore text
# Add our URL to the import page
@@ -100,6 +96,8 @@ def test_check_notification(client, live_server):
"Diff URL: {{diff_url}}\n"
"Snapshot: {{current_snapshot}}\n"
"Diff: {{diff}}\n"
"Diff Added: {{diff_added}}\n"
"Diff Removed: {{diff_removed}}\n"
"Diff Full: {{diff_full}}\n"
":-)",
"notification_screenshot": True,
@@ -147,7 +145,7 @@ def test_check_notification(client, live_server):
assert ':-)' in notification_submission
assert "Diff Full: Some initial text" in notification_submission
assert "Diff: (changed) Which is across multiple lines" in notification_submission
assert "(into ) which has this one new line" in notification_submission
assert "(into) which has this one new line" in notification_submission
# Re #342 - check for accidental python byte encoding of non-utf8/string
assert "b'" not in notification_submission
assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE)
@@ -160,12 +158,12 @@ def test_check_notification(client, live_server):
# Check the attachment was added, and that it is a JPEG from the original PNG
notification_submission_object = json.loads(notification_submission)
assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.jpg'
# We keep PNG screenshots for now
assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.png'
assert len(notification_submission_object['attachments'][0]['base64'])
assert notification_submission_object['attachments'][0]['mimetype'] == 'image/jpeg'
assert notification_submission_object['attachments'][0]['mimetype'] == 'image/png'
jpeg_in_attachment = base64.b64decode(notification_submission_object['attachments'][0]['base64'])
assert b'JFIF' in jpeg_in_attachment
assert testimage_png not in notification_submission
# Assert that the JPEG is readable (didn't get chewed up somewhere)
from PIL import Image
import io
@@ -297,7 +295,10 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server):
follow_redirects=True
)
assert b'Settings updated' in res.data
client.get(
url_for("form_delete", uuid="all"),
follow_redirects=True
)
# Add a watch and trigger a HTTP POST
test_url = url_for('test_endpoint', _external=True)
res = client.post(

View File

@@ -8,10 +8,10 @@ from . util import live_server_setup
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
@@ -24,10 +24,10 @@ def set_original_ignore_response():
def set_modified_original_ignore_response():
test_return_data = """<html>
<body>
Some NEW nice initial text</br>
Some NEW nice initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
@@ -40,12 +40,12 @@ def set_modified_original_ignore_response():
def set_modified_with_trigger_text_response():
test_return_data = """<html>
<body>
Some NEW nice initial text</br>
Some NEW nice initial text<br>
<p>Which is across multiple lines</p>
</br>
<br>
Add to cart
<br/>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
@@ -142,4 +142,4 @@ def test_trigger_functionality(client, live_server):
res = client.get(url_for("preview_page", uuid="first"))
# We should be able to see what we triggered on
assert b'<div class="triggered">Add to cart' in res.data
assert b'<div class="triggered">Add to cart' in res.data

View File

@@ -8,10 +8,10 @@ from . util import live_server_setup
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
@@ -72,7 +72,7 @@ def test_trigger_regex_functionality(client, live_server):
assert b'unviewed' not in res.data
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("regex test123<br/>\nsomething 123")
f.write("regex test123<br>\nsomething 123")
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread)
@@ -81,4 +81,4 @@ def test_trigger_regex_functionality(client, live_server):
# Cleanup everything
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
assert b'Deleted' in res.data

View File

@@ -8,10 +8,10 @@ from . util import live_server_setup
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>

View File

@@ -12,10 +12,10 @@ def test_setup(live_server):
def set_original_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<div class="sametext">Some text thats the same</div>
<div class="changetext">Some text that will change</div>
</body>
@@ -29,10 +29,10 @@ def set_original_response():
def set_modified_response():
test_return_data = """<html>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. THIS CHANGES AND SHOULDNT TRIGGER A CHANGE</br>
<br>
So let's see what happens. THIS CHANGES AND SHOULDNT TRIGGER A CHANGE<br>
<div class="sametext">Some text thats the same</div>
<div class="changetext">Some new text</div>
</body>

View File

@@ -16,15 +16,30 @@ class TestDiffBuilder(unittest.TestCase):
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt")
output = output.split("\n")
self.assertIn('(changed) ok', output)
self.assertIn('(into ) xok', output)
self.assertIn('(into ) next-x-ok', output)
self.assertIn('(added ) and something new', output)
self.assertIn('(into) xok', output)
self.assertIn('(into) next-x-ok', output)
self.assertIn('(added) and something new', output)
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt")
output = output.split("\n")
self.assertIn('(removed) for having learned computerese,', output)
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
#diff_removed
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt", include_equal=False, include_removed=True, include_added=False)
output = output.split("\n")
self.assertIn('(changed) ok', output)
self.assertIn('(into) xok', output)
self.assertIn('(into) next-x-ok', output)
self.assertNotIn('(added) and something new', output)
#diff_removed
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt", include_equal=False, include_removed=True, include_added=False)
output = output.split("\n")
self.assertIn('(removed) for having learned computerese,', output)
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
# @todo test blocks of changed, blocks of added, blocks of removed

View File

@@ -9,10 +9,10 @@ def set_original_response():
test_return_data = """<html>
<head><title>head title</title></head>
<body>
Some initial text</br>
Some initial text<br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
<span class="foobar-detection" style='display:none'></span>
</body>
</html>
@@ -26,10 +26,10 @@ def set_modified_response():
test_return_data = """<html>
<head><title>modified head title</title></head>
<body>
Some initial text</br>
Some initial text<br>
<p>which has this one new line</p>
</br>
So let's see what happens. </br>
<br>
So let's see what happens. <br>
</body>
</html>
"""
@@ -43,11 +43,11 @@ def set_more_modified_response():
test_return_data = """<html>
<head><title>modified head title</title></head>
<body>
Some initial text</br>
Some initial text<br>
<p>which has this one new line</p>
</br>
So let's see what happens. </br>
Ohh yeah awesome<br/>
<br>
So let's see what happens. <br>
Ohh yeah awesome<br>
</body>
</html>
"""

View File

@@ -4,8 +4,7 @@ import queue
import time
from changedetectionio import content_fetcher
from changedetectionio import queuedWatchMetaData
from changedetectionio.fetch_site_status import FilterNotFoundInResponse
from .fetchers.text_json_diff import FilterNotFoundInResponse
# A single update worker
#
@@ -65,7 +64,7 @@ class update_worker(threading.Thread):
if 'notification_urls' in n_object and n_object['notification_urls']:
# HTML needs linebreak, but MarkDown and Text can use a linefeed
if n_object['notification_format'] == 'HTML':
line_feed_sep = "</br>"
line_feed_sep = "<br>"
else:
line_feed_sep = "\n"
@@ -75,10 +74,12 @@ class update_worker(threading.Thread):
n_object.update({
'watch_url': watch['url'],
'uuid': watch_uuid,
'screenshot': watch.get_screenshot_as_jpeg() if watch.get('notification_screenshot') else None,
'screenshot': watch.get_screenshot() if watch.get('notification_screenshot') else None,
'current_snapshot': snapshot_contents.decode('utf-8'),
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
'diff_added': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_removed=False, line_feed_sep=line_feed_sep),
'diff_removed': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_added=False, line_feed_sep=line_feed_sep),
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_equal=True, line_feed_sep=line_feed_sep)
})
logging.info (">> SENDING NOTIFICATION")
self.notification_q.put(n_object)
@@ -151,7 +152,7 @@ class update_worker(threading.Thread):
os.unlink(full_path)
def run(self):
from changedetectionio import fetch_site_status
from .fetchers import text_json_diff as fetch_site_status
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
@@ -169,10 +170,8 @@ class update_worker(threading.Thread):
if uuid in list(self.datastore.data['watching'].keys()):
changed_detected = False
contents = b''
screenshot = False
update_obj= {}
xpath_data = False
process_changedetection_results = True
update_obj= {}
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
now = time.time()
@@ -274,6 +273,7 @@ class update_worker(threading.Thread):
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetcher.ScreenshotUnavailable as e:
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
@@ -285,6 +285,7 @@ class update_worker(threading.Thread):
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetcher.PageUnloadable as e:
err_text = "Page request from server didnt respond correctly"
if e.message:
@@ -295,6 +296,7 @@ class update_worker(threading.Thread):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except Exception as e:
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})