Compare commits
13 Commits
ticket-962
...
dont-creat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
51fd45624c | ||
|
|
8e5ea2cc93 | ||
|
|
9f6dc6cd04 | ||
|
|
2bc988dffc | ||
|
|
a578de36c5 | ||
|
|
4c74d39df0 | ||
|
|
c454cbb808 | ||
|
|
6f1eec0d5a | ||
|
|
0d05ee1586 | ||
|
|
23476f0e70 | ||
|
|
cf363971c1 | ||
|
|
35409f79bf | ||
|
|
fc88306805 |
@@ -49,6 +49,7 @@ Requires Playwright to be enabled.
|
||||
- Governmental department updates (changes are often only on their websites)
|
||||
- New software releases, security advisories when you're not on their mailing list.
|
||||
- Festivals with changes
|
||||
- Discogs restock alerts and monitoring
|
||||
- Realestate listing changes
|
||||
- Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
|
||||
- COVID related news from government websites
|
||||
@@ -63,6 +64,7 @@ Requires Playwright to be enabled.
|
||||
- You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product)
|
||||
- Get notified when certain keywords appear in Twitter search results
|
||||
- Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords.
|
||||
- Get alerts when new job positions are open on Bamboo HR and other job platforms
|
||||
|
||||
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_
|
||||
|
||||
@@ -100,6 +102,8 @@ $ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/d
|
||||
|
||||
`:latest` tag is our latest stable release, `:dev` tag is our bleeding edge `master` branch.
|
||||
|
||||
Alternative docker repository over at ghcr - [ghcr.io/dgtlmoon/changedetection.io](https://ghcr.io/dgtlmoon/changedetection.io)
|
||||
|
||||
### Windows
|
||||
|
||||
See the install instructions at the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
|
||||
|
||||
@@ -36,7 +36,7 @@ from flask import (
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.api import api_v1
|
||||
|
||||
__version__ = '0.40.2'
|
||||
__version__ = '0.40.3'
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -361,7 +361,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
fe.title(title=watch_title)
|
||||
latest_fname = watch.history[dates[-1]]
|
||||
|
||||
html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="</br>")
|
||||
html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="<br>")
|
||||
fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff),
|
||||
type='CDATA')
|
||||
|
||||
@@ -1168,7 +1168,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
new_uuid = datastore.clone(uuid)
|
||||
if new_uuid:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
|
||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
|
||||
flash('Cloned.')
|
||||
|
||||
return redirect(url_for('index'))
|
||||
|
||||
@@ -106,8 +106,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
if step_operation == 'Goto site':
|
||||
step_operation = 'goto_url'
|
||||
step_optional_value = None
|
||||
step_selector = datastore.data['watching'][uuid].get('url')
|
||||
step_optional_value = datastore.data['watching'][uuid].get('url')
|
||||
step_selector = None
|
||||
|
||||
# @todo try.. accept.. nice errors not popups..
|
||||
try:
|
||||
|
||||
@@ -25,6 +25,7 @@ browser_step_ui_config = {'Choose one': '0 0',
|
||||
'Execute JS': '0 1',
|
||||
# 'Extract text and use as filter': '1 0',
|
||||
'Goto site': '0 0',
|
||||
'Goto URL': '0 1',
|
||||
'Press Enter': '0 0',
|
||||
'Select by label': '1 1',
|
||||
'Scroll down': '0 0',
|
||||
@@ -54,7 +55,7 @@ class steppable_browser_interface():
|
||||
|
||||
print("> action calling", call_action_name)
|
||||
# https://playwright.dev/python/docs/selectors#xpath-selectors
|
||||
if selector.startswith('/') and not selector.startswith('//'):
|
||||
if selector and selector.startswith('/') and not selector.startswith('//'):
|
||||
selector = "xpath=" + selector
|
||||
|
||||
action_handler = getattr(self, "action_" + call_action_name)
|
||||
@@ -73,10 +74,10 @@ class steppable_browser_interface():
|
||||
self.page.wait_for_timeout(3 * 1000)
|
||||
print("Call action done in", time.time() - now)
|
||||
|
||||
def action_goto_url(self, url, optional_value):
|
||||
def action_goto_url(self, selector, value):
|
||||
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
||||
now = time.time()
|
||||
response = self.page.goto(url, timeout=0, wait_until='commit')
|
||||
response = self.page.goto(value, timeout=0, wait_until='commit')
|
||||
|
||||
# Wait_until = commit
|
||||
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
|
||||
|
||||
@@ -10,7 +10,7 @@ def same_slicer(l, a, b):
|
||||
return l[a:b]
|
||||
|
||||
# like .compare but a little different output
|
||||
def customSequenceMatcher(before, after, include_equal=False):
|
||||
def customSequenceMatcher(before, after, include_equal=False, include_removed=True, include_added=True):
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after)
|
||||
|
||||
# @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?)
|
||||
@@ -18,20 +18,20 @@ def customSequenceMatcher(before, after, include_equal=False):
|
||||
if include_equal and tag == 'equal':
|
||||
g = before[alo:ahi]
|
||||
yield g
|
||||
elif tag == 'delete':
|
||||
elif include_removed and tag == 'delete':
|
||||
g = ["(removed) " + i for i in same_slicer(before, alo, ahi)]
|
||||
yield g
|
||||
elif tag == 'replace':
|
||||
g = ["(changed) " + i for i in same_slicer(before, alo, ahi)]
|
||||
g += ["(into ) " + i for i in same_slicer(after, blo, bhi)]
|
||||
g += ["(into) " + i for i in same_slicer(after, blo, bhi)]
|
||||
yield g
|
||||
elif tag == 'insert':
|
||||
g = ["(added ) " + i for i in same_slicer(after, blo, bhi)]
|
||||
elif include_added and tag == 'insert':
|
||||
g = ["(added) " + i for i in same_slicer(after, blo, bhi)]
|
||||
yield g
|
||||
|
||||
# only_differences - only return info about the differences, no context
|
||||
# line_feed_sep could be "<br/>" or "<li>" or "\n" etc
|
||||
def render_diff(previous_file, newest_file, include_equal=False, line_feed_sep="\n"):
|
||||
# line_feed_sep could be "<br>" or "<li>" or "\n" etc
|
||||
def render_diff(previous_file, newest_file, include_equal=False, include_removed=True, include_added=True, line_feed_sep="\n"):
|
||||
with open(newest_file, 'r') as f:
|
||||
newest_version_file_contents = f.read()
|
||||
newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||
@@ -45,7 +45,7 @@ def render_diff(previous_file, newest_file, include_equal=False, line_feed_sep="
|
||||
|
||||
rendered_diff = customSequenceMatcher(previous_version_file_contents,
|
||||
newest_version_file_contents,
|
||||
include_equal)
|
||||
include_equal, include_removed, include_added)
|
||||
|
||||
# Recursively join lists
|
||||
f = lambda L: line_feed_sep.join([f(x) if type(x) is list else x for x in L])
|
||||
|
||||
0
changedetectionio/fetchers/__init__.py
Normal file
0
changedetectionio/fetchers/__init__.py
Normal file
@@ -1,3 +1,5 @@
|
||||
# HTML to TEXT/JSON DIFFERENCE FETCHER
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
@@ -147,12 +147,12 @@ class ValidateContentFetcherIsReady(object):
|
||||
except urllib3.exceptions.MaxRetryError as e:
|
||||
driver_url = some_object.command_executor
|
||||
message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data))
|
||||
message += '<br/>' + field.gettext(
|
||||
message += '<br>' + field.gettext(
|
||||
'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.')
|
||||
message += '<br/>' + field.gettext('Did you follow the instructions in the wiki?')
|
||||
message += '<br/><br/>' + field.gettext('WebDriver Host: %s' % (driver_url))
|
||||
message += '<br/><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>'
|
||||
message += '<br/>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e)))
|
||||
message += '<br>' + field.gettext('Did you follow the instructions in the wiki?')
|
||||
message += '<br><br>' + field.gettext('WebDriver Host: %s' % (driver_url))
|
||||
message += '<br><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>'
|
||||
message += '<br>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e)))
|
||||
|
||||
raise ValidationError(message)
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import json
|
||||
import re
|
||||
|
||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||
TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"
|
||||
TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
|
||||
|
||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||
# all of those may or may not appear on different websites
|
||||
|
||||
@@ -311,17 +311,6 @@ class model(dict):
|
||||
# False is not an option for AppRise, must be type None
|
||||
return None
|
||||
|
||||
def get_screenshot_as_jpeg(self):
|
||||
|
||||
# Created by save_screenshot()
|
||||
fname = os.path.join(self.watch_data_dir, "last-screenshot.jpg")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
|
||||
# False is not an option for AppRise, must be type None
|
||||
return None
|
||||
|
||||
|
||||
def __get_file_ctime(self, filename):
|
||||
fname = os.path.join(self.watch_data_dir, filename)
|
||||
if os.path.isfile(fname):
|
||||
|
||||
@@ -10,6 +10,8 @@ valid_tokens = {
|
||||
'watch_title': '',
|
||||
'watch_tag': '',
|
||||
'diff': '',
|
||||
'diff_added': '',
|
||||
'diff_removed': '',
|
||||
'diff_full': '',
|
||||
'diff_url': '',
|
||||
'preview_url': '',
|
||||
@@ -120,10 +122,10 @@ def process_notification(n_object, datastore):
|
||||
url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
|
||||
|
||||
if url.startswith('tgram://'):
|
||||
# Telegram only supports a limit subset of HTML, remove the '<br/>' we place in.
|
||||
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
|
||||
# re https://github.com/dgtlmoon/changedetection.io/issues/555
|
||||
# @todo re-use an existing library we have already imported to strip all non-allowed tags
|
||||
n_body = n_body.replace('<br/>', '\n')
|
||||
n_body = n_body.replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
# real limit is 4096, but minus some for extra metadata
|
||||
payload_max_size = 3600
|
||||
@@ -215,6 +217,8 @@ def create_notification_parameters(n_object, datastore):
|
||||
'watch_tag': watch_tag if watch_tag is not None else '',
|
||||
'diff_url': diff_url,
|
||||
'diff': n_object.get('diff', ''), # Null default in the case we use a test
|
||||
'diff_added': n_object.get('diff_added', ''), # Null default in the case we use a test
|
||||
'diff_removed': n_object.get('diff_removed', ''), # Null default in the case we use a test
|
||||
'diff_full': n_object.get('diff_full', ''), # Null default in the case we use a test
|
||||
'preview_url': preview_url,
|
||||
'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else ''
|
||||
|
||||
@@ -360,11 +360,6 @@ class ChangeDetectionStore:
|
||||
f.write(screenshot)
|
||||
f.close()
|
||||
|
||||
# Make a JPEG that's used in notifications (due to being a smaller size) available
|
||||
from PIL import Image
|
||||
im1 = Image.open(target_path)
|
||||
im1.convert('RGB').save(target_path.replace('.png','.jpg'), quality=int(os.getenv("NOTIFICATION_SCREENSHOT_JPG_QUALITY", 75)))
|
||||
|
||||
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
if not self.data['watching'].get(watch_uuid):
|
||||
|
||||
@@ -55,39 +55,51 @@
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><code>{{ '{{ base_url }}' }}</code></td>
|
||||
<td><code>{{ '{{base_url}}' }}</code></td>
|
||||
<td>The URL of the changedetection.io instance you are running.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{ watch_url }}' }}</code></td>
|
||||
<td><code>{{ '{{watch_url}}' }}</code></td>
|
||||
<td>The URL being watched.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{ watch_uuid }}' }}</code></td>
|
||||
<td><code>{{ '{{watch_uuid}}' }}</code></td>
|
||||
<td>The UUID of the watch.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{ watch_title }}' }}</code></td>
|
||||
<td><code>{{ '{{watch_title}}' }}</code></td>
|
||||
<td>The title of the watch.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{ watch_tag }}' }}</code></td>
|
||||
<td><code>{{ '{{watch_tag}}' }}</code></td>
|
||||
<td>The watch label / tag</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{ preview_url }}' }}</code></td>
|
||||
<td><code>{{ '{{preview_url}}' }}</code></td>
|
||||
<td>The URL of the preview page generated by changedetection.io.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{ diff_url }}' }}</code></td>
|
||||
<td>The diff output - differences only</td>
|
||||
<td><code>{{ '{{diff_url}}' }}</code></td>
|
||||
<td>The URL of the diff output for the watch.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff}}' }}</code></td>
|
||||
<td>The diff output - only changes, additions, and removals</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_added}}' }}</code></td>
|
||||
<td>The diff output - only changes and additions</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_removed}}' }}</code></td>
|
||||
<td>The diff output - only changes and removals</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{ diff_full }}' }}</code></td>
|
||||
<td><code>{{ '{{diff_full}}' }}</code></td>
|
||||
<td>The diff output - full difference output</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{ current_snapshot }}' }}</code></td>
|
||||
<td><code>{{ '{{current_snapshot}}' }}</code></td>
|
||||
<td>The current snapshot value, useful when combined with JSON or CSS filters
|
||||
</td>
|
||||
</tr>
|
||||
@@ -95,8 +107,10 @@
|
||||
</table>
|
||||
<div class="pure-form-message-inline">
|
||||
<br>
|
||||
URLs generated by changedetection.io (such as <code>{{ '{{ diff_url }}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br/>
|
||||
URLs generated by changedetection.io (such as <code>{{ '{{diff_url}}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br>
|
||||
Your <code>BASE_URL</code> var is currently "{{settings_application['current_base_url']}}"
|
||||
<br>
|
||||
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removal%7D%7D-notification-tokens">More Here</a> </br>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -124,12 +124,12 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(extract_form.extract_regex) }}
|
||||
<span class="pure-form-message-inline">
|
||||
A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br/>
|
||||
A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br>
|
||||
|
||||
<p>
|
||||
For example, to extract only the numbers from text ‐</br>
|
||||
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code></br>
|
||||
<strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br/>
|
||||
For example, to extract only the numbers from text ‐<br>
|
||||
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
|
||||
<strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
|
||||
</p>
|
||||
<p>
|
||||
<a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
|
||||
@@ -154,4 +154,4 @@
|
||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>
|
||||
|
||||
|
||||
{% endblock %}
|
||||
{% endblock %}
|
||||
|
||||
@@ -49,8 +49,8 @@
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
|
||||
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br/>
|
||||
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br/>
|
||||
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
|
||||
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, class="m-d") }}
|
||||
@@ -106,10 +106,10 @@
|
||||
{{ render_field(form.webdriver_delay) }}
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
|
||||
<br/>
|
||||
<br>
|
||||
This will wait <i>n</i> seconds before extracting the text.
|
||||
{% if using_global_webdriver_wait %}
|
||||
<br/><strong>Using the current global default settings</strong>
|
||||
<br><strong>Using the current global default settings</strong>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -216,7 +216,7 @@ User-Agent: wonderbra 1.0") }}
|
||||
|
||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||
<div class="pure-control-group">
|
||||
<strong>Pro-tips:</strong><br/>
|
||||
<strong>Pro-tips:</strong><br>
|
||||
<ul>
|
||||
<li>
|
||||
Use the preview page to see your filters and triggers highlighted.
|
||||
@@ -241,9 +241,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
%}
|
||||
{{ field }}
|
||||
{% if '/text()' in field %}
|
||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br/>
|
||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
|
||||
{% endif %}
|
||||
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/>
|
||||
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
|
||||
|
||||
<ul>
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
@@ -266,7 +266,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
</li>
|
||||
</ul>
|
||||
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
@@ -334,7 +334,7 @@ Unavailable") }}
|
||||
<li>Extracts text in the final output (line by line) after other filters using regular expressions;
|
||||
<ul>
|
||||
<li>Regular expression ‐ example <code>/reports.+?2022/i</code></li>
|
||||
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br/></li>
|
||||
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
|
||||
<li>Keyword example ‐ example <code>Out of stock</code></li>
|
||||
<li>Use groups to extract just that text ‐ example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
|
||||
</ul>
|
||||
@@ -353,7 +353,7 @@ Unavailable") }}
|
||||
<div class="pure-control-group">
|
||||
{% if visualselector_enabled %}
|
||||
<span class="pure-form-message-inline">
|
||||
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection ‐ after the <i>Browser Steps</i> has completed.<br/><br/>
|
||||
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection ‐ after the <i>Browser Steps</i> has completed.<br><br>
|
||||
</span>
|
||||
|
||||
<div id="selector-header">
|
||||
|
||||
@@ -41,12 +41,12 @@
|
||||
|
||||
<fieldset class="pure-group">
|
||||
<legend>
|
||||
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.</br>
|
||||
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
|
||||
This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
|
||||
<br/>
|
||||
<br>
|
||||
<p>
|
||||
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br/>
|
||||
Be sure to set your default fetcher to Chrome if required.</br>
|
||||
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
|
||||
Be sure to set your default fetcher to Chrome if required.<br>
|
||||
</p>
|
||||
</legend>
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
<div class="tip">
|
||||
For now, Differences are performed on text, not graphically, only the latest screenshot is available.
|
||||
</div>
|
||||
</br>
|
||||
<br>
|
||||
{% if is_html_webdriver %}
|
||||
{% if screenshot %}
|
||||
<div class="snapshot-age">{{watch.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
|
||||
@@ -67,4 +67,4 @@
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
{% endblock %}
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
|
||||
<span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
|
||||
<br/>
|
||||
<br>
|
||||
Set to <strong>0</strong> to disable
|
||||
</span>
|
||||
</div>
|
||||
@@ -66,7 +66,7 @@
|
||||
{{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
|
||||
class="m-d") }}
|
||||
<span class="pure-form-message-inline">
|
||||
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br/>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
|
||||
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
|
||||
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
|
||||
</span>
|
||||
</div>
|
||||
@@ -105,13 +105,13 @@
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
</span>
|
||||
<br/>
|
||||
<br>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
|
||||
</div>
|
||||
<fieldset class="pure-group" id="webdriver-override-options">
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
|
||||
<br/>
|
||||
<br>
|
||||
This will wait <i>n</i> seconds before extracting the text.
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
@@ -124,14 +124,14 @@
|
||||
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.ignore_whitespace) }}
|
||||
<span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br/>
|
||||
<span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br>
|
||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.render_anchor_tag_content) }}
|
||||
<span class="pure-form-message-inline">Render anchor tag content, default disabled, when enabled renders links as <code>(link text)[https://somesite.com]</code>
|
||||
<br/>
|
||||
<br>
|
||||
<i>Note:</i> Changing this could affect the content of your existing watches, possibly trigger alerts etc.
|
||||
</span>
|
||||
</fieldset>
|
||||
@@ -151,7 +151,7 @@ nav
|
||||
{{ render_field(form.application.form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
|
||||
/some.regex\d{2}/ for case-INsensitive regex
|
||||
") }}
|
||||
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br/>
|
||||
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Note: This is applied globally in addition to the per-watch rules.</li>
|
||||
@@ -170,8 +170,8 @@ nav
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
|
||||
<div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header</div><br/>
|
||||
<div class="pure-form-message-inline"><br/>API Key <span id="api-key">{{api_key}}</span>
|
||||
<div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header</div><br>
|
||||
<div class="pure-form-message-inline"><br>API Key <span id="api-key">{{api_key}}</span>
|
||||
<span style="display:none;" id="api-key-copy" >copy</span>
|
||||
</div>
|
||||
</div>
|
||||
@@ -181,7 +181,7 @@ nav
|
||||
<p><strong>Tip</strong>: You can connect to websites using <a href="https://brightdata.grsm.io/n0r16zf7eivq">BrightData</a> proxies, their service <strong>WebUnlocker</strong> will solve most CAPTCHAs, whilst their <strong>Residential Proxies</strong> may help to avoid CAPTCHA altogether. </p>
|
||||
<p>It may be easier to try <strong>WebUnlocker</strong> first, WebUnlocker also supports country selection.</p>
|
||||
<p>
|
||||
When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the example URL below<br/>
|
||||
When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the example URL below<br>
|
||||
The Proxy URL with BrightData should start with <code>http://brd-customer...</code>
|
||||
</p>
|
||||
|
||||
|
||||
@@ -11,10 +11,10 @@ import uuid
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div id="sametext">Some text thats the same</div>
|
||||
<div id="changetext">Some text that will change</div>
|
||||
</body>
|
||||
@@ -29,10 +29,10 @@ def set_original_response():
|
||||
def set_modified_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>which has this one new line</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div id="sametext">Some text thats the same</div>
|
||||
<div id="changetext">Some text that changes</div>
|
||||
</body>
|
||||
|
||||
@@ -7,10 +7,10 @@ from .util import live_server_setup, extract_UUID_from_client, extract_api_key_f
|
||||
def set_response_with_ldjson():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div class="sametext">Some text thats the same</div>
|
||||
<div class="changetext">Some text that will change</div>
|
||||
<script type="application/ld+json">
|
||||
@@ -61,10 +61,10 @@ def set_response_with_ldjson():
|
||||
def set_response_without_ldjson():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div class="sametext">Some text thats the same</div>
|
||||
<div class="changetext">Some text that will change</div>
|
||||
</body>
|
||||
@@ -143,4 +143,4 @@ def test_check_ldjson_price_autodetect(client, live_server):
|
||||
assert b'ldjson-price-track-offer' not in res.data
|
||||
|
||||
##########################################################################################
|
||||
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
@@ -11,7 +11,7 @@ sleep_time_for_fetch_thread = 3
|
||||
# Basic test to check inscriptus is not adding return line chars, basically works etc
|
||||
def test_inscriptus():
|
||||
from inscriptis import get_text
|
||||
html_content = "<html><body>test!<br/>ok man</body></html>"
|
||||
html_content = "<html><body>test!<br>ok man</body></html>"
|
||||
stripped_text_from_html = get_text(html_content)
|
||||
assert stripped_text_from_html == 'test!\nok man'
|
||||
|
||||
@@ -82,7 +82,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
||||
assert b'<rss' in res.data
|
||||
|
||||
# re #16 should have the diff in here too
|
||||
assert b'(into ) which has this one new line' in res.data
|
||||
assert b'(into) which has this one new line' in res.data
|
||||
assert b'CDATA' in res.data
|
||||
|
||||
assert expected_url.encode('utf-8') in res.data
|
||||
|
||||
@@ -8,10 +8,10 @@ from changedetectionio import html_tools
|
||||
def set_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -24,10 +24,10 @@ def set_original_ignore_response():
|
||||
def set_modified_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some NEW nice initial text</br>
|
||||
Some NEW nice initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<p>new ignore stuff</p>
|
||||
<p>out of stock</p>
|
||||
<p>blah</p>
|
||||
@@ -44,11 +44,11 @@ def set_modified_original_ignore_response():
|
||||
def set_modified_response_minus_block_text():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some NEW nice initial text</br>
|
||||
Some NEW nice initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<p>now on sale $2/p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<p>new ignore stuff</p>
|
||||
<p>blah</p>
|
||||
</body>
|
||||
|
||||
@@ -12,10 +12,10 @@ def test_setup(live_server):
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div id="sametext">Some text thats the same</div>
|
||||
<div id="changetext">Some text that will change</div>
|
||||
</body>
|
||||
@@ -29,10 +29,10 @@ def set_original_response():
|
||||
def set_modified_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>which has this one new line</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div id="sametext">Some text thats the same</div>
|
||||
<div id="changetext">Some text that changes</div>
|
||||
</body>
|
||||
|
||||
@@ -25,10 +25,10 @@ def set_original_response():
|
||||
</ul>
|
||||
</nav>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div id="changetext">Some text that will change</div>
|
||||
</body>
|
||||
<footer>
|
||||
@@ -54,10 +54,10 @@ def set_modified_response():
|
||||
</ul>
|
||||
</nav>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div id="changetext">Some text that changes</div>
|
||||
</body>
|
||||
<footer>
|
||||
@@ -71,7 +71,6 @@ def set_modified_response():
|
||||
|
||||
|
||||
def test_element_removal_output():
|
||||
from changedetectionio import fetch_site_status
|
||||
from inscriptis import get_text
|
||||
|
||||
# Check text with sub-parts renders correctly
|
||||
@@ -85,7 +84,7 @@ def test_element_removal_output():
|
||||
</ul>
|
||||
</nav>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>across multiple lines</p>
|
||||
<div id="changetext">Some text that changes</div>
|
||||
</body>
|
||||
|
||||
@@ -10,10 +10,10 @@ from ..html_tools import *
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div id="sametext">Some text thats the same</div>
|
||||
<div class="changetext">Some text that will change</div>
|
||||
</body>
|
||||
@@ -28,12 +28,12 @@ def set_original_response():
|
||||
def set_modified_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>which has this one new line</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div id="sametext">Some text thats the same</div>
|
||||
<div class="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/> 2000 online )</div>
|
||||
<div class="changetext">Some text that did change ( 1000 online <br> 80 guests<br> 2000 online )</div>
|
||||
<div class="changetext">SomeCase insensitive 3456</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -49,8 +49,8 @@ def set_multiline_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
|
||||
<p>Something <br/>
|
||||
across 6 billion multiple<br/>
|
||||
<p>Something <br>
|
||||
across 6 billion multiple<br>
|
||||
lines
|
||||
</p>
|
||||
|
||||
|
||||
@@ -11,10 +11,10 @@ from changedetectionio.model import App
|
||||
def set_response_without_filter():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div id="nope-doesnt-exist">Some text thats the same</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -28,10 +28,10 @@ def set_response_without_filter():
|
||||
def set_response_with_filter():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div class="ticket-available">Ticket now on sale!</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -8,10 +8,10 @@ from changedetectionio.model import App
|
||||
def set_response_with_filter():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div id="nope-doesnt-exist">Some text thats the same</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -145,4 +145,4 @@ def test_check_xpath_filter_failure_notification(client, live_server):
|
||||
time.sleep(1)
|
||||
run_filter_test(client, '//*[@id="nope-doesnt-exist"]')
|
||||
|
||||
# Test that notification is never sent
|
||||
# Test that notification is never sent
|
||||
|
||||
@@ -6,11 +6,11 @@ from ..html_tools import html_to_text
|
||||
def test_html_to_text_func():
|
||||
test_html = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<a href="/first_link"> More Text </a>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<a href="second_link.com"> Even More Text </a>
|
||||
</body>
|
||||
</html>
|
||||
@@ -21,7 +21,7 @@ def test_html_to_text_func():
|
||||
|
||||
no_links_text = \
|
||||
"Some initial text\n\nWhich is across multiple " \
|
||||
"lines\n\nMore Text So let's see what happens. Even More Text"
|
||||
"lines\n\nMore Text\nSo let's see what happens.\nEven More Text"
|
||||
|
||||
# check that no links are in the extracted text
|
||||
assert text_content == no_links_text
|
||||
@@ -31,7 +31,7 @@ def test_html_to_text_func():
|
||||
|
||||
links_text = \
|
||||
"Some initial text\n\nWhich is across multiple lines\n\n[ More Text " \
|
||||
"](/first_link) So let's see what happens. [ Even More Text ]" \
|
||||
"](/first_link)\nSo let's see what happens.\n[ Even More Text ]" \
|
||||
"(second_link.com)"
|
||||
|
||||
# check that links are present in the extracted text
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from . util import live_server_setup
|
||||
from changedetectionio import html_tools
|
||||
|
||||
@@ -11,7 +9,7 @@ def test_setup(live_server):
|
||||
# Unit test of the stripper
|
||||
# Always we are dealing in utf-8
|
||||
def test_strip_regex_text_func():
|
||||
from changedetectionio import fetch_site_status
|
||||
from ..fetchers import text_json_diff as fetch_site_status
|
||||
|
||||
test_content = """
|
||||
but sometimes we want to remove the lines.
|
||||
|
||||
@@ -11,7 +11,7 @@ def test_setup(live_server):
|
||||
# Unit test of the stripper
|
||||
# Always we are dealing in utf-8
|
||||
def test_strip_text_func():
|
||||
from changedetectionio import fetch_site_status
|
||||
from ..fetchers import text_json_diff as fetch_site_status
|
||||
|
||||
test_content = """
|
||||
Some content
|
||||
@@ -33,10 +33,10 @@ def test_strip_text_func():
|
||||
def set_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -49,10 +49,10 @@ def set_original_ignore_response():
|
||||
def set_modified_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some NEW nice initial text</br>
|
||||
Some NEW nice initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<p>new ignore stuff</p>
|
||||
<p>blah</p>
|
||||
</body>
|
||||
@@ -68,11 +68,11 @@ def set_modified_original_ignore_response():
|
||||
def set_modified_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<P>ZZZZz</P>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
@@ -12,10 +12,10 @@ def test_setup(live_server):
|
||||
def set_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<a href="/original_link"> Some More Text </a>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
@@ -29,10 +29,10 @@ def set_original_ignore_response():
|
||||
def set_modified_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<a href="/modified_link"> Some More Text </a>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
@@ -12,10 +12,10 @@ def test_setup(live_server):
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
@@ -27,10 +27,10 @@ def set_original_response():
|
||||
def set_some_changed_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines, and a new thing too.</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
@@ -12,15 +12,15 @@ def test_setup(live_server):
|
||||
def set_original_ignore_response_but_with_whitespace():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>
|
||||
|
||||
|
||||
Which is across multiple lines</p>
|
||||
<br>
|
||||
</br>
|
||||
<br>
|
||||
|
||||
So let's see what happens. </br>
|
||||
So let's see what happens. <br>
|
||||
|
||||
|
||||
</body>
|
||||
@@ -34,10 +34,10 @@ def set_original_ignore_response_but_with_whitespace():
|
||||
def set_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
@@ -73,16 +73,12 @@ def test_check_notification(client, live_server):
|
||||
# We write the PNG to disk, but a JPEG should appear in the notification
|
||||
# Write the last screenshot png
|
||||
testimage_png = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='
|
||||
# This one is created when we save the screenshot from the webdriver/playwright session (converted from PNG)
|
||||
testimage_jpg = '/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AKp//2Q=='
|
||||
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
datastore = 'test-datastore'
|
||||
with open(os.path.join(datastore, str(uuid), 'last-screenshot.png'), 'wb') as f:
|
||||
f.write(base64.b64decode(testimage_png))
|
||||
with open(os.path.join(datastore, str(uuid), 'last-screenshot.jpg'), 'wb') as f:
|
||||
f.write(base64.b64decode(testimage_jpg))
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
@@ -100,6 +96,8 @@ def test_check_notification(client, live_server):
|
||||
"Diff URL: {{diff_url}}\n"
|
||||
"Snapshot: {{current_snapshot}}\n"
|
||||
"Diff: {{diff}}\n"
|
||||
"Diff Added: {{diff_added}}\n"
|
||||
"Diff Removed: {{diff_removed}}\n"
|
||||
"Diff Full: {{diff_full}}\n"
|
||||
":-)",
|
||||
"notification_screenshot": True,
|
||||
@@ -147,7 +145,7 @@ def test_check_notification(client, live_server):
|
||||
assert ':-)' in notification_submission
|
||||
assert "Diff Full: Some initial text" in notification_submission
|
||||
assert "Diff: (changed) Which is across multiple lines" in notification_submission
|
||||
assert "(into ) which has this one new line" in notification_submission
|
||||
assert "(into) which has this one new line" in notification_submission
|
||||
# Re #342 - check for accidental python byte encoding of non-utf8/string
|
||||
assert "b'" not in notification_submission
|
||||
assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE)
|
||||
@@ -160,12 +158,12 @@ def test_check_notification(client, live_server):
|
||||
|
||||
# Check the attachment was added, and that it is a JPEG from the original PNG
|
||||
notification_submission_object = json.loads(notification_submission)
|
||||
assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.jpg'
|
||||
# We keep PNG screenshots for now
|
||||
assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.png'
|
||||
assert len(notification_submission_object['attachments'][0]['base64'])
|
||||
assert notification_submission_object['attachments'][0]['mimetype'] == 'image/jpeg'
|
||||
assert notification_submission_object['attachments'][0]['mimetype'] == 'image/png'
|
||||
jpeg_in_attachment = base64.b64decode(notification_submission_object['attachments'][0]['base64'])
|
||||
assert b'JFIF' in jpeg_in_attachment
|
||||
assert testimage_png not in notification_submission
|
||||
|
||||
# Assert that the JPEG is readable (didn't get chewed up somewhere)
|
||||
from PIL import Image
|
||||
import io
|
||||
@@ -297,7 +295,10 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
# Add a watch and trigger a HTTP POST
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
|
||||
@@ -8,10 +8,10 @@ from . util import live_server_setup
|
||||
def set_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -24,10 +24,10 @@ def set_original_ignore_response():
|
||||
def set_modified_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some NEW nice initial text</br>
|
||||
Some NEW nice initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -40,12 +40,12 @@ def set_modified_original_ignore_response():
|
||||
def set_modified_with_trigger_text_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some NEW nice initial text</br>
|
||||
Some NEW nice initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
<br>
|
||||
Add to cart
|
||||
<br/>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -142,4 +142,4 @@ def test_trigger_functionality(client, live_server):
|
||||
res = client.get(url_for("preview_page", uuid="first"))
|
||||
|
||||
# We should be able to see what we triggered on
|
||||
assert b'<div class="triggered">Add to cart' in res.data
|
||||
assert b'<div class="triggered">Add to cart' in res.data
|
||||
|
||||
@@ -8,10 +8,10 @@ from . util import live_server_setup
|
||||
def set_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -72,7 +72,7 @@ def test_trigger_regex_functionality(client, live_server):
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("regex test123<br/>\nsomething 123")
|
||||
f.write("regex test123<br>\nsomething 123")
|
||||
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
@@ -81,4 +81,4 @@ def test_trigger_regex_functionality(client, live_server):
|
||||
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -8,10 +8,10 @@ from . util import live_server_setup
|
||||
def set_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
@@ -12,10 +12,10 @@ def test_setup(live_server):
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div class="sametext">Some text thats the same</div>
|
||||
<div class="changetext">Some text that will change</div>
|
||||
</body>
|
||||
@@ -29,10 +29,10 @@ def set_original_response():
|
||||
def set_modified_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. THIS CHANGES AND SHOULDNT TRIGGER A CHANGE</br>
|
||||
<br>
|
||||
So let's see what happens. THIS CHANGES AND SHOULDNT TRIGGER A CHANGE<br>
|
||||
<div class="sametext">Some text thats the same</div>
|
||||
<div class="changetext">Some new text</div>
|
||||
</body>
|
||||
|
||||
@@ -16,15 +16,30 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt")
|
||||
output = output.split("\n")
|
||||
self.assertIn('(changed) ok', output)
|
||||
self.assertIn('(into ) xok', output)
|
||||
self.assertIn('(into ) next-x-ok', output)
|
||||
self.assertIn('(added ) and something new', output)
|
||||
self.assertIn('(into) xok', output)
|
||||
self.assertIn('(into) next-x-ok', output)
|
||||
self.assertIn('(added) and something new', output)
|
||||
|
||||
|
||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt")
|
||||
output = output.split("\n")
|
||||
self.assertIn('(removed) for having learned computerese,', output)
|
||||
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
|
||||
|
||||
#diff_removed
|
||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt", include_equal=False, include_removed=True, include_added=False)
|
||||
output = output.split("\n")
|
||||
self.assertIn('(changed) ok', output)
|
||||
self.assertIn('(into) xok', output)
|
||||
self.assertIn('(into) next-x-ok', output)
|
||||
self.assertNotIn('(added) and something new', output)
|
||||
|
||||
#diff_removed
|
||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt", include_equal=False, include_removed=True, include_added=False)
|
||||
output = output.split("\n")
|
||||
self.assertIn('(removed) for having learned computerese,', output)
|
||||
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
|
||||
|
||||
|
||||
# @todo test blocks of changed, blocks of added, blocks of removed
|
||||
|
||||
|
||||
@@ -9,10 +9,10 @@ def set_original_response():
|
||||
test_return_data = """<html>
|
||||
<head><title>head title</title></head>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<span class="foobar-detection" style='display:none'></span>
|
||||
</body>
|
||||
</html>
|
||||
@@ -26,10 +26,10 @@ def set_modified_response():
|
||||
test_return_data = """<html>
|
||||
<head><title>modified head title</title></head>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>which has this one new line</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
@@ -43,11 +43,11 @@ def set_more_modified_response():
|
||||
test_return_data = """<html>
|
||||
<head><title>modified head title</title></head>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
Some initial text<br>
|
||||
<p>which has this one new line</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
Ohh yeah awesome<br/>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
Ohh yeah awesome<br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
@@ -4,8 +4,7 @@ import queue
|
||||
import time
|
||||
|
||||
from changedetectionio import content_fetcher
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.fetch_site_status import FilterNotFoundInResponse
|
||||
from .fetchers.text_json_diff import FilterNotFoundInResponse
|
||||
|
||||
# A single update worker
|
||||
#
|
||||
@@ -65,7 +64,7 @@ class update_worker(threading.Thread):
|
||||
if 'notification_urls' in n_object and n_object['notification_urls']:
|
||||
# HTML needs linebreak, but MarkDown and Text can use a linefeed
|
||||
if n_object['notification_format'] == 'HTML':
|
||||
line_feed_sep = "</br>"
|
||||
line_feed_sep = "<br>"
|
||||
else:
|
||||
line_feed_sep = "\n"
|
||||
|
||||
@@ -75,10 +74,12 @@ class update_worker(threading.Thread):
|
||||
n_object.update({
|
||||
'watch_url': watch['url'],
|
||||
'uuid': watch_uuid,
|
||||
'screenshot': watch.get_screenshot_as_jpeg() if watch.get('notification_screenshot') else None,
|
||||
'screenshot': watch.get_screenshot() if watch.get('notification_screenshot') else None,
|
||||
'current_snapshot': snapshot_contents.decode('utf-8'),
|
||||
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
|
||||
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
|
||||
'diff_added': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_removed=False, line_feed_sep=line_feed_sep),
|
||||
'diff_removed': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_added=False, line_feed_sep=line_feed_sep),
|
||||
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_equal=True, line_feed_sep=line_feed_sep)
|
||||
})
|
||||
logging.info (">> SENDING NOTIFICATION")
|
||||
self.notification_q.put(n_object)
|
||||
@@ -151,7 +152,7 @@ class update_worker(threading.Thread):
|
||||
os.unlink(full_path)
|
||||
|
||||
def run(self):
|
||||
from changedetectionio import fetch_site_status
|
||||
from .fetchers import text_json_diff as fetch_site_status
|
||||
|
||||
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
|
||||
|
||||
@@ -169,10 +170,8 @@ class update_worker(threading.Thread):
|
||||
if uuid in list(self.datastore.data['watching'].keys()):
|
||||
changed_detected = False
|
||||
contents = b''
|
||||
screenshot = False
|
||||
update_obj= {}
|
||||
xpath_data = False
|
||||
process_changedetection_results = True
|
||||
update_obj= {}
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
|
||||
now = time.time()
|
||||
|
||||
@@ -274,6 +273,7 @@ class update_worker(threading.Thread):
|
||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.ScreenshotUnavailable as e:
|
||||
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
@@ -285,6 +285,7 @@ class update_worker(threading.Thread):
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.PageUnloadable as e:
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
if e.message:
|
||||
@@ -295,6 +296,7 @@ class update_worker(threading.Thread):
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except Exception as e:
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
Reference in New Issue
Block a user