Move history data to a textfile, improves memory handling (#638)

This commit is contained in:
dgtlmoon
2022-05-31 23:43:50 +02:00
committed by GitHub
parent dbb5468cdc
commit aa3c8a9370
14 changed files with 330 additions and 158 deletions

View File

@@ -178,6 +178,10 @@ def changedetection_app(config=None, datastore_o=None):
global datastore
datastore = datastore_o
# so far just for read-only via tests, but this will be moved eventually to be the main source
# (instead of the global var)
app.config['DATASTORE']=datastore_o
#app.config.update(config or {})
login_manager = flask_login.LoginManager(app)
@@ -317,25 +321,19 @@ def changedetection_app(config=None, datastore_o=None):
for watch in sorted_watches:
dates = list(watch['history'].keys())
dates = list(watch.history.keys())
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
if len(dates) < 2:
continue
# Convert to int, sort and back to str again
# @todo replace datastore getter that does this automatically
dates = [int(i) for i in dates]
dates.sort(reverse=True)
dates = [str(i) for i in dates]
prev_fname = watch['history'][dates[1]]
prev_fname = watch.history[dates[-2]]
if not watch['viewed']:
if not watch.viewed:
# Re #239 - GUID needs to be individual for each event
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
guid = "{}/{}".format(watch['uuid'], watch['last_changed'])
fe = fg.add_entry()
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
# Description is the page you watch, link takes you to the diff JS UI page
base_url = datastore.data['settings']['application']['base_url']
@@ -350,13 +348,13 @@ def changedetection_app(config=None, datastore_o=None):
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
fe.title(title=watch_title)
latest_fname = watch['history'][dates[0]]
latest_fname = watch.history[dates[-1]]
html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="</br>")
fe.description(description="<![CDATA[<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff))
fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch['newest_history_key']))
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)
@@ -491,10 +489,10 @@ def changedetection_app(config=None, datastore_o=None):
# 0 means that theres only one, so that there should be no 'unviewed' history available
if newest_history_key == 0:
newest_history_key = list(datastore.data['watching'][uuid]['history'].keys())[0]
newest_history_key = list(datastore.data['watching'][uuid].history.keys())[0]
if newest_history_key:
with open(datastore.data['watching'][uuid]['history'][newest_history_key],
with open(datastore.data['watching'][uuid].history[newest_history_key],
encoding='utf-8') as file:
raw_content = file.read()
@@ -588,12 +586,12 @@ def changedetection_app(config=None, datastore_o=None):
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
if form_ignore_text:
if len(datastore.data['watching'][uuid]['history']):
if len(datastore.data['watching'][uuid].history):
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
if form.css_filter.data.strip() != datastore.data['watching'][uuid]['css_filter']:
if len(datastore.data['watching'][uuid]['history']):
if len(datastore.data['watching'][uuid].history):
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
# Be sure proxy value is None
@@ -754,7 +752,7 @@ def changedetection_app(config=None, datastore_o=None):
# Save the current newest history as the most recently viewed
for watch_uuid, watch in datastore.data['watching'].items():
datastore.set_last_viewed(watch_uuid, watch['newest_history_key'])
datastore.set_last_viewed(watch_uuid, watch.newest_history_key)
flash("Cleared all statuses.")
return redirect(url_for('index'))
@@ -774,20 +772,17 @@ def changedetection_app(config=None, datastore_o=None):
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('index'))
dates = list(watch['history'].keys())
# Convert to int, sort and back to str again
# @todo replace datastore getter that does this automatically
dates = [int(i) for i in dates]
dates.sort(reverse=True)
dates = [str(i) for i in dates]
history = watch.history
dates = list(history.keys())
if len(dates) < 2:
flash("Not enough saved change detection snapshots to produce a report.", "error")
return redirect(url_for('index'))
# Save the current newest history as the most recently viewed
datastore.set_last_viewed(uuid, dates[0])
newest_file = watch['history'][dates[0]]
datastore.set_last_viewed(uuid, time.time())
newest_file = history[dates[-1]]
try:
with open(newest_file, 'r') as f:
@@ -797,10 +792,10 @@ def changedetection_app(config=None, datastore_o=None):
previous_version = request.args.get('previous_version')
try:
previous_file = watch['history'][previous_version]
previous_file = history[previous_version]
except KeyError:
# Not present, use a default value, the second one in the sorted list.
previous_file = watch['history'][dates[1]]
previous_file = history[dates[-2]]
try:
with open(previous_file, 'r') as f:
@@ -817,7 +812,7 @@ def changedetection_app(config=None, datastore_o=None):
extra_stylesheets=extra_stylesheets,
versions=dates[1:],
uuid=uuid,
newest_version_timestamp=dates[0],
newest_version_timestamp=dates[-1],
current_previous_version=str(previous_version),
current_diff_url=watch['url'],
extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
@@ -845,9 +840,9 @@ def changedetection_app(config=None, datastore_o=None):
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('index'))
if len(watch['history']):
timestamps = sorted(watch['history'].keys(), key=lambda x: int(x))
filename = watch['history'][timestamps[-1]]
if watch.history_n >0:
timestamps = sorted(watch.history.keys(), key=lambda x: int(x))
filename = watch.history[timestamps[-1]]
try:
with open(filename, 'r') as f:
tmp = f.readlines()
@@ -1141,6 +1136,7 @@ def changedetection_app(config=None, datastore_o=None):
# copy it to memory as trim off what we dont need (history)
watch = deepcopy(datastore.data['watching'][uuid])
# For older versions that are not a @property
if (watch.get('history')):
del (watch['history'])
@@ -1249,6 +1245,7 @@ def notification_runner():
# Thread runner to check every minute, look for new watches to feed into the Queue.
def ticker_thread_check_time_launch_checks():
from changedetectionio import update_worker
import logging
# Spin up Workers that do the fetching
# Can be overriden by ENV or use the default settings
@@ -1267,9 +1264,10 @@ def ticker_thread_check_time_launch_checks():
running_uuids.append(t.current_uuid)
# Re #232 - Deepcopy the data incase it changes while we're iterating through it all
watch_uuid_list = []
while True:
try:
copied_datastore = deepcopy(datastore)
watch_uuid_list = datastore.data['watching'].keys()
except RuntimeError as e:
# RuntimeError: dictionary changed size during iteration
time.sleep(0.1)
@@ -1286,7 +1284,12 @@ def ticker_thread_check_time_launch_checks():
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
recheck_time_system_seconds = datastore.threshold_seconds
for uuid, watch in copied_datastore.data['watching'].items():
for uuid in watch_uuid_list:
watch = datastore.data['watching'].get(uuid)
if not watch:
logging.error("Watch: {} no longer present.".format(uuid))
continue
# No need todo further processing if it's paused
if watch['paused']: