Dont rewrite/resave snapshot when its the same data, just bump the history index, saves disk space. (#1414)
This commit is contained in:
@@ -241,7 +241,7 @@ class model(dict):
|
|||||||
|
|
||||||
# Save some text file to the appropriate path and bump the history
|
# Save some text file to the appropriate path and bump the history
|
||||||
# result_obj from fetch_site_status.run()
|
# result_obj from fetch_site_status.run()
|
||||||
def save_history_text(self, contents, timestamp):
|
def save_history_text(self, contents, timestamp, snapshot_id):
|
||||||
|
|
||||||
self.ensure_data_dir_exists()
|
self.ensure_data_dir_exists()
|
||||||
|
|
||||||
@@ -250,13 +250,16 @@ class model(dict):
|
|||||||
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
|
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
|
||||||
time.sleep(timestamp - self.__newest_history_key)
|
time.sleep(timestamp - self.__newest_history_key)
|
||||||
|
|
||||||
snapshot_fname = "{}.txt".format(str(uuid.uuid4()))
|
snapshot_fname = f"{snapshot_id}.txt"
|
||||||
|
|
||||||
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
|
# Only write if it does not exist, this is so that we dont bother re-saving the same data by checksum under different filenames.
|
||||||
# most sites are utf-8 and some are even broken utf-8
|
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||||
with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f:
|
if not os.path.exists(dest):
|
||||||
f.write(contents)
|
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
|
||||||
f.close()
|
# most sites are utf-8 and some are even broken utf-8
|
||||||
|
with open(dest, 'wb') as f:
|
||||||
|
f.write(contents)
|
||||||
|
f.close()
|
||||||
|
|
||||||
# Append to index
|
# Append to index
|
||||||
# @todo check last char was \n
|
# @todo check last char was \n
|
||||||
|
|||||||
@@ -319,16 +319,15 @@ class update_worker(threading.Thread):
|
|||||||
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
||||||
if process_changedetection_results:
|
if process_changedetection_results:
|
||||||
try:
|
try:
|
||||||
watch = self.datastore.data['watching'][uuid]
|
watch = self.datastore.data['watching'].get(uuid)
|
||||||
fname = "" # Saved history text filename
|
|
||||||
|
|
||||||
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
|
||||||
if changed_detected or not watch['last_checked']:
|
|
||||||
# A change was detected
|
|
||||||
watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
|
|
||||||
|
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||||
|
|
||||||
|
# Also save the snapshot on the first time checked
|
||||||
|
if changed_detected or not watch['last_checked']:
|
||||||
|
watch.save_history_text(contents=contents,
|
||||||
|
timestamp=str(round(time.time())),
|
||||||
|
snapshot_id=update_obj.get('previous_md5', 'none'))
|
||||||
|
|
||||||
# A change was detected
|
# A change was detected
|
||||||
if changed_detected:
|
if changed_detected:
|
||||||
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
||||||
|
|||||||
Reference in New Issue
Block a user