Import speed improvements, and adding an import URL batch size of 5,000 to stop accidental CPU overload (#549)
This commit is contained in:
@@ -661,13 +661,19 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
good = 0
|
good = 0
|
||||||
|
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
|
now=time.time()
|
||||||
urls = request.values.get('urls').split("\n")
|
urls = request.values.get('urls').split("\n")
|
||||||
|
|
||||||
|
if (len(urls) > 5000):
|
||||||
|
flash("Importing 5,000 of the first URLs from your list, the rest can be imported again.")
|
||||||
|
|
||||||
for url in urls:
|
for url in urls:
|
||||||
url = url.strip()
|
url = url.strip()
|
||||||
url, *tags = url.split(" ")
|
url, *tags = url.split(" ")
|
||||||
# Flask wtform validators wont work with basic auth, use validators package
|
# Flask wtform validators wont work with basic auth, use validators package
|
||||||
if len(url) and validators.url(url.replace('source:', '')):
|
# Up to 5000 per batch so we dont flood the server
|
||||||
new_uuid = datastore.add_watch(url=url.strip(), tag=" ".join(tags))
|
if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
|
||||||
|
new_uuid = datastore.add_watch(url=url.strip(), tag=" ".join(tags), write_to_disk_now=False)
|
||||||
# Straight into the queue.
|
# Straight into the queue.
|
||||||
update_q.put(new_uuid)
|
update_q.put(new_uuid)
|
||||||
good += 1
|
good += 1
|
||||||
@@ -675,7 +681,8 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
if len(url):
|
if len(url):
|
||||||
remaining_urls.append(url)
|
remaining_urls.append(url)
|
||||||
|
|
||||||
flash("{} Imported, {} Skipped.".format(good, len(remaining_urls)))
|
flash("{} Imported in {:.2f}s, {} Skipped.".format(good, time.time()-now,len(remaining_urls)))
|
||||||
|
datastore.needs_write = True
|
||||||
|
|
||||||
if len(remaining_urls) == 0:
|
if len(remaining_urls) == 0:
|
||||||
# Looking good, redirect to index.
|
# Looking good, redirect to index.
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ from changedetectionio.notification import (
|
|||||||
|
|
||||||
class model(dict):
|
class model(dict):
|
||||||
def __init__(self, *arg, **kw):
|
def __init__(self, *arg, **kw):
|
||||||
super(model, self).__init__(*arg, **kw)
|
|
||||||
self.update({
|
self.update({
|
||||||
'url': None,
|
'url': None,
|
||||||
'tag': None,
|
'tag': None,
|
||||||
@@ -45,6 +44,9 @@ class model(dict):
|
|||||||
# Should be all None by default, so we use the system default in this case.
|
# Should be all None by default, so we use the system default in this case.
|
||||||
'minutes_between_check': None
|
'minutes_between_check': None
|
||||||
})
|
})
|
||||||
|
# goes at the end so we update the default object with the initialiser
|
||||||
|
super(model, self).__init__(*arg, **kw)
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_empty_checktime(self):
|
def has_empty_checktime(self):
|
||||||
|
|||||||
@@ -272,15 +272,14 @@ class ChangeDetectionStore:
|
|||||||
self.needs_write = True
|
self.needs_write = True
|
||||||
return changes_removed
|
return changes_removed
|
||||||
|
|
||||||
def add_watch(self, url, tag="", extras=None):
|
def add_watch(self, url, tag="", extras=None, write_to_disk_now=True):
|
||||||
if extras is None:
|
if extras is None:
|
||||||
extras = {}
|
extras = {}
|
||||||
|
|
||||||
with self.lock:
|
with self.lock:
|
||||||
# @todo use a common generic version of this
|
# @todo use a common generic version of this
|
||||||
new_uuid = str(uuid_builder.uuid4())
|
new_uuid = str(uuid_builder.uuid4())
|
||||||
_blank = deepcopy(self.generic_definition)
|
new_watch = Watch.model({
|
||||||
_blank.update({
|
|
||||||
'url': url,
|
'url': url,
|
||||||
'tag': tag
|
'tag': tag
|
||||||
})
|
})
|
||||||
@@ -291,9 +290,8 @@ class ChangeDetectionStore:
|
|||||||
if k in apply_extras:
|
if k in apply_extras:
|
||||||
del apply_extras[k]
|
del apply_extras[k]
|
||||||
|
|
||||||
_blank.update(apply_extras)
|
new_watch.update(apply_extras)
|
||||||
|
self.__data['watching'][new_uuid]=new_watch
|
||||||
self.data['watching'][new_uuid] = _blank
|
|
||||||
|
|
||||||
# Get the directory ready
|
# Get the directory ready
|
||||||
output_path = "{}/{}".format(self.datastore_path, new_uuid)
|
output_path = "{}/{}".format(self.datastore_path, new_uuid)
|
||||||
@@ -302,6 +300,7 @@ class ChangeDetectionStore:
|
|||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
print(output_path, "already exists.")
|
print(output_path, "already exists.")
|
||||||
|
|
||||||
|
if write_to_disk_now:
|
||||||
self.sync_to_json()
|
self.sync_to_json()
|
||||||
return new_uuid
|
return new_uuid
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import threading
|
|||||||
import queue
|
import queue
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from changedetectionio import content_fetcher
|
||||||
# A single update worker
|
# A single update worker
|
||||||
#
|
#
|
||||||
# Requests for checking on a single site(watch) from a queue of watches
|
# Requests for checking on a single site(watch) from a queue of watches
|
||||||
@@ -32,7 +33,6 @@ class update_worker(threading.Thread):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
self.current_uuid = uuid
|
self.current_uuid = uuid
|
||||||
from changedetectionio import content_fetcher
|
|
||||||
|
|
||||||
if uuid in list(self.datastore.data['watching'].keys()):
|
if uuid in list(self.datastore.data['watching'].keys()):
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user