From cfb4decf676223fb6d6e593e2af5e9ce459254ef Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Fri, 11 Apr 2025 17:36:29 +0200 Subject: [PATCH] UI Edit/Stats - Add levenshtein distance info, explains how "different" the last two snapshot are (#3109) --- changedetectionio/blueprint/ui/edit.py | 19 +++++++++++++++++-- changedetectionio/templates/edit.html | 4 ++++ changedetectionio/tests/test_backend.py | 5 +++++ requirements.txt | 2 ++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/changedetectionio/blueprint/ui/edit.py b/changedetectionio/blueprint/ui/edit.py index d4bd51ab..0f6ac006 100644 --- a/changedetectionio/blueprint/ui/edit.py +++ b/changedetectionio/blueprint/ui/edit.py @@ -19,6 +19,20 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')): return True + def levenshtein_ratio_recent_history(watch): + try: + from Levenshtein import ratio, distance + k = list(watch.history.keys()) + if len(k) >= 2: + a = watch.get_history_snapshot(timestamp=k[0]) + b = watch.get_history_snapshot(timestamp=k[1]) + distance = distance(a, b) + return distance + except Exception as e: + logger.warning("Unable to calc similarity", e) + return "Unable to calc similarity" + return '' + @edit_blueprint.route("/edit/", methods=['GET', 'POST']) @login_optionally_required # https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists @@ -247,14 +261,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe 'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False, 'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0, 'has_special_tag_options': _watch_has_tag_options_set(watch=watch), - 'watch_uses_webdriver': watch_uses_webdriver, 'jq_support': jq_support, + 'lev_info': levenshtein_ratio_recent_history(watch), 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), 'settings_application': datastore.data['settings']['application'], 'timezone_default_config': datastore.data['settings']['application'].get('timezone'), 'using_global_webdriver_wait': not default['webdriver_delay'], 'uuid': uuid, - 'watch': watch + 'watch': watch, + 'watch_uses_webdriver': watch_uses_webdriver, } included_content = None diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 47f4d8af..925af81d 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -443,6 +443,10 @@ Math: {{ 1 + 1 }}") }} + +

Text similarity

+

Levenshtein Distance - Last 2 snapshots: {{ lev_info }}

+

Levenshtein Distance Calculates the minimum number of insertions, deletions, and substitutions required to change one text into the other.

{% if watch.history_n %}

Download latest HTML snapshot diff --git a/changedetectionio/tests/test_backend.py b/changedetectionio/tests/test_backend.py index b67bd179..aac59be1 100644 --- a/changedetectionio/tests/test_backend.py +++ b/changedetectionio/tests/test_backend.py @@ -74,6 +74,11 @@ def test_check_basic_change_detection_functionality(client, live_server, measure res = client.get(url_for("ui.ui_edit.watch_get_latest_html", uuid=uuid)) assert b'which has this one new line' in res.data + # Check the 'levenshtein' distance calc showed something useful + res = client.get(url_for("ui.ui_edit.edit_page", uuid=uuid)) + assert b'Last 2 snapshots: 17' in res.data + + # Now something should be ready, indicated by having a 'unviewed' class res = client.get(url_for("watchlist.index")) assert b'unviewed' in res.data diff --git a/requirements.txt b/requirements.txt index 745503f5..68ae6ede 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,6 +68,8 @@ openpyxl jq~=1.3; python_version >= "3.8" and sys_platform == "darwin" jq~=1.3; python_version >= "3.8" and sys_platform == "linux" +levenshtein + # playwright is installed at Dockerfile build time because it's not available on all platforms pyppeteer-ng==2.0.0rc9