Memory fixes for large playwright screenshots (#3092)

2025-04-09 08:02:44 -07:00
parent 4e6e680d79
commit 9f326783e5
4 changed files with 88 additions and 64 deletions
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -4,7 +4,7 @@ import re
 from random import randint
 from loguru import logger
-from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
+from changedetectionio.content_fetchers.helpers import capture_full_page
 from changedetectionio.content_fetchers.base import manage_user_agent
 from changedetectionio.safe_jinja import render as jinja_render
@@ -298,14 +298,7 @@ class browsersteps_live_ui(steppable_browser_interface):
        now = time.time()
        self.page.wait_for_timeout(1 * 1000)
-
+        screenshot = capture_full_page(self.page)
        full_height = self.page.evaluate("document.documentElement.scrollHeight")
        if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
            logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
            screenshot = capture_stitched_together_full_page(self.page)
        else:
            screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
        logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
--- a/changedetectionio/content_fetchers/helpers.py
+++ b/changedetectionio/content_fetchers/helpers.py
@@ -1,79 +1,107 @@
 # Pages with a vertical height longer than this will use the 'stitch together' method.
 # - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
 # - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
 # - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
 # The size at which we will switch to stitching method
 SCREENSHOT_SIZE_STITCH_THRESHOLD=8000
 from loguru import logger
-def capture_stitched_together_full_page(page):
+def capture_full_page(page):
    import io
    import os
    import time
    from PIL import Image, ImageDraw, ImageFont
-    MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4  # Maximum total height for the final image (When in stitch mode)
+    # Maximum total height for the final image (When in stitch mode).
-    MAX_CHUNK_HEIGHT = 4000  # Height per screenshot chunk
+    # We limit this to 16000px due to the huge amount of RAM that was being used
    # Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
    MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", 16000))
    # The size at which we will switch to stitching method, when below this (and
    # MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
    # screenshot method.
    SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
    WARNING_TEXT_HEIGHT = 20  # Height of the warning text overlay
    # Save the original viewport size
    original_viewport = page.viewport_size
-    now = time.time()
+    start = time.time()
    stitched_image = None
    try:
-        viewport = page.viewport_size
+        viewport_width = original_viewport["width"]
        viewport_height = original_viewport["height"]
        page_height = page.evaluate("document.documentElement.scrollHeight")
        # Optimization to avoid unnecessary stitching if we can avoid it
        # Use the default screenshot method for smaller pages to take advantage
        # of GPU and native playwright screenshot optimizations
        if (
            page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD
            and page_height < MAX_TOTAL_HEIGHT
        ):
            logger.debug("Using default screenshot method")
            screenshot = page.screenshot(
                type="jpeg",
                quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
                full_page=True,
            )
            logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
            return screenshot
        logger.debug(
            "Using stitching method for large screenshot because page height exceeds threshold"
        )
        # Limit the total capture height
        capture_height = min(page_height, MAX_TOTAL_HEIGHT)
-        images = []
+        # Calculate number of chunks needed using ORIGINAL viewport height
-        total_captured_height = 0
+        num_chunks = (capture_height + viewport_height - 1) // viewport_height
-        for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
+        # Create the final image upfront to avoid holding all chunks in memory
-            # Ensure we do not exceed the total height limit
+        stitched_image = Image.new("RGB", (viewport_width, capture_height))
            chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)
-            # Adjust viewport size for this chunk
+        # Track cumulative paste position
            page.set_viewport_size({"width": viewport["width"], "height": chunk_height})
            # Scroll to the correct position
            page.evaluate(f"window.scrollTo(0, {offset})")
            # Capture screenshot chunk
            screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
            images.append(Image.open(io.BytesIO(screenshot_bytes)))
            total_captured_height += chunk_height
            # Stop if we reached the maximum total height
            if total_captured_height >= MAX_TOTAL_HEIGHT:
                break
        # Create the final stitched image
        stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
        y_offset = 0
-        # Stitch the screenshot chunks together
+        for _ in range(num_chunks):
-        for img in images:
+            # Scroll to position (no viewport resizing)
            page.evaluate(f"window.scrollTo(0, {y_offset})")
            # Capture only the visible area using clip
            with io.BytesIO(
                page.screenshot(
                    type="jpeg",
                    clip={
                        "x": 0,
                        "y": 0,
                        "width": viewport_width,
                        "height": min(viewport_height, capture_height - y_offset),
                    },
                    quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
                )
            ) as buf:
                with Image.open(buf) as img:
                    img.load()
                    stitched_image.paste(img, (0, y_offset))
                    y_offset += img.height
-        logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
+        logger.debug(f"Screenshot stitched together in {time.time() - start:.2f}s")
        # Overlay warning text if the screenshot was trimmed
-        if page_height > MAX_TOTAL_HEIGHT:
+        if capture_height < page_height:
            draw = ImageDraw.Draw(stitched_image)
            warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
            # Load font (default system font if Arial is unavailable)
            try:
-                font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT)  # Arial (Windows/Mac)
+                font = ImageFont.truetype(
                    "arial.ttf", WARNING_TEXT_HEIGHT
                )  # Arial (Windows/Mac)
            except IOError:
                font = ImageFont.load_default()  # Default font if Arial not found
@@ -83,22 +111,28 @@ def capture_stitched_together_full_page(page):
            text_height = text_bbox[3] - text_bbox[1]  # Calculate text height
            # Define background rectangle (top of the image)
-            draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
+            draw.rectangle(
                [(0, 0), (viewport_width, WARNING_TEXT_HEIGHT)], fill="white"
            )
            # Center text horizontally within the warning area
-            text_x = (viewport["width"] - text_width) // 2
+            text_x = (viewport_width - text_width) // 2
            text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
            # Draw the warning text in red
            draw.text((text_x, text_y), warning_text, fill="red", font=font)
-        # Save or return the final image
+        # Save final image
-        output = io.BytesIO()
+        with io.BytesIO() as output:
-        stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
+            stitched_image.save(
                output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30))
            )
            screenshot = output.getvalue()
    finally:
        # Restore the original viewport size
        page.set_viewport_size(original_viewport)
        if stitched_image is not None:
            stitched_image.close()
    return screenshot
--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
 from loguru import logger
-from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
+from changedetectionio.content_fetchers.helpers import capture_full_page
 from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
 from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
@@ -204,14 +204,7 @@ class fetcher(Fetcher):
            # acceptable screenshot quality here
            try:
                # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
-                full_height = self.page.evaluate("document.documentElement.scrollHeight")
+                self.screenshot = capture_full_page(self.page)
                if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
                    logger.warning(
                        f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
                    self.screenshot = capture_stitched_together_full_page(self.page)
                else:
                    self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
            except Exception as e:
                # It's likely the screenshot was too long/big and something crashed
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -63,6 +63,10 @@ services:
  #
  #        A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
  #      - TZ=America/Los_Angeles
  #
  #        Maximum height of screenshots, default is 16000 px, screenshots will be clipped to this if exceeded.
  #        RAM usage will be higher if you increase this.
  #      - SCREENSHOT_MAX_HEIGHT=16000
      # Comment out ports: when using behind a reverse proxy , enable networks: etc.
      ports: