Memory fixes for large playwright screenshots (#3092)
This commit is contained in:
@@ -4,7 +4,7 @@ import re
|
||||
from random import randint
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
from changedetectionio.content_fetchers.helpers import capture_full_page
|
||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
|
||||
@@ -298,14 +298,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
now = time.time()
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
|
||||
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
||||
logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
||||
screenshot = capture_stitched_together_full_page(self.page)
|
||||
else:
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
|
||||
screenshot = capture_full_page(self.page)
|
||||
|
||||
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
||||
|
||||
|
||||
@@ -1,79 +1,107 @@
|
||||
|
||||
# Pages with a vertical height longer than this will use the 'stitch together' method.
|
||||
|
||||
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
|
||||
# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
|
||||
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
|
||||
|
||||
|
||||
# The size at which we will switch to stitching method
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD=8000
|
||||
|
||||
from loguru import logger
|
||||
|
||||
def capture_stitched_together_full_page(page):
|
||||
def capture_full_page(page):
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4 # Maximum total height for the final image (When in stitch mode)
|
||||
MAX_CHUNK_HEIGHT = 4000 # Height per screenshot chunk
|
||||
# Maximum total height for the final image (When in stitch mode).
|
||||
# We limit this to 16000px due to the huge amount of RAM that was being used
|
||||
# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", 16000))
|
||||
|
||||
# The size at which we will switch to stitching method, when below this (and
|
||||
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
|
||||
# screenshot method.
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
|
||||
|
||||
WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay
|
||||
|
||||
# Save the original viewport size
|
||||
original_viewport = page.viewport_size
|
||||
now = time.time()
|
||||
start = time.time()
|
||||
|
||||
stitched_image = None
|
||||
|
||||
try:
|
||||
viewport = page.viewport_size
|
||||
viewport_width = original_viewport["width"]
|
||||
viewport_height = original_viewport["height"]
|
||||
|
||||
page_height = page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
# Optimization to avoid unnecessary stitching if we can avoid it
|
||||
# Use the default screenshot method for smaller pages to take advantage
|
||||
# of GPU and native playwright screenshot optimizations
|
||||
if (
|
||||
page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
and page_height < MAX_TOTAL_HEIGHT
|
||||
):
|
||||
logger.debug("Using default screenshot method")
|
||||
screenshot = page.screenshot(
|
||||
type="jpeg",
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
|
||||
full_page=True,
|
||||
)
|
||||
logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
|
||||
return screenshot
|
||||
|
||||
logger.debug(
|
||||
"Using stitching method for large screenshot because page height exceeds threshold"
|
||||
)
|
||||
|
||||
# Limit the total capture height
|
||||
capture_height = min(page_height, MAX_TOTAL_HEIGHT)
|
||||
|
||||
images = []
|
||||
total_captured_height = 0
|
||||
# Calculate number of chunks needed using ORIGINAL viewport height
|
||||
num_chunks = (capture_height + viewport_height - 1) // viewport_height
|
||||
|
||||
for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
|
||||
# Ensure we do not exceed the total height limit
|
||||
chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)
|
||||
# Create the final image upfront to avoid holding all chunks in memory
|
||||
stitched_image = Image.new("RGB", (viewport_width, capture_height))
|
||||
|
||||
# Adjust viewport size for this chunk
|
||||
page.set_viewport_size({"width": viewport["width"], "height": chunk_height})
|
||||
|
||||
# Scroll to the correct position
|
||||
page.evaluate(f"window.scrollTo(0, {offset})")
|
||||
|
||||
# Capture screenshot chunk
|
||||
screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
images.append(Image.open(io.BytesIO(screenshot_bytes)))
|
||||
|
||||
total_captured_height += chunk_height
|
||||
|
||||
# Stop if we reached the maximum total height
|
||||
if total_captured_height >= MAX_TOTAL_HEIGHT:
|
||||
break
|
||||
|
||||
# Create the final stitched image
|
||||
stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
|
||||
# Track cumulative paste position
|
||||
y_offset = 0
|
||||
|
||||
# Stitch the screenshot chunks together
|
||||
for img in images:
|
||||
for _ in range(num_chunks):
|
||||
# Scroll to position (no viewport resizing)
|
||||
page.evaluate(f"window.scrollTo(0, {y_offset})")
|
||||
|
||||
# Capture only the visible area using clip
|
||||
with io.BytesIO(
|
||||
page.screenshot(
|
||||
type="jpeg",
|
||||
clip={
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
"width": viewport_width,
|
||||
"height": min(viewport_height, capture_height - y_offset),
|
||||
},
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
|
||||
)
|
||||
) as buf:
|
||||
with Image.open(buf) as img:
|
||||
img.load()
|
||||
stitched_image.paste(img, (0, y_offset))
|
||||
y_offset += img.height
|
||||
|
||||
logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
|
||||
logger.debug(f"Screenshot stitched together in {time.time() - start:.2f}s")
|
||||
|
||||
# Overlay warning text if the screenshot was trimmed
|
||||
if page_height > MAX_TOTAL_HEIGHT:
|
||||
if capture_height < page_height:
|
||||
draw = ImageDraw.Draw(stitched_image)
|
||||
warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
|
||||
|
||||
# Load font (default system font if Arial is unavailable)
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT) # Arial (Windows/Mac)
|
||||
font = ImageFont.truetype(
|
||||
"arial.ttf", WARNING_TEXT_HEIGHT
|
||||
) # Arial (Windows/Mac)
|
||||
except IOError:
|
||||
font = ImageFont.load_default() # Default font if Arial not found
|
||||
|
||||
@@ -83,22 +111,28 @@ def capture_stitched_together_full_page(page):
|
||||
text_height = text_bbox[3] - text_bbox[1] # Calculate text height
|
||||
|
||||
# Define background rectangle (top of the image)
|
||||
draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
|
||||
draw.rectangle(
|
||||
[(0, 0), (viewport_width, WARNING_TEXT_HEIGHT)], fill="white"
|
||||
)
|
||||
|
||||
# Center text horizontally within the warning area
|
||||
text_x = (viewport["width"] - text_width) // 2
|
||||
text_x = (viewport_width - text_width) // 2
|
||||
text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
|
||||
|
||||
# Draw the warning text in red
|
||||
draw.text((text_x, text_y), warning_text, fill="red", font=font)
|
||||
|
||||
# Save or return the final image
|
||||
output = io.BytesIO()
|
||||
stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
# Save final image
|
||||
with io.BytesIO() as output:
|
||||
stitched_image.save(
|
||||
output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30))
|
||||
)
|
||||
screenshot = output.getvalue()
|
||||
|
||||
finally:
|
||||
# Restore the original viewport size
|
||||
page.set_viewport_size(original_viewport)
|
||||
if stitched_image is not None:
|
||||
stitched_image.close()
|
||||
|
||||
return screenshot
|
||||
|
||||
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
from changedetectionio.content_fetchers.helpers import capture_full_page
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
@@ -204,14 +204,7 @@ class fetcher(Fetcher):
|
||||
# acceptable screenshot quality here
|
||||
try:
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
||||
logger.warning(
|
||||
f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
||||
self.screenshot = capture_stitched_together_full_page(self.page)
|
||||
else:
|
||||
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
self.screenshot = capture_full_page(self.page)
|
||||
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
|
||||
@@ -63,6 +63,10 @@ services:
|
||||
#
|
||||
# A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
||||
# - TZ=America/Los_Angeles
|
||||
#
|
||||
# Maximum height of screenshots, default is 16000 px, screenshots will be clipped to this if exceeded.
|
||||
# RAM usage will be higher if you increase this.
|
||||
# - SCREENSHOT_MAX_HEIGHT=16000
|
||||
|
||||
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
|
||||
ports:
|
||||
|
||||
Reference in New Issue
Block a user