Memory fixes for large playwright screenshots (#3092)
This commit is contained in:
@@ -4,7 +4,7 @@ import re
|
|||||||
from random import randint
|
from random import randint
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
from changedetectionio.content_fetchers.helpers import capture_full_page
|
||||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||||
from changedetectionio.safe_jinja import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
|
|
||||||
@@ -298,14 +298,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
now = time.time()
|
now = time.time()
|
||||||
self.page.wait_for_timeout(1 * 1000)
|
self.page.wait_for_timeout(1 * 1000)
|
||||||
|
|
||||||
|
screenshot = capture_full_page(self.page)
|
||||||
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
|
||||||
|
|
||||||
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
|
||||||
logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
|
||||||
screenshot = capture_stitched_together_full_page(self.page)
|
|
||||||
else:
|
|
||||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
|
|
||||||
|
|
||||||
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
||||||
|
|
||||||
|
|||||||
@@ -1,79 +1,107 @@
|
|||||||
|
|
||||||
# Pages with a vertical height longer than this will use the 'stitch together' method.
|
# Pages with a vertical height longer than this will use the 'stitch together' method.
|
||||||
|
|
||||||
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
|
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
|
||||||
# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
|
# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
|
||||||
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
|
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
|
||||||
|
|
||||||
|
|
||||||
# The size at which we will switch to stitching method
|
|
||||||
SCREENSHOT_SIZE_STITCH_THRESHOLD=8000
|
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
def capture_stitched_together_full_page(page):
|
def capture_full_page(page):
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4 # Maximum total height for the final image (When in stitch mode)
|
# Maximum total height for the final image (When in stitch mode).
|
||||||
MAX_CHUNK_HEIGHT = 4000 # Height per screenshot chunk
|
# We limit this to 16000px due to the huge amount of RAM that was being used
|
||||||
|
# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
|
||||||
|
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", 16000))
|
||||||
|
|
||||||
|
# The size at which we will switch to stitching method, when below this (and
|
||||||
|
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
|
||||||
|
# screenshot method.
|
||||||
|
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
|
||||||
|
|
||||||
WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay
|
WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay
|
||||||
|
|
||||||
# Save the original viewport size
|
# Save the original viewport size
|
||||||
original_viewport = page.viewport_size
|
original_viewport = page.viewport_size
|
||||||
now = time.time()
|
start = time.time()
|
||||||
|
|
||||||
|
stitched_image = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
viewport = page.viewport_size
|
viewport_width = original_viewport["width"]
|
||||||
|
viewport_height = original_viewport["height"]
|
||||||
|
|
||||||
page_height = page.evaluate("document.documentElement.scrollHeight")
|
page_height = page.evaluate("document.documentElement.scrollHeight")
|
||||||
|
|
||||||
|
# Optimization to avoid unnecessary stitching if we can avoid it
|
||||||
|
# Use the default screenshot method for smaller pages to take advantage
|
||||||
|
# of GPU and native playwright screenshot optimizations
|
||||||
|
if (
|
||||||
|
page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||||
|
and page_height < MAX_TOTAL_HEIGHT
|
||||||
|
):
|
||||||
|
logger.debug("Using default screenshot method")
|
||||||
|
screenshot = page.screenshot(
|
||||||
|
type="jpeg",
|
||||||
|
quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
|
||||||
|
full_page=True,
|
||||||
|
)
|
||||||
|
logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
|
||||||
|
return screenshot
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"Using stitching method for large screenshot because page height exceeds threshold"
|
||||||
|
)
|
||||||
|
|
||||||
# Limit the total capture height
|
# Limit the total capture height
|
||||||
capture_height = min(page_height, MAX_TOTAL_HEIGHT)
|
capture_height = min(page_height, MAX_TOTAL_HEIGHT)
|
||||||
|
|
||||||
images = []
|
# Calculate number of chunks needed using ORIGINAL viewport height
|
||||||
total_captured_height = 0
|
num_chunks = (capture_height + viewport_height - 1) // viewport_height
|
||||||
|
|
||||||
for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
|
# Create the final image upfront to avoid holding all chunks in memory
|
||||||
# Ensure we do not exceed the total height limit
|
stitched_image = Image.new("RGB", (viewport_width, capture_height))
|
||||||
chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)
|
|
||||||
|
|
||||||
# Adjust viewport size for this chunk
|
# Track cumulative paste position
|
||||||
page.set_viewport_size({"width": viewport["width"], "height": chunk_height})
|
|
||||||
|
|
||||||
# Scroll to the correct position
|
|
||||||
page.evaluate(f"window.scrollTo(0, {offset})")
|
|
||||||
|
|
||||||
# Capture screenshot chunk
|
|
||||||
screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
|
||||||
images.append(Image.open(io.BytesIO(screenshot_bytes)))
|
|
||||||
|
|
||||||
total_captured_height += chunk_height
|
|
||||||
|
|
||||||
# Stop if we reached the maximum total height
|
|
||||||
if total_captured_height >= MAX_TOTAL_HEIGHT:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Create the final stitched image
|
|
||||||
stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
|
|
||||||
y_offset = 0
|
y_offset = 0
|
||||||
|
|
||||||
# Stitch the screenshot chunks together
|
for _ in range(num_chunks):
|
||||||
for img in images:
|
# Scroll to position (no viewport resizing)
|
||||||
stitched_image.paste(img, (0, y_offset))
|
page.evaluate(f"window.scrollTo(0, {y_offset})")
|
||||||
y_offset += img.height
|
|
||||||
|
|
||||||
logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
|
# Capture only the visible area using clip
|
||||||
|
with io.BytesIO(
|
||||||
|
page.screenshot(
|
||||||
|
type="jpeg",
|
||||||
|
clip={
|
||||||
|
"x": 0,
|
||||||
|
"y": 0,
|
||||||
|
"width": viewport_width,
|
||||||
|
"height": min(viewport_height, capture_height - y_offset),
|
||||||
|
},
|
||||||
|
quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
|
||||||
|
)
|
||||||
|
) as buf:
|
||||||
|
with Image.open(buf) as img:
|
||||||
|
img.load()
|
||||||
|
stitched_image.paste(img, (0, y_offset))
|
||||||
|
y_offset += img.height
|
||||||
|
|
||||||
|
logger.debug(f"Screenshot stitched together in {time.time() - start:.2f}s")
|
||||||
|
|
||||||
# Overlay warning text if the screenshot was trimmed
|
# Overlay warning text if the screenshot was trimmed
|
||||||
if page_height > MAX_TOTAL_HEIGHT:
|
if capture_height < page_height:
|
||||||
draw = ImageDraw.Draw(stitched_image)
|
draw = ImageDraw.Draw(stitched_image)
|
||||||
warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
|
warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
|
||||||
|
|
||||||
# Load font (default system font if Arial is unavailable)
|
# Load font (default system font if Arial is unavailable)
|
||||||
try:
|
try:
|
||||||
font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT) # Arial (Windows/Mac)
|
font = ImageFont.truetype(
|
||||||
|
"arial.ttf", WARNING_TEXT_HEIGHT
|
||||||
|
) # Arial (Windows/Mac)
|
||||||
except IOError:
|
except IOError:
|
||||||
font = ImageFont.load_default() # Default font if Arial not found
|
font = ImageFont.load_default() # Default font if Arial not found
|
||||||
|
|
||||||
@@ -83,22 +111,28 @@ def capture_stitched_together_full_page(page):
|
|||||||
text_height = text_bbox[3] - text_bbox[1] # Calculate text height
|
text_height = text_bbox[3] - text_bbox[1] # Calculate text height
|
||||||
|
|
||||||
# Define background rectangle (top of the image)
|
# Define background rectangle (top of the image)
|
||||||
draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
|
draw.rectangle(
|
||||||
|
[(0, 0), (viewport_width, WARNING_TEXT_HEIGHT)], fill="white"
|
||||||
|
)
|
||||||
|
|
||||||
# Center text horizontally within the warning area
|
# Center text horizontally within the warning area
|
||||||
text_x = (viewport["width"] - text_width) // 2
|
text_x = (viewport_width - text_width) // 2
|
||||||
text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
|
text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
|
||||||
|
|
||||||
# Draw the warning text in red
|
# Draw the warning text in red
|
||||||
draw.text((text_x, text_y), warning_text, fill="red", font=font)
|
draw.text((text_x, text_y), warning_text, fill="red", font=font)
|
||||||
|
|
||||||
# Save or return the final image
|
# Save final image
|
||||||
output = io.BytesIO()
|
with io.BytesIO() as output:
|
||||||
stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
stitched_image.save(
|
||||||
screenshot = output.getvalue()
|
output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30))
|
||||||
|
)
|
||||||
|
screenshot = output.getvalue()
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Restore the original viewport size
|
# Restore the original viewport size
|
||||||
page.set_viewport_size(original_viewport)
|
page.set_viewport_size(original_viewport)
|
||||||
|
if stitched_image is not None:
|
||||||
|
stitched_image.close()
|
||||||
|
|
||||||
return screenshot
|
return screenshot
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
from changedetectionio.content_fetchers.helpers import capture_full_page
|
||||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||||
|
|
||||||
@@ -204,14 +204,7 @@ class fetcher(Fetcher):
|
|||||||
# acceptable screenshot quality here
|
# acceptable screenshot quality here
|
||||||
try:
|
try:
|
||||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||||
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
self.screenshot = capture_full_page(self.page)
|
||||||
|
|
||||||
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
|
||||||
logger.warning(
|
|
||||||
f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
|
||||||
self.screenshot = capture_stitched_together_full_page(self.page)
|
|
||||||
else:
|
|
||||||
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# It's likely the screenshot was too long/big and something crashed
|
# It's likely the screenshot was too long/big and something crashed
|
||||||
|
|||||||
@@ -63,6 +63,10 @@ services:
|
|||||||
#
|
#
|
||||||
# A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
# A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
||||||
# - TZ=America/Los_Angeles
|
# - TZ=America/Los_Angeles
|
||||||
|
#
|
||||||
|
# Maximum height of screenshots, default is 16000 px, screenshots will be clipped to this if exceeded.
|
||||||
|
# RAM usage will be higher if you increase this.
|
||||||
|
# - SCREENSHOT_MAX_HEIGHT=16000
|
||||||
|
|
||||||
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
|
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
Reference in New Issue
Block a user