Fetching - Small improvement memory handling in detecting price information (saves ~10Mb)
This commit is contained in:
@@ -477,8 +477,10 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
|
|||||||
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
|
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
|
||||||
def has_ldjson_product_info(content):
|
def has_ldjson_product_info(content):
|
||||||
try:
|
try:
|
||||||
lc = content.lower()
|
# Better than .lower() which can use a lot of ram
|
||||||
if 'application/ld+json' in lc and lc.count('"price"') == 1 and '"pricecurrency"' in lc:
|
if (re.search(r'application/ld\+json', content, re.IGNORECASE) and
|
||||||
|
re.search(r'"price"', content, re.IGNORECASE) and
|
||||||
|
re.search(r'"pricecurrency"', content, re.IGNORECASE)):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# On some pages this is really terribly expensive when they dont really need it
|
# On some pages this is really terribly expensive when they dont really need it
|
||||||
|
|||||||
Reference in New Issue
Block a user