VisualSelector & BrowserSteps - Scraper improvements, remove duplicate code
This commit is contained in:
@@ -257,12 +257,10 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
self.page.evaluate("var include_filters=''")
|
self.page.evaluate("var include_filters=''")
|
||||||
from pkg_resources import resource_string
|
from pkg_resources import resource_string
|
||||||
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
|
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
|
||||||
# @todo dont duplicate these selectors, or just let them both use the same data?
|
|
||||||
xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
|
xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
|
||||||
xpath_element_js = xpath_element_js.replace('%ELEMENTS%',
|
from changedetectionio.content_fetcher import visualselector_xpath_selectors
|
||||||
'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section')
|
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
|
||||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
||||||
|
|
||||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
|
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
|
||||||
|
|
||||||
return (screenshot, xpath_data)
|
return (screenshot, xpath_data)
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ import requests
|
|||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
|
||||||
|
|
||||||
class Non200ErrorCodeReceived(Exception):
|
class Non200ErrorCodeReceived(Exception):
|
||||||
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
|
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
|
||||||
# Set this so we can use it in other parts of the app
|
# Set this so we can use it in other parts of the app
|
||||||
@@ -367,7 +369,7 @@ class base_html_playwright(Fetcher):
|
|||||||
else:
|
else:
|
||||||
self.page.evaluate("var include_filters=''")
|
self.page.evaluate("var include_filters=''")
|
||||||
|
|
||||||
self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary') + "}")
|
self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
|
||||||
|
|
||||||
# Bug 3 in Playwright screenshot handling
|
# Bug 3 in Playwright screenshot handling
|
||||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ for (var i = 0; i < elements.length; i++) {
|
|||||||
left: Math.floor(bbox['left']),
|
left: Math.floor(bbox['left']),
|
||||||
top: Math.floor(bbox['top']),
|
top: Math.floor(bbox['top']),
|
||||||
tagName: (elements[i].tagName) ? elements[i].tagName.toLowerCase() : '',
|
tagName: (elements[i].tagName) ? elements[i].tagName.toLowerCase() : '',
|
||||||
tagtype: (elements[i].type) ? elements[i].type.toLowerCase() : ''
|
tagtype: (elements[i].tagName == 'INPUT' && elements[i].type) ? elements[i].type.toLowerCase() : ''
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user