from playwright.async_api import async_playwright
import os
import pytesseract
from PIL import Image
import asyncio
proxy = 'http://abc.com.cn:80'
async def download():
async with async_playwright() as p:
cookie = os.path.join(cookies_folder, "cookie.json")
browser = await p.chromium.launch(
headless=False,
channel="chrome",
proxy={"server": proxy} # 设置代理?
)
context = await browser.new_context()
page = await context.new_page()
await page.goto("https://ddd.com/", timeout=180000)
await page.wait_for_load_state("networkidle")
# screenshot_path = 'fullpage.png'
# await page.screenshot(path=screenshot_path, full_page=True)
shadow_host = await page.query_selector('div#transcend-consent-manager')
await shadow_host.screenshot(path='shadow_host.png')
image = Image.open('shadow_host.png')
data = pytesseract.image_to_data(image, lang='chi_sim+eng', output_type=pytesseract.Output.DICT)
没有评论:
发表评论