教程详情

bash
pip install puppeteer beautifulsoup4
然后使用以下代码:
python
import asyncio
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
async def main():
options = Options()
options.add_argument('--headless') 无头模式,不显示浏览器界面
driver = webdriver.Chrome(executable_path='path/to/chromedriver', options=options)
async with driver.get('https://www.example.com') as page:
soup = BeautifulSoup(await page.(), '.parser')
urls = [a['href'] for a in soup.find_all('a', href=True)]
await driver.close()
for url in urls:
print(url)
if __name__ == '__main__':
asyncio.run(main())
将`path/to/chromedriver`替换为你的ChromeDriver可执行文件的实际路径。运行此代码后,它将打开一个Chrome浏览器窗口,访问指定的网站,提取所有链接,并在控制台中打印出来。