phpwebdriver+ docker-selenium+linux实现网络爬虫

linux上需要安装docker服务,如果没有安装请看前面的文章
#拉取docker 镜像 
docker pull selenium/standalone-chrome:4.0.0-alpha-7-prerelease-20200826

#创建selenium docker容器
docker run -d -p 4444:4444 --name=selenium -v /dev/shm:/dev/shm selenium/standalone-chrome:4.0.0-alpha-7-prerelease-20200826

#查看容器状态
docker ps

搭建php环境、安装compser此处不赘述

composer require php-webdriver/webdriver
<?php
/**
 * Created by PhpStorm.
 * User: lizhiguo
 * Date: 2020/8/31
 * Time: 10:05
 */
require __DIR__ . '/vendor/autoload.php';
use \Facebook\WebDriver\Remote\RemoteWebDriver;
use \Facebook\WebDriver\Remote\DesiredCapabilities;
use \Facebook\WebDriver\Chrome\ChromeOptions;
$host='http://127.0.0.1:4444';
$desiredCapabilities = DesiredCapabilities::chrome();

// Disable accepting SSL certificates
$desiredCapabilities->setCapability('acceptSslCerts', false);

// Run headless firefox

$chromeOptions = new ChromeOptions();
$chromeOptions->addArguments(['--no-sandbox', '--headless']);

$desiredCapabilities->setCapability(ChromeOptions::CAPABILITY_W3C, $chromeOptions);


$driver = RemoteWebDriver::create($host, $desiredCapabilities);


for ($i=1;$i<=14;$i++){
	echo $url="https://www.amazon.com/s?k=keyboard&page=".$i."&qid=".time()."&ref=sr_pg_3";
	$driver->get($url);
//	$chromeOptions->getCookies($url);
	print_r($source=$driver->getPageSource());
	file_put_contents($i.'.html',$source);
}

//$driver->manage()->getCookies();


$driver->quit();
https://php-webdriver.github.io/php-webdriver/latest/Facebook/WebDriver/Chrome/ChromeDriver.html