企业网站 三网系统,建网站手机软件,佛山外贸网站建设特色,关键词歌词简谱日后#xff0c;网络爬虫也好#xff0c;数据采集也好#xff0c;自动化必然是主流。因此#xff0c;笔者未雨绸缪#xff0c;在此研究各类自动化源码#xff0c;希望能够赶上时代#xff0c;做出一套实用的自动化框架。
这里先研究传统的webdriver中转来进行浏览器自动…日后网络爬虫也好数据采集也好自动化必然是主流。因此笔者未雨绸缪在此研究各类自动化源码希望能够赶上时代做出一套实用的自动化框架。
这里先研究传统的webdriver中转来进行浏览器自动化的源码。
webdriver官方位于这里WebDriver
用过selenium的同学应该都知道需要有selenium这个自动化库来写脚本需要一个webdriver.exe还需要一个浏览器。流程基本如下
编写脚本-----发送消息给webdriver------webdriver发消息给浏览器
通过这样一个流程就完成了自动化。
脚本简单资料一大堆浏览器也只是个执行者所以关键在于webdriver如何接受、处理、发送消息所以重点源码webdriver源码。
WebDriverMain.cpp启动函数 #include config.h#include LogInitialization.h
#include WebDriverService.h
#include wtf/MainThread.h
#include wtf/Threading.h#if OS(ANDROID)
__attribute__((visibility(default)))
int WebKit::WebDriverProcessMain(int argc, char** argv)
#else
int main(int argc, char** argv)
#endif
{WebDriver::WebDriverService::platformInit();WTF::initializeMainThread();
#if !LOG_DISABLED || !RELEASE_LOG_DISABLEDWebDriver::logChannels().initializeLogChannelsIfNecessary(WebDriver::logLevelString());
#endifWebDriver::WebDriverService service;return service.run(argc, argv);
}步骤 1平台初始化platformInit源码里空白无实现应该是等之后更新。
步骤 2WTFWeb Template Framework主线程初始化
步骤 3日志通道初始化 步骤 4创建并运行 WebDriver 服务
WebDriverService.cpp解析命令参数进入监听loop循环。。
if (const char* targetEnvVar getenv(WEBDRIVER_TARGET_ADDR))targetString String::fromLatin1(targetEnvVar);
先获取一个名为WEBDRIVER_TARGET_ADDR的环境变量这个变量是用于链接已经开启的浏览器是的需要先把浏览器打开然后webdriver回去链接而不是先运行webdriver。
然后解析命令行参数一大堆没什么可看的 if (equalSpans(arg, -h_span) || equalSpans(arg, --help_span)) {printUsageStatement(argv[0]);return EXIT_SUCCESS;}if (equalSpans(arg, -p_span) portString.isNull()) {if (i argc) {printUsageStatement(argv[0]);return EXIT_FAILURE;}portString String::fromLatin1(argv[i]);continue;}static constexpr auto portArgument --port_span;if (spanHasPrefix(arg, portArgument) portString.isNull()) {portString arg.subspan(portArgument.size());continue;}static constexpr auto hostArgument --host_span;if (spanHasPrefix(arg, hostArgument) !host) {host arg.subspan(hostArgument.size());continue;}#if ENABLE(WEBDRIVER_BIDI)static constexpr auto bidiPortArgument --bidi-port_span;if (spanHasPrefix(arg, bidiPortArgument) bidiPortString.isNull()) {bidiPortString arg.subspan(bidiPortArgument.size());continue;}
#endifif (equalSpans(arg, -t_span) targetString.isNull()) {if (i argc) {printUsageStatement(argv[0]);return EXIT_FAILURE;}targetString String::fromLatin1(argv[i]);continue;}static constexpr auto targetArgument --target_span;if (spanHasPrefix(arg, targetArgument) targetString.isNull()) {targetString arg.subspan(targetArgument.size());continue;}if (equalSpans(arg, --replace-on-new-session_span)) {m_replaceOnNewSession true;continue;}}if (portString.isNull()) {printUsageStatement(argv[0]);return EXIT_FAILURE;}if (!targetString.isEmpty()) {auto position targetString.reverseFind(:);if (position ! notFound) {m_targetAddress targetString.left(position);m_targetPort parseIntegerAllowingTrailingJunkuint16_t(StringView { targetString }.substring(position 1)).value_or(0);}}auto port parseIntegeruint16_t(portString);if (!port) {fprintf(stderr, Invalid port %s provided\n, portString.utf8().data());return EXIT_FAILURE;}#if ENABLE(WEBDRIVER_BIDI)auto bidiPort parseIntegeruint16_t(bidiPortString);if (!bidiPort) {const int16_t bidiPortIncrement *port std::numeric_limitsuint16_t::max() ? -1 : 1;bidiPort { *port bidiPortIncrement };fprintf(stderr, Invalid WebSocket BiDi port %s provided. Defaulting to %d.\n, bidiPortString.utf8().data(), *bidiPort);}
#endif
最后是进入主循环这里先线程初始化然后看是websockst还是http模式前者双向后者单向。之后如果监听listen成功就进入loop的循环不断接受消息 WTF::initializeMainThread();const char* hostStr host host-utf8().data() ? host-utf8().data() : local;
#if ENABLE(WEBDRIVER_BIDI)if (!m_bidiServer.listen(host ? *host : nullString(), *bidiPort)) {fprintf(stderr, FATAL: Unable to listen for WebSocket BiDi server at host %s and port %d.\n, hostStr, *bidiPort);return EXIT_FAILURE;}RELEASE_LOG(WebDriverBiDi, Started WebSocket BiDi server with host %s and port %d, hostStr, *bidiPort);
#endif // ENABLE(WEBDRIVER_BIDI)if (!m_server.listen(host, *port)) {fprintf(stderr, FATAL: Unable to listen for HTTP server at host %s and port %d.\n, hostStr, *port);return EXIT_FAILURE;}RELEASE_LOG(WebDriverClassic, Started HTTP server with host %s and port %d, hostStr, *port);RunLoop::run();#if ENABLE(WEBDRIVER_BIDI)m_bidiServer.disconnect();
#endifm_server.disconnect();return EXIT_SUCCESS;
其中的监听代码如下
bool HTTPServer::listen(const std::optionalString host, unsigned port)
{auto endpoint RemoteInspectorSocketEndpoint::singleton();if (auto id endpoint.listenInet(host ? host.value().utf8().data() : , port, *this)) {m_server id;return true;}return false;
}
这里先用了个设计模式单例模式为了可复用利用同一个监听端点。
然后是很常规的server模式打开一个TCP端口监听。
std::optionalConnectionID RemoteInspectorSocketEndpoint::listenInet(const char* address, uint16_t port, Listener listener)
{Locker locker { m_connectionsLock };auto id generateConnectionID();auto connection makeUniqueListenerConnection(id, listener, address, port);if (!connection-isListening())return std::nullopt;m_listeners.add(id, WTFMove(connection));wakeupWorkerThread();return id;
}bool RemoteInspectorSocketEndpoint::isListening(ConnectionID id)
{Locker locker { m_connectionsLock };if (m_listeners.contains(id))return true;return false;
}
主循环代码如下典型的一个GUI的事件消息处理机制熟悉win32的同学应该很懂而且这里就是纯粹的win32的函数接口获取消息把消息转化为字符消息分发消息
void RunLoop::run()
{MSG message;while (BOOL result ::GetMessage(message, nullptr, 0, 0)) {if (result -1)break;::TranslateMessage(message);::DispatchMessage(message);}
}