机械加工网站哪个好,服装设计专业有前途吗,怎么创建网络平台,做网站公司哪家公司好项目地址#xff1a;https://gitee.com/wyu_001/myscrapy 接下来接着说明如何多线程运行多个爬虫脚本#xff1a; 项目的根目录下有个batch.py文件#xff0c;这个就是批量运行多个爬虫的脚本#xff0c;这里使用了线程池#xff0c;同时运行spider下的多个爬虫类#xf…项目地址https://gitee.com/wyu_001/myscrapy 接下来接着说明如何多线程运行多个爬虫脚本 项目的根目录下有个batch.py文件这个就是批量运行多个爬虫的脚本这里使用了线程池同时运行spider下的多个爬虫类也可以在setting.py文件中设置运行的爬虫文件
#batch
#批量运行默认情况下运行spider下继承myspider类的子类
#批量运行脚本参数定义一次并发线程数BATCH_THREADS 10#batch run files in list
#自定义运行spider下脚本文件
BATCH_FILES [dxyqueryhospital.py,haodfqueryhospital.py]下面是batch.py脚本代码
import inspect
from os import listdir,getcwd
from os.path import isfile,join
import importlibfrom config.setting import BATCH_THREADS
from config.setting import BATCH_FILESfrom concurrent.futures import ThreadPoolExecutor,as_completedcrawls[]lib_dir spider
file_path join(getcwd(),lib_dir)
crawl_files [ f for f in listdir(file_path) if isfile(join(file_path,f))]crawls_sets set(crawl_files)
batch_sets set(BATCH_FILES)if len(batch_sets):crawl_files list(crawls_sets.intersection(batch_sets))for file in crawl_files:if file ! __init__.py :file f.{file.split(.)[0]}moudle importlib.import_module(file,lib_dir)for name ,obj in inspect.getmembers(moudle,inspect.isclass):if obj.__base__.__name__ MySpider:crawls.append(obj())thread_num 0tasks []with ThreadPoolExecutor(max_workers BATCH_THREADS) as tp:while(len(crawls)):task tp.submit(crawls.pop().start_request)tasks.append(task)thread_num 1if thread_num BATCH_THREADS :for future in as_completed(tasks):finish future.result()thread_num 0for future in as_completed(tasks):finish future.result()