import json from multiprocessing.pool import Pool from commons.constants.material_task import MaterialTaskType from commons.models.data_fujian import DataFujian from commons.models.data_guangdong import DataGuangdong from commons.models.data_zhejiang import DataZhejiang from commons.models.oil import Oil from spiders import run_spider from spiders.data_fujian import DataFujianSpider from spiders.data_guangdong import DataGuangdongSpider from spiders.date_zhejiang import DataZhejiangSpider from spiders.oil import OilSpider class DataService: spiders = { MaterialTaskType.OTHER_ZHEJIANG: DataZhejiangSpider, MaterialTaskType.OTHER_GUANGZHOU: DataGuangdongSpider, MaterialTaskType.OTHER_YUNNAN: None, MaterialTaskType.FUJIAN_DEPARTMENT: DataFujianSpider, MaterialTaskType.OIL: OilSpider, } models = { MaterialTaskType.OTHER_ZHEJIANG: DataZhejiang, MaterialTaskType.OTHER_GUANGZHOU: DataGuangdong, MaterialTaskType.OTHER_YUNNAN: None, MaterialTaskType.FUJIAN_DEPARTMENT: DataFujian, MaterialTaskType.OIL: Oil, } @classmethod def get_content(cls, type): spider_class = cls.spiders.get(type) if not spider_class: return [] pool = Pool(processes=1) result = pool.apply_async(run_spider, (spider_class,)) pool.close() pool.join() file_path = result.get() # file_path = run_spider(spider_class) content = json.loads(open(file_path, 'r', encoding='utf-8').read()) model_class = cls.models.get(type) for item in content: model_class(**item).upsert() return content