import scrapy from scrapy import Request class AsphaltDomesticSpider(scrapy.Spider): name = "asphalt_domestic" start_urls = [ ('国内炼厂重交沥青出厂价格', "http://www.baiinfo.com/api/website/price/priceInfo/getPriceList", {"channelId": "18", "pricesGroupId": 526}), ('国内市场沥青批发价格汇总', "http://www.baiinfo.com/api/website/price/priceInfo/getPriceList", {"channelId": "18", "pricesGroupId": 530}), ] cookie = None user_agent = None _token = None @property def token(self): if self._token: return self._token else: self._token = json.loads(cookie['user'])['token'] return self._token def start_requests(self): for source, url, data in self.start_urls: yield Request( method='POST', body=json.dumps(data), url=url, headers={ 'User-Agent': self.user_agent, 'Content-Type': 'application/json', 'Baiinfo-Auth': self.token, # 'Baiinfo-Auth': TOKEN, }, meta={'source': source} ) def parse(self, response, **kwargs): ret = json.loads(response.text) if ret.get('success') and ret.get('data'): for item in ret['data']: item['source'] = response.meta['source'] for date in item['priceData']: try: price = int(item['priceData'][date]) except ValueError: price = 0 yield { 'name': item['targetName'], 'price': price, 'date': date, # 'fluctuating': item['changePriceData'][date], 'from_': response.meta['source'], } if __name__ == '__main__': import json from spiders import run_spider, BaiinfoCookieTools from commons.models.asphalt_domestic import AsphaltDomestic from core.factory import ClientApp # cookie 读取 cookie = BaiinfoCookieTools.get_cookies() # 爬取 AsphaltDomesticSpider.cookie = cookie AsphaltDomesticSpider.user_agent = BaiinfoCookieTools.user_agent file_path = run_spider(AsphaltDomesticSpider) # 入库 data = json.loads(open(file_path, 'r', encoding='utf-8').read()) with ClientApp().app_context(): for item in data: print(item) AsphaltDomestic(**item).upsert()