Files
material-api/web/spiders/asphalt_domestic.py
2024-05-29 10:21:31 +08:00

80 lines
2.6 KiB
Python

import scrapy
from scrapy import Request
class AsphaltDomesticSpider(scrapy.Spider):
name = "asphalt_domestic"
start_urls = [
('国内炼厂重交沥青出厂价格', "http://www.baiinfo.com/api/website/price/priceInfo/getPriceList",
{"channelId": "18", "pricesGroupId": 526}),
('国内市场沥青批发价格汇总', "http://www.baiinfo.com/api/website/price/priceInfo/getPriceList",
{"channelId": "18", "pricesGroupId": 530}),
]
cookie = None
user_agent = None
_token = None
@property
def token(self):
if self._token:
return self._token
else:
self._token = json.loads(cookie['user'])['token']
return self._token
def start_requests(self):
for source, url, data in self.start_urls:
yield Request(
method='POST',
body=json.dumps(data),
url=url,
headers={
'User-Agent': self.user_agent,
'Content-Type': 'application/json',
'Baiinfo-Auth': self.token,
# 'Baiinfo-Auth': TOKEN,
},
meta={'source': source}
)
def parse(self, response, **kwargs):
ret = json.loads(response.text)
if ret.get('success') and ret.get('data'):
for item in ret['data']:
item['source'] = response.meta['source']
for date in item['priceData']:
try:
price = int(item['priceData'][date])
except ValueError:
price = 0
yield {
'name': item['targetName'],
'price': price,
'date': date,
# 'fluctuating': item['changePriceData'][date],
'from_': response.meta['source'],
}
if __name__ == '__main__':
import json
from spiders import run_spider, BaiinfoCookieTools
from commons.models.asphalt_domestic import AsphaltDomestic
from core.factory import ClientApp
# cookie 读取
cookie = BaiinfoCookieTools.get_cookies()
# 爬取
AsphaltDomesticSpider.cookie = cookie
AsphaltDomesticSpider.user_agent = BaiinfoCookieTools.user_agent
file_path = run_spider(AsphaltDomesticSpider)
# 入库
data = json.loads(open(file_path, 'r', encoding='utf-8').read())
with ClientApp().app_context():
for item in data:
print(item)
AsphaltDomestic(**item).upsert()