80 lines
2.6 KiB
Python
80 lines
2.6 KiB
Python
import scrapy
|
|
from scrapy import Request
|
|
|
|
|
|
|
|
class AsphaltDomesticSpider(scrapy.Spider):
|
|
name = "asphalt_domestic"
|
|
start_urls = [
|
|
('国内炼厂重交沥青出厂价格', "http://www.baiinfo.com/api/website/price/priceInfo/getPriceList",
|
|
{"channelId": "18", "pricesGroupId": 526}),
|
|
('国内市场沥青批发价格汇总', "http://www.baiinfo.com/api/website/price/priceInfo/getPriceList",
|
|
{"channelId": "18", "pricesGroupId": 530}),
|
|
]
|
|
cookie = None
|
|
user_agent = None
|
|
_token = None
|
|
|
|
@property
|
|
def token(self):
|
|
if self._token:
|
|
return self._token
|
|
else:
|
|
self._token = json.loads(cookie['user'])['token']
|
|
return self._token
|
|
|
|
def start_requests(self):
|
|
for source, url, data in self.start_urls:
|
|
yield Request(
|
|
method='POST',
|
|
body=json.dumps(data),
|
|
url=url,
|
|
headers={
|
|
'User-Agent': self.user_agent,
|
|
'Content-Type': 'application/json',
|
|
'Baiinfo-Auth': self.token,
|
|
# 'Baiinfo-Auth': TOKEN,
|
|
},
|
|
meta={'source': source}
|
|
)
|
|
|
|
def parse(self, response, **kwargs):
|
|
ret = json.loads(response.text)
|
|
if ret.get('success') and ret.get('data'):
|
|
for item in ret['data']:
|
|
item['source'] = response.meta['source']
|
|
for date in item['priceData']:
|
|
try:
|
|
price = int(item['priceData'][date])
|
|
except ValueError:
|
|
price = 0
|
|
yield {
|
|
'name': item['targetName'],
|
|
'price': price,
|
|
'date': date,
|
|
# 'fluctuating': item['changePriceData'][date],
|
|
'from_': response.meta['source'],
|
|
}
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import json
|
|
|
|
from spiders import run_spider, BaiinfoCookieTools
|
|
from commons.models.asphalt_domestic import AsphaltDomestic
|
|
from core.factory import ClientApp
|
|
|
|
# cookie 读取
|
|
cookie = BaiinfoCookieTools.get_cookies()
|
|
# 爬取
|
|
AsphaltDomesticSpider.cookie = cookie
|
|
AsphaltDomesticSpider.user_agent = BaiinfoCookieTools.user_agent
|
|
file_path = run_spider(AsphaltDomesticSpider)
|
|
# 入库
|
|
data = json.loads(open(file_path, 'r', encoding='utf-8').read())
|
|
with ClientApp().app_context():
|
|
for item in data:
|
|
print(item)
|
|
AsphaltDomestic(**item).upsert()
|
|
|