fix:
This commit is contained in:
@@ -15,8 +15,6 @@ class Reber400Calculator(Calculator):
|
||||
unit = "t"
|
||||
spec = ""
|
||||
|
||||
# todo price result 加 year 字段
|
||||
|
||||
def __init__(self, year, month):
|
||||
self.year = year
|
||||
self.month = month
|
||||
|
@@ -27,6 +27,7 @@ class PricePublish(db.Model, Model, OperationTrackMixin, BaseModelMixin):
|
||||
price_longyan = Column('PRICE_LONGYAN', Numeric(16, 4), default=0, comment='龙岩价格')
|
||||
price_ningde = Column('PRICE_NINGDE', Numeric(16, 4), default=0, comment='宁德价格')
|
||||
price_pintan = Column('PRICE_PINTAN', Numeric(16, 4), default=0, comment='平潭价格')
|
||||
price_zhangzhoukfq = Column('PRICE_ZHANGZHOUKFQ', Numeric(16, 4), default=0, comment='漳州开发区价格')
|
||||
tax = Column('TAX', Numeric(4, 2), default=0, comment='税率')
|
||||
status = Column('STATUS', Integer, default=0, comment='状态')
|
||||
type = Column('TYPE', Integer, default=0, comment='类型')
|
||||
|
83
web/spiders/data_jiangxi.py
Normal file
83
web/spiders/data_jiangxi.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import scrapy
|
||||
from lxml import html, etree
|
||||
from scrapy import Request
|
||||
|
||||
|
||||
class DataJiangxiSpider(scrapy.Spider):
|
||||
name = "data_jiangxi"
|
||||
start_urls = [
|
||||
('材料价格', "http://jt.jiangxi.gov.cn/col/col70716/index.html?uid=339408&pageNum=1"),
|
||||
('材料价格', "http://jt.jiangxi.gov.cn/col/col70716/index.html?uid=339408&pageNum=2"),
|
||||
('材料价格', "http://jt.jiangxi.gov.cn/col/col70716/index.html?uid=339408&pageNum=3"),
|
||||
]
|
||||
|
||||
def start_requests(self):
|
||||
for source, url in self.start_urls:
|
||||
yield Request(
|
||||
method='GET',
|
||||
url=url,
|
||||
headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
|
||||
},
|
||||
meta={'source': source, 'type': 'home'}
|
||||
)
|
||||
|
||||
def parse(self, response, **kwargs):
|
||||
if response.meta['type'] == 'home':
|
||||
yield from self.parse_home(response)
|
||||
elif response.meta['type'] == 'list':
|
||||
yield from self.parse_list(response)
|
||||
|
||||
def parse_home(self, response):
|
||||
rsp = response.xpath('//*[@id="339408"]/script/text()').get()
|
||||
for t in ('<![CDATA[', ']]>', '</record>', '<record>', '</recordset>', '<recordset>', '</datastore>', '<datastore>', '</nextgroup>', '<nextgroup>'):
|
||||
rsp = rsp.replace(t, '')
|
||||
html = etree.HTML(rsp)
|
||||
for item in html.xpath('//li/a'):
|
||||
print(item)
|
||||
uri = item.xpath('@href')[0]
|
||||
name = item.xpath('text()')[0]
|
||||
print(uri, name)
|
||||
if '工程材料价格信息' not in name:
|
||||
continue
|
||||
yield Request(
|
||||
method='GET',
|
||||
url=uri,
|
||||
headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
|
||||
},
|
||||
meta={'source': name, 'type': 'list'}
|
||||
)
|
||||
|
||||
def parse_list(self, response):
|
||||
date = response.xpath('//*[@id="content"]/div[1]/ul/li[3]/span/text()').get().split(' ')[0]
|
||||
source = response.xpath('//*[@id="content"]/div[1]/ul/li[1]/span/text()').get()
|
||||
for item in response.xpath('//*[@id="div_content"]/span/p/a'):
|
||||
uri = item.xpath('@href').get()
|
||||
name = item.xpath('text()').get()
|
||||
print(uri, name)
|
||||
yield {
|
||||
'url': f'http://jt.jiangxi.gov.cn{uri}',
|
||||
'name': name,
|
||||
# 'source': response.meta['source']
|
||||
'source': source,
|
||||
'date': date,
|
||||
}
|
||||
|
||||
# todo 江西造价站入库
|
||||
|
||||
if __name__ == '__main__':
|
||||
import json
|
||||
|
||||
from spiders import run_spider
|
||||
from commons.models.data_zhejiang import DataZhejiang
|
||||
from core.factory import ClientApp
|
||||
|
||||
# 爬取
|
||||
file_path = run_spider(DataJiangxiSpider)
|
||||
# 入库
|
||||
data = json.loads(open(file_path, 'r', encoding='utf-8').read())
|
||||
with ClientApp().app_context():
|
||||
for item in data:
|
||||
print(item)
|
||||
# DataJiangxi(**item).upsert()
|
@@ -23,6 +23,7 @@ def create_last_month_publish_data():
|
||||
# 检查是否生成上月数据
|
||||
query = PriceResult.get_query(year=year, month=month)
|
||||
result = PriceResult.get_list(query)
|
||||
# todo 有修改记录的忽略,其余都重新生成
|
||||
if not result:
|
||||
calculate(year=year, month=month)
|
||||
|
||||
|
@@ -15,6 +15,14 @@ from calculators.asphalt_imported_modifier import AsphaltImportedModifierCalcula
|
||||
|
||||
|
||||
def calculate(year=2023, month=8):
|
||||
"""
|
||||
计算生成趋势表
|
||||
"""
|
||||
# todo-1 价格计算触发器
|
||||
# todo-1 十一地市 交通局 计算调整
|
||||
# todo-1 十一地市 公路局 计算调整
|
||||
# todo-1 十一地市 调查表 计算调整
|
||||
# todo-1 十一地市 网络价格 计算调整
|
||||
for Calculator in [
|
||||
AsphaltDomesticCalculator,
|
||||
AsphaltImportedCalculator,
|
||||
|
@@ -2,6 +2,9 @@ from collectors import Collector
|
||||
|
||||
|
||||
def collect(year=2023, month=11):
|
||||
"""
|
||||
整理发布价格
|
||||
"""
|
||||
collector = Collector(year, month)
|
||||
collector.run()
|
||||
|
||||
|
30
web/tasks/once/init_db.py
Normal file
30
web/tasks/once/init_db.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from core.factory import ClientApp
|
||||
|
||||
if __name__ == '__main__':
|
||||
from core.extensions import db
|
||||
from commons.models.asphalt_domestic import *
|
||||
from commons.models.asphalt_imported import *
|
||||
from commons.models.asphalt_modifier import *
|
||||
from commons.models.budget import *
|
||||
from commons.models.budget_item import *
|
||||
from commons.models.cement import *
|
||||
from commons.models.data_fujian import *
|
||||
from commons.models.data_guangdong import *
|
||||
from commons.models.data_zhejiang import *
|
||||
from commons.models.fujian_survey import *
|
||||
from commons.models.fuzhou_highway_bureau import *
|
||||
from commons.models.fuzhou_transportation_bureau import *
|
||||
from commons.models.local_material import *
|
||||
from commons.models.material import *
|
||||
from commons.models.material_task import *
|
||||
from commons.models.oil import *
|
||||
from commons.models.price_publish import *
|
||||
from commons.models.price_result import *
|
||||
from commons.models.sanming_steel import *
|
||||
from commons.models.steel_plate import *
|
||||
from commons.models.steel_rebar import *
|
||||
from commons.models.steel_section import *
|
||||
from commons.models.steel_strand import *
|
||||
|
||||
with ClientApp().app_context():
|
||||
db.create_all()
|
Reference in New Issue
Block a user