Files
material-api/web/scripts/unpack_history_data.py

358 lines
16 KiB
Python
Raw Normal View History

2024-07-11 09:13:30 +08:00
import datetime
2024-07-10 16:08:47 +08:00
import os
import zipfile
from pathlib import Path
2024-07-11 09:13:30 +08:00
from dateutil.relativedelta import relativedelta
from openpyxl import load_workbook
from xls2xlsx import XLS2XLSX
2024-07-10 16:08:47 +08:00
2024-07-11 09:13:30 +08:00
from commons.models.asphalt_modifier import AsphaltModifier
from commons.models.data_network import DataNetwork
from commons.models.fujian_survey import FujianSurvey
from commons.models.fuzhou_highway_bureau import FuzhouHighwayBureau
from commons.models.fuzhou_transportation_bureau import FuzhouTransportationBureau
from commons.models.local_material import LocalMaterial
from commons.models.sanming_steel import SanmingSteel
def unpack(zip_path=r'C:\Users\Administrator\Desktop\材料管理系统模版\造价站近两年价格数据.zip'):
2024-07-10 16:08:47 +08:00
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
for file_info in zip_ref.infolist():
filename = file_info.filename.encode('cp437').decode('gbk')
is_excel = filename.endswith('.xlsx') or filename.endswith('.xls')
is_fuzhou = '福州' in filename
# 地材
2024-07-11 09:13:30 +08:00
if is_excel and is_fuzhou and '地材汇总/' in filename and '福州市交通工程地方材料价格信息汇总表(数据处理)' not in filename\
and '混凝土价格' not in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./地材-福州-{year}-{int(month):02d}{Path(filename).suffix}'
2024-07-10 16:08:47 +08:00
# 调查表
2024-07-11 09:13:30 +08:00
elif is_excel and is_fuzhou and '11地市调查表/' in filename and '福建省' not in filename and '通闽公司' not in filename and '-' not in filename:
2024-07-10 16:08:47 +08:00
year, month = get_month(filename)
print(year, month, filename)
2024-07-11 09:13:30 +08:00
yield zip_ref, file_info, f'./调查表-福州-{year}-{int(month):02d}{Path(filename).suffix}'
2024-07-10 16:08:47 +08:00
# 公路局
2024-07-11 09:13:30 +08:00
elif is_excel and is_fuzhou and '4.福州公路' in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./公路局-福州-{year}-{int(month):02d}{Path(filename).suffix}'
2024-07-10 16:08:47 +08:00
# 交通局
2024-07-11 09:13:30 +08:00
elif is_excel and is_fuzhou and '1.福州交通局' in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./交通局-福州-{year}-{int(month):02d}{Path(filename).suffix}'
2024-07-10 16:08:47 +08:00
# 网络价格
2024-07-11 09:13:30 +08:00
elif is_excel and is_fuzhou and '网络价格(' in filename and '通闽公司' not in filename and '5.福州' not in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./网络价格-福州-{year}-{int(month):02d}{Path(filename).suffix}'
2024-07-10 16:08:47 +08:00
# 改性剂
2024-07-11 09:13:30 +08:00
elif is_excel and '改性剂和沙钢' in filename :
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./改性剂-福州-{year}-{int(month):02d}{Path(filename).suffix}'
2024-07-10 16:08:47 +08:00
# 三明钢铁
2024-07-11 09:13:30 +08:00
elif is_excel and '2.三明钢铁' in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./三明钢铁-福州-{year}-{int(month):02d}{Path(filename).suffix}'
def main():
for zip_ref, file_info, target_file_path in unpack():
with open(target_file_path, 'wb') as f:
f.write(zip_ref.read(file_info))
if target_file_path.endswith('.xls'):
x2x = XLS2XLSX(target_file_path)
x2x.to_xlsx(target_file_path.replace('.xls', '.xlsx'))
class Cleaner:
city = '福州市'
@classmethod
def clean_gxj(cls, table=r'E:\Project\material_api\web\scripts\改性剂-福州-2022-12.xlsx'):
"""
改性剂
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
name = None
for row in ws.iter_rows(values_only=True):
for k, cell in enumerate(row):
if not name and cell == '日期':
name = row[k+1]
if isinstance(cell, datetime.datetime):
start = datetime.datetime(int(year), int(month), 1)
end = start + relativedelta(months=1)
if start <= cell < end:
print(year, month, name, cell, row[k+1])
yield {
'name': name,
'spec': '',
'price': row[k+1],
'date': cell,
}
@classmethod
def clean_ss(cls, table=r'E:\Project\material_api\web\scripts\三明钢铁-福州-2022-12.xlsx'):
"""
三明钢铁
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if isinstance(row[0], str) and ('材料编号' in row[0] or '三明钢铁' in row[0]):
continue
material, name, spec, _, _, price, *_ = row
if not name:
name = result[-1][3]
# if not spec:
# spec = result[-1][4]
item = (year, month, material, name, spec, price)
result.append(item)
if not item[5]:
continue
print(item)
yield {
'name': name.replace(' ', '') if name else '',
'spec': spec.replace(' ', '') if spec else '',
'material': material,
'price': price,
'fluctuating': None,
'date': datetime.datetime(int(year), int(month), 1),
}
@classmethod
def clean_jtj(cls, table=r'E:\Project\material_api\web\scripts\交通局-福州-2022-12.xlsx'):
"""
交通局
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if isinstance(row[0], str) and ('福建' in row[0] or '' in row[0] or '材料' in row[0] or '电话' in row[0]):
continue
if not row[0] and not row[1] and not row[2]:
continue
material, name, spec, unit, brand, price, *_ = row
# if not name:
# name = result[-1][3]
# if not spec:
# spec = result[-1][4]
item = (year, month, material, name, spec, unit, brand, price)
result.append(item)
if not item[7] or item[7] == '-':
continue
print(item)
yield {
'name': name.replace(' ', '') if name else '',
'spec': spec.replace(' ', '') if spec else '',
'price': price,
'date': datetime.datetime(int(year), int(month), 1),
'material_id': material,
'unit': unit,
'brand': brand,
'region': cls.city.replace('', ''),
}
@classmethod
def clean_glj(cls, table=r'E:\Project\material_api\web\scripts\公路局-福州-2022-12.xlsx'):
"""
公路局
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if isinstance(row[0], str) and ('福建' in row[0] or '' in row[0] or '材料' in row[0] or '电话' in row[0]
or '附件' in row[0] or '填报' in row[0]):
continue
if not row[0] and not row[1] and not row[2]:
continue
material, name, spec, unit, brand, price, source, remark, *_ = row
if not name:
name = result[-1][3]
# if not spec:
# spec = result[-1][4]
item = (year, month, material, name, spec, unit, brand, price)
result.append(item)
if not item[7] or item[7] == '-':
continue
print(item)
yield {
'name': name.replace(' ', '') if name else '',
'spec': spec.replace(' ', '') if spec else '',
'price': price,
'date': datetime.datetime(int(year), int(month), 1),
'material_id': material,
'unit': unit,
'brand': brand,
'region': cls.city.replace('', ''),
}
@classmethod
def clean_dcb(cls, table=r'E:\Project\material_api\web\scripts\调查表-福州-2022-12.xlsx'):
"""
调查表
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
print(row)
if isinstance(row[0], str) and ('福建' in row[0] or '' in row[0] or '材料' in row[0] or '电话' in row[0]
or '附件' in row[0] or '填报' in row[0]):
continue
if not row[0] and not row[1] and not row[2]:
continue
material, name, spec, unit, brand, price, price_without_tax, tax, source, remark, *_ = row
# if not name:
# name = result[-1][3]
# # if not spec:
# # spec = result[-1][4]
item = (year, month, material, name, spec, unit, brand, price)
result.append(item)
if not price or price == '-':
continue
print(item)
yield {
'name': name.replace(' ', '') if name else '',
'spec': spec.replace(' ', '') if spec else '',
'price': price,
'date': datetime.datetime(int(year), int(month), 1),
'material_id': material,
'unit': unit,
'brand': brand,
'tax': tax,
'region': cls.city.replace('', ''),
}
@classmethod
def clean_network(cls, table=r'E:\Project\material_api\web\scripts\网络价格-福州-2022-12.xlsx'):
"""
网络价格
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if isinstance(row[0], str) and ('网络' in row[0] or '' in row[0] or '材料' in row[0] or '电话' in row[0]
or '附件' in row[0] or '填报' in row[0]):
continue
if not row[0] and not row[1] and not row[2]:
continue
material, name, spec, unit, brand, price, _, remark, source, *_ = row
# # if not name:
# # name = result[-1][3]
# # # if not spec:
# # # spec = result[-1][4]
item = (year, month, material, name, spec, unit, brand, price, source, remark)
result.append(item)
if not item[7] or item[7] == '-':
continue
print(item)
yield {
'material_id': material,
'spec': spec.replace(' ', '') if spec else '',
'unit': unit,
'brand': brand,
'name': name.replace(' ', '') if name else '',
'price': price,
'source': source,
'remark': remark,
'date': datetime.datetime(int(year), int(month), 1),
'region': cls.city.replace('', ''),
}
@classmethod
def clean_local(cls, table=r'E:\Project\material_api\web\scripts\地材-福州-2022-12.xlsx'):
"""
地材
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if isinstance(row[0], str) and ('汇总表' in row[0] or '' in row[0] or '材料' in row[0] or '电话' in row[0]
or '附件' in row[0] or '填报' in row[0] or '单位' in row[0] or '序号' in row[0]):
continue
if not row[0] and not row[1] and not row[2]:
continue
_, county, name, spec, unit, price, price_without_tax, position, remark, *_ = row
if not county:
county = result[-1][2]
if not name:
name = result[-1][3]
item = (year, month, county, name, spec, unit, price, price_without_tax, position, remark)
result.append(item)
if not price or price in ('-', '', '/'):
continue
if not price_without_tax or price_without_tax in ('-', '', '/'):
price_without_tax = 0
print(item)
yield {
'name': name.replace(' ', '') if name else '',
'city': cls.city,
'county': county,
'material_id': '',
'spec': spec.replace(' ', '') if spec else '',
'unit': unit,
'price': price,
'price_without_tax': price_without_tax,
'date': datetime.datetime(int(year), int(month), 1),
'position': position,
'remark': remark,
}
2024-07-10 16:08:47 +08:00
def get_month(filename):
if '年信息价/' in filename:
year = filename.split('年信息价')[0].split('/')[-1]
month = filename.split('月发布)')[0].split('')[-1]
return year, month
elif '月份地材汇总/' in filename:
year = filename.split('年地材/')[0].split('/')[-1]
month = filename.split('月份地材汇总')[0].split('')[-1]
return year, month
if __name__ == '__main__':
2024-07-11 09:13:30 +08:00
from core.factory import ClientApp
with ClientApp().app_context():
for root, dirs, files in os.walk(r'E:\Project\material_api\web\scripts'):
for file in files:
file_path = os.path.join(root, file)
if '改性剂' in file and '.xlsx' in file:
for item in Cleaner.clean_gxj(file_path):
AsphaltModifier(**item).upsert()
if '三明钢铁' in file and '.xlsx' in file:
for item in Cleaner.clean_ss(file_path):
SanmingSteel(**item).upsert()
if '交通局' in file and '.xlsx' in file:
for item in Cleaner.clean_jtj(file_path):
FuzhouTransportationBureau(**item).upsert()
if '公路局' in file and '.xlsx' in file:
for item in Cleaner.clean_glj(file_path):
FuzhouHighwayBureau(**item).upsert()
if '调查表' in file and '.xlsx' in file:
for item in Cleaner.clean_dcb(file_path):
FujianSurvey(**item).upsert()
if '网络价格' in file and '.xlsx' in file:
for item in Cleaner.clean_network(file_path):
DataNetwork(**item).upsert()
if '地材' in file and '.xlsx' in file:
for item in Cleaner.clean_local(file_path):
LocalMaterial(**item).upsert()