Files
material-api/web/scripts/unpack_history_data.py
2024-07-11 09:13:30 +08:00

358 lines
16 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import datetime
import os
import zipfile
from pathlib import Path
from dateutil.relativedelta import relativedelta
from openpyxl import load_workbook
from xls2xlsx import XLS2XLSX
from commons.models.asphalt_modifier import AsphaltModifier
from commons.models.data_network import DataNetwork
from commons.models.fujian_survey import FujianSurvey
from commons.models.fuzhou_highway_bureau import FuzhouHighwayBureau
from commons.models.fuzhou_transportation_bureau import FuzhouTransportationBureau
from commons.models.local_material import LocalMaterial
from commons.models.sanming_steel import SanmingSteel
def unpack(zip_path=r'C:\Users\Administrator\Desktop\材料管理系统模版\造价站近两年价格数据.zip'):
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
for file_info in zip_ref.infolist():
filename = file_info.filename.encode('cp437').decode('gbk')
is_excel = filename.endswith('.xlsx') or filename.endswith('.xls')
is_fuzhou = '福州' in filename
# 地材
if is_excel and is_fuzhou and '地材汇总/' in filename and '福州市交通工程地方材料价格信息汇总表(数据处理)' not in filename\
and '混凝土价格' not in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./地材-福州-{year}-{int(month):02d}{Path(filename).suffix}'
# 调查表
elif is_excel and is_fuzhou and '11地市调查表/' in filename and '福建省' not in filename and '通闽公司' not in filename and '-' not in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./调查表-福州-{year}-{int(month):02d}{Path(filename).suffix}'
# 公路局
elif is_excel and is_fuzhou and '4.福州公路' in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./公路局-福州-{year}-{int(month):02d}{Path(filename).suffix}'
# 交通局
elif is_excel and is_fuzhou and '1.福州交通局' in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./交通局-福州-{year}-{int(month):02d}{Path(filename).suffix}'
# 网络价格
elif is_excel and is_fuzhou and '网络价格(' in filename and '通闽公司' not in filename and '5.福州' not in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./网络价格-福州-{year}-{int(month):02d}{Path(filename).suffix}'
# 改性剂
elif is_excel and '改性剂和沙钢' in filename :
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./改性剂-福州-{year}-{int(month):02d}{Path(filename).suffix}'
# 三明钢铁
elif is_excel and '2.三明钢铁' in filename:
year, month = get_month(filename)
print(year, month, filename)
yield zip_ref, file_info, f'./三明钢铁-福州-{year}-{int(month):02d}{Path(filename).suffix}'
def main():
for zip_ref, file_info, target_file_path in unpack():
with open(target_file_path, 'wb') as f:
f.write(zip_ref.read(file_info))
if target_file_path.endswith('.xls'):
x2x = XLS2XLSX(target_file_path)
x2x.to_xlsx(target_file_path.replace('.xls', '.xlsx'))
class Cleaner:
city = '福州市'
@classmethod
def clean_gxj(cls, table=r'E:\Project\material_api\web\scripts\改性剂-福州-2022-12.xlsx'):
"""
改性剂
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
name = None
for row in ws.iter_rows(values_only=True):
for k, cell in enumerate(row):
if not name and cell == '日期':
name = row[k+1]
if isinstance(cell, datetime.datetime):
start = datetime.datetime(int(year), int(month), 1)
end = start + relativedelta(months=1)
if start <= cell < end:
print(year, month, name, cell, row[k+1])
yield {
'name': name,
'spec': '',
'price': row[k+1],
'date': cell,
}
@classmethod
def clean_ss(cls, table=r'E:\Project\material_api\web\scripts\三明钢铁-福州-2022-12.xlsx'):
"""
三明钢铁
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if isinstance(row[0], str) and ('材料编号' in row[0] or '三明钢铁' in row[0]):
continue
material, name, spec, _, _, price, *_ = row
if not name:
name = result[-1][3]
# if not spec:
# spec = result[-1][4]
item = (year, month, material, name, spec, price)
result.append(item)
if not item[5]:
continue
print(item)
yield {
'name': name.replace(' ', '') if name else '',
'spec': spec.replace(' ', '') if spec else '',
'material': material,
'price': price,
'fluctuating': None,
'date': datetime.datetime(int(year), int(month), 1),
}
@classmethod
def clean_jtj(cls, table=r'E:\Project\material_api\web\scripts\交通局-福州-2022-12.xlsx'):
"""
交通局
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if isinstance(row[0], str) and ('福建' in row[0] or '' in row[0] or '材料' in row[0] or '电话' in row[0]):
continue
if not row[0] and not row[1] and not row[2]:
continue
material, name, spec, unit, brand, price, *_ = row
# if not name:
# name = result[-1][3]
# if not spec:
# spec = result[-1][4]
item = (year, month, material, name, spec, unit, brand, price)
result.append(item)
if not item[7] or item[7] == '-':
continue
print(item)
yield {
'name': name.replace(' ', '') if name else '',
'spec': spec.replace(' ', '') if spec else '',
'price': price,
'date': datetime.datetime(int(year), int(month), 1),
'material_id': material,
'unit': unit,
'brand': brand,
'region': cls.city.replace('', ''),
}
@classmethod
def clean_glj(cls, table=r'E:\Project\material_api\web\scripts\公路局-福州-2022-12.xlsx'):
"""
公路局
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if isinstance(row[0], str) and ('福建' in row[0] or '' in row[0] or '材料' in row[0] or '电话' in row[0]
or '附件' in row[0] or '填报' in row[0]):
continue
if not row[0] and not row[1] and not row[2]:
continue
material, name, spec, unit, brand, price, source, remark, *_ = row
if not name:
name = result[-1][3]
# if not spec:
# spec = result[-1][4]
item = (year, month, material, name, spec, unit, brand, price)
result.append(item)
if not item[7] or item[7] == '-':
continue
print(item)
yield {
'name': name.replace(' ', '') if name else '',
'spec': spec.replace(' ', '') if spec else '',
'price': price,
'date': datetime.datetime(int(year), int(month), 1),
'material_id': material,
'unit': unit,
'brand': brand,
'region': cls.city.replace('', ''),
}
@classmethod
def clean_dcb(cls, table=r'E:\Project\material_api\web\scripts\调查表-福州-2022-12.xlsx'):
"""
调查表
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
print(row)
if isinstance(row[0], str) and ('福建' in row[0] or '' in row[0] or '材料' in row[0] or '电话' in row[0]
or '附件' in row[0] or '填报' in row[0]):
continue
if not row[0] and not row[1] and not row[2]:
continue
material, name, spec, unit, brand, price, price_without_tax, tax, source, remark, *_ = row
# if not name:
# name = result[-1][3]
# # if not spec:
# # spec = result[-1][4]
item = (year, month, material, name, spec, unit, brand, price)
result.append(item)
if not price or price == '-':
continue
print(item)
yield {
'name': name.replace(' ', '') if name else '',
'spec': spec.replace(' ', '') if spec else '',
'price': price,
'date': datetime.datetime(int(year), int(month), 1),
'material_id': material,
'unit': unit,
'brand': brand,
'tax': tax,
'region': cls.city.replace('', ''),
}
@classmethod
def clean_network(cls, table=r'E:\Project\material_api\web\scripts\网络价格-福州-2022-12.xlsx'):
"""
网络价格
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if isinstance(row[0], str) and ('网络' in row[0] or '' in row[0] or '材料' in row[0] or '电话' in row[0]
or '附件' in row[0] or '填报' in row[0]):
continue
if not row[0] and not row[1] and not row[2]:
continue
material, name, spec, unit, brand, price, _, remark, source, *_ = row
# # if not name:
# # name = result[-1][3]
# # # if not spec:
# # # spec = result[-1][4]
item = (year, month, material, name, spec, unit, brand, price, source, remark)
result.append(item)
if not item[7] or item[7] == '-':
continue
print(item)
yield {
'material_id': material,
'spec': spec.replace(' ', '') if spec else '',
'unit': unit,
'brand': brand,
'name': name.replace(' ', '') if name else '',
'price': price,
'source': source,
'remark': remark,
'date': datetime.datetime(int(year), int(month), 1),
'region': cls.city.replace('', ''),
}
@classmethod
def clean_local(cls, table=r'E:\Project\material_api\web\scripts\地材-福州-2022-12.xlsx'):
"""
地材
"""
wb = load_workbook(file_path)
ws = wb.active
_, _, year, month, *_ = file.split('.')[0].split('-')
result = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if isinstance(row[0], str) and ('汇总表' in row[0] or '' in row[0] or '材料' in row[0] or '电话' in row[0]
or '附件' in row[0] or '填报' in row[0] or '单位' in row[0] or '序号' in row[0]):
continue
if not row[0] and not row[1] and not row[2]:
continue
_, county, name, spec, unit, price, price_without_tax, position, remark, *_ = row
if not county:
county = result[-1][2]
if not name:
name = result[-1][3]
item = (year, month, county, name, spec, unit, price, price_without_tax, position, remark)
result.append(item)
if not price or price in ('-', '', '/'):
continue
if not price_without_tax or price_without_tax in ('-', '', '/'):
price_without_tax = 0
print(item)
yield {
'name': name.replace(' ', '') if name else '',
'city': cls.city,
'county': county,
'material_id': '',
'spec': spec.replace(' ', '') if spec else '',
'unit': unit,
'price': price,
'price_without_tax': price_without_tax,
'date': datetime.datetime(int(year), int(month), 1),
'position': position,
'remark': remark,
}
def get_month(filename):
if '年信息价/' in filename:
year = filename.split('年信息价')[0].split('/')[-1]
month = filename.split('月发布)')[0].split('')[-1]
return year, month
elif '月份地材汇总/' in filename:
year = filename.split('年地材/')[0].split('/')[-1]
month = filename.split('月份地材汇总')[0].split('')[-1]
return year, month
if __name__ == '__main__':
from core.factory import ClientApp
with ClientApp().app_context():
for root, dirs, files in os.walk(r'E:\Project\material_api\web\scripts'):
for file in files:
file_path = os.path.join(root, file)
if '改性剂' in file and '.xlsx' in file:
for item in Cleaner.clean_gxj(file_path):
AsphaltModifier(**item).upsert()
if '三明钢铁' in file and '.xlsx' in file:
for item in Cleaner.clean_ss(file_path):
SanmingSteel(**item).upsert()
if '交通局' in file and '.xlsx' in file:
for item in Cleaner.clean_jtj(file_path):
FuzhouTransportationBureau(**item).upsert()
if '公路局' in file and '.xlsx' in file:
for item in Cleaner.clean_glj(file_path):
FuzhouHighwayBureau(**item).upsert()
if '调查表' in file and '.xlsx' in file:
for item in Cleaner.clean_dcb(file_path):
FujianSurvey(**item).upsert()
if '网络价格' in file and '.xlsx' in file:
for item in Cleaner.clean_network(file_path):
DataNetwork(**item).upsert()
if '地材' in file and '.xlsx' in file:
for item in Cleaner.clean_local(file_path):
LocalMaterial(**item).upsert()