import datetime import os import zipfile from pathlib import Path from dateutil.relativedelta import relativedelta from openpyxl import load_workbook from xls2xlsx import XLS2XLSX from commons.models.asphalt_modifier import AsphaltModifier from commons.models.data_network import DataNetwork from commons.models.fujian_survey import FujianSurvey from commons.models.fuzhou_highway_bureau import FuzhouHighwayBureau from commons.models.fuzhou_transportation_bureau import FuzhouTransportationBureau from commons.models.local_material import LocalMaterial from commons.models.sanming_steel import SanmingSteel def unpack(zip_path=r'C:\Users\Administrator\Desktop\材料管理系统模版\造价站近两年价格数据.zip'): with zipfile.ZipFile(zip_path, 'r') as zip_ref: for file_info in zip_ref.infolist(): filename = file_info.filename.encode('cp437').decode('gbk') is_excel = filename.endswith('.xlsx') or filename.endswith('.xls') is_fuzhou = '福州' in filename # 地材 if is_excel and is_fuzhou and '地材汇总/' in filename and '福州市交通工程地方材料价格信息汇总表(数据处理)' not in filename\ and '混凝土价格' not in filename: year, month = get_month(filename) print(year, month, filename) yield zip_ref, file_info, f'./地材-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 调查表 elif is_excel and is_fuzhou and '11地市调查表/' in filename and '福建省' not in filename and '通闽公司' not in filename and '-' not in filename: year, month = get_month(filename) print(year, month, filename) yield zip_ref, file_info, f'./调查表-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 公路局 elif is_excel and is_fuzhou and '4.福州公路' in filename: year, month = get_month(filename) print(year, month, filename) yield zip_ref, file_info, f'./公路局-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 交通局 elif is_excel and is_fuzhou and '1.福州交通局' in filename: year, month = get_month(filename) print(year, month, filename) yield zip_ref, file_info, f'./交通局-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 网络价格 elif is_excel and is_fuzhou and '网络价格(' in filename and '通闽公司' not in filename and '5.福州' not in filename: year, month = get_month(filename) print(year, month, filename) yield zip_ref, file_info, f'./网络价格-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 改性剂 elif is_excel and '改性剂和沙钢' in filename : year, month = get_month(filename) print(year, month, filename) yield zip_ref, file_info, f'./改性剂-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 三明钢铁 elif is_excel and '2.三明钢铁' in filename: year, month = get_month(filename) print(year, month, filename) yield zip_ref, file_info, f'./三明钢铁-福州-{year}-{int(month):02d}{Path(filename).suffix}' def main(): for zip_ref, file_info, target_file_path in unpack(): with open(target_file_path, 'wb') as f: f.write(zip_ref.read(file_info)) if target_file_path.endswith('.xls'): x2x = XLS2XLSX(target_file_path) x2x.to_xlsx(target_file_path.replace('.xls', '.xlsx')) class Cleaner: city = '福州市' @classmethod def clean_gxj(cls, table=r'E:\Project\material_api\web\scripts\改性剂-福州-2022-12.xlsx'): """ 改性剂 """ wb = load_workbook(file_path) ws = wb.active _, _, year, month, *_ = file.split('.')[0].split('-') name = None for row in ws.iter_rows(values_only=True): for k, cell in enumerate(row): if not name and cell == '日期': name = row[k+1] if isinstance(cell, datetime.datetime): start = datetime.datetime(int(year), int(month), 1) end = start + relativedelta(months=1) if start <= cell < end: print(year, month, name, cell, row[k+1]) yield { 'name': name, 'spec': '', 'price': row[k+1], 'date': cell, } @classmethod def clean_ss(cls, table=r'E:\Project\material_api\web\scripts\三明钢铁-福州-2022-12.xlsx'): """ 三明钢铁 """ wb = load_workbook(file_path) ws = wb.active _, _, year, month, *_ = file.split('.')[0].split('-') result = [] for i, row in enumerate(ws.iter_rows(values_only=True)): if isinstance(row[0], str) and ('材料编号' in row[0] or '三明钢铁' in row[0]): continue material, name, spec, _, _, price, *_ = row if not name: name = result[-1][3] # if not spec: # spec = result[-1][4] item = (year, month, material, name, spec, price) result.append(item) if not item[5]: continue print(item) yield { 'name': name.replace(' ', '') if name else '', 'spec': spec.replace(' ', '') if spec else '', 'material': material, 'price': price, 'fluctuating': None, 'date': datetime.datetime(int(year), int(month), 1), } @classmethod def clean_jtj(cls, table=r'E:\Project\material_api\web\scripts\交通局-福州-2022-12.xlsx'): """ 交通局 """ wb = load_workbook(file_path) ws = wb.active _, _, year, month, *_ = file.split('.')[0].split('-') result = [] for i, row in enumerate(ws.iter_rows(values_only=True)): if isinstance(row[0], str) and ('福建' in row[0] or '、' in row[0] or '材料' in row[0] or '电话' in row[0]): continue if not row[0] and not row[1] and not row[2]: continue material, name, spec, unit, brand, price, *_ = row # if not name: # name = result[-1][3] # if not spec: # spec = result[-1][4] item = (year, month, material, name, spec, unit, brand, price) result.append(item) if not item[7] or item[7] == '-': continue print(item) yield { 'name': name.replace(' ', '') if name else '', 'spec': spec.replace(' ', '') if spec else '', 'price': price, 'date': datetime.datetime(int(year), int(month), 1), 'material_id': material, 'unit': unit, 'brand': brand, 'region': cls.city.replace('市', ''), } @classmethod def clean_glj(cls, table=r'E:\Project\material_api\web\scripts\公路局-福州-2022-12.xlsx'): """ 公路局 """ wb = load_workbook(file_path) ws = wb.active _, _, year, month, *_ = file.split('.')[0].split('-') result = [] for i, row in enumerate(ws.iter_rows(values_only=True)): if isinstance(row[0], str) and ('福建' in row[0] or '、' in row[0] or '材料' in row[0] or '电话' in row[0] or '附件' in row[0] or '填报' in row[0]): continue if not row[0] and not row[1] and not row[2]: continue material, name, spec, unit, brand, price, source, remark, *_ = row if not name: name = result[-1][3] # if not spec: # spec = result[-1][4] item = (year, month, material, name, spec, unit, brand, price) result.append(item) if not item[7] or item[7] == '-': continue print(item) yield { 'name': name.replace(' ', '') if name else '', 'spec': spec.replace(' ', '') if spec else '', 'price': price, 'date': datetime.datetime(int(year), int(month), 1), 'material_id': material, 'unit': unit, 'brand': brand, 'region': cls.city.replace('市', ''), } @classmethod def clean_dcb(cls, table=r'E:\Project\material_api\web\scripts\调查表-福州-2022-12.xlsx'): """ 调查表 """ wb = load_workbook(file_path) ws = wb.active _, _, year, month, *_ = file.split('.')[0].split('-') result = [] for i, row in enumerate(ws.iter_rows(values_only=True)): print(row) if isinstance(row[0], str) and ('福建' in row[0] or '、' in row[0] or '材料' in row[0] or '电话' in row[0] or '附件' in row[0] or '填报' in row[0]): continue if not row[0] and not row[1] and not row[2]: continue material, name, spec, unit, brand, price, price_without_tax, tax, source, remark, *_ = row # if not name: # name = result[-1][3] # # if not spec: # # spec = result[-1][4] item = (year, month, material, name, spec, unit, brand, price) result.append(item) if not price or price == '-': continue print(item) yield { 'name': name.replace(' ', '') if name else '', 'spec': spec.replace(' ', '') if spec else '', 'price': price, 'date': datetime.datetime(int(year), int(month), 1), 'material_id': material, 'unit': unit, 'brand': brand, 'tax': tax, 'region': cls.city.replace('市', ''), } @classmethod def clean_network(cls, table=r'E:\Project\material_api\web\scripts\网络价格-福州-2022-12.xlsx'): """ 网络价格 """ wb = load_workbook(file_path) ws = wb.active _, _, year, month, *_ = file.split('.')[0].split('-') result = [] for i, row in enumerate(ws.iter_rows(values_only=True)): if isinstance(row[0], str) and ('网络' in row[0] or '、' in row[0] or '材料' in row[0] or '电话' in row[0] or '附件' in row[0] or '填报' in row[0]): continue if not row[0] and not row[1] and not row[2]: continue material, name, spec, unit, brand, price, _, remark, source, *_ = row # # if not name: # # name = result[-1][3] # # # if not spec: # # # spec = result[-1][4] item = (year, month, material, name, spec, unit, brand, price, source, remark) result.append(item) if not item[7] or item[7] == '-': continue print(item) yield { 'material_id': material, 'spec': spec.replace(' ', '') if spec else '', 'unit': unit, 'brand': brand, 'name': name.replace(' ', '') if name else '', 'price': price, 'source': source, 'remark': remark, 'date': datetime.datetime(int(year), int(month), 1), 'region': cls.city.replace('市', ''), } @classmethod def clean_local(cls, table=r'E:\Project\material_api\web\scripts\地材-福州-2022-12.xlsx'): """ 地材 """ wb = load_workbook(file_path) ws = wb.active _, _, year, month, *_ = file.split('.')[0].split('-') result = [] for i, row in enumerate(ws.iter_rows(values_only=True)): if isinstance(row[0], str) and ('汇总表' in row[0] or '、' in row[0] or '材料' in row[0] or '电话' in row[0] or '附件' in row[0] or '填报' in row[0] or '单位' in row[0] or '序号' in row[0]): continue if not row[0] and not row[1] and not row[2]: continue _, county, name, spec, unit, price, price_without_tax, position, remark, *_ = row if not county: county = result[-1][2] if not name: name = result[-1][3] item = (year, month, county, name, spec, unit, price, price_without_tax, position, remark) result.append(item) if not price or price in ('-', '∕', '/'): continue if not price_without_tax or price_without_tax in ('-', '∕', '/'): price_without_tax = 0 print(item) yield { 'name': name.replace(' ', '') if name else '', 'city': cls.city, 'county': county, 'material_id': '', 'spec': spec.replace(' ', '') if spec else '', 'unit': unit, 'price': price, 'price_without_tax': price_without_tax, 'date': datetime.datetime(int(year), int(month), 1), 'position': position, 'remark': remark, } def get_month(filename): if '年信息价/' in filename: year = filename.split('年信息价')[0].split('/')[-1] month = filename.split('月发布)')[0].split('(')[-1] return year, month elif '月份地材汇总/' in filename: year = filename.split('年地材/')[0].split('/')[-1] month = filename.split('月份地材汇总')[0].split('年')[-1] return year, month if __name__ == '__main__': from core.factory import ClientApp with ClientApp().app_context(): for root, dirs, files in os.walk(r'E:\Project\material_api\web\scripts'): for file in files: file_path = os.path.join(root, file) if '改性剂' in file and '.xlsx' in file: for item in Cleaner.clean_gxj(file_path): AsphaltModifier(**item).upsert() if '三明钢铁' in file and '.xlsx' in file: for item in Cleaner.clean_ss(file_path): SanmingSteel(**item).upsert() if '交通局' in file and '.xlsx' in file: for item in Cleaner.clean_jtj(file_path): FuzhouTransportationBureau(**item).upsert() if '公路局' in file and '.xlsx' in file: for item in Cleaner.clean_glj(file_path): FuzhouHighwayBureau(**item).upsert() if '调查表' in file and '.xlsx' in file: for item in Cleaner.clean_dcb(file_path): FujianSurvey(**item).upsert() if '网络价格' in file and '.xlsx' in file: for item in Cleaner.clean_network(file_path): DataNetwork(**item).upsert() if '地材' in file and '.xlsx' in file: for item in Cleaner.clean_local(file_path): LocalMaterial(**item).upsert()