diff --git a/requirements.txt b/requirements.txt index 316d6d9..0e2dbe5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,4 +24,7 @@ gunicorn scrapy Twisted==22.10.0 -selenium \ No newline at end of file +selenium + +openpyxl +xls2xlsx \ No newline at end of file diff --git a/web/commons/models/asphalt_modifier.py b/web/commons/models/asphalt_modifier.py index 6561037..8beba97 100644 --- a/web/commons/models/asphalt_modifier.py +++ b/web/commons/models/asphalt_modifier.py @@ -3,10 +3,12 @@ import datetime from dateutil.relativedelta import relativedelta from sqlalchemy import Column, Integer, String, Numeric, Date, UniqueConstraint +from commons.models.mixin.base import BaseModelMixin +from commons.models.model import Model from core.extensions import db -class AsphaltModifier(db.Model): +class AsphaltModifier(db.Model, Model, BaseModelMixin): __tablename__ = 'ASPHALT_MODIFIER' id = Column('ID', Integer, primary_key=True) name = Column('NAME', String(128), default='', comment='名称') @@ -19,6 +21,14 @@ class AsphaltModifier(db.Model): {'comment': '沥青改性剂'}, ) + def find_by_key(self): + query = AsphaltModifier.query + query = query.filter(AsphaltModifier.name == self.name) + query = query.filter(AsphaltModifier.spec == self.spec) + query = query.filter(AsphaltModifier.date == self.date) + result = query.one_or_none() + return result + @classmethod def get_query(cls, year=None, month=None, name=None, spec=None, name_in=None): query = cls.query diff --git a/web/commons/models/fujian_survey.py b/web/commons/models/fujian_survey.py index 7083a01..9af2807 100644 --- a/web/commons/models/fujian_survey.py +++ b/web/commons/models/fujian_survey.py @@ -3,10 +3,12 @@ import datetime from dateutil.relativedelta import relativedelta from sqlalchemy import Column, Integer, String, Date, UniqueConstraint, Numeric +from commons.models.mixin.base import BaseModelMixin +from commons.models.model import Model from core.extensions import db -class FujianSurvey(db.Model): +class FujianSurvey(db.Model, Model, BaseModelMixin): __tablename__ = 'FUJIAN_SURVEY' id = Column('ID', Integer, primary_key=True) name = Column('NAME', String(128), default='', comment='名称') @@ -20,10 +22,19 @@ class FujianSurvey(db.Model): region = Column('REGION', String(128), comment='地区') __table_args__ = ( - UniqueConstraint(name, spec, date, name='Idx_key'), + UniqueConstraint(name, spec, date, region, name='Idx_key'), {'comment': '福建省交通工程材料调查表'}, ) + def find_by_key(self): + query = FujianSurvey.query + query = query.filter(FujianSurvey.name == self.name) + query = query.filter(FujianSurvey.spec == self.spec) + query = query.filter(FujianSurvey.region == self.region) + query = query.filter(FujianSurvey.date == self.date) + result = query.one_or_none() + return result + @classmethod def get_query(cls, year=None, month=None, name=None, spec=None, name_in=None, region='福州'): query = cls.query diff --git a/web/commons/models/fuzhou_highway_bureau.py b/web/commons/models/fuzhou_highway_bureau.py index c7b8f57..d9f7a64 100644 --- a/web/commons/models/fuzhou_highway_bureau.py +++ b/web/commons/models/fuzhou_highway_bureau.py @@ -3,10 +3,12 @@ import datetime from dateutil.relativedelta import relativedelta from sqlalchemy import Column, Integer, String, Date, UniqueConstraint, Numeric +from commons.models.mixin.base import BaseModelMixin +from commons.models.model import Model from core.extensions import db -class FuzhouHighwayBureau(db.Model): +class FuzhouHighwayBureau(db.Model, Model, BaseModelMixin): __tablename__ = 'FUZHOU_HIGHWAY_BUREAU' id = Column('ID', Integer, primary_key=True) name = Column('NAME', String(128), default='', comment='名称') @@ -23,6 +25,15 @@ class FuzhouHighwayBureau(db.Model): {'comment': '福州公路局'}, ) + def find_by_key(self): + query = FuzhouHighwayBureau.query + query = query.filter(FuzhouHighwayBureau.name == self.name) + query = query.filter(FuzhouHighwayBureau.spec == self.spec) + query = query.filter(FuzhouHighwayBureau.region == self.region) + query = query.filter(FuzhouHighwayBureau.date == self.date) + result = query.one_or_none() + return result + @classmethod def get_query(cls, year, month, name, spec=None, region='福州'): start_date = datetime.date(year, month, 1) diff --git a/web/commons/models/fuzhou_transportation_bureau.py b/web/commons/models/fuzhou_transportation_bureau.py index 7a5a348..2db9cdf 100644 --- a/web/commons/models/fuzhou_transportation_bureau.py +++ b/web/commons/models/fuzhou_transportation_bureau.py @@ -3,10 +3,12 @@ import datetime from dateutil.relativedelta import relativedelta from sqlalchemy import Column, Integer, String, Date, UniqueConstraint, Numeric +from commons.models.mixin.base import BaseModelMixin +from commons.models.model import Model from core.extensions import db -class FuzhouTransportationBureau(db.Model): +class FuzhouTransportationBureau(db.Model, Model, BaseModelMixin): __tablename__ = 'FUZHOU_TRANSPORTATION_BUREAU' id = Column('ID', Integer, primary_key=True) name = Column('NAME', String(128), default='', comment='名称') @@ -23,6 +25,15 @@ class FuzhouTransportationBureau(db.Model): {'comment': '福州交通局'}, ) + def find_by_key(self): + query = FuzhouTransportationBureau.query + query = query.filter(FuzhouTransportationBureau.name == self.name) + query = query.filter(FuzhouTransportationBureau.spec == self.spec) + query = query.filter(FuzhouTransportationBureau.region == self.region) + query = query.filter(FuzhouTransportationBureau.date == self.date) + result = query.one_or_none() + return result + @classmethod def get_query(cls, year, month, name, region='福州'): start_date = datetime.date(year, month, 1) diff --git a/web/commons/models/local_material.py b/web/commons/models/local_material.py index 4f3f7e6..6c83e50 100644 --- a/web/commons/models/local_material.py +++ b/web/commons/models/local_material.py @@ -1,9 +1,11 @@ from sqlalchemy import Column, Integer, String, Date, UniqueConstraint, Numeric, Text +from commons.models.mixin.base import BaseModelMixin +from commons.models.model import Model from core.extensions import db -class LocalMaterial(db.Model): +class LocalMaterial(db.Model, Model, BaseModelMixin): __tablename__ = 'LOCAL_MATERIAL' id = Column('ID', Integer, primary_key=True) name = Column('NAME', String(128), default='', comment='名称') @@ -22,3 +24,15 @@ class LocalMaterial(db.Model): UniqueConstraint(name, spec, city, county, date, name='Idx_key'), {'comment': '地材'}, ) + + def find_by_key(self): + cls = self.__class__ + query = cls.query + query = query.filter(cls.date == self.date) + query = query.filter(cls.spec == self.spec) + query = query.filter(cls.name == self.name) + query = query.filter(cls.city == self.city) + query = query.filter(cls.county == self.county) + result = query.one_or_none() + return result + diff --git a/web/commons/models/sanming_steel.py b/web/commons/models/sanming_steel.py index 69e95fb..0593579 100644 --- a/web/commons/models/sanming_steel.py +++ b/web/commons/models/sanming_steel.py @@ -3,10 +3,12 @@ import datetime from dateutil.relativedelta import relativedelta from sqlalchemy import Column, Integer, String, Numeric, Date, UniqueConstraint +from commons.models.mixin.base import BaseModelMixin +from commons.models.model import Model from core.extensions import db -class SanmingSteel(db.Model): +class SanmingSteel(db.Model, Model, BaseModelMixin): __tablename__ = 'SANMING_STEEL' id = Column('ID', Integer, primary_key=True) name = Column('NAME', String(128), default='', comment='名称') @@ -17,10 +19,18 @@ class SanmingSteel(db.Model): date = Column('DATE', Date, comment='日期') __table_args__ = ( - UniqueConstraint(name, spec, material, date, name='Idx_key'), + UniqueConstraint(name, spec, date, name='Idx_key'), {'comment': '三明钢铁'}, ) + def find_by_key(self): + query = SanmingSteel.query + query = query.filter(SanmingSteel.name == self.name) + query = query.filter(SanmingSteel.spec == self.spec) + query = query.filter(SanmingSteel.date == self.date) + result = query.one_or_none() + return result + @classmethod def get_query(cls, year, month, name, spec): start_date = datetime.date(year, month, 1) diff --git a/web/scripts/unpack_history_data.py b/web/scripts/unpack_history_data.py index d27da5d..b10c198 100644 --- a/web/scripts/unpack_history_data.py +++ b/web/scripts/unpack_history_data.py @@ -1,41 +1,320 @@ +import datetime import os import zipfile from pathlib import Path +from dateutil.relativedelta import relativedelta +from openpyxl import load_workbook +from xls2xlsx import XLS2XLSX -def main(zip_path=r'C:\Users\Administrator\Desktop\材料管理系统模版\造价站近两年价格数据.zip'): +from commons.models.asphalt_modifier import AsphaltModifier +from commons.models.data_network import DataNetwork +from commons.models.fujian_survey import FujianSurvey +from commons.models.fuzhou_highway_bureau import FuzhouHighwayBureau +from commons.models.fuzhou_transportation_bureau import FuzhouTransportationBureau +from commons.models.local_material import LocalMaterial +from commons.models.sanming_steel import SanmingSteel + + +def unpack(zip_path=r'C:\Users\Administrator\Desktop\材料管理系统模版\造价站近两年价格数据.zip'): with zipfile.ZipFile(zip_path, 'r') as zip_ref: for file_info in zip_ref.infolist(): filename = file_info.filename.encode('cp437').decode('gbk') is_excel = filename.endswith('.xlsx') or filename.endswith('.xls') is_fuzhou = '福州' in filename # 地材 - # if is_excel and is_fuzhou and '地材汇总/' in filename and '福州市交通工程地方材料价格信息汇总表(数据处理)' not in filename\ - # and '混凝土价格' not in filename: - # year, month = get_month(filename) - # print(year, month, filename) - # 调查表 - if is_excel and is_fuzhou and '11地市调查表/' in filename and '福建省' not in filename and '通闽公司' not in filename and '-' not in filename: + if is_excel and is_fuzhou and '地材汇总/' in filename and '福州市交通工程地方材料价格信息汇总表(数据处理)' not in filename\ + and '混凝土价格' not in filename: year, month = get_month(filename) print(year, month, filename) - target_file_path = f'./{year}-{month}-调查表-福州{Path(filename).suffix}' - with open(target_file_path, 'wb') as f: - f.write(zip_ref.read(file_info)) + yield zip_ref, file_info, f'./地材-福州-{year}-{int(month):02d}{Path(filename).suffix}' + # 调查表 + elif is_excel and is_fuzhou and '11地市调查表/' in filename and '福建省' not in filename and '通闽公司' not in filename and '-' not in filename: + year, month = get_month(filename) + print(year, month, filename) + yield zip_ref, file_info, f'./调查表-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 公路局 - # if is_excel and is_fuzhou and '4.福州公路' in filename: - # print(filename) + elif is_excel and is_fuzhou and '4.福州公路' in filename: + year, month = get_month(filename) + print(year, month, filename) + yield zip_ref, file_info, f'./公路局-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 交通局 - # if is_excel and is_fuzhou and '1.福州交通局' in filename: - # print(filename) + elif is_excel and is_fuzhou and '1.福州交通局' in filename: + year, month = get_month(filename) + print(year, month, filename) + yield zip_ref, file_info, f'./交通局-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 网络价格 - # if is_excel and is_fuzhou and '网络价格(' in filename and '通闽公司' not in filename and '5.福州' not in filename: - # print(filename) + elif is_excel and is_fuzhou and '网络价格(' in filename and '通闽公司' not in filename and '5.福州' not in filename: + year, month = get_month(filename) + print(year, month, filename) + yield zip_ref, file_info, f'./网络价格-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 改性剂 - # if is_excel and '改性剂和沙钢' in filename : - # print(filename) + elif is_excel and '改性剂和沙钢' in filename : + year, month = get_month(filename) + print(year, month, filename) + yield zip_ref, file_info, f'./改性剂-福州-{year}-{int(month):02d}{Path(filename).suffix}' # 三明钢铁 - # if is_excel and '2.三明钢铁' in filename: - # print(filename) + elif is_excel and '2.三明钢铁' in filename: + year, month = get_month(filename) + print(year, month, filename) + yield zip_ref, file_info, f'./三明钢铁-福州-{year}-{int(month):02d}{Path(filename).suffix}' + + +def main(): + for zip_ref, file_info, target_file_path in unpack(): + with open(target_file_path, 'wb') as f: + f.write(zip_ref.read(file_info)) + if target_file_path.endswith('.xls'): + x2x = XLS2XLSX(target_file_path) + x2x.to_xlsx(target_file_path.replace('.xls', '.xlsx')) + + +class Cleaner: + city = '福州市' + + @classmethod + def clean_gxj(cls, table=r'E:\Project\material_api\web\scripts\改性剂-福州-2022-12.xlsx'): + """ + 改性剂 + """ + wb = load_workbook(file_path) + ws = wb.active + _, _, year, month, *_ = file.split('.')[0].split('-') + name = None + for row in ws.iter_rows(values_only=True): + for k, cell in enumerate(row): + if not name and cell == '日期': + name = row[k+1] + if isinstance(cell, datetime.datetime): + start = datetime.datetime(int(year), int(month), 1) + end = start + relativedelta(months=1) + if start <= cell < end: + print(year, month, name, cell, row[k+1]) + yield { + 'name': name, + 'spec': '', + 'price': row[k+1], + 'date': cell, + } + + @classmethod + def clean_ss(cls, table=r'E:\Project\material_api\web\scripts\三明钢铁-福州-2022-12.xlsx'): + """ + 三明钢铁 + """ + wb = load_workbook(file_path) + ws = wb.active + _, _, year, month, *_ = file.split('.')[0].split('-') + result = [] + for i, row in enumerate(ws.iter_rows(values_only=True)): + if isinstance(row[0], str) and ('材料编号' in row[0] or '三明钢铁' in row[0]): + continue + material, name, spec, _, _, price, *_ = row + if not name: + name = result[-1][3] + # if not spec: + # spec = result[-1][4] + item = (year, month, material, name, spec, price) + result.append(item) + if not item[5]: + continue + print(item) + yield { + 'name': name.replace(' ', '') if name else '', + 'spec': spec.replace(' ', '') if spec else '', + 'material': material, + 'price': price, + 'fluctuating': None, + 'date': datetime.datetime(int(year), int(month), 1), + } + + @classmethod + def clean_jtj(cls, table=r'E:\Project\material_api\web\scripts\交通局-福州-2022-12.xlsx'): + """ + 交通局 + """ + wb = load_workbook(file_path) + ws = wb.active + _, _, year, month, *_ = file.split('.')[0].split('-') + result = [] + for i, row in enumerate(ws.iter_rows(values_only=True)): + if isinstance(row[0], str) and ('福建' in row[0] or '、' in row[0] or '材料' in row[0] or '电话' in row[0]): + continue + if not row[0] and not row[1] and not row[2]: + continue + material, name, spec, unit, brand, price, *_ = row + # if not name: + # name = result[-1][3] + # if not spec: + # spec = result[-1][4] + item = (year, month, material, name, spec, unit, brand, price) + result.append(item) + if not item[7] or item[7] == '-': + continue + print(item) + yield { + 'name': name.replace(' ', '') if name else '', + 'spec': spec.replace(' ', '') if spec else '', + 'price': price, + 'date': datetime.datetime(int(year), int(month), 1), + 'material_id': material, + 'unit': unit, + 'brand': brand, + 'region': cls.city.replace('市', ''), + } + + @classmethod + def clean_glj(cls, table=r'E:\Project\material_api\web\scripts\公路局-福州-2022-12.xlsx'): + """ + 公路局 + """ + wb = load_workbook(file_path) + ws = wb.active + _, _, year, month, *_ = file.split('.')[0].split('-') + result = [] + for i, row in enumerate(ws.iter_rows(values_only=True)): + if isinstance(row[0], str) and ('福建' in row[0] or '、' in row[0] or '材料' in row[0] or '电话' in row[0] + or '附件' in row[0] or '填报' in row[0]): + continue + if not row[0] and not row[1] and not row[2]: + continue + material, name, spec, unit, brand, price, source, remark, *_ = row + if not name: + name = result[-1][3] + # if not spec: + # spec = result[-1][4] + item = (year, month, material, name, spec, unit, brand, price) + result.append(item) + if not item[7] or item[7] == '-': + continue + print(item) + yield { + 'name': name.replace(' ', '') if name else '', + 'spec': spec.replace(' ', '') if spec else '', + 'price': price, + 'date': datetime.datetime(int(year), int(month), 1), + 'material_id': material, + 'unit': unit, + 'brand': brand, + 'region': cls.city.replace('市', ''), + } + + @classmethod + def clean_dcb(cls, table=r'E:\Project\material_api\web\scripts\调查表-福州-2022-12.xlsx'): + """ + 调查表 + """ + wb = load_workbook(file_path) + ws = wb.active + _, _, year, month, *_ = file.split('.')[0].split('-') + result = [] + for i, row in enumerate(ws.iter_rows(values_only=True)): + print(row) + if isinstance(row[0], str) and ('福建' in row[0] or '、' in row[0] or '材料' in row[0] or '电话' in row[0] + or '附件' in row[0] or '填报' in row[0]): + continue + if not row[0] and not row[1] and not row[2]: + continue + material, name, spec, unit, brand, price, price_without_tax, tax, source, remark, *_ = row + # if not name: + # name = result[-1][3] + # # if not spec: + # # spec = result[-1][4] + item = (year, month, material, name, spec, unit, brand, price) + result.append(item) + if not price or price == '-': + continue + print(item) + yield { + 'name': name.replace(' ', '') if name else '', + 'spec': spec.replace(' ', '') if spec else '', + 'price': price, + 'date': datetime.datetime(int(year), int(month), 1), + 'material_id': material, + 'unit': unit, + 'brand': brand, + 'tax': tax, + 'region': cls.city.replace('市', ''), + } + + @classmethod + def clean_network(cls, table=r'E:\Project\material_api\web\scripts\网络价格-福州-2022-12.xlsx'): + """ + 网络价格 + """ + wb = load_workbook(file_path) + ws = wb.active + _, _, year, month, *_ = file.split('.')[0].split('-') + result = [] + for i, row in enumerate(ws.iter_rows(values_only=True)): + if isinstance(row[0], str) and ('网络' in row[0] or '、' in row[0] or '材料' in row[0] or '电话' in row[0] + or '附件' in row[0] or '填报' in row[0]): + continue + if not row[0] and not row[1] and not row[2]: + continue + material, name, spec, unit, brand, price, _, remark, source, *_ = row + # # if not name: + # # name = result[-1][3] + # # # if not spec: + # # # spec = result[-1][4] + item = (year, month, material, name, spec, unit, brand, price, source, remark) + result.append(item) + if not item[7] or item[7] == '-': + continue + print(item) + yield { + 'material_id': material, + 'spec': spec.replace(' ', '') if spec else '', + 'unit': unit, + 'brand': brand, + 'name': name.replace(' ', '') if name else '', + 'price': price, + 'source': source, + 'remark': remark, + 'date': datetime.datetime(int(year), int(month), 1), + 'region': cls.city.replace('市', ''), + } + + @classmethod + def clean_local(cls, table=r'E:\Project\material_api\web\scripts\地材-福州-2022-12.xlsx'): + """ + 地材 + """ + wb = load_workbook(file_path) + ws = wb.active + _, _, year, month, *_ = file.split('.')[0].split('-') + result = [] + for i, row in enumerate(ws.iter_rows(values_only=True)): + if isinstance(row[0], str) and ('汇总表' in row[0] or '、' in row[0] or '材料' in row[0] or '电话' in row[0] + or '附件' in row[0] or '填报' in row[0] or '单位' in row[0] or '序号' in row[0]): + continue + if not row[0] and not row[1] and not row[2]: + continue + _, county, name, spec, unit, price, price_without_tax, position, remark, *_ = row + if not county: + county = result[-1][2] + if not name: + name = result[-1][3] + item = (year, month, county, name, spec, unit, price, price_without_tax, position, remark) + result.append(item) + if not price or price in ('-', '∕', '/'): + continue + if not price_without_tax or price_without_tax in ('-', '∕', '/'): + price_without_tax = 0 + print(item) + yield { + 'name': name.replace(' ', '') if name else '', + 'city': cls.city, + 'county': county, + 'material_id': '', + 'spec': spec.replace(' ', '') if spec else '', + 'unit': unit, + 'price': price, + 'price_without_tax': price_without_tax, + 'date': datetime.datetime(int(year), int(month), 1), + 'position': position, + 'remark': remark, + } def get_month(filename): @@ -50,4 +329,29 @@ def get_month(filename): if __name__ == '__main__': - main() + from core.factory import ClientApp + with ClientApp().app_context(): + for root, dirs, files in os.walk(r'E:\Project\material_api\web\scripts'): + for file in files: + file_path = os.path.join(root, file) + if '改性剂' in file and '.xlsx' in file: + for item in Cleaner.clean_gxj(file_path): + AsphaltModifier(**item).upsert() + if '三明钢铁' in file and '.xlsx' in file: + for item in Cleaner.clean_ss(file_path): + SanmingSteel(**item).upsert() + if '交通局' in file and '.xlsx' in file: + for item in Cleaner.clean_jtj(file_path): + FuzhouTransportationBureau(**item).upsert() + if '公路局' in file and '.xlsx' in file: + for item in Cleaner.clean_glj(file_path): + FuzhouHighwayBureau(**item).upsert() + if '调查表' in file and '.xlsx' in file: + for item in Cleaner.clean_dcb(file_path): + FujianSurvey(**item).upsert() + if '网络价格' in file and '.xlsx' in file: + for item in Cleaner.clean_network(file_path): + DataNetwork(**item).upsert() + if '地材' in file and '.xlsx' in file: + for item in Cleaner.clean_local(file_path): + LocalMaterial(**item).upsert()