44 lines
1.3 KiB
Python
44 lines
1.3 KiB
Python
# 爬虫
|
|
import datetime
|
|
import json
|
|
import traceback
|
|
|
|
from commons.constants.material_task import MaterialTaskType, MaterialTaskStatus
|
|
from commons.models.material_task import MaterialTask
|
|
from commons.services.data import DataService
|
|
|
|
|
|
def crawler():
|
|
today = datetime.date.today()
|
|
for task_type in MaterialTaskType.values:
|
|
with MaterialTask.atomic() as session:
|
|
# 创建任务
|
|
item = MaterialTask(
|
|
name=None,
|
|
status=MaterialTaskStatus.DOING,
|
|
file=None,
|
|
type=task_type,
|
|
year=today.year,
|
|
month=today.month,
|
|
content=None,
|
|
)
|
|
session.add(item)
|
|
session.flush()
|
|
# 运行爬虫
|
|
try:
|
|
content = DataService.get_content(type=task_type)
|
|
item.content = json.dumps(content, ensure_ascii=False)
|
|
item.status = MaterialTaskStatus.DONE
|
|
session.flush()
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
item.status = MaterialTaskStatus.FAILED
|
|
session.flush()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from core.factory import ClientApp
|
|
|
|
with ClientApp().app_context():
|
|
crawler()
|