This commit is contained in:
han0
2024-05-29 10:21:31 +08:00
commit 54ac29d27b
119 changed files with 6817 additions and 0 deletions

43
web/tasks/once/crawler.py Normal file
View File

@@ -0,0 +1,43 @@
# 爬虫
import datetime
import json
import traceback
from commons.constants.material_task import MaterialTaskType, MaterialTaskStatus
from commons.models.material_task import MaterialTask
from commons.services.data import DataService
def crawler():
today = datetime.date.today()
for task_type in MaterialTaskType.values:
with MaterialTask.atomic() as session:
# 创建任务
item = MaterialTask(
name=None,
status=MaterialTaskStatus.DOING,
file=None,
type=task_type,
year=today.year,
month=today.month,
content=None,
)
session.add(item)
session.flush()
# 运行爬虫
try:
content = DataService.get_content(type=task_type)
item.content = json.dumps(content, ensure_ascii=False)
item.status = MaterialTaskStatus.DONE
session.flush()
except Exception as e:
traceback.print_exc()
item.status = MaterialTaskStatus.FAILED
session.flush()
if __name__ == '__main__':
from core.factory import ClientApp
with ClientApp().app_context():
crawler()