代码已脱敏,自行替换
# @Time : 2023/10/8 14:43
# @Author : Lan
# @File : niukespider.py
# @Software: PyCharm
import time
import requests
def get_category(catalog='10klpm'):
url = f'https://www.lanol.cn.com/content/zhuanlan/index/catalog/{catalog}'
return requests.get(url).json()
c = """
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
</head>
<body>
{{content}}
</body>
</html>
"""
def get_content(catalog, entity):
url = f'https://www.lanol.cn.com/content/zhuanlan/index/detail/{catalog}/{entity}?_={int(time.time() * 1000)}'
return requests.get(url, headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
}).json()
if __name__ == '__main__':
catalog = 'Gj5x2m'
for i in get_category(catalog)['data']['catalog']:
content = get_content(catalog, i['uuid'])['data']
with open('./docs/' + i['title'].replace('/', '-') + '.html', 'w', encoding='utf-8') as f:
f.write(c.replace('{{content}}', content['content']))
评论 (0)