import urllib.request as request
from bs4 import BeautifulSoup
import os
def downloadImg(url: str, name):
soup = BeautifulSoup(str(request.urlopen(url).read(), encoding='utf-8'), features='html.parser')
savePath = f"./result/images/{name.replace('/', '-')}"
if not os.path.exists(savePath):
os.mkdir(savePath)
for img in soup.select('div.content__article__slide__item > img'):
img = img.get('data-src')
with open(f"{savePath}/{img.split('/')[-1].split('!')[0].split('?')[0]}", 'wb') as f:
f.write(request.urlopen(img).read())
def a():
with open('./result/list.txt', 'w', encoding='utf-8') as f:
for i in range(1, 11):
url = f'https://cs.zu.ke.com/zufang/pg{i}/#contentList'
data = str(request.urlopen(url).read(), encoding='utf-8')
soup = BeautifulSoup(data, features='html.parser')
for item in soup.find_all('div', attrs={'class': 'content__list--item'}):
imgTag = item.find('img')
imgUrl = imgTag.get('data-src')
print(imgUrl)
title = imgTag.get('alt')
print(title)
downloadImg(
f"https://cs.zu.ke.com{item.find('a', attrs={'class': 'content__list--item--aside'}).get('href')}",
title)
des = item.find('p', attrs={'class': 'content__list--item--des'}) \
.get_text().replace('\n', '').replace(' ', '')
print(des)
price = item.find('span', attrs={'class': "content__list--item-price"}).get_text()
print(price)
tags = [tag.get_text() for tag in
item.find('p', attrs={'class': 'content__list--item--bottom oneline'}).select('p > i')]
print(tags)
f.write(f'{title},{des},{price},{tags}\n')
print('*' * 30)
if __name__ == '__main__':
os.mkdir('/result')
a()
牛哇(ノ°ο°)ノ