本文最后更新于 2024-07-26T20:49:51+08:00
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
| import urllib.request from lxml import etree def creat_request(page): header={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } req=urllib.request.Request(url=page,headers=header) return req def get_content(req): response=urllib.request.urlopen(req) content=response.read().decode('utf-8') return content def download_imag(content): tree=etree.HTML(content) name_li=tree.xpath('//img[@class="lazy"]/@alt') src_li=tree.xpath('//img[@class="lazy"]/@data-original') for i in range(len(name_li)): name=name_li[i] src=src_li[i][:-6]+".jpg" url='https:'+src print(url) try: urllib.request.urlretrieve(url=url, filename="./img/"+name_li[i] + ".jpg") print("下载完成") except Exception as e: continue def main(): start_num=1 end_num=10 for i in range(start_num,end_num+1): if i==1: page = 'https://sc.chinaz.com/tupian/rentiyishu.html' else: page = 'https://sc.chinaz.com/tupian/rentiyishu_'+str(i)+'.html' req=creat_request(page) content=get_content(req) download_imag(content)
main()
|
urllib_站长素材
https://ianwusb.blog/2024/07/26/urllib_站长素材/