使用BeautifulSoup与requests爬取美图:
r = requests.get(url, headers=headers, timeout = 30)
以请求头访问指定的URL;
r.encoding = r.apparent_encoding
网页的编码方式;
return r.text
返回网页的文字;
soup = BeautifulSoup(html, 'lxml')
img_ul = soup.find_all('img', {"class": "progressive__img progressive--not-loaded"})
上面代码用来选择网页的元素;
with open('路径\img\%s' % (image_name), 'wb') as f: #不知为啥必须是绝对路径且不能访问未创建的文件夹
for chunk in r.iter_content(chunk_size=128): #将网页内容可迭代
f.write(chunk)
上面用来写入图片;
import requests
from bs4 import BeautifulSoup
import os
def gethtmltext(url):
try:
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/51.0.2704.63 Safari/537.36'}
r = requests.get(url, headers=headers, timeout = 30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
print ("错误")
for i in range(11, 21):
URL = "https://bing.wilii.cn/index.asp"+("?page=%s" % i)
html = gethtmltext(URL)
soup = BeautifulSoup(html, 'lxml')
img_ul = soup.find_all('img', {"class": "progressive__img progressive--not-loaded"})
#print (soup) #打印网页源码
for img in img_ul:
url = "https://bing.wilii.cn"+img['src']
print (url)
r = requests.get(url, stream=True)
image_name = url.split('/')[-1]
#image_name = image_name0.split('?')[0]
with open('F:\MIUI_data\python_test\img\%s' % (image_name), 'wb') as f:
for chunk in r.iter_content(chunk_size=128):
f.write(chunk)
print('Saved %s' % image_name)
随想
- 文件批量重命名脚本
list
与str
的相互转化;
list
的截取; - 文件复制
shutil.copyfile
shutil.coytree
阅读量
loading...