1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
| import requests from lxml import etree import random import re
def ip_proxy(): url = "http://proxy.httpdaili.com/apinew.asp?ddbh=1454554059906534893" r = requests.get(url) print("代理ip状态码:", r.status_code) t = r.text ips = re.findall(r'[0-9]+(?:\.[0-9])+.+[0-9]{3}', t) print("当前代理ip地有:", ips)
return ips
def ip_data(): for i in range(999): url = 'https://tool.lu/ip/' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36' } ip = ip_proxy() ipp = { 'https': 'http://' + ip[random.randint(0, 1)] } print(ipp)
req = requests.get(url, headers=headers, proxies=ipp, timeout=10)
print(req)
html_ip = etree.HTML(req.text)
data = html_ip.xpath('//*[@id="main_form"]/p[1]/text()') print(data)
if __name__ == '__main__': ip_data()
|