寻找建设银行的挂马网站
找建设银行的挂马网站,通过google的搜索结果来查找。
通过建设银行的首页的title进行检索,然后进行人工排除。
#!/usr/bin/env python
import urllib2
import re
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
URL = "http://www.google.cn/search?as_q=%E6%AC%A2%E8%BF%8E%E8%AE%BF%E9%97%AE%E4%B8%AD%E5%9B%BD%E5%BB%BA%E8%AE%BE%E9%93%B6%E8%A1%8C%E7%BD%91%E7%AB%99&complete=1&hl=zh-CN&newwindow=1&num=100&btnG=Google+%E6%90%9C%E7%B4%A2&as_epq=&as_oq=&as_eq=&lr=&cr=&as_ft=i&as_filetype=&as_qdr=all&as_occt=title&as_dt=i&as_sitesearch=&as_rights="
sock = opener.open(URL)
#sock = urllib.urlopen(URL);
htmlsource = sock.read()
#print htmlsource
#ippattern = re.compile(r'<a href="(.+?)" target')
list = re.findall(ippattern,htmlsource)
result = []
format = re.compile(r'.*http://(.+?)/')
for i in list:
#print i
#temp = re.findall(format,i)
#result.extend(temp)
temp = format.search(i)
#print temp.group(1)
result.append(temp.group(1))
sock.close()
result = dict.fromkeys(result).keys()
outfile = open('blacklist.txt','w')
len = len(result)
for i in range(len):
outfile.write(result+'\n')
outfile.close()