python使用google和baidu进行网页search
python使用google和baidu进行网页search
$ cat google.py
#!/usr/bin/env python
#-*- encoding:utf-8 -*-
import sys
import re
import httplib
def request_and_save(conn, query_str, f):
conn.request("GET", query_str)
r1 = conn.getresponse()
line = r1.read()
p1 = re.compile("")
match_start = 0
match_end = 0
while line:
m = p1.search(line)
if m is None:
break
match_start = m.start()
line = line[match_start:]
m = p2.search(line)
if m is None:
break
match_end = m.end()
item = line[:match_end]
if item.find("class=l") == -1:
line = line[match_end:]
continue
f.write(item)
f.write("
\n");
line = line[match_end:]
# end function
if len(sys.argv) kf701 python search tool\n"
html_header += "kf701 python search tool"
html_end = ""
conn = httplib.HTTPConnection("www.google.cn")
print 'Search ' + sys.argv[1] + ', Save result in ' + sys.argv[1] + '-search.html'
f = file( sys.argv[1] + "-search.html", "w")
f.write(html_header);
request_and_save(conn, query_str, f)
request_and_save(conn, query_str2, f)
request_and_save(conn, query_str3, f)
request_and_save(conn, query_str4, f)
request_and_save(conn, query_str5, f)
request_and_save(conn, query_str6, f)
request_and_save(conn, query_str7, f)
f.write(html_end)
f.close()
conn.close()
==================
$ cat baidu.py
#!/usr/bin/env python
#-*- encoding:utf-8 -*-
import sys
import re
import httplib
def request_and_save(conn, query_str, f):
conn.request("GET", query_str)
r1 = conn.getresponse()
line = r1.read()
p1 = re.compile("")
match_start = 0
match_end = 0
while line:
m = p1.search(line)
if m is None:
break
match_start = m.start()
line = line[match_start:]
m = p2.search(line)
if m is None:
break
match_end = m.end()
item = line[:match_end]
f.write(item)
f.write("
\n");
line = line[match_end:]
# end function
if len(sys.argv) kf701 python search tool\n"
html_header += "kf701 python search tool"
html_end = ""
conn = httplib.HTTPConnection("www.baidu.com")
print 'Search ' + sys.argv[1] + ', Save result in ' + sys.argv[1] + '-search.html'
f = file( sys.argv[1] + "-search.html", "w")
f.write(html_header)
request_and_save(conn, query_str, f)
request_and_save(conn, query_str2, f)
request_and_save(conn, query_str3, f)
request_and_save(conn, query_str4, f)
request_and_save(conn, query_str5, f)
request_and_save(conn, query_str6, f)
request_and_save(conn, query_str7, f)
request_and_save(conn, query_str8, f)
f.write(html_end)
f.close()
conn.close()