python多线程抓新浪天气的代码
# -*- coding: utf-8 -*-
import os,sys,time,re
from threading import Thread
class DownloadWeather(Thread):
def __init__(self, path, url, num_of_workers=5, timeout = 2):
Thread.__init__(self)
self.path = path
self.url = url
#self.city = city
def run(self):
#use command "curl" download data from sina.com.cn
os.popen("/usr/local/bin/curl --limit-rate 200k --compressed -s -o %s -e %s" % (self.path, self.url))
time1=time.time()
f = open("/var/www/weather/city.txt")
for city in f.readlines():
city=city.strip()
wtime=str(time.time())[:10]
path='/var/www/weather/data/' + city
url='http://weather.sina.com.cn http://php.weather.sina.com.cn/js2.php?city=' +city+ '&time=' +wtime
# an instance of class DownloadWeather
mydownload=DownloadWeather(path,url)
#mydownload=DownloadWeather(path,city,wtime)
mydownload.setDaemon(1)
mydownload.start()
mydownload.join()
f.close()
print (time.time() - time1)
读出/var/www/weather/city.txt的文件,里面一行是一个城市的名称,也是从天气页面下载的,注意,这个文件必须是gb2312编码的,不然无法curl正确的数据,只是因为url编码的问题。