python多线程抓新浪天气的代码


                               
               
                # -*- coding: utf-8 -*-
import os,sys,time,re
from threading import Thread
class DownloadWeather(Thread):
    def __init__(self, path, url, num_of_workers=5, timeout = 2):
        Thread.__init__(self)
        self.path = path
        self.url = url
        #self.city = city
    def run(self):
        #use command "curl" download data from sina.com.cn
        os.popen("/usr/local/bin/curl --limit-rate 200k --compressed -s -o %s -e %s" % (self.path, self.url))
time1=time.time()
f = open("/var/www/weather/city.txt")
for city in f.readlines():
    city=city.strip()
    wtime=str(time.time())[:10]
    path='/var/www/weather/data/' + city
    url='http://weather.sina.com.cn http://php.weather.sina.com.cn/js2.php?city=' +city+ '&time=' +wtime
    # an instance of class DownloadWeather
    mydownload=DownloadWeather(path,url)
    #mydownload=DownloadWeather(path,city,wtime)
    mydownload.setDaemon(1)
    mydownload.start()
mydownload.join()
f.close()
print (time.time() - time1)
读出/var/www/weather/city.txt的文件,里面一行是一个城市的名称,也是从天气页面下载的,注意,这个文件必须是gb2312编码的,不然无法curl正确的数据,只是因为url编码的问题。