如何用Perl读取一个WEB页面的内容?

如何用Perl读取一个WEB页面的内容?

刚学Perl,想用Perl定期读取一个WEB页的内容(就是在WEB页上查看源码看到的东西,这个WEB页是允许这个操作的),我完全不知道要用到哪些模块,怎么做,可否请大家指点一下,如果有这样的例子最好了,先谢了
lwp
我在前两天问了一个问题,贴的代码对你有启示作用

另外版里搜搜吧,有些有用的资料~~
我先搜索过的,不过大家的问题都是挺复杂的,其实我就是简单的获取全部内容,不分什么头还是什么的,是要用到LWP模块吗?我先去找一下这个模块的资料
LWP::Simple 或者LWP::UserAgent
自己perldoc下快多了
这种问题google一下比这边发帖快无数倍
发个我自己写的代码,是取GOOGLE的天气的情况的,包括解析网页的.你应该可以拿来参考参考.还可以下图片的.

[Copy to clipboard] [ - ]
CODE:
#!/usr/bin/perl
use strict;
use Cwd;
use Socket qw(:DEFAULT :crlf);

####################################给小于10的数值在前面添0
sub fillzero
{
        my($num)=@_;
        if($num<10)
        {
                return "0".$num;
        }
        else{
                return $num;
        }
}

my ($socket,$buffer,%image);
my ($path)=getcwd()."/";
my ($image_path)=$path."image/";
my ($host,$port)=("www.google.com",80);
my($sec,$min,$hour,$day,$month,$year,$wday,$yday,$isdst) = localtime();
$year+=1900;$month+=1;$month=fillzero($month);$day=fillzero($day);
my @city=("南宁",);
my $head="GET http://www.google.cn/search?hl=zh-CN&q={city}+tq&btnG=Google+%E6%90%9C%E7%B4%A2&meta=&aq=f HTTP/1.0\r\n";
$head.="Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*\r\n";
$head.="Referer: http://www.google.cn\r\n";
$head.="Accept-Language: zh-cn\r\n";
$head.="Proxy-Connection: Keep-Alive\r\n";
$head.="User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon)\r\n";
$head.="Host: www.google.cn\r\n";
$head.="Cookie: SS=Q0=55S35aWz5Lqk6YWN5Zu-; PREF=ID=555c912d17d9d47b:NW=1:TM=1209093843:LM=1209093843:S=Ee0KCVmzEj-c50qs\r\n\r\n".CRLF;
my $image_head="GET http://www.google.com/images/weather/{file} HTTP/1.0\r\n";
$image_head.="Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*\r\n";
$image_head.="Accept-Language: zh-cn\r\n";
$image_head.="User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon)\r\n";
$image_head.="Host: www.google.com\r\n";
$image_head.="Proxy-Connection: Keep-Alive\r\n\r\n".CRLF;
$path=~s/\//\\/g;$image_path=~s/\//\\/g;
print $path."\n";
print $image_path."\n";
system("del $path*.xml");
#############################################
##
## 取网页
##
#############################################
foreach (@city)
{
my $shead=$head;$shead=~s/\{city\}/$_/;
print $shead;

socket ($socket, AF_INET, SOCK_STREAM, getprotobyname('tcp')) or die "problem creating socket: $!\n";
my $con_ok = connect ($socket, sockaddr_in($port, inet_aton($host))) or die "problem connecting to \"$host\", port $port: $!\n";
print "error " unless $con_ok;
open(HTML,">$path$_-google.htm")  or die "error";
syswrite($socket,$shead,length($shead),0);
while(1)
{
  $buffer="";
  sysread($socket,$buffer,1024 );
  $buffer=~s/\>\s*\</>\n</g;
  print HTML $buffer;
  last if($buffer=~/\<\/(html)|(HTML)\>\s*$/);
}
close(HTML);close($socket);
#last;
}
#############################################
##
## 分析网页,取天气信息
##
#############################################
open(XML,">$path$year$month$day.xml") or die "error";
print XML "\<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\<root\>\n";
foreach (@city)
{
open(HTML,"$path$_-google.htm")  or die "error";
while(my $line=<HTML>)
{
  if($line=~/\<FONT color\=\#cc0033\>(\S+)\<\/FONT\>/)
  {
   print XML "\t<region city=\"$1\">\n";
  }
  if($line=~/\<br\>(.+)\<br\>(.+)\<\/div\>/)
  {
   print XML "\t\t<wind>$1</wind>\n\t\t<humidity>$2</humidity>\n";
  }
  if($line=~/\<div align=center style=\"padding:\d+px;float:left;font-size:\d+\%\"\>(.+)\<br\>/)
  {
   print XML "\t\t<temperature day=\"$1\" ";
  }
  if($line=~/\<img style=\".+\" src=\"(\/\w+)+\/(.+)\" alt=\".+\"\s+title=\"(.+)\"/)
  {
   $image{$2}=0 if(!defined($image{$2}));
   print XML "img=\"$2\" weather=\"$3\" ";
  }
  if($line=~/\<nobr\>(.+)\<\/nobr\>/)
  {
   print XML ">$1</temperature>\n";
  }
  if($line=~/\<div style=\"float:left;font-size:\d+\%;color:\#\w+\"\>(.+)\<\/div\>/)
  {
   print XML "\t\t<level></level>\n";
   print XML "\t</region>\n";
  }
}
close(HTML);
#last;
}
print XML "</root>\n";
close(XML);
######################################
##
##  取图片
##
######################################
foreach (%image)
{
if($_=~/gif/)
{
  
  my $simage_head=$image_head;$simage_head=~s/\{file\}/$_/;
  print $simage_head;
  socket ($socket, AF_INET, SOCK_STREAM, getprotobyname('tcp')) or die "problem creating socket: $!\n";
  my $con_ok = connect ($socket, sockaddr_in($port, inet_aton($host))) or die "problem connecting to \"$host\", port $port: $!\n";
  print "error " unless $con_ok;
  open(IMAGE,">$image_path$_")  or die "error";
  binmode(IMAGE);
  syswrite($socket,$simage_head,length($simage_head),0);
  my $image="";
  while(1)
  {
   $buffer="";
   sysread($socket,$buffer,1024 );
   $image.=$buffer;
   last if(length($buffer)==0);
  }
  print IMAGE substr($image,index($image,"GIF"));
  close(IMAGE);close($socket);
}
}
######################################
##
## 删除网页
##
system("del $path*.htm");