[Copy to clipboard] [ - ]
CODE:
#
# 本脚本是一个辅助你开发 LWP & HTML::TreeBuilder 应用的
# 工具。使用它可以轻易地搜寻到你所要分析的 HTML 页面元素
# 的路径,并且自动生成代码。因此你无须仔细地分析 HTML 页
# 面便可以精确地定位页面元素的位置。
#
use strict;
use warnings;
use HTML::TreeBuilder;
use Win32::Clipboard;
use Data::Dumper;
use LWP;
my $url = shift;
our $url_or_file = 'X';
our $tree = new HTML::TreeBuilder();
our $ua = new LWP::UserAgent;
our $html;
if ( $url ){
if ( -r $url ){
$tree->parse_file( $url );
$url_or_file = 'file';
}
else{
&geturl( $url );
}
}
my $keyword = shift;
if ( $keyword ){
&work( $keyword );
exit 1;
}
my $prompt = "输入一个关键字或者执行一个命令,输入 '/help' 查看帮助:\n";
$Data::Dumper::Sortkeys = \&my_filter;
print $prompt;
while(<>){
print "\n";
chomp;
next unless $_;
if ( $_ eq '/help' ){
print '
/help : 查看本文本
/saveto <file name> : 保存刚刚下载到的页面到文件
/get <url> : 再次获取一个新的 URL
/open <file name> : 打开分析磁盘上的文件
/newperl <file name> : 生成一个新的 LWP 应用框架
';
}
elsif ( $_ =~ m{^/saveto} ){
if ( $' =~ /^\s+(\S+)/ ){
&saveto( $1 );
}
else{
print "用法:/saveto <file name>, 缺少文件名\n";
}
next;
}
elsif( $_ =~ m{^/get} ){
if ( $' =~ /^\s+(\S+)/ ){
&geturl( $1 );
}
else{
print "用法:/get <url>, 缺少 URL\n";
}
next;
}
elsif( $_ =~ m{^/open} ){
if ( $' =~ /^\s+(\S+)/ ){
&getfile( $1 );
}
else{
print "用法:/open <file name>, 缺少文件名\n";
}
next;
}
elsif( $_ =~ m{^/newperl} ){
if ( $' =~ /^\s+(\S+)/ ){
&newperl( $1 );
}
else{
print "用法:/newperl <file name>, 缺少文件名\n";
}
next;
}
else{
if ( $url_or_file eq 'X' ){
print "你必须先执行一下 /open 命令或者 /get 命令。\n";
next;
}
&work( $_ );
}
}continue{
print "\n"x2;
print $prompt;
}
exit 1;
sub saveto($){
my $fileName = shift;
if ( $url_or_file ne 'url' ){
print "当前正在处理的不是 URL\n";
}
else{
if( open FH, ">$fileName" ){
print FH $html;
close FH;
print "已经保存到 $fileName\n";
}
else{
print "创建文件失败!\n";
}
}
}
sub geturl($){
my $url = shift;
print "正在联系 Web 服务器……\n";
my $res = $ua->get( $url );
print "收到回应,正在解析……";
unless ( $res->is_success ){
print "请求失败![", $res->status_line, "]\n";
return;
}
$html = $res->content;
$url_or_file = 'url';
$tree->parse( $html );
print "解析完毕\n";
}
sub getfile($){
my $fileName = shift;
unless ( -r $fileName ){
print "文件不可读\n";
return;
}
$tree->parse_file( $fileName ) or print "无法解析文件。\n$!\n";
$url_or_file = 'file';
}
sub newperl($){
my $fileName = shift;
local $/;
my $str = <DATA>;
if( open FH, ">$fileName" ){
print FH $str;
close FH;
print "已经保存到 $fileName\n";
}
else{
print "创建文件失败!\n";
}
}
sub work($){
my $keyword = shift;
my @result = ();
&findText( \@result, $tree, $keyword, '->' );
if ( @result == 1 ){
print "找到了: '$result[0]->{text}', 它的位置是:\n\$tree->$result[0]->{path}\n";
print "\n再没有更多的结果了。\n";
my $ret = &check_it( $result[0] );
if ( $ret eq 'ok' ){
return;
}
}
else{
print "找到了很多匹配“$keyword”的字符串,下面挨个儿辨认一下。\n";
foreach my $item (@result){
my $ret = &check_it( $item );
if ( $ret eq 'ok' ){
return;
}
print "\n"x3;
print '-'x60, "\n";
print "看来不是这个,那就接着看下一个搜索结果。\n";
}
}
print "\n"x3;
print "奇怪了,怎么会没有呢?你确定你没有看错?哈哈!肯定是你摁错键了!\n";
}
sub check_it{
my $item = shift;
my $path = $item->{path};
while(1){
last unless ( $path =~ s/((\[\d+?\])|(\{[^\{\}]+?\}))$// );
print "-"x60, "\n";
eval "print \$tree$path->as_HTML()";
eval "print Dumper \$tree$path";
}continue{
print "-"x60, "\n";
print "是这个吗?是(Y)/不是(N),看不清楚的话,请直接按回车放大再看。";
my $input = <STDIN>;
chomp $input;
if ( $input =~ /^Y$/i ){
my $clip = Win32::Clipboard();
$clip->Set( "\$tree$item->{path}" );
print "\n"x2;
print "已经将路径放入 windows 剪切板。\n";
print "\n"x2;
return 'ok';
}
elsif( $input =~ /^N$/i ){
last;
}
}
return 'not';
}
sub findText($$$$){
my ( $result, $node, $keyword, $path ) = @_;
return undef unless defined $node;
my $type = ref $node;
if ( $type ){
if ( $type !~ /^(SCALAR|ARRAY|HASH|)$/ ){
"$node" =~ /=(\w+)/;
$type = $1;
}
if ( $type eq 'HASH' ){
for my $key ( keys %$node ){
next if $key eq '_parent';
next if $key eq '_body'; # ????
&findText( $result, $node->{$key}, $keyword, $path . "{$key}" );
}
}
elsif( $type eq 'ARRAY' ){
for my $index ( 0..$#$node ){
&findText( $result, $node->[$index], $keyword, $path . "[$index]" );
}
}
elsif( $type eq 'SCALAR' ){
&findText( $result, $$node, $keyword, $path );
}
}
else{
if ( index( $node, $keyword ) != -1 ){
push @$result, { 'path' => $path, 'text' => $node };
}
}
return undef;
}
sub my_filter {
my $hash = shift;
return [
grep { $_ ne '_parent' } keys %$hash
];
}
__DATA__
use strict;
use warnings;
use Data::Dumper;
use LWP;
use HTML::TreeBuilder;
my $tree = new HTML::TreeBuilder;
my $ua = new LWP::UserAgent;
my $res = $ua->get( '__URL__' );
my $html = $res->content;
$tree->parse( $html );
$Data::Dumper::Sortkeys = \&my_filter;
print $tree->{_content}[1]{_content}[$j]{_content}[2]{_content}[$i]{_content}[1]{_content}[0]{href};
sub my_filter {
my $hash = shift;
return [
grep { $_ ne '_parent' } keys %$hash
];
}