Wednesday, June 2, 2010

Grab all hrefs from a html page which have text containing View associated with them

Quick and dirty perl program to grab all links from a webpage which have have anchors with the text "View" in them:

#!/usr/bin/perl

use constant false => 0;
use constant true => 1;

use HTML::TreeBuilder;
use HTML::FormatText;

$html = HTML::TreeBuilder->new();
$html->parse_file($ARGV[0]);

my @stuff = $html->look_down( '_tag' , 'a' );

my $seqtag = "";
my $use_next = false;

for my $i (@stuff) {
my @thing = $i->content();
my $target = $i->attr('href');
my $str = $thing[0][0];

# string contains View
if($str =~ m/View/) {
print $target . "\n";
}
}



1 comment:

Niall Haslam said...

Oh god NO. NOT PERL.