/home/httpd/html/Lesson6/xurl.pl.html

#!/usr/bin/perl -w
# xurl - extract unique, sorted list of links from URL
use HTML::LinkExtor;
use LWP::Simple;

$base_url = shift or die "Usage: $0 URL\n";

# Uses HTML::Parser
$parser = HTML::LinkExtor->new(undef, $base_url);
$parser->parse(get($base_url));


# The links() method clears the link list, so you can call
# it only once per parsed document. It returns a list of lists.
# reference with an HTML::Element object at the front
# followed by a list of attribute name and attribute value pairs.

# The HTML code:
# <A HREF="http://www.perl.org">
#    <IMG SRC="images/big.gif" LOWSRC="images/big-lowres.gif">
# </A>
# would result in a data structure like this:
# ( [ a, href, "http://www.perl.org"],
#   [ img, src ,   "images/big.gif",
#          lowsrc, "images/big-lowres.gif" ] )

@links = $parser->links;

foreach $linkarray (@links) {
    my @element  = @$linkarray;
    my $elt_type = shift @element;
    while (@element) {
        my ($attr_name , $attr_value) = splice(@element, 0, 2);
        # print "$attr_name -> $attr_value\n";
        $seen{$attr_value}++;
    }
}
foreach (sort keys %seen) {
    print $_, "\n"
}