[lug] Perl question

Tkil tkil at scrye.com
Mon Jun 25 22:39:43 MDT 2001


having said all that, i noticed that it was screwing up a few more
test cases.  the use of \S on an unquoted value was grabbing the
closing angle bracket as well, which was bad.  the revised code is:
=============================================================================
#!/usr/bin/perl -w

use strict;

# my $doc = do { local $/; <> };
my $doc = do { local $/; <DATA> };

# split up the document into alternating tags and normal text.
my @chunks = split /(<(?:[^>\'\"]+|"[^\"]+"|'[^\']+')+>)/s, $doc;

my $pos = 0;

CHUNK:
foreach (@chunks)
{
    # print STDERR "|$_|\n";

    # skip everything but image tags.
    goto DONE unless /^<img/i;

    my $orig = $_;

    # split out the SRC= attribute value.
    my ($pre, $q_src, $qq_src, $naked_src, $post) = 
      m{ ^(<img.*?src\s*=\s*)
         (?:'([^\']+)'|"([^\"]+)"|([^\s>]+))
         (.*)$ }isx;

    # get rid of quotes.  the condition is necessary, since "0" is a
    # perfectly valid name for a file.
    my $src = ( $q_src  && $q_src  ne '' ? $q_src  :
                $qq_src && $qq_src ne '' ? $qq_src :
                $naked_src );
    unless ($src) 
    {
        print STDERR "no \"src\" attribute at position $pos\n";
        goto DONE;
    }

    # now do whatever you want to $src
    $src = "MODIFIED(\"$src\")";

    # make it fancy.
    $src =~ s/\"/"/g;
    $src = "\"$src\"";
    
    # and reconstruct the tag.
    $_ = $pre . $src . $post;

    # print STDERR
    #   "orig: $orig\n",
    #   "new:  $_\n";

  DONE:
    $pos += length;
    print;
}

exit 0;

__DATA__
<!-- some test data -->
<img src="alpha">
<img
  src="beta">
<img src = "gamma">
<img src = delta>
<img src='epsilon'>
<img align="left">
<img src='zeta eta'>
<link src="iota">
<IMG SRC=kappa.gif>




More information about the LUG mailing list