[lug] Perl question
Tkil
tkil at scrye.com
Mon Jun 25 19:11:50 MDT 2001
>>>>> "Chip" == Chip Atkinson <catkinson at circadence.com> writes:
Chip> I'm writing a perl script to rewrite <img src=imgfile.gif> tags
Chip> (as well as some other stuff) in html. I want to change
Chip> imgfile.gif into a name derived by calculating the md5sum of
Chip> imgfile.gif.
here's yet another over-engineered script.
=============================================================================
#!/usr/bin/perl -w
use strict;
my $doc = do { local $/; <> };
# my $doc = do { local $/; <DATA> };
# split up the document into alternating tags and normal text.
my @chunks = split /(<(?:[^>\'\"]+|"[^\"]+"|'[^\']+')+>)/s, $doc;
my $pos = 0;
CHUNK:
foreach (@chunks)
{
# print STDERR "|$_|\n";
# skip everything but image tags.
goto DONE unless /^<img/i;
my $orig = $_;
# split out the SRC= attribute value.
my ($pre, $q_src, $qq_src, $naked_src, $post) =
m{ ^(<img.*?src\s*=\s*)
(?:'([^\']+)'|"([^\"]+)"|(\S+))
(.*)$ }isx;
# get rid of quotes. the condition is necessary, since "0" is a
# perfectly valid name for a file.
my $src = ( $q_src && $q_src ne '' ? $q_src :
$qq_src && $qq_src ne '' ? $qq_src :
$naked_src );
unless ($src)
{
print STDERR "no \"src\" attribute at position $pos";
goto DONE;
}
# now do whatever you want to $src
$src = "MODIFIED(\"$src\")";
# make it fancy.
$src =~ s/\"/"/g;
$src = "\"$src\"";
# and reconstruct the tag.
$_ = $pre . $src . $post;
# print STDERR
# "orig: $orig\n",
# "new: $_\n";
DONE:
$pos += length;
print;
}
exit 0;
__DATA__
<!-- some test data -->
<img src="alpha">
<img
src="beta">
<img src = "gamma">
<img src = delta>
<img src='epsilon'>
<img align="left">
More information about the LUG
mailing list