Tag Archive for filter

Tail requests for HTML files

tail -f access.log|grep -P "GET[^"]*.html"

source

filterXMLAssocArray

// Filters specified tags from a supplied XML Node resource by tag name.
// Returns a new associative array with the filtered tags' attributes.
private function filterXMLAssocArray(filterNodeName:String, xmlNodes:Array):Array{

var tmpArray:Array = new Array();

var indexCounter:Number = 0;
for (var i:Number = 0; i<xmlNodes.length; i++){
if (xmlNodes[i].nodeName == filterNodeName){
tmpArray[indexCounter] = new Array();
tmpArray[indexCounter] = xmlNodes[i].attributes;
indexCounter ++;
}
}
return tmpArray;
};

source

replace content tags with callback via regex

/**
*  Replace all tags matching a regexp with value of callback function
*  (passes grouped subpatterns to callback as params)
*/
function my_wp_plugin_tag_action($content,$tag,$function,$args = FALSE) {
// match all regular expressions
preg_match_all($tag,$content,$matches);
if (count($matches)>0) {
// filter duplicates
$matches = array_unique($matches);
// loop through
$tag_results = array();
$found_tags = array();
foreach ($matches as $idx => $match) {
//build arg array
$full_tag = array_shift($match);
//call function, adding function output and full tag text to replacement array
$tag_results[] = my_wp_plugin_buffer_func($function,$match);
$found_tags[] = $full_tag;
}
// replace all tags with corresponding text
$content = str_replace($found_tags,$tag_results,$content);
}
return $content;
}

source

capitalize words

sub wordcaps {
my $line = shift;
$line =~ s/(w)/U$1/g;
return $line;
}

source

Truncate filenames but keep file extensions intact

#! /usr/bin/perl -w
use strict;
use File::Copy;

foreach my $mp3file (@ARGV) {
my $oldname = $mp3file;
$mp3file=~ s/(.{0,27}).*(.mp3)/$1$2/;
if ($oldname eq $mp3file) {
print "Skipping $oldname: name is OK for Mac.
";
next;
}
copy ($oldname, $mp3file) or print "Cannot move $oldname to $mp3file: $!";
print $oldname ." moved to" . $mp3file . "
";
}

source

Grab linked files from a list of web pages

#!/usr/local/bin/perl    -w
use strict;
use LWP::Simple;
use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
#MAIN----------------------------------------------------------------------------
#Extract info from the tags in these files:
my $tag_type;             #Extract info from this type of tag ONLY (ok to use | here)
my $local_directory;      #Save files here
my $extensions;           #Only save files with these extensions (ok to use | here)
$extensions = &grab_what();  #Let the user choose what type of files to grab
#Need to prefix the sub with "&" here or perl thinks its a call to a prototype and gives a warning
# $ARGV[0] = "c:/windows/desktop/list.txt";  #List of urls to search for files
$local_directory = "c:/windows/desktop/grabit/";  #Store grabbed files here
$tag_type = "a";  #Look in <A> tags for file URIs
#
die "
*******************
ERROR
*******************
Please create the directory:

$local_directory

" unless -d $local_directory; #unless local really is a directory...
#
while (<>) { #Assume we are reading a file with one URL on each line
chomp(my $url = $_);
if ($url ne "") {
grabit($_, $tag_type, $local_directory, $extensions);
print $url . "
";
#print " ";  #delete urls from list file once they've been grabbed
}

}
#GRABIT--------------------------------------------------------------------------
#Just a wrapper for grab_hyperlink
#makes it easier to call g-h iteratively
sub grabit {
my ($url, $tag_type, $local_directory, $extensions) = @_;

grab_hyperlinked($url, $tag_type, $local_directory, $extensions);
}
#GRAB_HYPERLINKED----------------------------------------------------------------
#Search the file at URL for tags of type TAG_TYPE and grab those targets that end with arbitrarily chosen EXTENSIONS
sub grab_hyperlinked {
my ($url, $tag_type, $local_directory, $extensions) = @_;
my @links = list_links($url, $tag_type);

#@links = @links[0 .. 7];  #only get the first X images (or comment this out to get all)

foreach my $image_uri (@links) {
next if $image_uri eq "";
if ($image_uri =~ m{.($extensions)$}io){  #Only save files with the specified extensions
my $image_name = $image_uri;
$image_name =~ s{.*/(.*)}{$1};
$image_name = smart_save($image_name, $local_directory);  #Don't overwrite files with same name (obviously, either this line should be commented out, or the one below it should be)
save_image($image_uri, $local_directory . $image_name);
#print $image_uri;
}
}
}
#SMART SAVE------------------------------------------------------------------------------
#This script checks to see if the file FILE_NAME already exists in DIRECTORY
#and if so, adds an integer to the end of the file's name, before the extension
#ie, if there are 2 files named foo.bar, then the second one to be saved will be renamed foo-1.bar
#The RETURN VALUE is the new name of the file.
sub smart_save {
my ($file_name, $directory) = @_;
my $int = 0;
my $ext = $file_name;

while (-e $directory . $file_name) {
$ext =~ s{[^.]*(.*)}{$1};       #extension of file_name
$file_name =~ s{([^.]*).*}{$1}; #file_name minus exension
while (-e $directory . $file_name . "-" . $int . $ext) {
$int++;
}
$file_name = $file_name . "-" . $int . $ext;  #returns foo-1.bar
}
return $file_name;
}

#SAVE IMAGE------------------------------------------------------------------------------
#This script will grab an image from a web page and save it locally
#file = 'http://localhost/libraries/images/oiltower/top_boom.jpg';   #This is the name of the image on the server
#my $download = 'c:windowsdesktopgrabbed.jpg';   #This is where the image will be saved locally
#save_image($file, $download);
sub save_image {  #copy web FILE to local DOWNLOAD location
my ($file, $download) = @_;

my $user_agent = LWP::UserAgent->new;
my $request = HTTP::Request->new('GET', $file);
my $response = $user_agent->request ($request, $download);
}
#LIST LINKS---------------------------------------------------------------------
#Extract the URL information from all links on the page, filtering out links that do not go to GIFS or JPEGS
#Returns an array containing the full paths of each of the images
#This code is adapted from the HTML::LinkExtor docs
#my $temp = "c:/windows/desktop/grabit.temp";
#my $url = "http://localhost/lwp/pics.html";  # for instance
#my @links = list_links($url, $temp);
sub list_links {
my ($url, $tag_type) = @_;
my $user_agent = new LWP::UserAgent;
#$user_agent->agent("MSIE/5.5 " . $user_agent->agent);
# Set up a callback that collect image links
my @images = ();
#
# Make the parser.  Unfortunately, we don't know the base yet
# (it might be diffent from $url)
#        my $p = HTML::LinkExtor->new(&callback);
my $p = HTML::LinkExtor->new(
sub {
my($tag, %attributes) = @_;
return if $tag ne $tag_type ;  # we only look closer at the tags specified by TAG_TYPE
push(@images, values %attributes);
}
);
#
# Request document and parse it as it arrives
my $response = $user_agent->request(HTTP::Request->new(GET => $url),
sub {$p->parse($_[0])});
#
# Expand all image URLs to absolute ones
my $base = $response->base;
@images = map { $_ = url($_, $base)->abs; } @images;
#
# Print them out
#print join("
", @images), "
";
return @images;
}

#******************************************************************************
#*                                                                            *
#*                       USER-QUERY FUNCTIONS:                                *
#*                                                                            *
#******************************************************************************

#GRAB WHAT?--------------------------------------------------------------------
#Let the user choose what type(s) of files to grab
sub grab_what(){
my $option_id = 1;
my $selection;
my @extensions = qw(
jpg|gif|mpg
wav|zip
zip
wav
mp3
);
print "Welcome to Grabit by Noah Sussman

";
foreach my $ext (@extensions){
print "$option_id) $ext
";
$option_id++;
}
print "
What type(s) of files would you like to grab?
";
chomp($selection = <STDIN>);
die "You must enter a number corresponding to an option!!" unless ($extensions[$selection - 1] ne "");
print "Extension set to "$extensions[$selection - 1]".
Grabbing...
";
return  $extensions[$selection - 1];
}

##############################
##############################
##############################
##############################
##############################
##############################
##############################
##########END#################
##############################
##############################
##############################
##############################
##############################
##############################
##############################

source

Remove duplicate lines from a text file with Perl

#!/usr/bin/perl -w
use strict;
my $origfile = shift;
my $outfile  = "no_dupes_" . $origfile;
my %hTmp;

open (IN, "<$origfile")  or die "Couldn't open input file: $!";
open (OUT, ">$outfile") or die "Couldn't open output file: $!";

while (my $sLine = <IN>) {
next if $sLine =~ m/^s*$/;  #remove empty lines. Without this, still destroys empty lines except for the first one.
$sLine=~s/^s+//;            #strip leading/trailing whitespace
$sLine=~s/s+$//;
print OUT qq{$sLine
} unless ($hTmp{$sLine}++);
}
close OUT;
close IN;

source

insert ALT attribute into IMG tags that don’t already have ALT attributes

#/usr/local/bin/perl    -w
use strict;

############################################################
#                                                          #
#                                                          #
#                                                          #
#                      NOAH SUSSMAN                        #
#                                                          #
#                       insert_alt                         #
#                                                          #
#                Created 5/11/01 at 01:38 PM               #
#                                                          #
# Insert ALT element into IMG tags that lack it.  The alt  #
# text inserted is identical to the contents of the <TITLE>#
# tag -- or not.                                           #
#                                                          #
#                                                          #
############################################################

=item THIS NEEDS TO BE ADJUSTED SO IT WORKS WITH JSP
(02:35:50) VERSUSearth: I'll have to adjust my insert_alt script to take JSP into account next time
(02:36:35) mitiege: yep- I'm guessing you are looking for the first closing sign and inserting before that..
(02:36:47) mitiege: a simple fix would be to put the alt first in the img tag...
(02:37:06) VERSUSearth: yeah that's probably a good idea
=cut

$^I=".bk";

#undef $/;           # read in whole file, not just one line

my $text = "" ;    #Insert blank alt attribute

while (<>) {

#m{<title>(.*?)</title>}ix;
#my $text = $1;    #Use the document title as the ALT text

unless (m{<img.*?alt=.*?>}ix){
s{(<img)(.*?)>}{$1$2 alt="$text">}gsix;
}
print "$_";

}

source

grayscale img with css crossbrowser

<!-- HTML -->
<a class="a-thumb"><img src="/img/proxy-thumb.gif" /><span> </span></a>

/* CSS */
a.a-thumb {
border: 1px solid black;
position: relative;}
a.a-thumb img {
width: 60px;
height: 60px;
border: 0;}
a.a-thumb span {
background-color: #000000;
position: absolute;
top: 0;
left: 0;
width: 60px;
height: 60px;
z-index: 100;
filter: alpha(opacity=20);
-moz-opacity: 0.2;
opacity: 0.2;}

source