public static string GetInputControlsNameAndValueInPage(string strPage)
{
string strRegExPatten = "<s*input.*?names*=s*"(?<Name>.*?)".*?values*=s*"(?<Value>.*?)".*?>";
Regex reg = new Regex(strRegExPatten, RegexOptions.Multiline);
MatchCollection mc = reg.Matches(strPage);
string strTemp = string.Empty;
foreach (Match m in mc)
{
strTemp = strTemp + m.Groups["Name"].Value + "=" + m.Groups["Value"].Value + "&";
}
int n = strTemp.Length;
strTemp = strTemp.Remove(n - 1);
return strTemp;
}
Tag Archive for regex
Convert whitespace to underscores
Find (id=")(w+)(s) Replace $1$2_
Mexican RFC RegEx
^[A-Za-z]{4}-d{6}(?:-[A-Za-zd]{3})?$
Category: Uncategorized |
Tags: contribuyentes, Expression, federal, Mexico, regex, registro, regular, rfc
Tail requests for HTML files
tail -f access.log|grep -P "GET[^"]*.html"
Category: Uncategorized |
Tags: apache, Bash, commandline, debugging, filter, html, log, one-liners, perl, regex, request, tail
check_email_address() email validator
function check_email_address($email) {
// First, we check that there's one @ symbol, and that the lengths are right
if (!ereg("^[^@]{1,64}@[^@]{1,255}$", $email)) {
// Email invalid because wrong number of characters in one section, or wrong number of @ symbols.
return false;
}
// Split it into sections to make life easier
$email_array = explode("@", $email);
$local_array = explode(".", $email_array[0]);
for ($i = 0; $i < sizeof($local_array); $i++) {
if (!ereg("^(([A-Za-z0-9!#$%&'*+/=?^_`{|}~-][A-Za-z0-9!#$%&'*+/=?^_`{|}~.-]{0,63})|("[^(|")]{0,62}"))$", $local_array[$i])) {
return false;
}
}
if (!ereg("^[?[0-9.]+]?$", $email_array[1])) { // Check if domain is IP. If not, it should be valid domain name
$domain_array = explode(".", $email_array[1]);
if (sizeof($domain_array) < 2) {
return false; // Not enough parts to domain
}
for ($i = 0; $i < sizeof($domain_array); $i++) {
if (!ereg("^(([A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9])|([A-Za-z0-9]+))$", $domain_array[$i])) {
return false;
}
}
}
return true;
}
Test for (in)valid regex
# Test for (in)valid regex
# perl regex
#
# (you may need to disable warnings)
my $pattern = shift;
my $regex = eval { qr/$pattern/ };
die "Your regex doesn't seem valid : $@" if $@;
Category: Uncategorized |
Tags: regex
Kruse’s hasClass
/*
** Matt Kruse's hasClass, with slight modification
** Determine if an object or class string contains a given class.
*/
function hasClass (obj, className) {
if (typeof obj == 'undefined' || obj==null || !RegExp) { return false; }
var re = new RegExp("(^|s)" + className + "(s|$)");
if (typeof(obj)=="string") {
return re.test(obj);
}
else if (typeof(obj)=="object" && obj.className) {
return re.test(obj.className);
}
return false;
}
Category: Uncategorized |
Tags: className, css, DOM, hasclass, icanhasclass, mattkruse, regex, selector, utilities, whitespace
search and replace across multiple files with Perl
#print the result of search-and-replace to the terminal
perl -pe 's/bart/milhouse/g' test.html
#search-and-replace, with backup
#leave the suffix off of -i to overwrite
perl -i.bak -pe 's/bart/milhouse/g' test.html
#echo the number of lines in a file
perl -lne 'END { print $t } @w = /(w+)/g; $t += @w' test.html
#cat file with line numbers
# -p prints $_ each iteration
perl -pe '$_ = "$. = $_"' test.html
# recursive search-and-replace, only on shells that support file globs
perl -i.bak -pe 's{bart}{milhouse}' **/*html
Category: Uncategorized |
Tags: Bash, batch, code, commandline, diamond, edit, editing, files, FileSystem, hacking, one-liners, perl, productivity, regex, replace, search, text, tips, tricks, wrapper
scraper
#! /usr/bin/perl -w
use strict;
#use LWP::Simple;
use LWP::UserAgent;
package NoFace;
#****************************************************************
# SYNOPSIS: *
#****************************************************************
# NOFACE is a script to grab strings off arbitrary web pages based on a regular expression
#****************************************************************
# get_facts FILE
#****************************************************************
#HEADLINES currently cant be called as a method, only internally as a sub
#which is fine.
# headlines FACT, REGEX, URI, QUERY_STRING;
# EXAMPLE:
# my $monster = new NoFace;
# print $monster -> headlines ('high', '.*<b CLASS=obsTempTextA>(.d*°F)</b>.*', 'http://www.weather.com/weather/local/11215', 'x=19&lswe=11215&lswa=WeatherLocalUndeclared&GO=GO&whatprefs=&y=7');
#****************************************************************
# METHODS *
#****************************************************************
################################################################
#if this doesn't make sense, look at the headlines() method
sub get_facts {
shift;
$ARGV[0] = shift;
my @fact;
my %headline;
while (<>) {
if ($_ ne "") {
chomp(@fact = split (' ', $_)); #expects a file with a tab-delimited list on each line
$headline{$fact[0]} = headlines (@fact);
}
}
return %headline;
}
################################################################
sub headlines {
my $ua = new LWP::UserAgent;
my $webfact;
my ($fact, $regex, $uri, $query_string) = @_;
$ua->agent("MSIE/6.0 " . $ua->agent);
# Create a request
my $req = new HTTP::Request POST => $uri;
#$req->content_type('application/x-www-form-urlencoded');
$req->content($query_string);
# Pass request to the user agent and get a response back
my $res = $ua->request($req);
# Check the outcome of the response
if ($res->is_success) {
my $page = $res->content;
if ( $page =~ s{$regex}{$1}igs ) {
$webfact = $page;
}
else { $webfact = "REGEX ERROR:
$uri
did not match regex:
$regex
";}
} else {
$webfact .= "BROWSER ERROR:
$uri
not found!
";
}
return $webfact;
}
################################################################
sub new
{
my $class = shift;
my $self = {};
#$self->{START_XML_TAG} = ""; #start regex
bless($self, $class);
return $self;
}
return 1;