Tag Archive for RegularExpression

Look Up a Company’s 401k Plan Rating from BrightScope

import re

from urllib import urlopen
from urllib import urlencode

"""
Author: <a href="mailto:bing.jian@gmail.com">bing.jian@gmail.com</a>
Date:  2009/07/25
Example Usage:

>>> import brightscope
>>> o = brightscope.get_directory()
>>> brightscope.query_score(o,'southwest')
Found Southwest Airlines Co. (3)
BrightScope Rating: 86
Found Southwest Electric Company (2)
BrightScope Rating: 55
Found Southwestern Energy Company
BrightScope Rating: 65
Found Southwest Water Company (2)
BrightScope Rating: 66
Found Southwest Gas Corporation
BrightScope Rating: 69
Found Southwestern/Great American, Inc.
BrightScope Rating: 52

"""

home_url = 'http://www.brightscope.com/'
rating_base_url = 'http://www.brightscope.com/401k-rating/'
directory_url = 'http://www.brightscope.com/ratings/'

company_string = '<li ><a href="/401k-rating/(?P<link>.+?)/"[s]+>(?P<name>.+?)</a></li>'
company_pattern = re.compile(company_string,re.MULTILINE)

top_company_string = '<li class="top.+?"><a href="/401k-rating/(?P<link>.+?)/"[s]+title=".+?"[s]+>(?P<name>.+?)</a></li>'
top_company_pattern = re.compile(top_company_string,re.MULTILINE)

score_string = '<span id="your_plan_rating">(?P<score>[0-9]+)</span>'
score_pattern = re.compile(score_string)

amp_pattern = re.compile(r'&amp;')
prime_pattern = re.compile(r'&#39;')

def get_directory():
doc = urlopen(directory_url).read()
res = {}
patterns = [company_pattern, top_company_pattern]
for pattern in patterns:
o = pattern.finditer(doc)
for item in o:
link, company = item.groups()
amp = amp_pattern.search(company)
if amp:
company = amp_pattern.sub('&',company)
prime = prime_pattern.search(company)
if prime:
company = prime_pattern.sub("'",company)
res[company] = link
return res

def query_score(directory, key):
for company in directory:
if key.lower() in company.lower():
print "Found %s"%company
url = rating_base_url + directory[company]
doc = urlopen(url).read()
o = score_pattern.search(doc)
if o:
print "BrightScope Rating: %d"%int(o.groups()[0])

source

Replace Spaces with Underscores

var str = "This string has too many spaces.";
var result = str.replace(/s/gi, "_");
document.write(result);

source

Parse html via RegularExpression and place results in Array

using System.Text.RegularExpressions;

//looking for photo paths with REGX-a
string nl = // in this case page html.
MatchCollection mc = null;
string sRegExp = "src=(?:"|')?(?<imgSrc>[^>]*[^/].(?:jpg|bmp|gif|png))(?:"|')?";
mc = Regex.Matches(nl, sRegExp);

string[] path;
for (int j = 0; j < mc.Count; j++)
{
path = mc[j].Value.Split('"');
Response.Write(path[1].ToString().Trim() + "<br />");
}

source

Validate string via regular expression

System.Text.RegularExpressions.Regex.IsMatch(someString, someRegularExpression)

source

Email Address Format Validation

''' <summary>
''' Determines whether provided email address is properly formatted.
''' </summary>
''' <param name="s"></param>
''' <returns></returns>
''' <remarks></remarks>
Private Function IsValidEmailFormat(ByVal s As String) As Boolean
Dim result As Boolean = False

If Not String.IsNullOrEmpty(s) Then

'^[_a-zA-Z0-9-]+(.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(.[a-zA-Z0-9-]+)*.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$
Dim pattern As String = My.Settings.Email_Validation_Pattern

Dim match As RegularExpressions.Match = RegularExpressions.Regex.Match(s, pattern)
result = match.Success
End If

Return result
End Function

source

Convert whitespace to underscores

Find
(id=")(w+)(s)
Replace
$1$2_

source

Regular Expression to Grab the Content of an XML Element

(?<=Message>)[Ss]*?(?=</Message)

source

Regular Expression to Grab an Object Type Name from a C# Code File.

(?<=news)s*S*?s*(?=[[(])

source

Regular Expression to Grab an Object Reference Name from a C# Code File.

S*s*(?==s*S*s*news)

source

Regular Expression to Grab a Method Name from a C# Code File.

S*s*(?=()

source