import os,re,sys
# python script.py file.html
links = re.compile('[<].?[Aa].*[Hh][Rr][Ee][Ff].*=.*["']?.*["']?.?[>]')
lunghezza_file = os.stat(sys.argv[1])[6]
f = open(sys.argv[1], 'r')
while(lunghezza_file > 0):
riga = f.readline()
lunghezza_file -= len(riga)
if links.search(riga):
comparazione = links.search(riga)
output = comparazione.group(0)
links2 = re.compile('http:-*[Zz][Ii][Pp]')
if links2.search(output):
output2 = links2.search(output)
print output2.group(0)
print 'FATTO'
Python – Cattura tutti i links
Category: Uncategorized |
Tags: cli, count, dangerous, default, delicious, dictionary, documentation, hello, python, regex, world
Leave a Reply
You must be logged in to post a comment.