check linked pages for Tidy validation errors, on the command line

lwp-request -o links <a href="file:///SAVED_GOOGLE_RESULTS.htm" >file:///SAVED_GOOGLE_RESULTS.htm</a>|grep -P "As*http://w*.MY_DOMAIN" | perl -pe "m#As*(.*)#; $notify = qq{
$1:
}; $_=qx{lwp-request $1|tidy -eq 2>&1|grep -e Error -e DOCTYPE}; $_ = $notify .$_ if $_" > report.txt

#Alternate: print out just the HTTP response code for linked pages that have my domain in the link

lwp-request -o links <a href="http://onemorebug.com" >http://onemorebug.com</a>|perl -pe "chomp; $_ =~ s#w*s*##; undef $_ unless m/onemorebug.com/; $_ .= qq{	} . qx{lwp-request -ds $_} if $_"

#old version

lwp-request -o links <a href="file:///C:/SAVED_GOOGLE_RESULTS.htm" >file:///C:/SAVED_GOOGLE_RESULTS.htm</a>|grep -P "As*http://w*.MY_DOMAIN" | perl -pe "m#As*(.*)#; $notify = qq{	$1: }; $_=qx{lwp-request $1|tidy -e 2>&1 | grep "DOCTYPE"}; print $notify if $_"

source

Leave a Reply