#!/usr/bin/perl - w use strict; my $protocol='(?:(?=[FfHh])(?i:http(?>s?)|ftp)://)'; my $host=<<HOST; (?>[A-Za-z0-9]{1,63}\\.) (?>[A-Za-z0-9] (?>[-A-Za-z0-9]{0,62})\\. )* HOST my $subdom=<<SUBDOM; (?: (?>[A-Za-z0-9] (?:[-A-Za-z0-9]{0,61}[A-Za-z0-9])? )\\. )+ SUBDOM my $wb='(?![A-Za-z0-9])'; my $zone=<<ZONE; (?i:(?(?=[a-z]{3}$wb)(?>com|net|org|edu|biz|gov|int|mil)| (?(?=[a-z]{2}$wb)[a-z]{2}| (?(?=[a-z]{4}$wb)(?>info|aero|name)| (?(?=[a-z]{6}$wb)museum|(?!) ) ) ) ) (?>\\.[a-z]{2}$wb)? ) ZONE my $port="(?::\\d{1,5}$wb)"; my $tail=<<TAIL; (?:[/?] (?>[^.,"'<>()[\\]{}\\s\\x7F-\\xFF]*) (?:(?>[.,?]+) (?:[^"'<>()[\\]{}\\s\\x7F-\\xFF]+) )* (?<![,.?!-]) ) TAIL my $re=<<RE; ( (?>($protocol)(?(2)(?>$host$zone)|$host$zone) (?![A-Za-z0-9])| (?<![A-Za-z0-9_\\\@-]) (?<!\\.(?!(?i:www))) $subdom$zone(?![A-Za-z0-9_.-]*\\\@) ) (?>(?>$port?(?>\\\@$host$zone(?![A-Za-z0-9_.-]*\\\@))?)?) ) ($tail?) RE my $text=<<TEXT; URLs: Ftp://a.com/AAa Look at:aaa.Museum. http://www.proxy.com:80\@www.site.com/ http://proxy.com:80\@site.com/ http://proxy.com\@site.com/ aAaa.com.au.rr.ggg Zwww.Yabcd.co.uk Фforum.abcd.de www.Abc.eu П123.123.123.1234.com/?q=aaa http://Abc.Tk Ahttp://www.Abc.pt/AAa http://abc.au/query/vid.cam.dig/sony.dcrhc15.htm#full_image Ф.Www.old-avto.tk NOT URLs: aaa.museumm http://aaa.museumm, http://-aaa.com www._aaa.com www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com TEXT $text =~ s!$re!<a href="${\($2 ? '' : 'http://')}\L$1\E$3" target="_blank">$1$3</a>!gx; print $text; |
Листинг 8.1. |
Закрыть окно |