perl 执行效率,如何提高.
代码:1 按理说应该比正则运行的效率更快呀..
- Perl code
sub format_line{ my $line = shift; return unless ($line); my $char = ' '; my $offset = 0; my @line_arr; my ($ip,$time,$url,$status,$size); my $result = index($line,$char,$offset); while ($result != -1){ push(@line_arr,$result); $offset = $result + 1; $result = index($line,$char,$offset); } my $ip_end = $line_arr[0]; my $ip = substr($line,0,$ip_end); my $time_start = $line_arr[2] + 1; my $time_end = $line_arr[3] - $time_start; my $time = substr($line,$time_start,$time_end); $time = str2stamp($time,$log_tm_fmt); my $url_start = $line_arr[5] + 1; my $url_end = $line_arr[6] - $url_start; my $url = substr($line,$url_start,$url_end); my $status_start = $line_arr[7] + 1; my $status_end = $line_arr[8] - $status_start; my $status = substr($line,$status_start,$status_end); my $size_start = $line_arr[8] + 1; my $size_end = $line_arr[9] - $size_start; my $size = substr($line,$size_start,$size_end); my $time = $time - ($time % 300); $hour{$time} += $size; return ($ip,$time,$url,$status,$size);}以上代码处理390M日志文件,耗时,57秒.
使用正则代码:
- Perl code
my $line_fmt = "^([^ ]+) [^ ]+ [^ ]+ \\[([^ ]+) [^ ]+\\] \\\"[^ ]+ [^ ]+ HTTP/1\.[0|1]?\\\" ([\\d]+) ([\\d]+).*";my %hour;while (<>){ chomp; my $line = $_; my @line = map(/$line_fmt/,$line); my $ip = $line[0]; # ipadress next if (($ip eq '127.0.0.1') || ($ip =~ /^192\.168\.\d{1,3}\.\d{1,3}/)); my $status = $line[2]; next if ($status =~ /^(40|30|50)/); my $timestr = str2stamp(substr($line[1],1),$log_tm_fmt); my $size = $line[3]; my $time = $timestr - ($timestr % 300); $hour{$time} += $size;}处理390M日志文件耗时,40秒.
麻烦高手们,看看怎么优化一下.
日志格式,
122.77.179.249 - - [31/Oct/2011:00:02:37 +0800] "GET http://flv1.jstv.com/feichengwr/201110/fcwr20111022_1.flv HTTP/1.1" 200 4805478 "http://jayce.com" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1"
[解决办法]
要是觉得效率低,你就用C来写吧。我觉得效率,满足要求就行,更珍贵的是人的开发效率。
[解决办法]