my ($searchterm, $textfield,$facet, $sort, $expand,$more,$page,$record,$follow,$refine,$year,$month,$day,$time,$yr,$mon, $hour, $min, $sec,$seconds,$last_seconds,$elapsed); while (<>){ $_ =~ s/%2B/\+/g; $_ =~ s/%2b/\+/g; $_ =~ s/%20/ /g; $_ =~ s/ /\+/g; $_ =~ s/%2C/,/g; $_ =~ s/%2c/,/g; $_ =~ s/%22/\"/g; $_ =~ s/%27/\'/g; $_ =~ s/%3D/=/g; $_ =~ s/%3d/=/g; $_ =~ s/%26/\&/g; $_ =~ s/%3F/\?/g; $_ =~ s/%3f/\?/g; $_ =~ s/%2F/\//g; $_ =~ s/%2f/\//g; $_ =~ s/%3A/:/g; $_ =~ s/%3a/:/g; $_ =~ s/%7C/|/g; $_ =~ s/%7c/|/g; $_ =~ s/ntt=/Ntt=/g; $_ =~ s/ntk=/Ntk=/g; $_ =~ s/n=/N=/g; $_ =~ s/ns=/Ns=/g; $_ =~ s/ne=/Ne=/g; $_ =~ s/no=/No=/g; $_ =~ s/r=/R=/g; $_ =~ s/nf=/Nf=/g; $_ =~ s/\'//g; $_ =~ s/\"//g; $_ =~ s/\\//g; $_ =~ s/ //g; my($user,$ipaddress,$datetime,$url,$reference,$visit_num,$visit_step)= split(" ",$_); chomp ($visit_step); #searchterm; if ($url =~ /Ntt=&/ ||$url=~/Ntt=$/) { $searchterm = "-"; } elsif ($url =~ /Ntt=(\S*?)&/) { $searchterm = $1; } elsif ($url=~/Ntt=(\S*?)$/) { $searchterm = $1; } else { $searchterm = "-"; } # textfield; if ( $url=~/Ntk=&/ || $url=~/Ntk=$/) { $textfield = "-"; } elsif ($url=~/Ntk=(\S*?)&/) { $textfield = $1; } elsif ( $url=~/Ntk=(\S*?)$/) { $textfield = $1; } else { $textfield = "-"; } # facet; if ($url=~/N=&/ ||$url=~/N=$/) { $facet = "-"; } elsif ($url=~/N=([^0]\d+?)$/) { $facet = $1; } elsif ($url=~/N=([0-9+#]*)$/) { $facet = $1; } elsif ($url=~/N=([^0].*?)&/) { $facet = $1; } elsif ($url=~/N=(0)/) { $facet = $1; } else { $facet = "-"; } if ($facet == "0"){ $facet = "-"; } # sort order; if ($url=~/Ns=&/ ||$url=~ /Ns=$/) { $sort = "-"; } elsif ($url=~/Ns=(\S*?)&/) { $sort = $1; } elsif ($url=~/Ns=(\S*?)$/) { $sort = $1; } else { $sort = "-"; } # expand facet; if ( $url=~/Ne=&/ ||$url=~ /Ne=$/) { $expand = "-"; } elsif ($url=~/Ne=(\S*?)&/) { $expand = $1; } elsif ($url=~/Ne=(\S*?)$/) { $expand = $1; } else { $expand = "-"; } # "more"; if ($url =~ /more=&/ || $url =~ /more=$/) { $more = "-"; } elsif ($url =~ /closed=(\S*?)$/) { $more = $1; } elsif ($url =~ /more=(\S*?)&/) { $more = $1; } elsif ($url =~ /more=(\S*?)$/) { $more = $1; } else { $more = "-"; } # pageview (page the user viewed if greater than 1); if ( $url=~/No=&/ || $url=~ /No=$/) { $page = "-"; } elsif ( $url=~/No=(\S*?)&/) { $page = $1; } elsif ($url=~/No=(\S*?)$/) { $page = $1; } else { $page = "-"; } #record if ( $url=~/R=&/ || $url=~ /R=$/) { $record = "-"; } elsif ( $url=~/R=(\S*?)&/) { $record = $1; } elsif ($url=~/R=(\S*?)$/) { $record = $1; } else { $record = "-"; } #follow-up analysis if ($url =~ /follow=(\S*)/) { $follow = $1; } elsif ($url=~/follow=(\S*)$/) { $follow= $1; } else { $follow = "-"; } #refine years if ($url =~ /Nf=&/ ||$url=~/Nf=$/) { $refine = "-"; } elsif ($url =~ /Nf=(\S*?)&/) { $refine = $1; } elsif ($url=~/Nf=(\S*?)$/) { $refine = $1; } else { $refine = "-"; } # Convert Date and Time to Epoch # convert date; if ($datetime=~ /\[(\S+\/\S+\/\S+):(\d+:\d+:\d+)/){ $date = $1; $time = $2; } if ($date =~ /(\S+)\/(...)\/(\S+)/) { $day = $1; $month = $2; $year = $3; } if ($month eq "Jan") { $month = "1"; } elsif ($month eq "Feb") { $month = "2"; } elsif ($month eq "Mar") { $month = "3"; } elsif ($month eq "Apr") { $month = "4"; } elsif ($month eq "May") { $month = "5"; } elsif ($month eq "Jun") { $month = "6"; } elsif ($month eq "Jul") { $month = "7"; } elsif ($month eq "Aug") { $month = "8"; } elsif ($month eq "Sep") { $month = "9"; } elsif ($month eq "Oct") { $month = "10"; } elsif ($month eq "Nov") { $month = "11"; } elsif ($month eq "Dec") { $month = "12"; } if ($time =~ /(\d+):(\d+):(\d+)/) { $hour = $1; $min = $2; $sec = $3; } use Time::Local; $yr = $year-1900; $mon = $month-1; $seconds = timelocal($sec, $min, $hour, $day, $mon, $yr); # elapsed if ($visit_step ==1){ $elapsed = 0; }else{ $elapsed = $seconds-$last_seconds; } print "$user;$ipaddress;$day;$month;$year;$time;$url;$reference;$searchterm;$textfield;$facet;$sort;$expand;$more;$page;$record;$follow;$refine;$seconds;$visit_num;$elapsed;$visit_step\n"; #print "$record\n"; $last_seconds=$seconds; }