This script determines the longest repeating sequence in a file which contains only one line.
#!/usr/bin/perl #Name: text_longest_repeating_v1.pl # #Written by: Balazs, Lendvay (ITFanatic :) ) # #Purpose: this script finds the longest repeating sequence in the only line in $file #(the script was created to analyze a continous character string) # #The script can be downloaded from "ITFanatic.com" and it is absolutely FREE, #you can redistribute it and/or modify it under the same terms as Perl itself. (I think:) ) #This script comes with NO WARRANTY of any kind. (I am sure! :) ) #I cannot guarantee that this works in your environment, and I am not responsible #for any harm it may cause on your computer. (I hope it won't cause harm at all :) ) use strict; my $file="test.txt"; my $starter=0; my $maxi=2; my $str=""; my $line=""; my $rep=0; my $maxstr=""; my $maxrep=0; #open file for read open FILE0, $file or die "Cannot open $filename for read :$!"; #read file line which contains the string while (<FILE0>) { $line=$_; } print "Line: $line\n"; #close file close FILE0; #cut the first two characters $str=substr($line,$starter,$maxi); #get the length of the line my $len=length($line); print "Length:".$len."\n"; #reset string which contains the remaining string my $searchin=""; #repeat until the last string while ($starter<($len-$maxi+1)) { #cut the string which is the pattern to search $str=substr($line,$starter,$maxi); #cut the remaining string to search in $searchin=substr($line,$starter+$maxi,$len); if ($searchin=~/$str/) { #if string found increment counter $rep++; } else { #if string not found, reset counter $rep=0; } if (length($str)>length($maxstr) && ($rep==1)) { #if string is repeated and the length is longer than the previous match #record string and length $maxrep=$rep; $maxstr=$str; #print the new longest string print "New max found: \"$maxstr\" , count: \"$maxrep\"\n"; } if ($rep==1) { #if match found, increment maxi until you can $maxi++; $rep=0; $str=""; } else { #if match not found, increment $starter, leave maxi at previous value $starter++; #just to see the process, print every 1000th loop the starter position my $mod=$starter % 1000; if ($mod eq 0) { print "Starter++ mod 1000: $starter\n"; } $rep=0; $str=""; } } print "Maximum string: \"$maxstr\" repeated: \"$maxrep\" \n";