#!/usr/bin/perl -w
#
if (@ARGV < 1) {
	print STDERR "<fastq file>\n";
	exit;
}

my $numSeqsToCheck=1e5;
my @lens = (10,20,25,30);

my %seqs = ();
foreach(@lens) {
	my %a = ();
	$seqs{$_}=\%a;
}
my $numSeqs = 0;

my $count = 0;
if ($ARGV[0] =~ /\.gz$/) {
    open IN, "gunzip -c $ARGV[0] |" or die "Could not open $ARGV[0]\n";
} else {
    open IN, $ARGV[0] or die "Could not open $ARGV[0]\n";
}


while (<IN>) {
	$count++;
	if ($count % 4 == 2) {
		$numSeqs++;
		last if ($numSeqs > $numSeqsToCheck);
		print STDERR "\t$numSeqs\n" if ($numSeqs % 1e5 == 0);
			
		my $seq = $_;
		my $len = length($seq);
		foreach(@lens) {
			my $L = $_;
			for (my $i=0;$i<$len-$L;$i++) {
				my $m = substr($seq,$i,$L);
				$seqs{$L}->{$m}++;
			}
		}
	}
}
close IN;
	
foreach(@lens) {
	my $L = $_;
	my @a = sort {$seqs{$L}->{$b} <=> $seqs{$L}->{$a}} keys %{$seqs{$L}};	
	print "\nLength = $L (total=$numSeqs)\n";
	for (my $i=0;$i<10;$i++) {
		print "$a[$i]\t$seqs{$L}->{$a[$i]}\n";
	}
}

