#!`which perl` use strict; # Check usage: if($#ARGV != 1 && $#ARGV != 2) { print STDERR "Usage: cat | perl $0 []\n"; print STDERR " = length of character Markoff model\n"; print STDERR " = how many characters to generate\n"; print STDERR " = starting string for generation\n"; exit; } # Variables to store histories: Start state is all hard returns: my %HistoryNext = (); my %HistoryCount = (); my $history = ""; for my $i (1..$ARGV[0]-1) { $history .= "\n"; } my $charCount = 0; # Load in the file and store character-based histories: while(my $line = ) { for my $i (0..length($line)-1) { # Add a count for this character given the current history: my $char = substr($line, $i, 1); $HistoryNext{$history}{$char}++; $HistoryCount{$history}++; $charCount++; # Incrememt history: $history = substr($history, 1) . $char; # Silly test: # if($charCount % 100000 == 0) # { # my $testHist = substr("in the middle of the day", # 0, $ARGV[0]-1); # print "$testHist + a\t" . $HistoryNext{$testHist}{'a'} . "\n"; # print "$testHist + e\t" . $HistoryNext{$testHist}{'e'} . "\n"; # print "$testHist + t\t" . $HistoryNext{$testHist}{'t'} . "\n"; # print "$testHist + y\t" . $HistoryNext{$testHist}{'y'} . "\n"; # print "\n"; # } } } print "Loaded histories for $charCount characters.\n"; # Set starting seed, if any: my $seed = ""; if($#ARGV == 2) { $seed = substr($ARGV[2], length($ARGV[2])-$ARGV[0]+1); } else { for my $i (1..$ARGV[0]-1) { $seed .= "\n"; } } # Seed test: #print "$seed + A\t" . $HistoryNext{$seed}{'A'} . "\n"; #print "$seed + E\t" . $HistoryNext{$seed}{'E'} . "\n"; #print "$seed + T\t" . $HistoryNext{$seed}{'T'} . "\n"; #print "$seed + W\t" . $HistoryNext{$seed}{'W'} . "\n"; #print "\n"; # Generate the specified length: print "--------\n"; for my $i (1..$ARGV[1]) { # Probabilisitically decide a continuation, based on the counts: my $random = int(rand($HistoryCount{$seed}+1)); #print "For $seed, picked $random of $HistoryCount{$seed}\n"; # TEMP my $accum = 0; my $next = ""; for my $k (keys %{$HistoryNext{$seed}}) { $accum += $HistoryNext{$seed}{$k}; #print "After $k, now have $accum\n"; # TEMP if($accum >= $random) { $next = $k; last; } } #print "Chose $next\n"; # TEMP # Output that character and add it to the history: print "$next"; $seed = substr($seed, 1) . $next; } print "\n--------\n";