mirror of
https://github.com/MrDetonia/Maki.git
synced 2024-11-14 00:46:23 -05:00
54 lines
1.5 KiB
Python
54 lines
1.5 KiB
Python
import random
|
|
|
|
class Markov(object):
|
|
|
|
def __init__(self, open_file):
|
|
self.cache = {}
|
|
self.open_file = open_file
|
|
self.words = self.file_to_words()
|
|
self.word_size = len(self.words)
|
|
self.database()
|
|
|
|
def file_to_words(self):
|
|
self.open_file.seek(0)
|
|
data = self.open_file.read()
|
|
words = data.split()
|
|
return words
|
|
|
|
def doubles(self):
|
|
if len(self.words) < 2:
|
|
return
|
|
|
|
for i in range(len(self.words) - 1):
|
|
yield (self.words[i], self.words[i+1])
|
|
|
|
def triples(self):
|
|
if len(self.words) < 3:
|
|
return
|
|
|
|
for i in range(len(self.words) - 2):
|
|
yield (self.words[i], self.words[i+1], self.words[i+2])
|
|
|
|
def database(self):
|
|
for w1, w2, w3 in self.triples():
|
|
key = (w1, w2)
|
|
if key in self.cache:
|
|
self.cache[key].append(w3)
|
|
else:
|
|
self.cache[key] = [w3]
|
|
|
|
def generate_text(self, size=25):
|
|
seed = random.randint(0, self.word_size - 3)
|
|
seed_word, next_word = self.words[seed], self.words[seed+1]
|
|
w1, w2 = seed_word, next_word
|
|
gen_words = []
|
|
for i in range(size):
|
|
gen_words.append(w1)
|
|
try:
|
|
w1, w2 = w2, random.choice(self.cache[(w1,w2)])
|
|
except KeyError:
|
|
break
|
|
gen_words.append(w1)
|
|
gen_words.append(w2)
|
|
return ' '.join(gen_words)
|