2016-04-03 11:03:40 -04:00
|
|
|
import random
|
|
|
|
|
|
|
|
|
2018-08-23 16:30:26 -04:00
|
|
|
class Markov(object):
|
2016-04-03 11:03:40 -04:00
|
|
|
def __init__(self, open_file):
|
|
|
|
self.cache = {}
|
|
|
|
self.open_file = open_file
|
|
|
|
self.words = self.file_to_words()
|
|
|
|
self.word_size = len(self.words)
|
|
|
|
self.database()
|
|
|
|
|
|
|
|
def file_to_words(self):
|
|
|
|
self.open_file.seek(0)
|
|
|
|
data = self.open_file.read()
|
|
|
|
words = data.split()
|
|
|
|
return words
|
|
|
|
|
2016-04-06 14:44:28 -04:00
|
|
|
def doubles(self):
|
|
|
|
if len(self.words) < 2:
|
2016-04-03 11:03:40 -04:00
|
|
|
return
|
|
|
|
|
2016-04-06 14:44:28 -04:00
|
|
|
for i in range(len(self.words) - 1):
|
2018-08-23 16:30:26 -04:00
|
|
|
yield (self.words[i], self.words[i + 1])
|
2016-04-03 11:03:40 -04:00
|
|
|
|
2016-12-01 17:43:37 -05:00
|
|
|
def triples(self):
|
|
|
|
if len(self.words) < 3:
|
|
|
|
return
|
|
|
|
|
|
|
|
for i in range(len(self.words) - 2):
|
2018-08-23 16:30:26 -04:00
|
|
|
yield (self.words[i], self.words[i + 1], self.words[i + 2])
|
2016-12-01 17:43:37 -05:00
|
|
|
|
2016-04-03 11:03:40 -04:00
|
|
|
def database(self):
|
2016-12-01 17:43:37 -05:00
|
|
|
for w1, w2, w3 in self.triples():
|
|
|
|
key = (w1, w2)
|
2016-04-03 11:03:40 -04:00
|
|
|
if key in self.cache:
|
2016-12-01 17:43:37 -05:00
|
|
|
self.cache[key].append(w3)
|
2016-04-03 11:03:40 -04:00
|
|
|
else:
|
2016-12-01 17:43:37 -05:00
|
|
|
self.cache[key] = [w3]
|
2016-04-03 11:03:40 -04:00
|
|
|
|
|
|
|
def generate_text(self, size=25):
|
2016-12-01 17:43:37 -05:00
|
|
|
seed = random.randint(0, self.word_size - 3)
|
2018-08-23 16:30:26 -04:00
|
|
|
seed_word, next_word = self.words[seed], self.words[seed + 1]
|
2016-04-03 11:03:40 -04:00
|
|
|
w1, w2 = seed_word, next_word
|
|
|
|
gen_words = []
|
|
|
|
for i in range(size):
|
|
|
|
gen_words.append(w1)
|
2016-04-03 11:25:35 -04:00
|
|
|
try:
|
2018-08-23 16:30:26 -04:00
|
|
|
w1, w2 = w2, random.choice(self.cache[(w1, w2)])
|
2016-04-03 11:25:35 -04:00
|
|
|
except KeyError:
|
|
|
|
break
|
2016-04-06 18:57:07 -04:00
|
|
|
gen_words.append(w1)
|
2016-04-03 11:03:40 -04:00
|
|
|
gen_words.append(w2)
|
|
|
|
return ' '.join(gen_words)
|