-
Notifications
You must be signed in to change notification settings - Fork 0
/
generation.py
195 lines (170 loc) · 7.47 KB
/
generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
"""
## This module allows you to **build** the matrix from the processed dictionary and to **generate** words.
"""
from random import choices
from itertools import product
###########################################
# N dimensions matrix, N-1 letters before #
###########################################
separator_list = [' ','\t','-','_',',',';',':','|']
def find_separator(alphabet):
"""
`find_separator()` gets the first char in the list above that is not in the alphabet, if no such character exists, an exception is raised.
* **alphabet** (*list*): the used alphabet (from input file or from dictionary)
* **return** (*char*): the first separator that is not in the alphabet
"""
for s in separator_list:
if s not in alphabet:
return s
raise Exception(f"no separator available: all characters in {separator_list} are in the alphabet, maybe try to add one manually in the code")
def build_ND_matrix(dictionary, alphabet, N):
"""
`build_ND_matrix()` initiate and fill a N dimension matrix (dict of dict object) by browsing the dictionary.
* **dictionary** (*list*): the input dictionary (after processing)
* **alphabet** (*list*): the used alphabet (from input file or from dictionary)
* **N** (*int*): the dimension of the matrix
* **return** (*dict*): the matrix representing the probability of letter chaining each other
"""
separator = alphabet[-1]
# initiate the matrix
matrix = dict()
for i in product(alphabet, repeat=N-1):
index = ''.join(i)
matrix[index] = dict()
for l in alphabet:
matrix[index][l] = 0
# fill matrix with dictionary
for word in dictionary:
previous_letters = (N-1)*separator
for current_letter in word:
matrix[previous_letters][current_letter]+=1
previous_letters = previous_letters[1:] + current_letter
for i in range (1,N):
matrix[previous_letters][separator]+=1
previous_letters = previous_letters[1:] + separator
return matrix
def generate_word_ND(matrix, alphabet, prefix, N):
"""
`generate_word_ND()` generates a word used the `random.choices()` method uppon the ND matrix in the last letter column.
* **matrix** (*dict*): the matrix representing the probability of letter chaining each other
* **alphabet** (*list*): the used alphabet (from input file or from dictionary)
* **prefix** (*str*): the prefix requested for the generated words
* **N** (*int*): the dimension of the matrix
* **return** (*str*): the generated word (length variable)
"""
separator = alphabet[-1]
previous_letters = (N-1)*separator
if len(prefix) < N:
previous_letters = previous_letters[len(prefix):] + prefix
else:
previous_letters = prefix[len(prefix)-N+1:]
word = prefix
new_letter = None
while new_letter != separator:
new_letter = choices(population=alphabet, weights=matrix[previous_letters].values(), k=1)[0]
if new_letter != separator:
word = word+new_letter
previous_letters = previous_letters[1:] + new_letter
return (word)
#################
# /!\ DEAD CODE #
#################
# def build_2D_matrix(dictionary, alphabet):
# """
# `build_2D_matrix()` initiate and fill a 2D matrix (dict of dict object) by browsing the dictionary.
# * **dictionary** (*list*): the input dictionary (after processing)
# * **alphabet** (*list*): the used alphabet (from input file or from dictionary)
# * **return** (*dict*): the matrix representing the probability of letter chaining each other
# """
# # initiate matrix
# matrix = dict()
# for letter in alphabet:
# matrix[letter] = dict()
# for other_letter in alphabet:
# matrix[letter][other_letter] = 0
# # fill matrix with dictionary
# for word in dictionary:
# previous_letter = ''
# for current_letter in word:
# matrix[previous_letter][current_letter] += 1
# previous_letter = current_letter
# matrix[word[len(word)-1]][''] +=1
# return matrix
# # def plot_2D_matrix(matrix, alphabet):
# # print (alphabet)
# # for line in matrix:
# # print (line, matrix[line])
# # print ('')
# def generate_word_2D(matrix, alphabet, prefix):
# """
# `generate_word_3D()` generates a word used the `random.choices()` method uppon the 3D matrix in the last letter column.
# * **matrix** (*dict*): the matrix representing the probability of letter chaining each other
# * **alphabet** (*list*): the used alphabet (from input file or from dictionary)
# * **prefix** (*str*): the prefix requested for the generated words
# * **return** (*str*): the generated word (length variable)
# """
# if len(prefix) == 0:
# word = ''
# previous_letter = ''
# else:
# word = prefix
# previous_letter = prefix[-1]
# new_letter = None
# while new_letter != '':
# new_letter = choices(population=alphabet, weights=matrix[previous_letter].values(), k=1)[0]
# word = word+new_letter
# previous_letter = new_letter
# return (word)
# def build_3D_matrix(dictionary, alphabet):
# """
# `build_3D_matrix()` initiate and fill a 3D matrix (dict of dict of dict object) by browsing the dictionary.
# * **dictionary** (*list*): the input dictionary (after processing)
# * **alphabet** (*list*): the used alphabet (from input file or from dictionary)
# * **return** (*dict*): the matrix representing the probability of letter chaining each other
# """
# # initiate matrix
# matrix = dict()
# for letter1 in alphabet:
# matrix[letter1] = dict()
# for letter2 in alphabet:
# matrix[letter1][letter2] = dict()
# for letter3 in alphabet:
# matrix[letter1][letter2][letter3] = 0
# # fill matrix with dictionary
# for word in dictionary:
# previous_letter1 = ''
# previous_letter2 = ''
# for current_letter in word:
# matrix[previous_letter1][previous_letter2][current_letter] += 1
# previous_letter1 = previous_letter2
# previous_letter2 = current_letter
# matrix[word[len(word)-2]][word[len(word)-1]][''] +=1
# matrix[word[len(word)-1]][''][''] +=1
# return matrix
# def generate_word_3D(matrix, alphabet, prefix):
# """
# `generate_word_3D()` generates a word used the `random.choices()` method uppon the 3D matrix in the last letter column.
# * **matrix** (*dict*): the matrix representing the probability of letter chaining each other
# * **alphabet** (*list*): the used alphabet (from input file or from dictionary)
# * **prefix** (*str*): the prefix requested for the generated words
# * **return** (*str*): the generated word (length variable)
# """
# if len(prefix) == 0:
# word = ''
# previous_letter1 = ''
# previous_letter2 = ''
# elif len(prefix) == 1:
# word = prefix
# previous_letter1 = ''
# previous_letter2 = prefix[-1]
# else:
# word = prefix
# previous_letter1 = prefix[-2]
# previous_letter2 = prefix[-1]
# new_letter = None
# while new_letter != '':
# new_letter = choices(population=alphabet, weights=matrix[previous_letter1][previous_letter2].values(), k=1)[0]
# word = word+new_letter
# previous_letter1 = previous_letter2
# previous_letter2 = new_letter
# return (word)