-
Notifications
You must be signed in to change notification settings - Fork 0
/
wordsmith.py
58 lines (47 loc) · 1.72 KB
/
wordsmith.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
# wordsmith module
# spellchecking and the likes...
import pickle
class wordsmith:
def __init__(self,wordlst):
self.wordlst = wordlst
self.vocab = self.loadpickle('vocab.pickle')
def loadpickle(self,filename):
with open(filename,'rb') as f:
return pickle.load(f)
# Determine if a string represents a number
def isnumber(self,string):
try:
float(string)
except ValueError:
return False
return True
# match words from self.wordlst to words in self.vocab
# Defaults to returning positive matches. Setting arg to False returns words not in vocab
# the latter may indicate a misspelt word
def invocab(self,validate=True):
words = []
for word in self.wordlst:
word = word.lower()
if (word in self.vocab) == validate:
# filter out duplicated and empty entries and numbers
if word not in words and not self.isnumber(word) and word != '':
words.append(word)
return words
def keyword_density(self):
# List all repeated words and their frequency
# Help sorting dicts converted to tuples by keys
def getkey(item):
return item[0]
keywords = {}
for word in self.wordlst:
word = word.lower()
if word in keywords.keys():
keywords[word] += 1
else:
keywords[word] = 1
keywordlst = []
for key in keywords:
keywordlst.append([keywords[key],key])
keywordlst = sorted(keywordlst, key = getkey, reverse = True)
return keywordlst