Skip to content

Commit

Permalink
Merge pull request #26 from vaaaaanquish/individual
Browse files Browse the repository at this point in the history
selective individual word when parse
  • Loading branch information
vaaaaanquish authored May 8, 2020
2 parents a0f645a + e6189d0 commit 9ed680a
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 16 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ Cloudia(
stop_words=STOPWORDS, # not count words, default is wordcloud.STOPWORDS
extract_postags=['名詞', '英単語', 'ローマ字文'], # part of speech for japanese
parse_func=None, # split text function, example: lambda x: x.split(',')
multiprocess=True # Flag for using multiprocessing
multiprocess=True, # Flag for using multiprocessing
individual=False # flag for ' '.join(word) with parse
)
```

Expand Down Expand Up @@ -126,6 +127,7 @@ DataFrame.wc.plot(
extract_postags=['名詞', '英単語', 'ローマ字文'], # part of speech for japanese
parse_func=None, # split text function, example: lambda x: x.split(',')
multiprocess=True, # Flag for using multiprocessing
individual=False, # flag for ' '.join(word) with parse
dark_theme=False, # color theme
title_size=12, # title text size
row_num=3, # for example, 12 wordcloud, row_num=3 -> 4*3image
Expand Down
3 changes: 2 additions & 1 deletion cloudia/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ def __init__(self,
extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
parse_func: Any = default_parse_func,
multiprocess: bool = True,
individual: bool = False,
**args):
args.update(dict(single_words=single_words, stop_words=stop_words, extract_postags=extract_postags))
self.wd = WordData(data, parse_func, multiprocess, **args)
self.wd = WordData(data, parse_func, multiprocess, individual, **args)

def make_wordcloud(self, dark_theme: bool, rate: int) -> List[Tuple[str, WordCloud]]:
wordcloud_list = []
Expand Down
14 changes: 10 additions & 4 deletions cloudia/pandas_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ def plot(self,
title_size: int = 12,
row_num: int = 3,
figsize_rate: int = 2,
multiprocess: bool = True):
Cloudia(self.df, single_words, stop_words, extract_postags, parse_func, multiprocess).plot(dark_theme, title_size, row_num, figsize_rate)
multiprocess: bool = True,
individual: bool = False,
**args):
Cloudia(self.df, single_words, stop_words, extract_postags, parse_func, multiprocess, individual,
**args).plot(dark_theme, title_size, row_num, figsize_rate)

def save(self, fig_path: str, dark_theme: bool, **args: Any):
self.plot(**args)
Expand All @@ -44,8 +47,11 @@ def plot(self,
title_size: int = 12,
row_num: int = 3,
figsize_rate: int = 2,
multiprocess: bool = True):
Cloudia(self.series, single_words, stop_words, extract_postags, parse_func, multiprocess).plot(dark_theme, title_size, row_num, figsize_rate)
multiprocess: bool = True,
individual: bool = False,
**args):
Cloudia(self.series, single_words, stop_words, extract_postags, parse_func, multiprocess, individual,
**args).plot(dark_theme, title_size, row_num, figsize_rate)

def save(self, fig_path: str, dark_theme: bool, **args: Any):
self.plot(**args)
Expand Down
18 changes: 11 additions & 7 deletions cloudia/word_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,22 @@


class WordData:
def __init__(self, data: Any, parse_func: Callable[..., List[str]], multiprocess: bool, **args):
def __init__(self, data: Any, parse_func: Callable[..., List[str]], multiprocess: bool, individual: bool, **args):
words, self.names = self._init_data(data)
self.counter_list = self.parse(words, parse_func, multiprocess, **args)
self.counter_list = self.parse(words, parse_func, multiprocess, individual, **args)
self.words = [self.convert_weight(x) for x in self.counter_list]

def parse(self, words, parse_func: Callable[..., List[str]], multiprocess: bool, **args) -> List[Counter]:
def parse(self, words, parse_func: Callable[..., List[str]], multiprocess: bool, individual: bool, **args) -> List[Counter]:
if isinstance(words[0], list):
word_list_length = len(words[0])
words = list(chain.from_iterable(words))
words = self._parse(words, parse_func, multiprocess, **args)
words = list(zip_longest(*[iter(words)] * word_list_length))
words = [sum(w, Counter()) for w in words]
if individual:
words = list(chain.from_iterable(words))
words = self._parse(words, parse_func, multiprocess, **args)
words = list(zip_longest(*[iter(words)] * word_list_length))
words = [sum(w, Counter()) for w in words]
else:
words = [' '.join(x) for x in words]
words = self._parse(words, parse_func, multiprocess, **args)
else:
words = self._parse(words, parse_func, multiprocess, **args)
return words
Expand Down
6 changes: 3 additions & 3 deletions test/unit_test/test_word_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class TestWordData(unittest.TestCase):
def setUp(self):
self.cls = WordData('test', lambda x: [x], True)
self.cls = WordData('test', lambda x: [x], True, False)

def assertSortTextEqual(self, data, target):
"""for random sample list."""
Expand Down Expand Up @@ -60,15 +60,15 @@ def _parse(x, y, z, **args):
return x

with patch('cloudia.word_data.WordData._parse', side_effect=_parse):
output = self.cls.parse(['hoge hoge', 'piyo'], None, None)
output = self.cls.parse(['hoge hoge', 'piyo'], None, None, False)
self.assertListEqual(output, ['hoge hoge', 'piyo'])

def test_parse_list_case(self):
def _parse(x, y, z, **args):
return [Counter(w.split(' ')) for w in x]

with patch('cloudia.word_data.WordData._parse', side_effect=_parse):
output = self.cls.parse([['hoge hoge', 'piyo'], ['fuga', 'fuga']], None, None)
output = self.cls.parse([['hoge hoge', 'piyo'], ['fuga', 'fuga']], None, None, False)
target = [Counter({'hoge': 2, 'piyo': 1}), Counter({'fuga': 2})]
for o, t in zip(output, target):
self.assertEqual(type(o), type(t))
Expand Down

0 comments on commit 9ed680a

Please sign in to comment.