From 3ba0f0f6b87772fef832cd5e0c5b9533b4a35e54 Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 08:15:28 +0900 Subject: [PATCH 1/2] add individual flag --- README.md | 4 +++- cloudia/main.py | 3 ++- cloudia/pandas_accessor.py | 14 ++++++++++---- cloudia/word_data.py | 18 +++++++++++------- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index c308bf8..6ddcc35 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,8 @@ Cloudia( stop_words=STOPWORDS, # not count words, default is wordcloud.STOPWORDS extract_postags=['名詞', '英単語', 'ローマ字文'], # part of speech for japanese parse_func=None, # split text function, example: lambda x: x.split(',') - multiprocess=True # Flag for using multiprocessing + multiprocess=True, # Flag for using multiprocessing + individual=False # flag for ' '.join(word) with parse ) ``` @@ -126,6 +127,7 @@ DataFrame.wc.plot( extract_postags=['名詞', '英単語', 'ローマ字文'], # part of speech for japanese parse_func=None, # split text function, example: lambda x: x.split(',') multiprocess=True, # Flag for using multiprocessing + individual=False, # flag for ' '.join(word) with parse dark_theme=False, # color theme title_size=12, # title text size row_num=3, # for example, 12 wordcloud, row_num=3 -> 4*3image diff --git a/cloudia/main.py b/cloudia/main.py index 61b0e35..5882c93 100644 --- a/cloudia/main.py +++ b/cloudia/main.py @@ -16,9 +16,10 @@ def __init__(self, extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], parse_func: Any = default_parse_func, multiprocess: bool = True, + individual: bool = False, **args): args.update(dict(single_words=single_words, stop_words=stop_words, extract_postags=extract_postags)) - self.wd = WordData(data, parse_func, multiprocess, **args) + self.wd = WordData(data, parse_func, multiprocess, individual, **args) def make_wordcloud(self, dark_theme: bool, rate: int) -> List[Tuple[str, WordCloud]]: wordcloud_list = [] diff --git a/cloudia/pandas_accessor.py b/cloudia/pandas_accessor.py index 371cd14..0c9a26c 100644 --- a/cloudia/pandas_accessor.py +++ b/cloudia/pandas_accessor.py @@ -22,8 +22,11 @@ def plot(self, title_size: int = 12, row_num: int = 3, figsize_rate: int = 2, - multiprocess: bool = True): - Cloudia(self.df, single_words, stop_words, extract_postags, parse_func, multiprocess).plot(dark_theme, title_size, row_num, figsize_rate) + multiprocess: bool = True, + individual: bool = False, + **args): + Cloudia(self.df, single_words, stop_words, extract_postags, parse_func, multiprocess, individual, + **args).plot(dark_theme, title_size, row_num, figsize_rate) def save(self, fig_path: str, dark_theme: bool, **args: Any): self.plot(**args) @@ -44,8 +47,11 @@ def plot(self, title_size: int = 12, row_num: int = 3, figsize_rate: int = 2, - multiprocess: bool = True): - Cloudia(self.series, single_words, stop_words, extract_postags, parse_func, multiprocess).plot(dark_theme, title_size, row_num, figsize_rate) + multiprocess: bool = True, + individual: bool = False, + **args): + Cloudia(self.series, single_words, stop_words, extract_postags, parse_func, multiprocess, individual, + **args).plot(dark_theme, title_size, row_num, figsize_rate) def save(self, fig_path: str, dark_theme: bool, **args: Any): self.plot(**args) diff --git a/cloudia/word_data.py b/cloudia/word_data.py index 7a2543b..2e4641d 100644 --- a/cloudia/word_data.py +++ b/cloudia/word_data.py @@ -9,18 +9,22 @@ class WordData: - def __init__(self, data: Any, parse_func: Callable[..., List[str]], multiprocess: bool, **args): + def __init__(self, data: Any, parse_func: Callable[..., List[str]], multiprocess: bool, individual: bool, **args): words, self.names = self._init_data(data) - self.counter_list = self.parse(words, parse_func, multiprocess, **args) + self.counter_list = self.parse(words, parse_func, multiprocess, individual, **args) self.words = [self.convert_weight(x) for x in self.counter_list] - def parse(self, words, parse_func: Callable[..., List[str]], multiprocess: bool, **args) -> List[Counter]: + def parse(self, words, parse_func: Callable[..., List[str]], multiprocess: bool, individual: bool, **args) -> List[Counter]: if isinstance(words[0], list): word_list_length = len(words[0]) - words = list(chain.from_iterable(words)) - words = self._parse(words, parse_func, multiprocess, **args) - words = list(zip_longest(*[iter(words)] * word_list_length)) - words = [sum(w, Counter()) for w in words] + if individual: + words = list(chain.from_iterable(words)) + words = self._parse(words, parse_func, multiprocess, **args) + words = list(zip_longest(*[iter(words)] * word_list_length)) + words = [sum(w, Counter()) for w in words] + else: + words = [' '.join(x) for x in words] + words = self._parse(words, parse_func, multiprocess, **args) else: words = self._parse(words, parse_func, multiprocess, **args) return words From e6189d0b95d20eecdd64f78309ec77f3c1bf909f Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 08:19:56 +0900 Subject: [PATCH 2/2] fix ut --- test/unit_test/test_word_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unit_test/test_word_data.py b/test/unit_test/test_word_data.py index c95e48c..2c9a978 100644 --- a/test/unit_test/test_word_data.py +++ b/test/unit_test/test_word_data.py @@ -7,7 +7,7 @@ class TestWordData(unittest.TestCase): def setUp(self): - self.cls = WordData('test', lambda x: [x], True) + self.cls = WordData('test', lambda x: [x], True, False) def assertSortTextEqual(self, data, target): """for random sample list.""" @@ -60,7 +60,7 @@ def _parse(x, y, z, **args): return x with patch('cloudia.word_data.WordData._parse', side_effect=_parse): - output = self.cls.parse(['hoge hoge', 'piyo'], None, None) + output = self.cls.parse(['hoge hoge', 'piyo'], None, None, False) self.assertListEqual(output, ['hoge hoge', 'piyo']) def test_parse_list_case(self): @@ -68,7 +68,7 @@ def _parse(x, y, z, **args): return [Counter(w.split(' ')) for w in x] with patch('cloudia.word_data.WordData._parse', side_effect=_parse): - output = self.cls.parse([['hoge hoge', 'piyo'], ['fuga', 'fuga']], None, None) + output = self.cls.parse([['hoge hoge', 'piyo'], ['fuga', 'fuga']], None, None, False) target = [Counter({'hoge': 2, 'piyo': 1}), Counter({'fuga': 2})] for o, t in zip(output, target): self.assertEqual(type(o), type(t))