From 1021035fba35237826372365fe372b3b707470b2 Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Thu, 7 May 2020 01:34:30 +0900
Subject: [PATCH 01/11] add sample

---
 cloudia/word_data.py | 75 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 57 insertions(+), 18 deletions(-)

diff --git a/cloudia/word_data.py b/cloudia/word_data.py
index 12aacd9..b0889a1 100644
--- a/cloudia/word_data.py
+++ b/cloudia/word_data.py
@@ -10,6 +10,26 @@
     import nagisa
 
 
+def count(words: List[str], stop_words, word_num) -> Dict[str, float]:
+    c = Counter(words).most_common()
+    _max_count = c[0][1]
+    weight = {k: v / _max_count for k, v in c if k not in stop_words}
+    weight = {k: weight[k] for k in list(weight.keys())[:word_num]}
+    return weight
+
+
+def parse(text: str, single_words, extract_postags, num_regex) -> List[str]:
+    parser = nagisa.Tagger(single_word_list=single_words)
+    for x in ['"', ';', ',', '(', ')', '\u3000']:
+        text = text.replace(x, ' ')
+    text = text.lower()
+    return [x for x in parser.extract(text, extract_postags=extract_postags).words if len(x) > 1 and not num_regex.match(x)]
+
+
+def process(text, stop_words, word_num, single_words, extract_postags, num_regex):
+    return count(parse(text, single_words, extract_postags, num_regex), stop_words, word_num)
+
+
 class WordData:
     def __init__(self, data: Any, single_words: List[str], stop_words: List[str], extract_postags: List[str], word_num: int, parser: Any, parse_func: Any):
         words, self.names = self._init_data(data)
@@ -17,12 +37,44 @@ def __init__(self, data: Any, single_words: List[str], stop_words: List[str], ex
         self.single_words = single_words
         self.extract_postags = extract_postags
         self.stop_words = stop_words
-        self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser
+        # self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser
+        num_regex = re.compile('^[0-9]+$')
         self.num_regex = re.compile('^[0-9]+$')
-        if parse_func:
-            self.words = [self.count(parse_func(x)) for x in words]
-        else:
-            self.words = [self.count(self.parse(x)) for x in words]
+        # if parse_func:
+        #     self.words = [self.count(parse_func(x)) for x in words]
+        # else:
+        #     self.words = [self.count(self.parse(x)) for x in words]
+        import time
+        from joblib import Parallel, delayed
+        from itertools import repeat
+        a = time.time()
+        print('joblib start', a)
+        self.words = Parallel(n_jobs=-1)([
+            delayed(process)(a, b, c, d, e, f)
+            for a, b, c, d, e, f in zip(words, repeat(stop_words), repeat(word_num), repeat(single_words), repeat(extract_postags), repeat(num_regex))
+        ])
+        b = time.time()
+        print('joblib end', b, b - a)
+
+        a = time.time()
+        print('start', a)
+        self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser
+        self.words = [self.count(self.parse(x)) for x in words]
+        b = time.time()
+        print(' end', b, b - a)
+
+    def count(self, words: List[str]) -> Dict[str, float]:
+        c = Counter(words).most_common()
+        _max_count = c[0][1]
+        weight = {k: v / _max_count for k, v in c if k not in self.stop_words}
+        weight = {k: weight[k] for k in list(weight.keys())[:self.word_num]}
+        return weight
+
+    def parse(self, text: str) -> List[str]:
+        for x in ['"', ';', ',', '(', ')', '\u3000']:
+            text = text.replace(x, ' ')
+        text = text.lower()
+        return [x for x in self.parser.extract(text, extract_postags=self.extract_postags).words if len(x) > 1 and not self.num_regex.match(x)]
 
     def _init_data(self, data: Any) -> Tuple[List[str], List[str]]:
         words, names = [], []
@@ -55,19 +107,6 @@ def _init_data(self, data: Any) -> Tuple[List[str], List[str]]:
 
         return words, names
 
-    def count(self, words: List[str]) -> Dict[str, float]:
-        c = Counter(words).most_common()
-        _max_count = c[0][1]
-        weight = {k: v / _max_count for k, v in c if k not in self.stop_words}
-        weight = {k: weight[k] for k in list(weight.keys())[:self.word_num]}
-        return weight
-
-    def parse(self, text: str) -> List[str]:
-        for x in ['"', ';', ',', '(', ')', '\u3000']:
-            text = text.replace(x, ' ')
-        text = text.lower()
-        return [x for x in self.parser.extract(text, extract_postags=self.extract_postags).words if len(x) > 1 and not self.num_regex.match(x)]
-
     def __iter__(self):
         for n, w in zip(self.names, self.words):
             yield n, w

From 9f1d2af942c7993b82e11f7be60327dffd4d6ba0 Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Sat, 9 May 2020 04:02:38 +0900
Subject: [PATCH 02/11] add multiprocessing parse

---
 cloudia/__init__.py        |   1 +
 cloudia/main.py            |  17 +++---
 cloudia/pandas_accessor.py |  38 ++++---------
 cloudia/word_data.py       | 114 ++++++++++++++-----------------------
 4 files changed, 61 insertions(+), 109 deletions(-)

diff --git a/cloudia/__init__.py b/cloudia/__init__.py
index 072de49..6254286 100644
--- a/cloudia/__init__.py
+++ b/cloudia/__init__.py
@@ -1,3 +1,4 @@
 from .main import Cloudia
 from .pandas_accessor import CloudiaDataFrame, CloudiaSeries
 from .word_data import WordData
+from .utils import function_wrapper, process
diff --git a/cloudia/main.py b/cloudia/main.py
index 1557067..ed38a0a 100644
--- a/cloudia/main.py
+++ b/cloudia/main.py
@@ -3,7 +3,9 @@
 import matplotlib.pyplot as plt
 import japanize_matplotlib
 from wordcloud import WordCloud, STOPWORDS
+
 from cloudia.word_data import WordData
+from cloudia.utils import process
 
 
 class CloudiaBase:
@@ -12,16 +14,11 @@ def __init__(self,
                  single_words: List[str] = [],
                  stop_words: List[str] = STOPWORDS,
                  extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
-                 word_num: int = 100,
-                 parser: Any = None,
-                 parse_func: Any = None):
-        self.wd = WordData(data=data,
-                           single_words=single_words,
-                           stop_words=stop_words,
-                           extract_postags=extract_postags,
-                           word_num=word_num,
-                           parser=parser,
-                           parse_func=parse_func)
+                 parse_func: Any = process,
+                 multiprocess: bool = True,
+                 **args):
+        args.update(dict(single_words=single_words, stop_words=stop_words, extract_postags=extract_postags))
+        self.wd = WordData(data, parse_func, multiprocess, **args)
 
     def make_wordcloud(self, dark_theme: bool, rate: int) -> List[Tuple[str, WordCloud]]:
         wordcloud_list = []
diff --git a/cloudia/pandas_accessor.py b/cloudia/pandas_accessor.py
index b6e03fd..24bb7b1 100644
--- a/cloudia/pandas_accessor.py
+++ b/cloudia/pandas_accessor.py
@@ -1,10 +1,12 @@
 from typing import Any, List
 
-from cloudia.main import CloudiaBase, Cloudia
 import matplotlib.pyplot as plt
 from wordcloud import STOPWORDS
 import pandas as pd
 
+from cloudia.main import CloudiaBase, Cloudia
+from cloudia.utils import process
+
 
 @pd.api.extensions.register_dataframe_accessor('wc')
 class CloudiaDataFrame(CloudiaBase):
@@ -15,22 +17,13 @@ def plot(self,
              single_words: List[str] = [],
              stop_words: List[str] = STOPWORDS,
              extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
-             word_num: int = 100,
-             parser: Any = None,
-             parse_func: Any = None,
+             parse_func: Any = process,
              dark_theme: bool = False,
              title_size: int = 12,
              row_num: int = 3,
-             figsize_rate: int = 2):
-        Cloudia(
-            self.df,
-            single_words,
-            stop_words,
-            extract_postags,
-            word_num,
-            parser,
-            parse_func,
-        ).plot(dark_theme, title_size, row_num, figsize_rate)
+             figsize_rate: int = 2,
+             multiprocess: bool = True):
+        Cloudia(self.df, single_words, stop_words, extract_postags, parse_func, multiprocess).plot(dark_theme, title_size, row_num, figsize_rate)
 
     def save(self, fig_path: str, dark_theme: bool, **args: Any):
         self.plot(**args)
@@ -46,22 +39,13 @@ def plot(self,
              single_words: List[str] = [],
              stop_words: List[str] = STOPWORDS,
              extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
-             word_num: int = 100,
-             parser: Any = None,
-             parse_func: Any = None,
+             parse_func: Any = process,
              dark_theme: bool = False,
              title_size: int = 12,
              row_num: int = 3,
-             figsize_rate: int = 2):
-        Cloudia(
-            self.series,
-            single_words,
-            stop_words,
-            extract_postags,
-            word_num,
-            parser,
-            parse_func,
-        ).plot(dark_theme, title_size, row_num, figsize_rate)
+             figsize_rate: int = 2,
+             multiprocess: bool = True):
+        Cloudia(self.series, single_words, stop_words, extract_postags, parse_func, multiprocess).plot(dark_theme, title_size, row_num, figsize_rate)
 
     def save(self, fig_path: str, dark_theme: bool, **args: Any):
         self.plot(**args)
diff --git a/cloudia/word_data.py b/cloudia/word_data.py
index b0889a1..6b40642 100644
--- a/cloudia/word_data.py
+++ b/cloudia/word_data.py
@@ -1,87 +1,58 @@
 from typing import Any, List, Tuple, Dict
-import re
-
+from itertools import repeat, chain, zip_longest
 from collections import Counter
-import pandas as pd
-from wurlitzer import pipes
-
-with pipes() as (out, err):
-    # https://github.com/clab/dynet/issues/1528
-    import nagisa
-
-
-def count(words: List[str], stop_words, word_num) -> Dict[str, float]:
-    c = Counter(words).most_common()
-    _max_count = c[0][1]
-    weight = {k: v / _max_count for k, v in c if k not in stop_words}
-    weight = {k: weight[k] for k in list(weight.keys())[:word_num]}
-    return weight
-
-
-def parse(text: str, single_words, extract_postags, num_regex) -> List[str]:
-    parser = nagisa.Tagger(single_word_list=single_words)
-    for x in ['"', ';', ',', '(', ')', '\u3000']:
-        text = text.replace(x, ' ')
-    text = text.lower()
-    return [x for x in parser.extract(text, extract_postags=extract_postags).words if len(x) > 1 and not num_regex.match(x)]
 
+from joblib import Parallel, delayed
+import pandas as pd
 
-def process(text, stop_words, word_num, single_words, extract_postags, num_regex):
-    return count(parse(text, single_words, extract_postags, num_regex), stop_words, word_num)
+from cloudia.utils import function_wrapper
 
 
 class WordData:
-    def __init__(self, data: Any, single_words: List[str], stop_words: List[str], extract_postags: List[str], word_num: int, parser: Any, parse_func: Any):
-        words, self.names = self._init_data(data)
-        self.word_num = word_num
-        self.single_words = single_words
-        self.extract_postags = extract_postags
-        self.stop_words = stop_words
-        # self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser
-        num_regex = re.compile('^[0-9]+$')
-        self.num_regex = re.compile('^[0-9]+$')
-        # if parse_func:
-        #     self.words = [self.count(parse_func(x)) for x in words]
-        # else:
-        #     self.words = [self.count(self.parse(x)) for x in words]
-        import time
-        from joblib import Parallel, delayed
-        from itertools import repeat
-        a = time.time()
-        print('joblib start', a)
-        self.words = Parallel(n_jobs=-1)([
-            delayed(process)(a, b, c, d, e, f)
-            for a, b, c, d, e, f in zip(words, repeat(stop_words), repeat(word_num), repeat(single_words), repeat(extract_postags), repeat(num_regex))
-        ])
-        b = time.time()
-        print('joblib end', b, b - a)
-
-        a = time.time()
-        print('start', a)
-        self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser
-        self.words = [self.count(self.parse(x)) for x in words]
-        b = time.time()
-        print(' end', b, b - a)
-
-    def count(self, words: List[str]) -> Dict[str, float]:
-        c = Counter(words).most_common()
+    def __init__(self, data: Any, parse_func: Any, multiprocess: bool, **args):
+        self.words, self.names = self._init_data(data)
+        self.words = self._process_parse(function_wrapper(parse_func), multiprocess, **args)
+        self.words = [self._convert_weight(x) for x in self.words]
+
+    def _process_parse(self, parse_func: Any, multiprocess: bool, **args) -> List[List[str]]:
+        """flatten -> parse words -> chunked"""
+        if isinstance(self.words[0], list):
+            word_list_length = len(self.words[0])
+            words = list(chain.from_iterable(self.words))
+            words = self._parse(words, parse_func, multiprocess, **args)
+            words = list(zip_longest(*[iter(words)] * word_list_length))
+            words = [sum(w, Counter()) for w in words]
+        else:
+            words = self._parse(self.words, parse_func, multiprocess, **args)
+        return words
+
+    def _convert_weight(self, c: Counter) -> Dict[str, float]:
+        c = c.most_common()
         _max_count = c[0][1]
-        weight = {k: v / _max_count for k, v in c if k not in self.stop_words}
-        weight = {k: weight[k] for k in list(weight.keys())[:self.word_num]}
+        weight = {k: v / _max_count for k, v in c}
+        weight = {k: weight[k] for k in list(weight.keys())}
         return weight
 
-    def parse(self, text: str) -> List[str]:
-        for x in ['"', ';', ',', '(', ')', '\u3000']:
-            text = text.replace(x, ' ')
-        text = text.lower()
-        return [x for x in self.parser.extract(text, extract_postags=self.extract_postags).words if len(x) > 1 and not self.num_regex.match(x)]
+    def _parse(self, words: List[str], parse_func: Any, multiprocess: bool, **args) -> List[str]:
+        if multiprocess:
+            return self._parallel_parse(words, parse_func, **args)
+        return self._single_thread_parse(words, parse_func, **args)
+
+    def _single_thread_parse(self, words: List[str], parse_func: Any, **args) -> List[str]:
+        return [parse_func(x, **args) for x in words]
+
+    def _parallel_parse(self, words: List[str], parse_func: Any, **args) -> List[str]:
+        words = Parallel(n_jobs=-1)([delayed(parse_func)(w, **dict(**a, **{'_index': i})) for i, (w, a) in enumerate(zip(words, repeat(args)))])
+        words.sort(key=lambda x: x[1])
+        words = [t[0] for t in words]
+        return words
 
     def _init_data(self, data: Any) -> Tuple[List[str], List[str]]:
         words, names = [], []
         if isinstance(data, list):
             if isinstance(data[0], tuple):
                 if isinstance(data[0][1], pd.Series):
-                    words = [' '.join(d.values.tolist()) for n, d in data]
+                    words = [d.values.tolist() for n, d in data]
                     names = [n for n, d in data]
                 else:
                     words = [w for n, w in data]
@@ -90,7 +61,7 @@ def _init_data(self, data: Any) -> Tuple[List[str], List[str]]:
                 words = data
                 names = [f'word cloud {i+1}' for i in range(len(data))]
             elif isinstance(data[0], pd.Series):
-                words = [' '.join(d.values.tolist()) for d in data]
+                words = [d.values.tolist() for d in data]
                 names = [d.name for d in data]
         elif isinstance(data, str):
             words = [data]
@@ -100,11 +71,10 @@ def _init_data(self, data: Any) -> Tuple[List[str], List[str]]:
             names = [data[0]]
         elif isinstance(data, pd.DataFrame):
             names = data.columns.tolist()
-            words = [' '.join(data[x].values.tolist()) for x in names]
+            words = [data[x].values.tolist() for x in names]
         elif isinstance(data, pd.Series):
-            words = [' '.join(data.values.tolist())]
+            words = [data.values.tolist()]
             names = [data.name]
-
         return words, names
 
     def __iter__(self):

From a1bf76b9308620da6fef43ab6be2a24a0454c81d Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Sat, 9 May 2020 04:22:01 +0900
Subject: [PATCH 03/11] fix ut

---
 test/unit_test/test_word_data.py | 44 ++++++++++++++++----------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/test/unit_test/test_word_data.py b/test/unit_test/test_word_data.py
index f25c3b5..69f0ba3 100644
--- a/test/unit_test/test_word_data.py
+++ b/test/unit_test/test_word_data.py
@@ -5,12 +5,12 @@
 
 class TestCloudia(unittest.TestCase):
     def setUp(self):
-        self.cls = WordData('test', [], [], None, None, None, lambda x: [x])
+        self.cls = WordData('test', lambda x: [x], True)
 
     def assertSortTextEqual(self, data, target):
         """for random sample list."""
-        data = [sorted(t.split(' ')) for t in data]
-        target = [sorted(t.split(' ')) for t in target]
+        data = [sorted(t.split(' ')) if isinstance(t, str) else sorted(t) for t in data]
+        target = [sorted(t.split(' ')) if isinstance(t, str) else sorted(t) for t in target]
         for x, y in zip(data, target):
             self.assertListEqual(x, y)
 
@@ -38,7 +38,7 @@ def test_init_data_list_tuple_series(self):
         test_1 = pd.Series(['test1 test2', 'test3'], name='wc1')
         test_2 = pd.Series(['test4', 'test5', 'test6'], name='wc2')
         words, name = self.cls._init_data([('name1', test_1), ('name2', test_2)])
-        self.assertSortTextEqual(words, ['test1 test2 test3', 'test4 test5 test6'])
+        self.assertSortTextEqual(words, [['test1 test2', 'test3'], 'test4 test5 test6'])
         self.assertListEqual(name, ['name1', 'name2'])
 
     def test_init_data_dataframe(self):
@@ -53,22 +53,22 @@ def test_init_data_series(self):
         self.assertSortTextEqual(words, ['test1 test2'])
         self.assertListEqual(name, ['wc'])
 
-    def test_count(self):
-        self.cls.word_num = 2
-        self.cls.stop_words = 'test'
-        words = ['hoge', 'hoge', 'hoge', 'test', 'test', 'piyo', 'piyo', 'fuga']
-        output = self.cls.count(words)
-        self.assertDictEqual(output, {'hoge': 1.0, 'piyo': 0.6666666666666666})
+    # def test_count(self):
+    #     self.cls.word_num = 2
+    #     self.cls.stop_words = 'test'
+    #     words = ['hoge', 'hoge', 'hoge', 'test', 'test', 'piyo', 'piyo', 'fuga']
+    #     output = self.cls.count(words)
+    #     self.assertDictEqual(output, {'hoge': 1.0, 'piyo': 0.6666666666666666})
 
-    def test_parse(self):
-        class MockData:
-            def __init__(self, d):
-                self.words = d
-
-        class MockParser:
-            def extract(self, text, extract_postags):
-                return MockData(text.split(' '))
-
-        self.cls.parser = MockParser()
-        output = self.cls.parse("It's a sample text; samples 1,2 face;) ")
-        self.assertListEqual(output, ["it's", 'sample', 'text', 'samples', 'face'])
+    # def test_parse(self):
+    #     class MockData:
+    #         def __init__(self, d):
+    #             self.words = d
+    #
+    #     class MockParser:
+    #         def extract(self, text, extract_postags):
+    #             return MockData(text.split(' '))
+    #
+    #     self.cls.parser = MockParser()
+    #     output = self.cls.parse("It's a sample text; samples 1,2 face;) ")
+    #     self.assertListEqual(output, ["it's", 'sample', 'text', 'samples', 'face'])

From 990af564fed31fae5dfe86a0b2fbc1b55354440e Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Sat, 9 May 2020 04:43:24 +0900
Subject: [PATCH 04/11] refact

---
 cloudia/__init__.py        |  1 -
 cloudia/main.py            |  4 ++--
 cloudia/pandas_accessor.py |  6 +++---
 cloudia/word_data.py       | 21 ++++++++++-----------
 4 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/cloudia/__init__.py b/cloudia/__init__.py
index 6254286..072de49 100644
--- a/cloudia/__init__.py
+++ b/cloudia/__init__.py
@@ -1,4 +1,3 @@
 from .main import Cloudia
 from .pandas_accessor import CloudiaDataFrame, CloudiaSeries
 from .word_data import WordData
-from .utils import function_wrapper, process
diff --git a/cloudia/main.py b/cloudia/main.py
index ed38a0a..9c4a121 100644
--- a/cloudia/main.py
+++ b/cloudia/main.py
@@ -5,7 +5,7 @@
 from wordcloud import WordCloud, STOPWORDS
 
 from cloudia.word_data import WordData
-from cloudia.utils import process
+from cloudia.utils import defalt_parse_func
 
 
 class CloudiaBase:
@@ -14,7 +14,7 @@ def __init__(self,
                  single_words: List[str] = [],
                  stop_words: List[str] = STOPWORDS,
                  extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
-                 parse_func: Any = process,
+                 parse_func: Any = defalt_parse_func,
                  multiprocess: bool = True,
                  **args):
         args.update(dict(single_words=single_words, stop_words=stop_words, extract_postags=extract_postags))
diff --git a/cloudia/pandas_accessor.py b/cloudia/pandas_accessor.py
index 24bb7b1..267a4aa 100644
--- a/cloudia/pandas_accessor.py
+++ b/cloudia/pandas_accessor.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 from cloudia.main import CloudiaBase, Cloudia
-from cloudia.utils import process
+from cloudia.utils import defalt_parse_func
 
 
 @pd.api.extensions.register_dataframe_accessor('wc')
@@ -17,7 +17,7 @@ def plot(self,
              single_words: List[str] = [],
              stop_words: List[str] = STOPWORDS,
              extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
-             parse_func: Any = process,
+             parse_func: Any = defalt_parse_func,
              dark_theme: bool = False,
              title_size: int = 12,
              row_num: int = 3,
@@ -39,7 +39,7 @@ def plot(self,
              single_words: List[str] = [],
              stop_words: List[str] = STOPWORDS,
              extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
-             parse_func: Any = process,
+             parse_func: Any = defalt_parse_func,
              dark_theme: bool = False,
              title_size: int = 12,
              row_num: int = 3,
diff --git a/cloudia/word_data.py b/cloudia/word_data.py
index 6b40642..d23ba4c 100644
--- a/cloudia/word_data.py
+++ b/cloudia/word_data.py
@@ -10,23 +10,22 @@
 
 class WordData:
     def __init__(self, data: Any, parse_func: Any, multiprocess: bool, **args):
-        self.words, self.names = self._init_data(data)
-        self.words = self._process_parse(function_wrapper(parse_func), multiprocess, **args)
-        self.words = [self._convert_weight(x) for x in self.words]
+        words, self.names = self._init_data(data)
+        self.counter_list = self.parse(words, parse_func, multiprocess, **args)
+        self.words = [self.convert_weight(x) for x in self.counter_list]
 
-    def _process_parse(self, parse_func: Any, multiprocess: bool, **args) -> List[List[str]]:
-        """flatten -> parse words -> chunked"""
-        if isinstance(self.words[0], list):
-            word_list_length = len(self.words[0])
-            words = list(chain.from_iterable(self.words))
+    def parse(self, words, parse_func: Any, multiprocess: bool, **args) -> List[List[str]]:
+        if isinstance(words[0], list):
+            word_list_length = len(words[0])
+            words = list(chain.from_iterable(words))
             words = self._parse(words, parse_func, multiprocess, **args)
             words = list(zip_longest(*[iter(words)] * word_list_length))
             words = [sum(w, Counter()) for w in words]
         else:
-            words = self._parse(self.words, parse_func, multiprocess, **args)
+            words = self._parse(words, parse_func, multiprocess, **args)
         return words
 
-    def _convert_weight(self, c: Counter) -> Dict[str, float]:
+    def convert_weight(self, c: Counter) -> Dict[str, float]:
         c = c.most_common()
         _max_count = c[0][1]
         weight = {k: v / _max_count for k, v in c}
@@ -35,7 +34,7 @@ def _convert_weight(self, c: Counter) -> Dict[str, float]:
 
     def _parse(self, words: List[str], parse_func: Any, multiprocess: bool, **args) -> List[str]:
         if multiprocess:
-            return self._parallel_parse(words, parse_func, **args)
+            return self._parallel_parse(words, function_wrapper(parse_func), **args)
         return self._single_thread_parse(words, parse_func, **args)
 
     def _single_thread_parse(self, words: List[str], parse_func: Any, **args) -> List[str]:

From 701a6a165975a971a27355737bc3b0f742cf8303 Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Sat, 9 May 2020 05:27:45 +0900
Subject: [PATCH 05/11] fix ut

---
 cloudia/main.py                  |  4 +-
 cloudia/pandas_accessor.py       |  6 +--
 cloudia/word_data.py             |  2 +-
 test/unit_test/test_word_data.py | 68 ++++++++++++++++++++++----------
 4 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/cloudia/main.py b/cloudia/main.py
index 9c4a121..61b0e35 100644
--- a/cloudia/main.py
+++ b/cloudia/main.py
@@ -5,7 +5,7 @@
 from wordcloud import WordCloud, STOPWORDS
 
 from cloudia.word_data import WordData
-from cloudia.utils import defalt_parse_func
+from cloudia.utils import default_parse_func
 
 
 class CloudiaBase:
@@ -14,7 +14,7 @@ def __init__(self,
                  single_words: List[str] = [],
                  stop_words: List[str] = STOPWORDS,
                  extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
-                 parse_func: Any = defalt_parse_func,
+                 parse_func: Any = default_parse_func,
                  multiprocess: bool = True,
                  **args):
         args.update(dict(single_words=single_words, stop_words=stop_words, extract_postags=extract_postags))
diff --git a/cloudia/pandas_accessor.py b/cloudia/pandas_accessor.py
index 267a4aa..371cd14 100644
--- a/cloudia/pandas_accessor.py
+++ b/cloudia/pandas_accessor.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 from cloudia.main import CloudiaBase, Cloudia
-from cloudia.utils import defalt_parse_func
+from cloudia.utils import default_parse_func
 
 
 @pd.api.extensions.register_dataframe_accessor('wc')
@@ -17,7 +17,7 @@ def plot(self,
              single_words: List[str] = [],
              stop_words: List[str] = STOPWORDS,
              extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
-             parse_func: Any = defalt_parse_func,
+             parse_func: Any = default_parse_func,
              dark_theme: bool = False,
              title_size: int = 12,
              row_num: int = 3,
@@ -39,7 +39,7 @@ def plot(self,
              single_words: List[str] = [],
              stop_words: List[str] = STOPWORDS,
              extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
-             parse_func: Any = defalt_parse_func,
+             parse_func: Any = default_parse_func,
              dark_theme: bool = False,
              title_size: int = 12,
              row_num: int = 3,
diff --git a/cloudia/word_data.py b/cloudia/word_data.py
index d23ba4c..6c45e08 100644
--- a/cloudia/word_data.py
+++ b/cloudia/word_data.py
@@ -38,7 +38,7 @@ def _parse(self, words: List[str], parse_func: Any, multiprocess: bool, **args)
         return self._single_thread_parse(words, parse_func, **args)
 
     def _single_thread_parse(self, words: List[str], parse_func: Any, **args) -> List[str]:
-        return [parse_func(x, **args) for x in words]
+        return [Counter(parse_func(x, **args)) for x in words]
 
     def _parallel_parse(self, words: List[str], parse_func: Any, **args) -> List[str]:
         words = Parallel(n_jobs=-1)([delayed(parse_func)(w, **dict(**a, **{'_index': i})) for i, (w, a) in enumerate(zip(words, repeat(args)))])
diff --git a/test/unit_test/test_word_data.py b/test/unit_test/test_word_data.py
index 69f0ba3..c95e48c 100644
--- a/test/unit_test/test_word_data.py
+++ b/test/unit_test/test_word_data.py
@@ -1,9 +1,11 @@
 from cloudia.word_data import WordData
 import unittest
+from unittest.mock import patch
 import pandas as pd
+from collections import Counter
 
 
-class TestCloudia(unittest.TestCase):
+class TestWordData(unittest.TestCase):
     def setUp(self):
         self.cls = WordData('test', lambda x: [x], True)
 
@@ -53,22 +55,48 @@ def test_init_data_series(self):
         self.assertSortTextEqual(words, ['test1 test2'])
         self.assertListEqual(name, ['wc'])
 
-    # def test_count(self):
-    #     self.cls.word_num = 2
-    #     self.cls.stop_words = 'test'
-    #     words = ['hoge', 'hoge', 'hoge', 'test', 'test', 'piyo', 'piyo', 'fuga']
-    #     output = self.cls.count(words)
-    #     self.assertDictEqual(output, {'hoge': 1.0, 'piyo': 0.6666666666666666})
-
-    # def test_parse(self):
-    #     class MockData:
-    #         def __init__(self, d):
-    #             self.words = d
-    #
-    #     class MockParser:
-    #         def extract(self, text, extract_postags):
-    #             return MockData(text.split(' '))
-    #
-    #     self.cls.parser = MockParser()
-    #     output = self.cls.parse("It's a sample text; samples 1,2 face;) ")
-    #     self.assertListEqual(output, ["it's", 'sample', 'text', 'samples', 'face'])
+    def test_parse(self):
+        def _parse(x, y, z, **args):
+            return x
+
+        with patch('cloudia.word_data.WordData._parse', side_effect=_parse):
+            output = self.cls.parse(['hoge hoge', 'piyo'], None, None)
+            self.assertListEqual(output, ['hoge hoge', 'piyo'])
+
+    def test_parse_list_case(self):
+        def _parse(x, y, z, **args):
+            return [Counter(w.split(' ')) for w in x]
+
+        with patch('cloudia.word_data.WordData._parse', side_effect=_parse):
+            output = self.cls.parse([['hoge hoge', 'piyo'], ['fuga', 'fuga']], None, None)
+            target = [Counter({'hoge': 2, 'piyo': 1}), Counter({'fuga': 2})]
+            for o, t in zip(output, target):
+                self.assertEqual(type(o), type(t))
+                self.assertEqual(o.most_common(), t.most_common())
+
+    def test_convert_weight(self):
+        output = self.cls.convert_weight(Counter(['hoge', 'hoge', 'piyo']))
+        self.assertDictEqual(output, {'hoge': 1, 'piyo': 0.5})
+
+    def test_single_thread_parse(self):
+        def f(x):
+            return x.split(' ')
+
+        output = self.cls._single_thread_parse(['hoge hoge', 'piyo'], f)
+        target = [Counter(['hoge', 'hoge']), Counter(['piyo'])]
+        for o, t in zip(output, target):
+            self.assertEqual(type(o), type(t))
+            self.assertEqual(o.most_common(), t.most_common())
+
+    def test_parallel_parse(self):
+        def f(x, _index):
+            return Counter(x.split(' ')), _index
+
+        output = self.cls._parallel_parse(['hoge hoge', 'piyo'], f, **{})
+        target = [
+            Counter(['hoge', 'hoge']),
+            Counter(['piyo']),
+        ]
+        for o, t in zip(output, target):
+            self.assertEqual(type(o), type(t))
+            self.assertEqual(o.most_common(), t.most_common())

From 74b91f44b6c91bf693d21c1a664deb160e4dfedb Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Sat, 9 May 2020 06:10:39 +0900
Subject: [PATCH 06/11] add unittest, integration test

---
 .github/workflows/python_test.yml             | 15 +++++++++-
 cloudia/utils.py                              | 28 +++++++++++++++++++
 cloudia/word_data.py                          |  1 +
 .../test_cloudia_pandas_plot.py               | 17 +++++++++++
 test/integration_test/test_cloudia_plot.py    | 21 ++++++++++++++
 test/unit_test/test_utils.py                  | 21 ++++++++++++++
 6 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 cloudia/utils.py
 create mode 100644 test/integration_test/test_cloudia_pandas_plot.py
 create mode 100644 test/integration_test/test_cloudia_plot.py
 create mode 100644 test/unit_test/test_utils.py

diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml
index fb65aa4..e595937 100644
--- a/.github/workflows/python_test.yml
+++ b/.github/workflows/python_test.yml
@@ -44,4 +44,17 @@ jobs:
             exit 1
         fi
         echo "pass mypy"
-
+    - name: integration test
+      run: |
+        poetry run python ./test/integration_test/test_cloudia_plot.py
+        if [ $? != 0 ]; then
+            echo "failed: cloudia_plot"
+            exit 1
+        fi
+        echo "pass cloudia_plot"
+        poetry run python ./test/integration_test/test_cloudia_pandas_plot.py
+        if [ $? != 0 ]; then
+            echo "failed: cloudia_pandas_plot"
+            exit 1
+        fi
+        echo "pass cloudia_pandas_plot"
diff --git a/cloudia/utils.py b/cloudia/utils.py
new file mode 100644
index 0000000..77e01ae
--- /dev/null
+++ b/cloudia/utils.py
@@ -0,0 +1,28 @@
+from collections import Counter
+from typing import List
+import re
+
+from wurlitzer import pipes
+
+with pipes() as (out, err):
+    # https://github.com/clab/dynet/issues/1528
+    import nagisa
+
+NUM_REGEX = re.compile('^[0-9]+$')
+
+
+def default_parse_func(text: str, single_words: List[str], extract_postags: List[str], stop_words: List[str]) -> List[str]:
+    parser = nagisa.Tagger(single_word_list=single_words)
+    for x in ['"', ';', ',', '(', ')', '\u3000']:
+        text = text.replace(x, ' ')
+    text = text.lower()
+    return [x for x in parser.extract(text, extract_postags=extract_postags).words if len(x) > 1 and not NUM_REGEX.match(x) and x not in stop_words]
+
+
+def function_wrapper(func):
+    def _f(t, **kwargs):
+        i = kwargs.pop('_index')
+        d = Counter(func(t, **kwargs))
+        return d, i
+
+    return _f
diff --git a/cloudia/word_data.py b/cloudia/word_data.py
index 6c45e08..3b75090 100644
--- a/cloudia/word_data.py
+++ b/cloudia/word_data.py
@@ -47,6 +47,7 @@ def _parallel_parse(self, words: List[str], parse_func: Any, **args) -> List[str
         return words
 
     def _init_data(self, data: Any) -> Tuple[List[str], List[str]]:
+        # TODO: set assert
         words, names = [], []
         if isinstance(data, list):
             if isinstance(data[0], tuple):
diff --git a/test/integration_test/test_cloudia_pandas_plot.py b/test/integration_test/test_cloudia_pandas_plot.py
new file mode 100644
index 0000000..3c9b991
--- /dev/null
+++ b/test/integration_test/test_cloudia_pandas_plot.py
@@ -0,0 +1,17 @@
+import sys
+import pathlib
+import pandas as pd
+
+if __name__ == '__main__':
+    current_dir = pathlib.Path(__file__).resolve().parent
+    sys.path.append(str(current_dir.parents[1]))
+    from cloudia.main import Cloudia
+
+    try:
+        pd.DataFrame({'test': ['hoge']}).wc.plot()
+        pd.DataFrame({'test': ['hoge']})['test'].wc.plot()
+        pd.Series(['hoge']).wc.plot()
+        raise
+    except:
+        sys.exit(1)
+    sys.exit(0)
diff --git a/test/integration_test/test_cloudia_plot.py b/test/integration_test/test_cloudia_plot.py
new file mode 100644
index 0000000..d204ff4
--- /dev/null
+++ b/test/integration_test/test_cloudia_plot.py
@@ -0,0 +1,21 @@
+import sys
+import pathlib
+import pandas as pd
+
+if __name__ == '__main__':
+    current_dir = pathlib.Path(__file__).resolve().parent
+    sys.path.append(str(current_dir.parents[1]))
+    from cloudia.main import Cloudia
+
+    try:
+        Cloudia([('test', pd.Series(['hoge']))]).plot()
+        Cloudia([('test', 'hoge')]).plot()
+        Cloudia(['hoge']).plot()
+        Cloudia([pd.Series(['hoge'])]).plot()
+        Cloudia('hoge').plot()
+        Cloudia(('test', 'hoge')).plot()
+        Cloudia(pd.DataFrame({'test': ['hoge']})).plot()
+        Cloudia(pd.Series(['hoge'])).plot()
+    except:
+        sys.exit(1)
+    sys.exit(0)
diff --git a/test/unit_test/test_utils.py b/test/unit_test/test_utils.py
new file mode 100644
index 0000000..d07a4b6
--- /dev/null
+++ b/test/unit_test/test_utils.py
@@ -0,0 +1,21 @@
+from cloudia.utils import default_parse_func, function_wrapper
+import unittest
+from collections import Counter
+
+
+class TestUtils(unittest.TestCase):
+    def test_default_parse_func(self):
+        output = default_parse_func('This is a simple test.', ['simple test'], ['英単語'], ['is'])
+        self.assertListEqual(output, ['this', 'simple\u3000test'])
+
+    def test_function_wrapper(self):
+        def test(x):
+            return [x + '_']
+
+        wf = function_wrapper(test)
+        output = [wf(x, _index=i) for i, x in enumerate(['hoge', 'piyo'])]
+        target = [(Counter({'hoge_': 1}), 0), (Counter({'piyo_': 1}), 1)]
+        for o, t in zip(output, target):
+            self.assertEqual(type(o), type(t))
+            self.assertEqual(o[1], t[1])
+            self.assertEqual(o[0].most_common(), t[0].most_common())

From b3a303501e2dceb3ae2875fdbabb81bfa779d9d0 Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Sat, 9 May 2020 06:13:26 +0900
Subject: [PATCH 07/11] fix it

---
 test/integration_test/test_cloudia_pandas_plot.py | 4 ++--
 test/integration_test/test_cloudia_plot.py        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/integration_test/test_cloudia_pandas_plot.py b/test/integration_test/test_cloudia_pandas_plot.py
index 3c9b991..a522a47 100644
--- a/test/integration_test/test_cloudia_pandas_plot.py
+++ b/test/integration_test/test_cloudia_pandas_plot.py
@@ -5,13 +5,13 @@
 if __name__ == '__main__':
     current_dir = pathlib.Path(__file__).resolve().parent
     sys.path.append(str(current_dir.parents[1]))
-    from cloudia.main import Cloudia
+    import cloudia  # noqa
 
     try:
         pd.DataFrame({'test': ['hoge']}).wc.plot()
         pd.DataFrame({'test': ['hoge']})['test'].wc.plot()
         pd.Series(['hoge']).wc.plot()
         raise
-    except:
+    except Exception:
         sys.exit(1)
     sys.exit(0)
diff --git a/test/integration_test/test_cloudia_plot.py b/test/integration_test/test_cloudia_plot.py
index d204ff4..b172ac1 100644
--- a/test/integration_test/test_cloudia_plot.py
+++ b/test/integration_test/test_cloudia_plot.py
@@ -16,6 +16,6 @@
         Cloudia(('test', 'hoge')).plot()
         Cloudia(pd.DataFrame({'test': ['hoge']})).plot()
         Cloudia(pd.Series(['hoge'])).plot()
-    except:
+    except Exception:
         sys.exit(1)
     sys.exit(0)

From 05b89c92c7400947b3b9e3f54ab0896dcb283ab0 Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Sat, 9 May 2020 06:22:52 +0900
Subject: [PATCH 08/11] add joblob

---
 .github/workflows/python_test.yml              | 10 +++++-----
 poetry.lock                                    | 18 +++++++++++++++---
 pyproject.toml                                 |  1 +
 .../test_cloudia_pandas_plot.py                |  2 ++
 test/integration_test/test_cloudia_plot.py     |  2 ++
 5 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml
index e595937..8f3382a 100644
--- a/.github/workflows/python_test.yml
+++ b/.github/workflows/python_test.yml
@@ -33,10 +33,7 @@ jobs:
           exit 1
         fi
         echo "pass yapf"
-    - name: Test with pytest
-      run: |
-        poetry run python -m unittest discover -s ./test/unit_test/
-    - name: Test with mypy
+    - name: Lint with mypy
       run: |
         mypy --ignore-missing-imports ./cloudia/
         if [ $? != 0 ]; then
@@ -44,7 +41,10 @@ jobs:
             exit 1
         fi
         echo "pass mypy"
-    - name: integration test
+    - name: Unit Test
+      run: |
+        poetry run python -m unittest discover -s ./test/unit_test/
+    - name: Integration Test
       run: |
         poetry run python ./test/integration_test/test_cloudia_plot.py
         if [ $? != 0 ]; then
diff --git a/poetry.lock b/poetry.lock
index ece8bbf..5c5137d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -35,11 +35,19 @@ description = "matplotlibのフォント設定を自動で日本語化する"
 name = "japanize-matplotlib"
 optional = false
 python-versions = "*"
-version = "1.1.1"
+version = "1.1.2"
 
 [package.dependencies]
 matplotlib = "*"
 
+[[package]]
+category = "main"
+description = "Lightweight pipelining: using Python functions as pipeline jobs."
+name = "joblib"
+optional = false
+python-versions = "*"
+version = "0.14.1"
+
 [[package]]
 category = "main"
 description = "A fast implementation of the Cassowary constraint solver"
@@ -165,7 +173,7 @@ python-versions = ">=2.7"
 version = "2.0.0"
 
 [metadata]
-content-hash = "aa467e1fc61ce35041b8b0bf14d477d5c0fb0d72e8fbb0e7fcef9e48b85b9203"
+content-hash = "6e1e02815b139ab5162d0a2b90ec531ae6ed13fe54c692fde83cdf3ab5ffd970"
 python-versions = "^3.6"
 
 [metadata.files]
@@ -232,7 +240,11 @@ dynet = [
     {file = "dyNET-2.1.tar.gz", hash = "sha256:d0f58aaf3926da24baba6e3e76cb3d090c8b6d359196ce138b11faa291b2ec07"},
 ]
 japanize-matplotlib = [
-    {file = "japanize-matplotlib-1.1.1.tar.gz", hash = "sha256:763cae497fd2884c5b3ce40ddb1eb5160ee02b6e1d5a4397af416584edb3b0e7"},
+    {file = "japanize-matplotlib-1.1.2.tar.gz", hash = "sha256:e2d9bb5ac2f2c37baf0991d43981fc63c2c5a3722e8e41dd6c31030220237b01"},
+]
+joblib = [
+    {file = "joblib-0.14.1-py2.py3-none-any.whl", hash = "sha256:bdb4fd9b72915ffb49fde2229ce482dd7ae79d842ed8c2b4c932441495af1403"},
+    {file = "joblib-0.14.1.tar.gz", hash = "sha256:0630eea4f5664c463f23fbf5dcfc54a2bc6168902719fa8e19daf033022786c8"},
 ]
 kiwisolver = [
     {file = "kiwisolver-1.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:443c2320520eda0a5b930b2725b26f6175ca4453c61f739fef7a5847bd262f74"},
diff --git a/pyproject.toml b/pyproject.toml
index 960b0c1..a544ce0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,7 @@ wordcloud = "*"
 pandas = "*"
 matplotlib = "*"
 wurlitzer = "*"
+joblib = "*"
 japanize_matplotlib = "^1.1.1"
 
 [tool.poetry.scripts]
diff --git a/test/integration_test/test_cloudia_pandas_plot.py b/test/integration_test/test_cloudia_pandas_plot.py
index a522a47..de70979 100644
--- a/test/integration_test/test_cloudia_pandas_plot.py
+++ b/test/integration_test/test_cloudia_pandas_plot.py
@@ -1,5 +1,6 @@
 import sys
 import pathlib
+import traceback
 import pandas as pd
 
 if __name__ == '__main__':
@@ -13,5 +14,6 @@
         pd.Series(['hoge']).wc.plot()
         raise
     except Exception:
+        traceback.print_exc()
         sys.exit(1)
     sys.exit(0)
diff --git a/test/integration_test/test_cloudia_plot.py b/test/integration_test/test_cloudia_plot.py
index b172ac1..9468dc2 100644
--- a/test/integration_test/test_cloudia_plot.py
+++ b/test/integration_test/test_cloudia_plot.py
@@ -1,5 +1,6 @@
 import sys
 import pathlib
+import traceback
 import pandas as pd
 
 if __name__ == '__main__':
@@ -17,5 +18,6 @@
         Cloudia(pd.DataFrame({'test': ['hoge']})).plot()
         Cloudia(pd.Series(['hoge'])).plot()
     except Exception:
+        traceback.print_exc()
         sys.exit(1)
     sys.exit(0)

From 795b15cc64c7687cb2c221ee0e40e2121e4baf8a Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Sat, 9 May 2020 06:39:50 +0900
Subject: [PATCH 09/11] fix mypy

---
 cloudia/word_data.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/cloudia/word_data.py b/cloudia/word_data.py
index 3b75090..7a2543b 100644
--- a/cloudia/word_data.py
+++ b/cloudia/word_data.py
@@ -1,4 +1,4 @@
-from typing import Any, List, Tuple, Dict
+from typing import Any, List, Tuple, Dict, Callable, Union
 from itertools import repeat, chain, zip_longest
 from collections import Counter
 
@@ -9,12 +9,12 @@
 
 
 class WordData:
-    def __init__(self, data: Any, parse_func: Any, multiprocess: bool, **args):
+    def __init__(self, data: Any, parse_func: Callable[..., List[str]], multiprocess: bool, **args):
         words, self.names = self._init_data(data)
         self.counter_list = self.parse(words, parse_func, multiprocess, **args)
         self.words = [self.convert_weight(x) for x in self.counter_list]
 
-    def parse(self, words, parse_func: Any, multiprocess: bool, **args) -> List[List[str]]:
+    def parse(self, words, parse_func: Callable[..., List[str]], multiprocess: bool, **args) -> List[Counter]:
         if isinstance(words[0], list):
             word_list_length = len(words[0])
             words = list(chain.from_iterable(words))
@@ -26,25 +26,25 @@ def parse(self, words, parse_func: Any, multiprocess: bool, **args) -> List[List
         return words
 
     def convert_weight(self, c: Counter) -> Dict[str, float]:
-        c = c.most_common()
-        _max_count = c[0][1]
-        weight = {k: v / _max_count for k, v in c}
+        most_common = c.most_common()
+        _max_count = most_common[0][1]
+        weight = {k: v / _max_count for k, v in most_common}
         weight = {k: weight[k] for k in list(weight.keys())}
         return weight
 
-    def _parse(self, words: List[str], parse_func: Any, multiprocess: bool, **args) -> List[str]:
+    def _parse(self, words: List[str], parse_func: Callable[..., List[str]], multiprocess: bool, **args) -> Union[List[Counter], List[List[Counter]]]:
         if multiprocess:
             return self._parallel_parse(words, function_wrapper(parse_func), **args)
         return self._single_thread_parse(words, parse_func, **args)
 
-    def _single_thread_parse(self, words: List[str], parse_func: Any, **args) -> List[str]:
+    def _single_thread_parse(self, words: List[str], parse_func: Callable[..., List[str]], **args) -> List[Counter]:
         return [Counter(parse_func(x, **args)) for x in words]
 
-    def _parallel_parse(self, words: List[str], parse_func: Any, **args) -> List[str]:
-        words = Parallel(n_jobs=-1)([delayed(parse_func)(w, **dict(**a, **{'_index': i})) for i, (w, a) in enumerate(zip(words, repeat(args)))])
-        words.sort(key=lambda x: x[1])
-        words = [t[0] for t in words]
-        return words
+    def _parallel_parse(self, words: List[str], parse_func: Callable, **args) -> List[List[Counter]]:
+        parsed_words = Parallel(n_jobs=-1)([delayed(parse_func)(w, **dict(**a, **{'_index': i})) for i, (w, a) in enumerate(zip(words, repeat(args)))])
+        parsed_words.sort(key=lambda x: x[1])
+        parsed_words = [t[0] for t in parsed_words]
+        return parsed_words
 
     def _init_data(self, data: Any) -> Tuple[List[str], List[str]]:
         # TODO: set assert

From 932dfd3debfed90c81194c2c074031d9264e2f71 Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Sat, 9 May 2020 06:47:43 +0900
Subject: [PATCH 10/11] remove raise

---
 test/integration_test/test_cloudia_pandas_plot.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/integration_test/test_cloudia_pandas_plot.py b/test/integration_test/test_cloudia_pandas_plot.py
index de70979..82062e7 100644
--- a/test/integration_test/test_cloudia_pandas_plot.py
+++ b/test/integration_test/test_cloudia_pandas_plot.py
@@ -12,7 +12,6 @@
         pd.DataFrame({'test': ['hoge']}).wc.plot()
         pd.DataFrame({'test': ['hoge']})['test'].wc.plot()
         pd.Series(['hoge']).wc.plot()
-        raise
     except Exception:
         traceback.print_exc()
         sys.exit(1)

From 22bb40ed28d05ae52f65e25c152855d393e1116a Mon Sep 17 00:00:00 2001
From: 6syun9 <6syun9@gmail.com>
Date: Sat, 9 May 2020 06:55:43 +0900
Subject: [PATCH 11/11] update readme, update it

---
 README.md                                       |  6 ++----
 .../test_cloudia_pandas_plot.py                 |  7 ++++---
 test/integration_test/test_cloudia_plot.py      | 17 +++++++++--------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index c76b5c8..c308bf8 100644
--- a/README.md
+++ b/README.md
@@ -91,9 +91,8 @@ Cloudia(
   single_words=[],    # It's not split word list, example: ["neural network"]
   stop_words=STOPWORDS,    # not count words, default is wordcloud.STOPWORDS
   extract_postags=['名詞', '英単語', 'ローマ字文'],    # part of speech for japanese
-  word_num=100,    # max word num
-  parser=None,    # morphological analysis instance for japanese
   parse_func=None,    # split text function, example: lambda x: x.split(',')
+  multiprocess=True    # Flag for using multiprocessing
 )
 ```
   
@@ -125,9 +124,8 @@ DataFrame.wc.plot(
   single_words=[],    # It's not split word list, example: ["neural network"]
   stop_words=STOPWORDS,    # not count words, default is wordcloud.STOPWORDS
   extract_postags=['名詞', '英単語', 'ローマ字文'],    # part of speech for japanese
-  word_num=100,    # max word num
-  parser=None,    # morphological analysis instance for japanese
   parse_func=None,    # split text function, example: lambda x: x.split(',')
+  multiprocess=True,    # Flag for using multiprocessing
   dark_theme=False,    # color theme
   title_size=12,     # title text size
   row_num=3,    # for example, 12 wordcloud, row_num=3 -> 4*3image
diff --git a/test/integration_test/test_cloudia_pandas_plot.py b/test/integration_test/test_cloudia_pandas_plot.py
index 82062e7..0a1028a 100644
--- a/test/integration_test/test_cloudia_pandas_plot.py
+++ b/test/integration_test/test_cloudia_pandas_plot.py
@@ -9,9 +9,10 @@
     import cloudia  # noqa
 
     try:
-        pd.DataFrame({'test': ['hoge']}).wc.plot()
-        pd.DataFrame({'test': ['hoge']})['test'].wc.plot()
-        pd.Series(['hoge']).wc.plot()
+        for multiprocess in [True, False]:
+            pd.DataFrame({'test': ['hoge']}).wc.plot(multiprocess=multiprocess)
+            pd.DataFrame({'test': ['hoge']})['test'].wc.plot(multiprocess=multiprocess)
+            pd.Series(['hoge']).wc.plot(multiprocess=multiprocess)
     except Exception:
         traceback.print_exc()
         sys.exit(1)
diff --git a/test/integration_test/test_cloudia_plot.py b/test/integration_test/test_cloudia_plot.py
index 9468dc2..7a3fad8 100644
--- a/test/integration_test/test_cloudia_plot.py
+++ b/test/integration_test/test_cloudia_plot.py
@@ -9,14 +9,15 @@
     from cloudia.main import Cloudia
 
     try:
-        Cloudia([('test', pd.Series(['hoge']))]).plot()
-        Cloudia([('test', 'hoge')]).plot()
-        Cloudia(['hoge']).plot()
-        Cloudia([pd.Series(['hoge'])]).plot()
-        Cloudia('hoge').plot()
-        Cloudia(('test', 'hoge')).plot()
-        Cloudia(pd.DataFrame({'test': ['hoge']})).plot()
-        Cloudia(pd.Series(['hoge'])).plot()
+        for multiprocess in [True, False]:
+            Cloudia([('test', pd.Series(['hoge']))], multiprocess=multiprocess).plot()
+            Cloudia([('test', 'hoge')], multiprocess=multiprocess).plot()
+            Cloudia(['hoge'], multiprocess=multiprocess).plot()
+            Cloudia([pd.Series(['hoge'])], multiprocess=multiprocess).plot()
+            Cloudia('hoge', multiprocess=multiprocess).plot()
+            Cloudia(('test', 'hoge'), multiprocess=multiprocess).plot()
+            Cloudia(pd.DataFrame({'test': ['hoge']}), multiprocess=multiprocess).plot()
+            Cloudia(pd.Series(['hoge']), multiprocess=multiprocess).plot()
     except Exception:
         traceback.print_exc()
         sys.exit(1)