From 1021035fba35237826372365fe372b3b707470b2 Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Thu, 7 May 2020 01:34:30 +0900 Subject: [PATCH 01/11] add sample --- cloudia/word_data.py | 75 +++++++++++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 18 deletions(-) diff --git a/cloudia/word_data.py b/cloudia/word_data.py index 12aacd9..b0889a1 100644 --- a/cloudia/word_data.py +++ b/cloudia/word_data.py @@ -10,6 +10,26 @@ import nagisa +def count(words: List[str], stop_words, word_num) -> Dict[str, float]: + c = Counter(words).most_common() + _max_count = c[0][1] + weight = {k: v / _max_count for k, v in c if k not in stop_words} + weight = {k: weight[k] for k in list(weight.keys())[:word_num]} + return weight + + +def parse(text: str, single_words, extract_postags, num_regex) -> List[str]: + parser = nagisa.Tagger(single_word_list=single_words) + for x in ['"', ';', ',', '(', ')', '\u3000']: + text = text.replace(x, ' ') + text = text.lower() + return [x for x in parser.extract(text, extract_postags=extract_postags).words if len(x) > 1 and not num_regex.match(x)] + + +def process(text, stop_words, word_num, single_words, extract_postags, num_regex): + return count(parse(text, single_words, extract_postags, num_regex), stop_words, word_num) + + class WordData: def __init__(self, data: Any, single_words: List[str], stop_words: List[str], extract_postags: List[str], word_num: int, parser: Any, parse_func: Any): words, self.names = self._init_data(data) @@ -17,12 +37,44 @@ def __init__(self, data: Any, single_words: List[str], stop_words: List[str], ex self.single_words = single_words self.extract_postags = extract_postags self.stop_words = stop_words - self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser + # self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser + num_regex = re.compile('^[0-9]+$') self.num_regex = re.compile('^[0-9]+$') - if parse_func: - self.words = [self.count(parse_func(x)) for x in words] - else: - self.words = [self.count(self.parse(x)) for x in words] + # if parse_func: + # self.words = [self.count(parse_func(x)) for x in words] + # else: + # self.words = [self.count(self.parse(x)) for x in words] + import time + from joblib import Parallel, delayed + from itertools import repeat + a = time.time() + print('joblib start', a) + self.words = Parallel(n_jobs=-1)([ + delayed(process)(a, b, c, d, e, f) + for a, b, c, d, e, f in zip(words, repeat(stop_words), repeat(word_num), repeat(single_words), repeat(extract_postags), repeat(num_regex)) + ]) + b = time.time() + print('joblib end', b, b - a) + + a = time.time() + print('start', a) + self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser + self.words = [self.count(self.parse(x)) for x in words] + b = time.time() + print(' end', b, b - a) + + def count(self, words: List[str]) -> Dict[str, float]: + c = Counter(words).most_common() + _max_count = c[0][1] + weight = {k: v / _max_count for k, v in c if k not in self.stop_words} + weight = {k: weight[k] for k in list(weight.keys())[:self.word_num]} + return weight + + def parse(self, text: str) -> List[str]: + for x in ['"', ';', ',', '(', ')', '\u3000']: + text = text.replace(x, ' ') + text = text.lower() + return [x for x in self.parser.extract(text, extract_postags=self.extract_postags).words if len(x) > 1 and not self.num_regex.match(x)] def _init_data(self, data: Any) -> Tuple[List[str], List[str]]: words, names = [], [] @@ -55,19 +107,6 @@ def _init_data(self, data: Any) -> Tuple[List[str], List[str]]: return words, names - def count(self, words: List[str]) -> Dict[str, float]: - c = Counter(words).most_common() - _max_count = c[0][1] - weight = {k: v / _max_count for k, v in c if k not in self.stop_words} - weight = {k: weight[k] for k in list(weight.keys())[:self.word_num]} - return weight - - def parse(self, text: str) -> List[str]: - for x in ['"', ';', ',', '(', ')', '\u3000']: - text = text.replace(x, ' ') - text = text.lower() - return [x for x in self.parser.extract(text, extract_postags=self.extract_postags).words if len(x) > 1 and not self.num_regex.match(x)] - def __iter__(self): for n, w in zip(self.names, self.words): yield n, w From 9f1d2af942c7993b82e11f7be60327dffd4d6ba0 Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 04:02:38 +0900 Subject: [PATCH 02/11] add multiprocessing parse --- cloudia/__init__.py | 1 + cloudia/main.py | 17 +++--- cloudia/pandas_accessor.py | 38 ++++--------- cloudia/word_data.py | 114 ++++++++++++++----------------------- 4 files changed, 61 insertions(+), 109 deletions(-) diff --git a/cloudia/__init__.py b/cloudia/__init__.py index 072de49..6254286 100644 --- a/cloudia/__init__.py +++ b/cloudia/__init__.py @@ -1,3 +1,4 @@ from .main import Cloudia from .pandas_accessor import CloudiaDataFrame, CloudiaSeries from .word_data import WordData +from .utils import function_wrapper, process diff --git a/cloudia/main.py b/cloudia/main.py index 1557067..ed38a0a 100644 --- a/cloudia/main.py +++ b/cloudia/main.py @@ -3,7 +3,9 @@ import matplotlib.pyplot as plt import japanize_matplotlib from wordcloud import WordCloud, STOPWORDS + from cloudia.word_data import WordData +from cloudia.utils import process class CloudiaBase: @@ -12,16 +14,11 @@ def __init__(self, single_words: List[str] = [], stop_words: List[str] = STOPWORDS, extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], - word_num: int = 100, - parser: Any = None, - parse_func: Any = None): - self.wd = WordData(data=data, - single_words=single_words, - stop_words=stop_words, - extract_postags=extract_postags, - word_num=word_num, - parser=parser, - parse_func=parse_func) + parse_func: Any = process, + multiprocess: bool = True, + **args): + args.update(dict(single_words=single_words, stop_words=stop_words, extract_postags=extract_postags)) + self.wd = WordData(data, parse_func, multiprocess, **args) def make_wordcloud(self, dark_theme: bool, rate: int) -> List[Tuple[str, WordCloud]]: wordcloud_list = [] diff --git a/cloudia/pandas_accessor.py b/cloudia/pandas_accessor.py index b6e03fd..24bb7b1 100644 --- a/cloudia/pandas_accessor.py +++ b/cloudia/pandas_accessor.py @@ -1,10 +1,12 @@ from typing import Any, List -from cloudia.main import CloudiaBase, Cloudia import matplotlib.pyplot as plt from wordcloud import STOPWORDS import pandas as pd +from cloudia.main import CloudiaBase, Cloudia +from cloudia.utils import process + @pd.api.extensions.register_dataframe_accessor('wc') class CloudiaDataFrame(CloudiaBase): @@ -15,22 +17,13 @@ def plot(self, single_words: List[str] = [], stop_words: List[str] = STOPWORDS, extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], - word_num: int = 100, - parser: Any = None, - parse_func: Any = None, + parse_func: Any = process, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, - figsize_rate: int = 2): - Cloudia( - self.df, - single_words, - stop_words, - extract_postags, - word_num, - parser, - parse_func, - ).plot(dark_theme, title_size, row_num, figsize_rate) + figsize_rate: int = 2, + multiprocess: bool = True): + Cloudia(self.df, single_words, stop_words, extract_postags, parse_func, multiprocess).plot(dark_theme, title_size, row_num, figsize_rate) def save(self, fig_path: str, dark_theme: bool, **args: Any): self.plot(**args) @@ -46,22 +39,13 @@ def plot(self, single_words: List[str] = [], stop_words: List[str] = STOPWORDS, extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], - word_num: int = 100, - parser: Any = None, - parse_func: Any = None, + parse_func: Any = process, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, - figsize_rate: int = 2): - Cloudia( - self.series, - single_words, - stop_words, - extract_postags, - word_num, - parser, - parse_func, - ).plot(dark_theme, title_size, row_num, figsize_rate) + figsize_rate: int = 2, + multiprocess: bool = True): + Cloudia(self.series, single_words, stop_words, extract_postags, parse_func, multiprocess).plot(dark_theme, title_size, row_num, figsize_rate) def save(self, fig_path: str, dark_theme: bool, **args: Any): self.plot(**args) diff --git a/cloudia/word_data.py b/cloudia/word_data.py index b0889a1..6b40642 100644 --- a/cloudia/word_data.py +++ b/cloudia/word_data.py @@ -1,87 +1,58 @@ from typing import Any, List, Tuple, Dict -import re - +from itertools import repeat, chain, zip_longest from collections import Counter -import pandas as pd -from wurlitzer import pipes - -with pipes() as (out, err): - # https://github.com/clab/dynet/issues/1528 - import nagisa - - -def count(words: List[str], stop_words, word_num) -> Dict[str, float]: - c = Counter(words).most_common() - _max_count = c[0][1] - weight = {k: v / _max_count for k, v in c if k not in stop_words} - weight = {k: weight[k] for k in list(weight.keys())[:word_num]} - return weight - - -def parse(text: str, single_words, extract_postags, num_regex) -> List[str]: - parser = nagisa.Tagger(single_word_list=single_words) - for x in ['"', ';', ',', '(', ')', '\u3000']: - text = text.replace(x, ' ') - text = text.lower() - return [x for x in parser.extract(text, extract_postags=extract_postags).words if len(x) > 1 and not num_regex.match(x)] +from joblib import Parallel, delayed +import pandas as pd -def process(text, stop_words, word_num, single_words, extract_postags, num_regex): - return count(parse(text, single_words, extract_postags, num_regex), stop_words, word_num) +from cloudia.utils import function_wrapper class WordData: - def __init__(self, data: Any, single_words: List[str], stop_words: List[str], extract_postags: List[str], word_num: int, parser: Any, parse_func: Any): - words, self.names = self._init_data(data) - self.word_num = word_num - self.single_words = single_words - self.extract_postags = extract_postags - self.stop_words = stop_words - # self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser - num_regex = re.compile('^[0-9]+$') - self.num_regex = re.compile('^[0-9]+$') - # if parse_func: - # self.words = [self.count(parse_func(x)) for x in words] - # else: - # self.words = [self.count(self.parse(x)) for x in words] - import time - from joblib import Parallel, delayed - from itertools import repeat - a = time.time() - print('joblib start', a) - self.words = Parallel(n_jobs=-1)([ - delayed(process)(a, b, c, d, e, f) - for a, b, c, d, e, f in zip(words, repeat(stop_words), repeat(word_num), repeat(single_words), repeat(extract_postags), repeat(num_regex)) - ]) - b = time.time() - print('joblib end', b, b - a) - - a = time.time() - print('start', a) - self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser - self.words = [self.count(self.parse(x)) for x in words] - b = time.time() - print(' end', b, b - a) - - def count(self, words: List[str]) -> Dict[str, float]: - c = Counter(words).most_common() + def __init__(self, data: Any, parse_func: Any, multiprocess: bool, **args): + self.words, self.names = self._init_data(data) + self.words = self._process_parse(function_wrapper(parse_func), multiprocess, **args) + self.words = [self._convert_weight(x) for x in self.words] + + def _process_parse(self, parse_func: Any, multiprocess: bool, **args) -> List[List[str]]: + """flatten -> parse words -> chunked""" + if isinstance(self.words[0], list): + word_list_length = len(self.words[0]) + words = list(chain.from_iterable(self.words)) + words = self._parse(words, parse_func, multiprocess, **args) + words = list(zip_longest(*[iter(words)] * word_list_length)) + words = [sum(w, Counter()) for w in words] + else: + words = self._parse(self.words, parse_func, multiprocess, **args) + return words + + def _convert_weight(self, c: Counter) -> Dict[str, float]: + c = c.most_common() _max_count = c[0][1] - weight = {k: v / _max_count for k, v in c if k not in self.stop_words} - weight = {k: weight[k] for k in list(weight.keys())[:self.word_num]} + weight = {k: v / _max_count for k, v in c} + weight = {k: weight[k] for k in list(weight.keys())} return weight - def parse(self, text: str) -> List[str]: - for x in ['"', ';', ',', '(', ')', '\u3000']: - text = text.replace(x, ' ') - text = text.lower() - return [x for x in self.parser.extract(text, extract_postags=self.extract_postags).words if len(x) > 1 and not self.num_regex.match(x)] + def _parse(self, words: List[str], parse_func: Any, multiprocess: bool, **args) -> List[str]: + if multiprocess: + return self._parallel_parse(words, parse_func, **args) + return self._single_thread_parse(words, parse_func, **args) + + def _single_thread_parse(self, words: List[str], parse_func: Any, **args) -> List[str]: + return [parse_func(x, **args) for x in words] + + def _parallel_parse(self, words: List[str], parse_func: Any, **args) -> List[str]: + words = Parallel(n_jobs=-1)([delayed(parse_func)(w, **dict(**a, **{'_index': i})) for i, (w, a) in enumerate(zip(words, repeat(args)))]) + words.sort(key=lambda x: x[1]) + words = [t[0] for t in words] + return words def _init_data(self, data: Any) -> Tuple[List[str], List[str]]: words, names = [], [] if isinstance(data, list): if isinstance(data[0], tuple): if isinstance(data[0][1], pd.Series): - words = [' '.join(d.values.tolist()) for n, d in data] + words = [d.values.tolist() for n, d in data] names = [n for n, d in data] else: words = [w for n, w in data] @@ -90,7 +61,7 @@ def _init_data(self, data: Any) -> Tuple[List[str], List[str]]: words = data names = [f'word cloud {i+1}' for i in range(len(data))] elif isinstance(data[0], pd.Series): - words = [' '.join(d.values.tolist()) for d in data] + words = [d.values.tolist() for d in data] names = [d.name for d in data] elif isinstance(data, str): words = [data] @@ -100,11 +71,10 @@ def _init_data(self, data: Any) -> Tuple[List[str], List[str]]: names = [data[0]] elif isinstance(data, pd.DataFrame): names = data.columns.tolist() - words = [' '.join(data[x].values.tolist()) for x in names] + words = [data[x].values.tolist() for x in names] elif isinstance(data, pd.Series): - words = [' '.join(data.values.tolist())] + words = [data.values.tolist()] names = [data.name] - return words, names def __iter__(self): From a1bf76b9308620da6fef43ab6be2a24a0454c81d Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 04:22:01 +0900 Subject: [PATCH 03/11] fix ut --- test/unit_test/test_word_data.py | 44 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/test/unit_test/test_word_data.py b/test/unit_test/test_word_data.py index f25c3b5..69f0ba3 100644 --- a/test/unit_test/test_word_data.py +++ b/test/unit_test/test_word_data.py @@ -5,12 +5,12 @@ class TestCloudia(unittest.TestCase): def setUp(self): - self.cls = WordData('test', [], [], None, None, None, lambda x: [x]) + self.cls = WordData('test', lambda x: [x], True) def assertSortTextEqual(self, data, target): """for random sample list.""" - data = [sorted(t.split(' ')) for t in data] - target = [sorted(t.split(' ')) for t in target] + data = [sorted(t.split(' ')) if isinstance(t, str) else sorted(t) for t in data] + target = [sorted(t.split(' ')) if isinstance(t, str) else sorted(t) for t in target] for x, y in zip(data, target): self.assertListEqual(x, y) @@ -38,7 +38,7 @@ def test_init_data_list_tuple_series(self): test_1 = pd.Series(['test1 test2', 'test3'], name='wc1') test_2 = pd.Series(['test4', 'test5', 'test6'], name='wc2') words, name = self.cls._init_data([('name1', test_1), ('name2', test_2)]) - self.assertSortTextEqual(words, ['test1 test2 test3', 'test4 test5 test6']) + self.assertSortTextEqual(words, [['test1 test2', 'test3'], 'test4 test5 test6']) self.assertListEqual(name, ['name1', 'name2']) def test_init_data_dataframe(self): @@ -53,22 +53,22 @@ def test_init_data_series(self): self.assertSortTextEqual(words, ['test1 test2']) self.assertListEqual(name, ['wc']) - def test_count(self): - self.cls.word_num = 2 - self.cls.stop_words = 'test' - words = ['hoge', 'hoge', 'hoge', 'test', 'test', 'piyo', 'piyo', 'fuga'] - output = self.cls.count(words) - self.assertDictEqual(output, {'hoge': 1.0, 'piyo': 0.6666666666666666}) + # def test_count(self): + # self.cls.word_num = 2 + # self.cls.stop_words = 'test' + # words = ['hoge', 'hoge', 'hoge', 'test', 'test', 'piyo', 'piyo', 'fuga'] + # output = self.cls.count(words) + # self.assertDictEqual(output, {'hoge': 1.0, 'piyo': 0.6666666666666666}) - def test_parse(self): - class MockData: - def __init__(self, d): - self.words = d - - class MockParser: - def extract(self, text, extract_postags): - return MockData(text.split(' ')) - - self.cls.parser = MockParser() - output = self.cls.parse("It's a sample text; samples 1,2 face;) ") - self.assertListEqual(output, ["it's", 'sample', 'text', 'samples', 'face']) + # def test_parse(self): + # class MockData: + # def __init__(self, d): + # self.words = d + # + # class MockParser: + # def extract(self, text, extract_postags): + # return MockData(text.split(' ')) + # + # self.cls.parser = MockParser() + # output = self.cls.parse("It's a sample text; samples 1,2 face;) ") + # self.assertListEqual(output, ["it's", 'sample', 'text', 'samples', 'face']) From 990af564fed31fae5dfe86a0b2fbc1b55354440e Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 04:43:24 +0900 Subject: [PATCH 04/11] refact --- cloudia/__init__.py | 1 - cloudia/main.py | 4 ++-- cloudia/pandas_accessor.py | 6 +++--- cloudia/word_data.py | 21 ++++++++++----------- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/cloudia/__init__.py b/cloudia/__init__.py index 6254286..072de49 100644 --- a/cloudia/__init__.py +++ b/cloudia/__init__.py @@ -1,4 +1,3 @@ from .main import Cloudia from .pandas_accessor import CloudiaDataFrame, CloudiaSeries from .word_data import WordData -from .utils import function_wrapper, process diff --git a/cloudia/main.py b/cloudia/main.py index ed38a0a..9c4a121 100644 --- a/cloudia/main.py +++ b/cloudia/main.py @@ -5,7 +5,7 @@ from wordcloud import WordCloud, STOPWORDS from cloudia.word_data import WordData -from cloudia.utils import process +from cloudia.utils import defalt_parse_func class CloudiaBase: @@ -14,7 +14,7 @@ def __init__(self, single_words: List[str] = [], stop_words: List[str] = STOPWORDS, extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], - parse_func: Any = process, + parse_func: Any = defalt_parse_func, multiprocess: bool = True, **args): args.update(dict(single_words=single_words, stop_words=stop_words, extract_postags=extract_postags)) diff --git a/cloudia/pandas_accessor.py b/cloudia/pandas_accessor.py index 24bb7b1..267a4aa 100644 --- a/cloudia/pandas_accessor.py +++ b/cloudia/pandas_accessor.py @@ -5,7 +5,7 @@ import pandas as pd from cloudia.main import CloudiaBase, Cloudia -from cloudia.utils import process +from cloudia.utils import defalt_parse_func @pd.api.extensions.register_dataframe_accessor('wc') @@ -17,7 +17,7 @@ def plot(self, single_words: List[str] = [], stop_words: List[str] = STOPWORDS, extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], - parse_func: Any = process, + parse_func: Any = defalt_parse_func, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, @@ -39,7 +39,7 @@ def plot(self, single_words: List[str] = [], stop_words: List[str] = STOPWORDS, extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], - parse_func: Any = process, + parse_func: Any = defalt_parse_func, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, diff --git a/cloudia/word_data.py b/cloudia/word_data.py index 6b40642..d23ba4c 100644 --- a/cloudia/word_data.py +++ b/cloudia/word_data.py @@ -10,23 +10,22 @@ class WordData: def __init__(self, data: Any, parse_func: Any, multiprocess: bool, **args): - self.words, self.names = self._init_data(data) - self.words = self._process_parse(function_wrapper(parse_func), multiprocess, **args) - self.words = [self._convert_weight(x) for x in self.words] + words, self.names = self._init_data(data) + self.counter_list = self.parse(words, parse_func, multiprocess, **args) + self.words = [self.convert_weight(x) for x in self.counter_list] - def _process_parse(self, parse_func: Any, multiprocess: bool, **args) -> List[List[str]]: - """flatten -> parse words -> chunked""" - if isinstance(self.words[0], list): - word_list_length = len(self.words[0]) - words = list(chain.from_iterable(self.words)) + def parse(self, words, parse_func: Any, multiprocess: bool, **args) -> List[List[str]]: + if isinstance(words[0], list): + word_list_length = len(words[0]) + words = list(chain.from_iterable(words)) words = self._parse(words, parse_func, multiprocess, **args) words = list(zip_longest(*[iter(words)] * word_list_length)) words = [sum(w, Counter()) for w in words] else: - words = self._parse(self.words, parse_func, multiprocess, **args) + words = self._parse(words, parse_func, multiprocess, **args) return words - def _convert_weight(self, c: Counter) -> Dict[str, float]: + def convert_weight(self, c: Counter) -> Dict[str, float]: c = c.most_common() _max_count = c[0][1] weight = {k: v / _max_count for k, v in c} @@ -35,7 +34,7 @@ def _convert_weight(self, c: Counter) -> Dict[str, float]: def _parse(self, words: List[str], parse_func: Any, multiprocess: bool, **args) -> List[str]: if multiprocess: - return self._parallel_parse(words, parse_func, **args) + return self._parallel_parse(words, function_wrapper(parse_func), **args) return self._single_thread_parse(words, parse_func, **args) def _single_thread_parse(self, words: List[str], parse_func: Any, **args) -> List[str]: From 701a6a165975a971a27355737bc3b0f742cf8303 Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 05:27:45 +0900 Subject: [PATCH 05/11] fix ut --- cloudia/main.py | 4 +- cloudia/pandas_accessor.py | 6 +-- cloudia/word_data.py | 2 +- test/unit_test/test_word_data.py | 68 ++++++++++++++++++++++---------- 4 files changed, 54 insertions(+), 26 deletions(-) diff --git a/cloudia/main.py b/cloudia/main.py index 9c4a121..61b0e35 100644 --- a/cloudia/main.py +++ b/cloudia/main.py @@ -5,7 +5,7 @@ from wordcloud import WordCloud, STOPWORDS from cloudia.word_data import WordData -from cloudia.utils import defalt_parse_func +from cloudia.utils import default_parse_func class CloudiaBase: @@ -14,7 +14,7 @@ def __init__(self, single_words: List[str] = [], stop_words: List[str] = STOPWORDS, extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], - parse_func: Any = defalt_parse_func, + parse_func: Any = default_parse_func, multiprocess: bool = True, **args): args.update(dict(single_words=single_words, stop_words=stop_words, extract_postags=extract_postags)) diff --git a/cloudia/pandas_accessor.py b/cloudia/pandas_accessor.py index 267a4aa..371cd14 100644 --- a/cloudia/pandas_accessor.py +++ b/cloudia/pandas_accessor.py @@ -5,7 +5,7 @@ import pandas as pd from cloudia.main import CloudiaBase, Cloudia -from cloudia.utils import defalt_parse_func +from cloudia.utils import default_parse_func @pd.api.extensions.register_dataframe_accessor('wc') @@ -17,7 +17,7 @@ def plot(self, single_words: List[str] = [], stop_words: List[str] = STOPWORDS, extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], - parse_func: Any = defalt_parse_func, + parse_func: Any = default_parse_func, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, @@ -39,7 +39,7 @@ def plot(self, single_words: List[str] = [], stop_words: List[str] = STOPWORDS, extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], - parse_func: Any = defalt_parse_func, + parse_func: Any = default_parse_func, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, diff --git a/cloudia/word_data.py b/cloudia/word_data.py index d23ba4c..6c45e08 100644 --- a/cloudia/word_data.py +++ b/cloudia/word_data.py @@ -38,7 +38,7 @@ def _parse(self, words: List[str], parse_func: Any, multiprocess: bool, **args) return self._single_thread_parse(words, parse_func, **args) def _single_thread_parse(self, words: List[str], parse_func: Any, **args) -> List[str]: - return [parse_func(x, **args) for x in words] + return [Counter(parse_func(x, **args)) for x in words] def _parallel_parse(self, words: List[str], parse_func: Any, **args) -> List[str]: words = Parallel(n_jobs=-1)([delayed(parse_func)(w, **dict(**a, **{'_index': i})) for i, (w, a) in enumerate(zip(words, repeat(args)))]) diff --git a/test/unit_test/test_word_data.py b/test/unit_test/test_word_data.py index 69f0ba3..c95e48c 100644 --- a/test/unit_test/test_word_data.py +++ b/test/unit_test/test_word_data.py @@ -1,9 +1,11 @@ from cloudia.word_data import WordData import unittest +from unittest.mock import patch import pandas as pd +from collections import Counter -class TestCloudia(unittest.TestCase): +class TestWordData(unittest.TestCase): def setUp(self): self.cls = WordData('test', lambda x: [x], True) @@ -53,22 +55,48 @@ def test_init_data_series(self): self.assertSortTextEqual(words, ['test1 test2']) self.assertListEqual(name, ['wc']) - # def test_count(self): - # self.cls.word_num = 2 - # self.cls.stop_words = 'test' - # words = ['hoge', 'hoge', 'hoge', 'test', 'test', 'piyo', 'piyo', 'fuga'] - # output = self.cls.count(words) - # self.assertDictEqual(output, {'hoge': 1.0, 'piyo': 0.6666666666666666}) - - # def test_parse(self): - # class MockData: - # def __init__(self, d): - # self.words = d - # - # class MockParser: - # def extract(self, text, extract_postags): - # return MockData(text.split(' ')) - # - # self.cls.parser = MockParser() - # output = self.cls.parse("It's a sample text; samples 1,2 face;) ") - # self.assertListEqual(output, ["it's", 'sample', 'text', 'samples', 'face']) + def test_parse(self): + def _parse(x, y, z, **args): + return x + + with patch('cloudia.word_data.WordData._parse', side_effect=_parse): + output = self.cls.parse(['hoge hoge', 'piyo'], None, None) + self.assertListEqual(output, ['hoge hoge', 'piyo']) + + def test_parse_list_case(self): + def _parse(x, y, z, **args): + return [Counter(w.split(' ')) for w in x] + + with patch('cloudia.word_data.WordData._parse', side_effect=_parse): + output = self.cls.parse([['hoge hoge', 'piyo'], ['fuga', 'fuga']], None, None) + target = [Counter({'hoge': 2, 'piyo': 1}), Counter({'fuga': 2})] + for o, t in zip(output, target): + self.assertEqual(type(o), type(t)) + self.assertEqual(o.most_common(), t.most_common()) + + def test_convert_weight(self): + output = self.cls.convert_weight(Counter(['hoge', 'hoge', 'piyo'])) + self.assertDictEqual(output, {'hoge': 1, 'piyo': 0.5}) + + def test_single_thread_parse(self): + def f(x): + return x.split(' ') + + output = self.cls._single_thread_parse(['hoge hoge', 'piyo'], f) + target = [Counter(['hoge', 'hoge']), Counter(['piyo'])] + for o, t in zip(output, target): + self.assertEqual(type(o), type(t)) + self.assertEqual(o.most_common(), t.most_common()) + + def test_parallel_parse(self): + def f(x, _index): + return Counter(x.split(' ')), _index + + output = self.cls._parallel_parse(['hoge hoge', 'piyo'], f, **{}) + target = [ + Counter(['hoge', 'hoge']), + Counter(['piyo']), + ] + for o, t in zip(output, target): + self.assertEqual(type(o), type(t)) + self.assertEqual(o.most_common(), t.most_common()) From 74b91f44b6c91bf693d21c1a664deb160e4dfedb Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 06:10:39 +0900 Subject: [PATCH 06/11] add unittest, integration test --- .github/workflows/python_test.yml | 15 +++++++++- cloudia/utils.py | 28 +++++++++++++++++++ cloudia/word_data.py | 1 + .../test_cloudia_pandas_plot.py | 17 +++++++++++ test/integration_test/test_cloudia_plot.py | 21 ++++++++++++++ test/unit_test/test_utils.py | 21 ++++++++++++++ 6 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 cloudia/utils.py create mode 100644 test/integration_test/test_cloudia_pandas_plot.py create mode 100644 test/integration_test/test_cloudia_plot.py create mode 100644 test/unit_test/test_utils.py diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml index fb65aa4..e595937 100644 --- a/.github/workflows/python_test.yml +++ b/.github/workflows/python_test.yml @@ -44,4 +44,17 @@ jobs: exit 1 fi echo "pass mypy" - + - name: integration test + run: | + poetry run python ./test/integration_test/test_cloudia_plot.py + if [ $? != 0 ]; then + echo "failed: cloudia_plot" + exit 1 + fi + echo "pass cloudia_plot" + poetry run python ./test/integration_test/test_cloudia_pandas_plot.py + if [ $? != 0 ]; then + echo "failed: cloudia_pandas_plot" + exit 1 + fi + echo "pass cloudia_pandas_plot" diff --git a/cloudia/utils.py b/cloudia/utils.py new file mode 100644 index 0000000..77e01ae --- /dev/null +++ b/cloudia/utils.py @@ -0,0 +1,28 @@ +from collections import Counter +from typing import List +import re + +from wurlitzer import pipes + +with pipes() as (out, err): + # https://github.com/clab/dynet/issues/1528 + import nagisa + +NUM_REGEX = re.compile('^[0-9]+$') + + +def default_parse_func(text: str, single_words: List[str], extract_postags: List[str], stop_words: List[str]) -> List[str]: + parser = nagisa.Tagger(single_word_list=single_words) + for x in ['"', ';', ',', '(', ')', '\u3000']: + text = text.replace(x, ' ') + text = text.lower() + return [x for x in parser.extract(text, extract_postags=extract_postags).words if len(x) > 1 and not NUM_REGEX.match(x) and x not in stop_words] + + +def function_wrapper(func): + def _f(t, **kwargs): + i = kwargs.pop('_index') + d = Counter(func(t, **kwargs)) + return d, i + + return _f diff --git a/cloudia/word_data.py b/cloudia/word_data.py index 6c45e08..3b75090 100644 --- a/cloudia/word_data.py +++ b/cloudia/word_data.py @@ -47,6 +47,7 @@ def _parallel_parse(self, words: List[str], parse_func: Any, **args) -> List[str return words def _init_data(self, data: Any) -> Tuple[List[str], List[str]]: + # TODO: set assert words, names = [], [] if isinstance(data, list): if isinstance(data[0], tuple): diff --git a/test/integration_test/test_cloudia_pandas_plot.py b/test/integration_test/test_cloudia_pandas_plot.py new file mode 100644 index 0000000..3c9b991 --- /dev/null +++ b/test/integration_test/test_cloudia_pandas_plot.py @@ -0,0 +1,17 @@ +import sys +import pathlib +import pandas as pd + +if __name__ == '__main__': + current_dir = pathlib.Path(__file__).resolve().parent + sys.path.append(str(current_dir.parents[1])) + from cloudia.main import Cloudia + + try: + pd.DataFrame({'test': ['hoge']}).wc.plot() + pd.DataFrame({'test': ['hoge']})['test'].wc.plot() + pd.Series(['hoge']).wc.plot() + raise + except: + sys.exit(1) + sys.exit(0) diff --git a/test/integration_test/test_cloudia_plot.py b/test/integration_test/test_cloudia_plot.py new file mode 100644 index 0000000..d204ff4 --- /dev/null +++ b/test/integration_test/test_cloudia_plot.py @@ -0,0 +1,21 @@ +import sys +import pathlib +import pandas as pd + +if __name__ == '__main__': + current_dir = pathlib.Path(__file__).resolve().parent + sys.path.append(str(current_dir.parents[1])) + from cloudia.main import Cloudia + + try: + Cloudia([('test', pd.Series(['hoge']))]).plot() + Cloudia([('test', 'hoge')]).plot() + Cloudia(['hoge']).plot() + Cloudia([pd.Series(['hoge'])]).plot() + Cloudia('hoge').plot() + Cloudia(('test', 'hoge')).plot() + Cloudia(pd.DataFrame({'test': ['hoge']})).plot() + Cloudia(pd.Series(['hoge'])).plot() + except: + sys.exit(1) + sys.exit(0) diff --git a/test/unit_test/test_utils.py b/test/unit_test/test_utils.py new file mode 100644 index 0000000..d07a4b6 --- /dev/null +++ b/test/unit_test/test_utils.py @@ -0,0 +1,21 @@ +from cloudia.utils import default_parse_func, function_wrapper +import unittest +from collections import Counter + + +class TestUtils(unittest.TestCase): + def test_default_parse_func(self): + output = default_parse_func('This is a simple test.', ['simple test'], ['英単語'], ['is']) + self.assertListEqual(output, ['this', 'simple\u3000test']) + + def test_function_wrapper(self): + def test(x): + return [x + '_'] + + wf = function_wrapper(test) + output = [wf(x, _index=i) for i, x in enumerate(['hoge', 'piyo'])] + target = [(Counter({'hoge_': 1}), 0), (Counter({'piyo_': 1}), 1)] + for o, t in zip(output, target): + self.assertEqual(type(o), type(t)) + self.assertEqual(o[1], t[1]) + self.assertEqual(o[0].most_common(), t[0].most_common()) From b3a303501e2dceb3ae2875fdbabb81bfa779d9d0 Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 06:13:26 +0900 Subject: [PATCH 07/11] fix it --- test/integration_test/test_cloudia_pandas_plot.py | 4 ++-- test/integration_test/test_cloudia_plot.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/integration_test/test_cloudia_pandas_plot.py b/test/integration_test/test_cloudia_pandas_plot.py index 3c9b991..a522a47 100644 --- a/test/integration_test/test_cloudia_pandas_plot.py +++ b/test/integration_test/test_cloudia_pandas_plot.py @@ -5,13 +5,13 @@ if __name__ == '__main__': current_dir = pathlib.Path(__file__).resolve().parent sys.path.append(str(current_dir.parents[1])) - from cloudia.main import Cloudia + import cloudia # noqa try: pd.DataFrame({'test': ['hoge']}).wc.plot() pd.DataFrame({'test': ['hoge']})['test'].wc.plot() pd.Series(['hoge']).wc.plot() raise - except: + except Exception: sys.exit(1) sys.exit(0) diff --git a/test/integration_test/test_cloudia_plot.py b/test/integration_test/test_cloudia_plot.py index d204ff4..b172ac1 100644 --- a/test/integration_test/test_cloudia_plot.py +++ b/test/integration_test/test_cloudia_plot.py @@ -16,6 +16,6 @@ Cloudia(('test', 'hoge')).plot() Cloudia(pd.DataFrame({'test': ['hoge']})).plot() Cloudia(pd.Series(['hoge'])).plot() - except: + except Exception: sys.exit(1) sys.exit(0) From 05b89c92c7400947b3b9e3f54ab0896dcb283ab0 Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 06:22:52 +0900 Subject: [PATCH 08/11] add joblob --- .github/workflows/python_test.yml | 10 +++++----- poetry.lock | 18 +++++++++++++++--- pyproject.toml | 1 + .../test_cloudia_pandas_plot.py | 2 ++ test/integration_test/test_cloudia_plot.py | 2 ++ 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml index e595937..8f3382a 100644 --- a/.github/workflows/python_test.yml +++ b/.github/workflows/python_test.yml @@ -33,10 +33,7 @@ jobs: exit 1 fi echo "pass yapf" - - name: Test with pytest - run: | - poetry run python -m unittest discover -s ./test/unit_test/ - - name: Test with mypy + - name: Lint with mypy run: | mypy --ignore-missing-imports ./cloudia/ if [ $? != 0 ]; then @@ -44,7 +41,10 @@ jobs: exit 1 fi echo "pass mypy" - - name: integration test + - name: Unit Test + run: | + poetry run python -m unittest discover -s ./test/unit_test/ + - name: Integration Test run: | poetry run python ./test/integration_test/test_cloudia_plot.py if [ $? != 0 ]; then diff --git a/poetry.lock b/poetry.lock index ece8bbf..5c5137d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -35,11 +35,19 @@ description = "matplotlibのフォント設定を自動で日本語化する" name = "japanize-matplotlib" optional = false python-versions = "*" -version = "1.1.1" +version = "1.1.2" [package.dependencies] matplotlib = "*" +[[package]] +category = "main" +description = "Lightweight pipelining: using Python functions as pipeline jobs." +name = "joblib" +optional = false +python-versions = "*" +version = "0.14.1" + [[package]] category = "main" description = "A fast implementation of the Cassowary constraint solver" @@ -165,7 +173,7 @@ python-versions = ">=2.7" version = "2.0.0" [metadata] -content-hash = "aa467e1fc61ce35041b8b0bf14d477d5c0fb0d72e8fbb0e7fcef9e48b85b9203" +content-hash = "6e1e02815b139ab5162d0a2b90ec531ae6ed13fe54c692fde83cdf3ab5ffd970" python-versions = "^3.6" [metadata.files] @@ -232,7 +240,11 @@ dynet = [ {file = "dyNET-2.1.tar.gz", hash = "sha256:d0f58aaf3926da24baba6e3e76cb3d090c8b6d359196ce138b11faa291b2ec07"}, ] japanize-matplotlib = [ - {file = "japanize-matplotlib-1.1.1.tar.gz", hash = "sha256:763cae497fd2884c5b3ce40ddb1eb5160ee02b6e1d5a4397af416584edb3b0e7"}, + {file = "japanize-matplotlib-1.1.2.tar.gz", hash = "sha256:e2d9bb5ac2f2c37baf0991d43981fc63c2c5a3722e8e41dd6c31030220237b01"}, +] +joblib = [ + {file = "joblib-0.14.1-py2.py3-none-any.whl", hash = "sha256:bdb4fd9b72915ffb49fde2229ce482dd7ae79d842ed8c2b4c932441495af1403"}, + {file = "joblib-0.14.1.tar.gz", hash = "sha256:0630eea4f5664c463f23fbf5dcfc54a2bc6168902719fa8e19daf033022786c8"}, ] kiwisolver = [ {file = "kiwisolver-1.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:443c2320520eda0a5b930b2725b26f6175ca4453c61f739fef7a5847bd262f74"}, diff --git a/pyproject.toml b/pyproject.toml index 960b0c1..a544ce0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ wordcloud = "*" pandas = "*" matplotlib = "*" wurlitzer = "*" +joblib = "*" japanize_matplotlib = "^1.1.1" [tool.poetry.scripts] diff --git a/test/integration_test/test_cloudia_pandas_plot.py b/test/integration_test/test_cloudia_pandas_plot.py index a522a47..de70979 100644 --- a/test/integration_test/test_cloudia_pandas_plot.py +++ b/test/integration_test/test_cloudia_pandas_plot.py @@ -1,5 +1,6 @@ import sys import pathlib +import traceback import pandas as pd if __name__ == '__main__': @@ -13,5 +14,6 @@ pd.Series(['hoge']).wc.plot() raise except Exception: + traceback.print_exc() sys.exit(1) sys.exit(0) diff --git a/test/integration_test/test_cloudia_plot.py b/test/integration_test/test_cloudia_plot.py index b172ac1..9468dc2 100644 --- a/test/integration_test/test_cloudia_plot.py +++ b/test/integration_test/test_cloudia_plot.py @@ -1,5 +1,6 @@ import sys import pathlib +import traceback import pandas as pd if __name__ == '__main__': @@ -17,5 +18,6 @@ Cloudia(pd.DataFrame({'test': ['hoge']})).plot() Cloudia(pd.Series(['hoge'])).plot() except Exception: + traceback.print_exc() sys.exit(1) sys.exit(0) From 795b15cc64c7687cb2c221ee0e40e2121e4baf8a Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 06:39:50 +0900 Subject: [PATCH 09/11] fix mypy --- cloudia/word_data.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/cloudia/word_data.py b/cloudia/word_data.py index 3b75090..7a2543b 100644 --- a/cloudia/word_data.py +++ b/cloudia/word_data.py @@ -1,4 +1,4 @@ -from typing import Any, List, Tuple, Dict +from typing import Any, List, Tuple, Dict, Callable, Union from itertools import repeat, chain, zip_longest from collections import Counter @@ -9,12 +9,12 @@ class WordData: - def __init__(self, data: Any, parse_func: Any, multiprocess: bool, **args): + def __init__(self, data: Any, parse_func: Callable[..., List[str]], multiprocess: bool, **args): words, self.names = self._init_data(data) self.counter_list = self.parse(words, parse_func, multiprocess, **args) self.words = [self.convert_weight(x) for x in self.counter_list] - def parse(self, words, parse_func: Any, multiprocess: bool, **args) -> List[List[str]]: + def parse(self, words, parse_func: Callable[..., List[str]], multiprocess: bool, **args) -> List[Counter]: if isinstance(words[0], list): word_list_length = len(words[0]) words = list(chain.from_iterable(words)) @@ -26,25 +26,25 @@ def parse(self, words, parse_func: Any, multiprocess: bool, **args) -> List[List return words def convert_weight(self, c: Counter) -> Dict[str, float]: - c = c.most_common() - _max_count = c[0][1] - weight = {k: v / _max_count for k, v in c} + most_common = c.most_common() + _max_count = most_common[0][1] + weight = {k: v / _max_count for k, v in most_common} weight = {k: weight[k] for k in list(weight.keys())} return weight - def _parse(self, words: List[str], parse_func: Any, multiprocess: bool, **args) -> List[str]: + def _parse(self, words: List[str], parse_func: Callable[..., List[str]], multiprocess: bool, **args) -> Union[List[Counter], List[List[Counter]]]: if multiprocess: return self._parallel_parse(words, function_wrapper(parse_func), **args) return self._single_thread_parse(words, parse_func, **args) - def _single_thread_parse(self, words: List[str], parse_func: Any, **args) -> List[str]: + def _single_thread_parse(self, words: List[str], parse_func: Callable[..., List[str]], **args) -> List[Counter]: return [Counter(parse_func(x, **args)) for x in words] - def _parallel_parse(self, words: List[str], parse_func: Any, **args) -> List[str]: - words = Parallel(n_jobs=-1)([delayed(parse_func)(w, **dict(**a, **{'_index': i})) for i, (w, a) in enumerate(zip(words, repeat(args)))]) - words.sort(key=lambda x: x[1]) - words = [t[0] for t in words] - return words + def _parallel_parse(self, words: List[str], parse_func: Callable, **args) -> List[List[Counter]]: + parsed_words = Parallel(n_jobs=-1)([delayed(parse_func)(w, **dict(**a, **{'_index': i})) for i, (w, a) in enumerate(zip(words, repeat(args)))]) + parsed_words.sort(key=lambda x: x[1]) + parsed_words = [t[0] for t in parsed_words] + return parsed_words def _init_data(self, data: Any) -> Tuple[List[str], List[str]]: # TODO: set assert From 932dfd3debfed90c81194c2c074031d9264e2f71 Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 06:47:43 +0900 Subject: [PATCH 10/11] remove raise --- test/integration_test/test_cloudia_pandas_plot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/integration_test/test_cloudia_pandas_plot.py b/test/integration_test/test_cloudia_pandas_plot.py index de70979..82062e7 100644 --- a/test/integration_test/test_cloudia_pandas_plot.py +++ b/test/integration_test/test_cloudia_pandas_plot.py @@ -12,7 +12,6 @@ pd.DataFrame({'test': ['hoge']}).wc.plot() pd.DataFrame({'test': ['hoge']})['test'].wc.plot() pd.Series(['hoge']).wc.plot() - raise except Exception: traceback.print_exc() sys.exit(1) From 22bb40ed28d05ae52f65e25c152855d393e1116a Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Sat, 9 May 2020 06:55:43 +0900 Subject: [PATCH 11/11] update readme, update it --- README.md | 6 ++---- .../test_cloudia_pandas_plot.py | 7 ++++--- test/integration_test/test_cloudia_plot.py | 17 +++++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index c76b5c8..c308bf8 100644 --- a/README.md +++ b/README.md @@ -91,9 +91,8 @@ Cloudia( single_words=[], # It's not split word list, example: ["neural network"] stop_words=STOPWORDS, # not count words, default is wordcloud.STOPWORDS extract_postags=['名詞', '英単語', 'ローマ字文'], # part of speech for japanese - word_num=100, # max word num - parser=None, # morphological analysis instance for japanese parse_func=None, # split text function, example: lambda x: x.split(',') + multiprocess=True # Flag for using multiprocessing ) ``` @@ -125,9 +124,8 @@ DataFrame.wc.plot( single_words=[], # It's not split word list, example: ["neural network"] stop_words=STOPWORDS, # not count words, default is wordcloud.STOPWORDS extract_postags=['名詞', '英単語', 'ローマ字文'], # part of speech for japanese - word_num=100, # max word num - parser=None, # morphological analysis instance for japanese parse_func=None, # split text function, example: lambda x: x.split(',') + multiprocess=True, # Flag for using multiprocessing dark_theme=False, # color theme title_size=12, # title text size row_num=3, # for example, 12 wordcloud, row_num=3 -> 4*3image diff --git a/test/integration_test/test_cloudia_pandas_plot.py b/test/integration_test/test_cloudia_pandas_plot.py index 82062e7..0a1028a 100644 --- a/test/integration_test/test_cloudia_pandas_plot.py +++ b/test/integration_test/test_cloudia_pandas_plot.py @@ -9,9 +9,10 @@ import cloudia # noqa try: - pd.DataFrame({'test': ['hoge']}).wc.plot() - pd.DataFrame({'test': ['hoge']})['test'].wc.plot() - pd.Series(['hoge']).wc.plot() + for multiprocess in [True, False]: + pd.DataFrame({'test': ['hoge']}).wc.plot(multiprocess=multiprocess) + pd.DataFrame({'test': ['hoge']})['test'].wc.plot(multiprocess=multiprocess) + pd.Series(['hoge']).wc.plot(multiprocess=multiprocess) except Exception: traceback.print_exc() sys.exit(1) diff --git a/test/integration_test/test_cloudia_plot.py b/test/integration_test/test_cloudia_plot.py index 9468dc2..7a3fad8 100644 --- a/test/integration_test/test_cloudia_plot.py +++ b/test/integration_test/test_cloudia_plot.py @@ -9,14 +9,15 @@ from cloudia.main import Cloudia try: - Cloudia([('test', pd.Series(['hoge']))]).plot() - Cloudia([('test', 'hoge')]).plot() - Cloudia(['hoge']).plot() - Cloudia([pd.Series(['hoge'])]).plot() - Cloudia('hoge').plot() - Cloudia(('test', 'hoge')).plot() - Cloudia(pd.DataFrame({'test': ['hoge']})).plot() - Cloudia(pd.Series(['hoge'])).plot() + for multiprocess in [True, False]: + Cloudia([('test', pd.Series(['hoge']))], multiprocess=multiprocess).plot() + Cloudia([('test', 'hoge')], multiprocess=multiprocess).plot() + Cloudia(['hoge'], multiprocess=multiprocess).plot() + Cloudia([pd.Series(['hoge'])], multiprocess=multiprocess).plot() + Cloudia('hoge', multiprocess=multiprocess).plot() + Cloudia(('test', 'hoge'), multiprocess=multiprocess).plot() + Cloudia(pd.DataFrame({'test': ['hoge']}), multiprocess=multiprocess).plot() + Cloudia(pd.Series(['hoge']), multiprocess=multiprocess).plot() except Exception: traceback.print_exc() sys.exit(1)