From 9e996590ba85b1aa30159f9ba9b785628c6343f3 Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Wed, 6 May 2020 15:06:02 +0900 Subject: [PATCH 1/4] auto calc fig size from rate --- cloudia/main.py | 33 ++++++++++++++++++++++++--------- cloudia/pandas_accessor.py | 14 ++++++-------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/cloudia/main.py b/cloudia/main.py index c584cf6..fe29eb6 100644 --- a/cloudia/main.py +++ b/cloudia/main.py @@ -23,17 +23,18 @@ def __init__(self, parse_func=parse_func, sampling_rate=sampling_rate) - def plot(self, dark_theme=False, figsize=(7.2, 4.8), wcsize=(720, 480), title_size=12, row_num=3): - wc = self.make_wordcloud(dark_theme, wcsize) - self.make_fig(wc, dark_theme, figsize, title_size, row_num) + def plot(self, dark_theme=False, title_size=12, row_num=3, figsize_rate=2): + wc = self.make_wordcloud(dark_theme, figsize_rate) + self.make_fig(wc, dark_theme, title_size, row_num, figsize_rate) - def save(self, fig_path, dark_theme=False, figsize=(7.2, 4.8), wcsize=(720, 480), title_size=12, row_num=3): - wc = self.make_wordcloud(dark_theme, wcsize) - self.make_fig(wc, dark_theme, figsize, title_size, row_num) + def save(self, fig_path, dark_theme=False, title_size=12, row_num=3, figsize_rate=2): + wc = self.make_wordcloud(dark_theme, figsize_rate) + self.make_fig(wc, dark_theme, title_size, row_num, figsize_rate) plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight") - def make_wordcloud(self, dark_theme, wcsize): + def make_wordcloud(self, dark_theme, rate): wordcloud_list = [] + wcsize = self._calc_wc_size(rate) for name, words in self.wd: wordcloud = WordCloud(font_path=japanize_matplotlib.get_font_ttf_path(), background_color=self._color(dark_theme), @@ -43,8 +44,8 @@ def make_wordcloud(self, dark_theme, wcsize): wordcloud_list.append((name, wordcloud)) return wordcloud_list - def make_fig(self, wordcloud_list, dark_theme, figsize, title_size, row_num): - fig = plt.figure(facecolor=self._color(dark_theme), figsize=figsize) + def make_fig(self, wordcloud_list, dark_theme, title_size, row_num, rate): + fig = plt.figure(facecolor=self._color(dark_theme), figsize=self._calc_fig_size(row_num, len(wordcloud_list), rate)) w, h = self._calc_sub_plot_dimensions(len(wordcloud_list), row_num) for i, (title, wc) in enumerate(wordcloud_list): ax = fig.add_subplot(w, h, i + 1) @@ -52,6 +53,20 @@ def make_fig(self, wordcloud_list, dark_theme, figsize, title_size, row_num): ax.set_title(title, color=self._color(dark_theme, True), fontsize=title_size) ax.axis('off') + @staticmethod + def _calc_fig_size(row_num, item_num, rate): + if row_num == 1 and item_num == 1: + return rate * 5 * 2, rate * 3 * 2 + if item_num <= row_num: + return rate * 5 * item_num, rate * 3 * item_num + elif item_num // row_num + 1 < row_num: + return rate * 5 * row_num, rate * 3 * ((item_num // row_num + 1) % row_num) + return rate * 5 * row_num, rate * 3 * (row_num + ((item_num // row_num + 1) - row_num)) + + @staticmethod + def _calc_wc_size(rate): + return rate * 5 * 100, rate * 3 * 100 + @staticmethod def _calc_sub_plot_dimensions(l, row_num): return (l // row_num) + 1, row_num if l > row_num else l diff --git a/cloudia/pandas_accessor.py b/cloudia/pandas_accessor.py index 06edd54..a82f4e1 100644 --- a/cloudia/pandas_accessor.py +++ b/cloudia/pandas_accessor.py @@ -18,12 +18,11 @@ def plot(self, parse_func=None, sampling_rate=1.0, dark_theme=False, - figsize=(7.2, 4.8), - wcsize=(720, 480), title_size=12, - row_num=3): + row_num=3, + figsize_rate=2): Cloudia(self.df, single_words, stop_words, extract_postags, word_num, parser, parse_func, - sampling_rate).plot(dark_theme, figsize, wcsize, title_size, row_num) + sampling_rate).plot(dark_theme, title_size, row_num, figsize_rate) def save(self, fig_path, dark_theme, **args): self.plot(args) @@ -44,12 +43,11 @@ def plot(self, parse_func=None, sampling_rate=1.0, dark_theme=False, - figsize=(7.2, 4.8), - wcsize=(720, 480), title_size=12, - row_num=3): + row_num=3, + figsize_rate=2): Cloudia(self.series, single_words, stop_words, extract_postags, word_num, parser, parse_func, - sampling_rate).plot(dark_theme, figsize, wcsize, title_size, row_num) + sampling_rate).plot(dark_theme, title_size, row_num, figsize_rate) def save(self, fig_path, dark_theme, **args): self.plot(args) From de96d2958e1cb890271e9ea70493cb56fd6eeaaf Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Wed, 6 May 2020 15:49:20 +0900 Subject: [PATCH 2/4] add unittest --- test/unit_test/test_main.py | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/test/unit_test/test_main.py b/test/unit_test/test_main.py index f3eb5f8..81a0a9c 100644 --- a/test/unit_test/test_main.py +++ b/test/unit_test/test_main.py @@ -3,9 +3,47 @@ class TestCloudia(unittest.TestCase): + # TODO: split test case def setUp(self): self.cls = Cloudia('test') + def test_calc_fig_size(self): + # row_num==item_num==1 + output = self.cls._calc_fig_size(1, 1, 1) + self.assertTupleEqual(output, (10, 6)) + + # rate + output = self.cls._calc_fig_size(1, 1, 2) + self.assertTupleEqual(output, (20, 12)) + + # item_num<=row_num + output = self.cls._calc_fig_size(1, 2, 1) + self.assertTupleEqual(output, (5, 9)) + + output = self.cls._calc_fig_size(1, 2, 2) + self.assertTupleEqual(output, (10, 18)) + + # item_num // row_num + 1 < row_num + output = self.cls._calc_fig_size(2, 3, 1) + self.assertTupleEqual(output, (10, 6)) + + output = self.cls._calc_fig_size(2, 3, 2) + self.assertTupleEqual(output, (20, 12)) + + # else + output = self.cls._calc_fig_size(3, 10, 1) + self.assertTupleEqual(output, (15, 12)) + + output = self.cls._calc_fig_size(3, 10, 2) + self.assertTupleEqual(output, (30, 24)) + + def test_calc_wc_size(self): + output = self.cls._calc_wc_size(1) + self.assertTupleEqual(output, (500, 300)) + + output = self.cls._calc_wc_size(2) + self.assertTupleEqual(output, (1000, 600)) + def test_calc_sub_plot_dimensions(self): output = self.cls._calc_sub_plot_dimensions(10, 3) self.assertTupleEqual(output, (4, 3)) From da48f194ea981bbc8bc58e170c83baf97d76218b Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Wed, 6 May 2020 15:55:11 +0900 Subject: [PATCH 3/4] update readme --- README.md | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5cb8a37..1bb0138 100644 --- a/README.md +++ b/README.md @@ -103,10 +103,9 @@ plot method args. ``` Cloudia().plot( dark_theme=False, # color theme - figsize=(7.2, 4.8), # plt figure size - wcsize=(720, 480), # one wordcloud ax size title_size=12, # title text size row_num=3, # for example, 12 wordcloud, row_num=3 -> 4*3image + figsize_rate=2 # figure size rate ) ``` @@ -115,10 +114,9 @@ save method args. Cloudia().save( file_path, # save figure image path dark_theme=False, - figsize=(7.2, 4.8), - wcsize=(720, 480), title_size=12, - row_num=3 + row_num=3, + figsize_rate=2 ) ``` @@ -133,10 +131,9 @@ DataFrame.wc.plot( parse_func=None, # split text function, example: lambda x: x.split(',') sampling_rate=sampling_rate # pandas.DataFrame.sample.frac dark_theme=False, # color theme - figsize=(7.2, 4.8), # plt figure size - wcsize=(720, 480), # one wordcloud ax size title_size=12, # title text size row_num=3, # for example, 12 wordcloud, row_num=3 -> 4*3image + figsize_rate=2 # figure size rate ) ``` If we use wc.save, setting file_path args. From deb9ad20dcffb5db270fa07e92ffadf8037da99c Mon Sep 17 00:00:00 2001 From: 6syun9 <6syun9@gmail.com> Date: Wed, 6 May 2020 17:12:27 +0900 Subject: [PATCH 4/4] add mypy --- .github/workflows/python_test.yml | 12 +++++-- cloudia/main.py | 52 ++++++++++++++------------- cloudia/pandas_accessor.py | 60 ++++++++++++++++--------------- cloudia/word_data.py | 19 +++++----- test/unit_test/test_main.py | 4 +-- 5 files changed, 82 insertions(+), 65 deletions(-) diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml index c72ff5f..fb65aa4 100644 --- a/.github/workflows/python_test.yml +++ b/.github/workflows/python_test.yml @@ -23,9 +23,8 @@ jobs: - name: Install poetry dependencies run: | python -m pip install --upgrade pip - pip install poetry yapf poetry-dynamic-versioning + pip install poetry yapf mypy poetry install - poetry run python -m pip install git+https://github.com/vaaaaanquish/japanize-matplotlib - name: Lint with yapf run: | diff=$(yapf -dr --style=./yapf.ini ./cloudia/) @@ -37,3 +36,12 @@ jobs: - name: Test with pytest run: | poetry run python -m unittest discover -s ./test/unit_test/ + - name: Test with mypy + run: | + mypy --ignore-missing-imports ./cloudia/ + if [ $? != 0 ]; then + echo "failed: mypy" + exit 1 + fi + echo "pass mypy" + diff --git a/cloudia/main.py b/cloudia/main.py index fe29eb6..a4f4a71 100644 --- a/cloudia/main.py +++ b/cloudia/main.py @@ -1,19 +1,21 @@ +from typing import Any, List, Tuple + import matplotlib.pyplot as plt import japanize_matplotlib from wordcloud import WordCloud, STOPWORDS from cloudia.word_data import WordData -class Cloudia: +class CloudiaBase: def __init__(self, - data, - single_words=[], - stop_words=STOPWORDS, - extract_postags=['名詞', '英単語', 'ローマ字文'], - word_num=100, - parser=None, - parse_func=None, - sampling_rate=1.0): + data: Any, + single_words: List[str] = [], + stop_words: List[str] = STOPWORDS, + extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], + word_num: int = 100, + parser: Any = None, + parse_func: Any = None, + sampling_rate: float = 1.0): self.wd = WordData(data=data, single_words=single_words, stop_words=stop_words, @@ -23,16 +25,7 @@ def __init__(self, parse_func=parse_func, sampling_rate=sampling_rate) - def plot(self, dark_theme=False, title_size=12, row_num=3, figsize_rate=2): - wc = self.make_wordcloud(dark_theme, figsize_rate) - self.make_fig(wc, dark_theme, title_size, row_num, figsize_rate) - - def save(self, fig_path, dark_theme=False, title_size=12, row_num=3, figsize_rate=2): - wc = self.make_wordcloud(dark_theme, figsize_rate) - self.make_fig(wc, dark_theme, title_size, row_num, figsize_rate) - plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight") - - def make_wordcloud(self, dark_theme, rate): + def make_wordcloud(self, dark_theme: bool, rate: int) -> List[Tuple[str, WordCloud]]: wordcloud_list = [] wcsize = self._calc_wc_size(rate) for name, words in self.wd: @@ -44,7 +37,7 @@ def make_wordcloud(self, dark_theme, rate): wordcloud_list.append((name, wordcloud)) return wordcloud_list - def make_fig(self, wordcloud_list, dark_theme, title_size, row_num, rate): + def make_fig(self, wordcloud_list: List[Tuple[str, WordCloud]], dark_theme: bool, title_size: int, row_num: int, rate: int): fig = plt.figure(facecolor=self._color(dark_theme), figsize=self._calc_fig_size(row_num, len(wordcloud_list), rate)) w, h = self._calc_sub_plot_dimensions(len(wordcloud_list), row_num) for i, (title, wc) in enumerate(wordcloud_list): @@ -54,7 +47,7 @@ def make_fig(self, wordcloud_list, dark_theme, title_size, row_num, rate): ax.axis('off') @staticmethod - def _calc_fig_size(row_num, item_num, rate): + def _calc_fig_size(row_num: int, item_num: int, rate: int) -> Tuple[int, int]: if row_num == 1 and item_num == 1: return rate * 5 * 2, rate * 3 * 2 if item_num <= row_num: @@ -64,15 +57,26 @@ def _calc_fig_size(row_num, item_num, rate): return rate * 5 * row_num, rate * 3 * (row_num + ((item_num // row_num + 1) - row_num)) @staticmethod - def _calc_wc_size(rate): + def _calc_wc_size(rate: int) -> Tuple[int, int]: return rate * 5 * 100, rate * 3 * 100 @staticmethod - def _calc_sub_plot_dimensions(l, row_num): + def _calc_sub_plot_dimensions(l: int, row_num: int) -> Tuple[int, int]: return (l // row_num) + 1, row_num if l > row_num else l @staticmethod - def _color(dark_theme, text=False): + def _color(dark_theme: bool, text: bool = False) -> str: if text: return 'white' if dark_theme else 'black' return 'black' if dark_theme else 'white' + + +class Cloudia(CloudiaBase): + def plot(self, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, figsize_rate: int = 2): + wc = self.make_wordcloud(dark_theme, figsize_rate) + self.make_fig(wc, dark_theme, title_size, row_num, figsize_rate) + + def save(self, fig_path: str, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, figsize_rate: int = 2): + wc = self.make_wordcloud(dark_theme, figsize_rate) + self.make_fig(wc, dark_theme, title_size, row_num, figsize_rate) + plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight") diff --git a/cloudia/pandas_accessor.py b/cloudia/pandas_accessor.py index a82f4e1..a4e97ce 100644 --- a/cloudia/pandas_accessor.py +++ b/cloudia/pandas_accessor.py @@ -1,54 +1,56 @@ -from cloudia.main import Cloudia +from typing import Any, List + +from cloudia.main import CloudiaBase, Cloudia import matplotlib.pyplot as plt from wordcloud import STOPWORDS import pandas as pd @pd.api.extensions.register_dataframe_accessor('wc') -class CloudiaDataFrame(Cloudia): +class CloudiaDataFrame(CloudiaBase): def __init__(self, df): self.df = df def plot(self, - single_words=[], - stop_words=STOPWORDS, - extract_postags=['名詞', '英単語', 'ローマ字文'], - word_num=100, - parser=None, - parse_func=None, - sampling_rate=1.0, - dark_theme=False, - title_size=12, - row_num=3, - figsize_rate=2): + single_words: List[str] = [], + stop_words: List[str] = STOPWORDS, + extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], + word_num: int = 100, + parser: Any = None, + parse_func: Any = None, + sampling_rate: float = 1.0, + dark_theme: bool = False, + title_size: int = 12, + row_num: int = 3, + figsize_rate: int = 2): Cloudia(self.df, single_words, stop_words, extract_postags, word_num, parser, parse_func, sampling_rate).plot(dark_theme, title_size, row_num, figsize_rate) - def save(self, fig_path, dark_theme, **args): - self.plot(args) + def save(self, fig_path: str, dark_theme: bool, **args: Any): + self.plot(**args) plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight") @pd.api.extensions.register_series_accessor('wc') -class CloudiaSeries(Cloudia): +class CloudiaSeries(CloudiaBase): def __init__(self, series): self.series = series def plot(self, - single_words=[], - stop_words=STOPWORDS, - extract_postags=['名詞', '英単語', 'ローマ字文'], - word_num=100, - parser=None, - parse_func=None, - sampling_rate=1.0, - dark_theme=False, - title_size=12, - row_num=3, - figsize_rate=2): + single_words: List[str] = [], + stop_words: List[str] = STOPWORDS, + extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'], + word_num: int = 100, + parser: Any = None, + parse_func: Any = None, + sampling_rate: float = 1.0, + dark_theme: bool = False, + title_size: int = 12, + row_num: int = 3, + figsize_rate: int = 2): Cloudia(self.series, single_words, stop_words, extract_postags, word_num, parser, parse_func, sampling_rate).plot(dark_theme, title_size, row_num, figsize_rate) - def save(self, fig_path, dark_theme, **args): - self.plot(args) + def save(self, fig_path: str, dark_theme: bool, **args: Any): + self.plot(**args) plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight") diff --git a/cloudia/word_data.py b/cloudia/word_data.py index 3e2a8a9..d817070 100644 --- a/cloudia/word_data.py +++ b/cloudia/word_data.py @@ -1,6 +1,8 @@ +from typing import Any, List, Tuple, Dict +import re + from collections import Counter import pandas as pd -import re from wurlitzer import pipes with pipes() as (out, err): @@ -9,8 +11,9 @@ class WordData: - def __init__(self, data, single_words, stop_words, extract_postags, word_num, parser, parse_func, sampling_rate): - self.words, self.names = self._init_data(data, sampling_rate) + def __init__(self, data: Any, single_words: List[str], stop_words: List[str], extract_postags: List[str], word_num: int, parser: Any, parse_func: Any, + sampling_rate: float): + words, self.names = self._init_data(data, sampling_rate) self.word_num = word_num self.single_words = single_words self.extract_postags = extract_postags @@ -18,11 +21,11 @@ def __init__(self, data, single_words, stop_words, extract_postags, word_num, pa self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser self.num_regex = re.compile('^[0-9]+$') if parse_func: - self.words = [self.count(parse_func(x)) for x in self.words] + self.words = [self.count(parse_func(x)) for x in words] else: - self.words = [self.count(self.parse(x)) for x in self.words] + self.words = [self.count(self.parse(x)) for x in words] - def _init_data(self, data, sampling_rate): + def _init_data(self, data: Any, sampling_rate: float) -> Tuple[List[str], List[str]]: words, names = [], [] if isinstance(data, list): if isinstance(data[0], tuple): @@ -53,14 +56,14 @@ def _init_data(self, data, sampling_rate): return words, names - def count(self, words): + def count(self, words: List[str]) -> Dict[str, float]: c = Counter(words).most_common() _max_count = c[0][1] weight = {k: v / _max_count for k, v in c if k not in self.stop_words} weight = {k: weight[k] for k in list(weight.keys())[:self.word_num]} return weight - def parse(self, text): + def parse(self, text: str) -> List[str]: for x in ['"', ';', ',', '(', ')', '\u3000']: text = text.replace(x, ' ') text = text.lower() diff --git a/test/unit_test/test_main.py b/test/unit_test/test_main.py index 81a0a9c..241067b 100644 --- a/test/unit_test/test_main.py +++ b/test/unit_test/test_main.py @@ -1,11 +1,11 @@ -from cloudia.main import Cloudia +from cloudia.main import CloudiaBase import unittest class TestCloudia(unittest.TestCase): # TODO: split test case def setUp(self): - self.cls = Cloudia('test') + self.cls = CloudiaBase('test') def test_calc_fig_size(self): # row_num==item_num==1