Merge pull request #11 from vaaaaanquish/calc_auto_figsize

Calc auto figsize
vaaaaanquish · May 6, 2020 · 08aeab4 · 08aeab4
2 parents 3af064f + daaae54
commit 08aeab4
Show file tree

Hide file tree

Showing 6 changed files with 140 additions and 75 deletions.
diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml
@@ -23,9 +23,8 @@ jobs:
     - name: Install poetry dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install poetry yapf poetry-dynamic-versioning
+        pip install poetry yapf mypy
         poetry install
-        poetry run python -m pip install git+https://github.com/vaaaaanquish/japanize-matplotlib
     - name: Lint with yapf
       run: |
         diff=$(yapf -dr --style=./yapf.ini ./cloudia/)
@@ -37,3 +36,12 @@ jobs:
     - name: Test with pytest
       run: |
         poetry run python -m unittest discover -s ./test/unit_test/
+    - name: Test with mypy
+      run: |
+        mypy --ignore-missing-imports ./cloudia/
+        if [ $? != 0 ]; then
+            echo "failed: mypy"
+            exit 1
+        fi
+        echo "pass mypy"
+
diff --git a/README.md b/README.md
@@ -103,10 +103,9 @@ plot method args.
 ```
 Cloudia().plot(
     dark_theme=False,    # color theme
-    figsize=(7.2, 4.8),    # plt figure size
-    wcsize=(720, 480),    # one wordcloud ax size
     title_size=12,     # title text size
     row_num=3,    # for example, 12 wordcloud, row_num=3 -> 4*3image
+    figsize_rate=2    # figure size rate
 )
 ```
 
@@ -115,10 +114,9 @@ save method args.
 Cloudia().save(
     file_path,    # save figure image path
     dark_theme=False,
-    figsize=(7.2, 4.8),
-    wcsize=(720, 480),
     title_size=12, 
-    row_num=3
+    row_num=3,
+    figsize_rate=2
 )
 ```
 
@@ -133,10 +131,9 @@ DataFrame.wc.plot(
   parse_func=None,    # split text function, example: lambda x: x.split(',')
   sampling_rate=sampling_rate    # pandas.DataFrame.sample.frac
   dark_theme=False,    # color theme
-  figsize=(7.2, 4.8),    # plt figure size
-  wcsize=(720, 480),    # one wordcloud ax size
   title_size=12,     # title text size
   row_num=3,    # for example, 12 wordcloud, row_num=3 -> 4*3image
+  figsize_rate=2    # figure size rate
 )
 ```
 If we use wc.save, setting file_path args.

diff --git a/cloudia/main.py b/cloudia/main.py
@@ -1,19 +1,21 @@
+from typing import Any, List, Tuple
+
 import matplotlib.pyplot as plt
 import japanize_matplotlib
 from wordcloud import WordCloud, STOPWORDS
 from cloudia.word_data import WordData
 
 
-class Cloudia:
+class CloudiaBase:
     def __init__(self,
-                 data,
-                 single_words=[],
-                 stop_words=STOPWORDS,
-                 extract_postags=['名詞', '英単語', 'ローマ字文'],
-                 word_num=100,
-                 parser=None,
-                 parse_func=None,
-                 sampling_rate=1.0):
+                 data: Any,
+                 single_words: List[str] = [],
+                 stop_words: List[str] = STOPWORDS,
+                 extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
+                 word_num: int = 100,
+                 parser: Any = None,
+                 parse_func: Any = None,
+                 sampling_rate: float = 1.0):
         self.wd = WordData(data=data,
                            single_words=single_words,
                            stop_words=stop_words,
@@ -23,17 +25,9 @@ def __init__(self,
                            parse_func=parse_func,
                            sampling_rate=sampling_rate)
 
-    def plot(self, dark_theme=False, figsize=(7.2, 4.8), wcsize=(720, 480), title_size=12, row_num=3):
-        wc = self.make_wordcloud(dark_theme, wcsize)
-        self.make_fig(wc, dark_theme, figsize, title_size, row_num)
-
-    def save(self, fig_path, dark_theme=False, figsize=(7.2, 4.8), wcsize=(720, 480), title_size=12, row_num=3):
-        wc = self.make_wordcloud(dark_theme, wcsize)
-        self.make_fig(wc, dark_theme, figsize, title_size, row_num)
-        plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight")
-
-    def make_wordcloud(self, dark_theme, wcsize):
+    def make_wordcloud(self, dark_theme: bool, rate: int) -> List[Tuple[str, WordCloud]]:
         wordcloud_list = []
+        wcsize = self._calc_wc_size(rate)
         for name, words in self.wd:
             wordcloud = WordCloud(font_path=japanize_matplotlib.get_font_ttf_path(),
                                   background_color=self._color(dark_theme),
@@ -43,8 +37,8 @@ def make_wordcloud(self, dark_theme, wcsize):
             wordcloud_list.append((name, wordcloud))
         return wordcloud_list
 
-    def make_fig(self, wordcloud_list, dark_theme, figsize, title_size, row_num):
-        fig = plt.figure(facecolor=self._color(dark_theme), figsize=figsize)
+    def make_fig(self, wordcloud_list: List[Tuple[str, WordCloud]], dark_theme: bool, title_size: int, row_num: int, rate: int):
+        fig = plt.figure(facecolor=self._color(dark_theme), figsize=self._calc_fig_size(row_num, len(wordcloud_list), rate))
         w, h = self._calc_sub_plot_dimensions(len(wordcloud_list), row_num)
         for i, (title, wc) in enumerate(wordcloud_list):
             ax = fig.add_subplot(w, h, i + 1)
@@ -53,11 +47,36 @@ def make_fig(self, wordcloud_list, dark_theme, figsize, title_size, row_num):
             ax.axis('off')
 
     @staticmethod
-    def _calc_sub_plot_dimensions(l, row_num):
+    def _calc_fig_size(row_num: int, item_num: int, rate: int) -> Tuple[int, int]:
+        if row_num == 1 and item_num == 1:
+            return rate * 5 * 2, rate * 3 * 2
+        if item_num <= row_num:
+            return rate * 5 * item_num, rate * 3 * item_num
+        elif item_num // row_num + 1 < row_num:
+            return rate * 5 * row_num, rate * 3 * ((item_num // row_num + 1) % row_num)
+        return rate * 5 * row_num, rate * 3 * (row_num + ((item_num // row_num + 1) - row_num))
+
+    @staticmethod
+    def _calc_wc_size(rate: int) -> Tuple[int, int]:
+        return rate * 5 * 100, rate * 3 * 100
+
+    @staticmethod
+    def _calc_sub_plot_dimensions(l: int, row_num: int) -> Tuple[int, int]:
         return (l // row_num) + 1, row_num if l > row_num else l
 
     @staticmethod
-    def _color(dark_theme, text=False):
+    def _color(dark_theme: bool, text: bool = False) -> str:
         if text:
             return 'white' if dark_theme else 'black'
         return 'black' if dark_theme else 'white'
+
+
+class Cloudia(CloudiaBase):
+    def plot(self, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, figsize_rate: int = 2):
+        wc = self.make_wordcloud(dark_theme, figsize_rate)
+        self.make_fig(wc, dark_theme, title_size, row_num, figsize_rate)
+
+    def save(self, fig_path: str, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, figsize_rate: int = 2):
+        wc = self.make_wordcloud(dark_theme, figsize_rate)
+        self.make_fig(wc, dark_theme, title_size, row_num, figsize_rate)
+        plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight")
diff --git a/cloudia/pandas_accessor.py b/cloudia/pandas_accessor.py
@@ -1,56 +1,56 @@
-from cloudia.main import Cloudia
+from typing import Any, List
+
+from cloudia.main import CloudiaBase, Cloudia
 import matplotlib.pyplot as plt
 from wordcloud import STOPWORDS
 import pandas as pd
 
 
 @pd.api.extensions.register_dataframe_accessor('wc')
-class CloudiaDataFrame(Cloudia):
+class CloudiaDataFrame(CloudiaBase):
     def __init__(self, df):
         self.df = df
 
     def plot(self,
-             single_words=[],
-             stop_words=STOPWORDS,
-             extract_postags=['名詞', '英単語', 'ローマ字文'],
-             word_num=100,
-             parser=None,
-             parse_func=None,
-             sampling_rate=1.0,
-             dark_theme=False,
-             figsize=(7.2, 4.8),
-             wcsize=(720, 480),
-             title_size=12,
-             row_num=3):
+             single_words: List[str] = [],
+             stop_words: List[str] = STOPWORDS,
+             extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
+             word_num: int = 100,
+             parser: Any = None,
+             parse_func: Any = None,
+             sampling_rate: float = 1.0,
+             dark_theme: bool = False,
+             title_size: int = 12,
+             row_num: int = 3,
+             figsize_rate: int = 2):
         Cloudia(self.df, single_words, stop_words, extract_postags, word_num, parser, parse_func,
-                sampling_rate).plot(dark_theme, figsize, wcsize, title_size, row_num)
+                sampling_rate).plot(dark_theme, title_size, row_num, figsize_rate)
 
-    def save(self, fig_path, dark_theme, **args):
-        self.plot(args)
+    def save(self, fig_path: str, dark_theme: bool, **args: Any):
+        self.plot(**args)
         plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight")
 
 
 @pd.api.extensions.register_series_accessor('wc')
-class CloudiaSeries(Cloudia):
+class CloudiaSeries(CloudiaBase):
     def __init__(self, series):
         self.series = series
 
     def plot(self,
-             single_words=[],
-             stop_words=STOPWORDS,
-             extract_postags=['名詞', '英単語', 'ローマ字文'],
-             word_num=100,
-             parser=None,
-             parse_func=None,
-             sampling_rate=1.0,
-             dark_theme=False,
-             figsize=(7.2, 4.8),
-             wcsize=(720, 480),
-             title_size=12,
-             row_num=3):
+             single_words: List[str] = [],
+             stop_words: List[str] = STOPWORDS,
+             extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
+             word_num: int = 100,
+             parser: Any = None,
+             parse_func: Any = None,
+             sampling_rate: float = 1.0,
+             dark_theme: bool = False,
+             title_size: int = 12,
+             row_num: int = 3,
+             figsize_rate: int = 2):
         Cloudia(self.series, single_words, stop_words, extract_postags, word_num, parser, parse_func,
-                sampling_rate).plot(dark_theme, figsize, wcsize, title_size, row_num)
+                sampling_rate).plot(dark_theme, title_size, row_num, figsize_rate)
 
-    def save(self, fig_path, dark_theme, **args):
-        self.plot(args)
+    def save(self, fig_path: str, dark_theme: bool, **args: Any):
+        self.plot(**args)
         plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight")
diff --git a/cloudia/word_data.py b/cloudia/word_data.py
@@ -1,6 +1,8 @@
+from typing import Any, List, Tuple, Dict
+import re
+
 from collections import Counter
 import pandas as pd
-import re
 from wurlitzer import pipes
 
 with pipes() as (out, err):
@@ -9,20 +11,21 @@
 
 
 class WordData:
-    def __init__(self, data, single_words, stop_words, extract_postags, word_num, parser, parse_func, sampling_rate):
-        self.words, self.names = self._init_data(data, sampling_rate)
+    def __init__(self, data: Any, single_words: List[str], stop_words: List[str], extract_postags: List[str], word_num: int, parser: Any, parse_func: Any,
+                 sampling_rate: float):
+        words, self.names = self._init_data(data, sampling_rate)
         self.word_num = word_num
         self.single_words = single_words
         self.extract_postags = extract_postags
         self.stop_words = stop_words
         self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser
         self.num_regex = re.compile('^[0-9]+$')
         if parse_func:
-            self.words = [self.count(parse_func(x)) for x in self.words]
+            self.words = [self.count(parse_func(x)) for x in words]
         else:
-            self.words = [self.count(self.parse(x)) for x in self.words]
+            self.words = [self.count(self.parse(x)) for x in words]
 
-    def _init_data(self, data, sampling_rate):
+    def _init_data(self, data: Any, sampling_rate: float) -> Tuple[List[str], List[str]]:
         words, names = [], []
         if isinstance(data, list):
             if isinstance(data[0], tuple):
@@ -53,14 +56,14 @@ def _init_data(self, data, sampling_rate):
 
         return words, names
 
-    def count(self, words):
+    def count(self, words: List[str]) -> Dict[str, float]:
         c = Counter(words).most_common()
         _max_count = c[0][1]
         weight = {k: v / _max_count for k, v in c if k not in self.stop_words}
         weight = {k: weight[k] for k in list(weight.keys())[:self.word_num]}
         return weight
 
-    def parse(self, text):
+    def parse(self, text: str) -> List[str]:
         for x in ['"', ';', ',', '(', ')', '\u3000']:
             text = text.replace(x, ' ')
         text = text.lower()

diff --git a/test/unit_test/test_main.py b/test/unit_test/test_main.py
@@ -1,10 +1,48 @@
-from cloudia.main import Cloudia
+from cloudia.main import CloudiaBase
 import unittest
 
 
 class TestCloudia(unittest.TestCase):
+    # TODO: split test case
     def setUp(self):
-        self.cls = Cloudia('test')
+        self.cls = CloudiaBase('test')
+
+    def test_calc_fig_size(self):
+        # row_num==item_num==1
+        output = self.cls._calc_fig_size(1, 1, 1)
+        self.assertTupleEqual(output, (10, 6))
+
+        # rate
+        output = self.cls._calc_fig_size(1, 1, 2)
+        self.assertTupleEqual(output, (20, 12))
+
+        # item_num<=row_num
+        output = self.cls._calc_fig_size(1, 2, 1)
+        self.assertTupleEqual(output, (5, 9))
+
+        output = self.cls._calc_fig_size(1, 2, 2)
+        self.assertTupleEqual(output, (10, 18))
+
+        # item_num // row_num + 1 < row_num
+        output = self.cls._calc_fig_size(2, 3, 1)
+        self.assertTupleEqual(output, (10, 6))
+
+        output = self.cls._calc_fig_size(2, 3, 2)
+        self.assertTupleEqual(output, (20, 12))
+
+        # else
+        output = self.cls._calc_fig_size(3, 10, 1)
+        self.assertTupleEqual(output, (15, 12))
+
+        output = self.cls._calc_fig_size(3, 10, 2)
+        self.assertTupleEqual(output, (30, 24))
+
+    def test_calc_wc_size(self):
+        output = self.cls._calc_wc_size(1)
+        self.assertTupleEqual(output, (500, 300))
+
+        output = self.cls._calc_wc_size(2)
+        self.assertTupleEqual(output, (1000, 600))
 
     def test_calc_sub_plot_dimensions(self):
         output = self.cls._calc_sub_plot_dimensions(10, 3)