Skip to content

Commit

Permalink
Merge pull request #11 from vaaaaanquish/calc_auto_figsize
Browse files Browse the repository at this point in the history
Calc auto figsize
  • Loading branch information
vaaaaanquish authored May 6, 2020
2 parents 3af064f + daaae54 commit 08aeab4
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 75 deletions.
12 changes: 10 additions & 2 deletions .github/workflows/python_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ jobs:
- name: Install poetry dependencies
run: |
python -m pip install --upgrade pip
pip install poetry yapf poetry-dynamic-versioning
pip install poetry yapf mypy
poetry install
poetry run python -m pip install git+https://github.com/vaaaaanquish/japanize-matplotlib
- name: Lint with yapf
run: |
diff=$(yapf -dr --style=./yapf.ini ./cloudia/)
Expand All @@ -37,3 +36,12 @@ jobs:
- name: Test with pytest
run: |
poetry run python -m unittest discover -s ./test/unit_test/
- name: Test with mypy
run: |
mypy --ignore-missing-imports ./cloudia/
if [ $? != 0 ]; then
echo "failed: mypy"
exit 1
fi
echo "pass mypy"
11 changes: 4 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,9 @@ plot method args.
```
Cloudia().plot(
dark_theme=False, # color theme
figsize=(7.2, 4.8), # plt figure size
wcsize=(720, 480), # one wordcloud ax size
title_size=12, # title text size
row_num=3, # for example, 12 wordcloud, row_num=3 -> 4*3image
figsize_rate=2 # figure size rate
)
```

Expand All @@ -115,10 +114,9 @@ save method args.
Cloudia().save(
file_path, # save figure image path
dark_theme=False,
figsize=(7.2, 4.8),
wcsize=(720, 480),
title_size=12,
row_num=3
row_num=3,
figsize_rate=2
)
```

Expand All @@ -133,10 +131,9 @@ DataFrame.wc.plot(
parse_func=None, # split text function, example: lambda x: x.split(',')
sampling_rate=sampling_rate # pandas.DataFrame.sample.frac
dark_theme=False, # color theme
figsize=(7.2, 4.8), # plt figure size
wcsize=(720, 480), # one wordcloud ax size
title_size=12, # title text size
row_num=3, # for example, 12 wordcloud, row_num=3 -> 4*3image
figsize_rate=2 # figure size rate
)
```
If we use wc.save, setting file_path args.
Expand Down
65 changes: 42 additions & 23 deletions cloudia/main.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from typing import Any, List, Tuple

import matplotlib.pyplot as plt
import japanize_matplotlib
from wordcloud import WordCloud, STOPWORDS
from cloudia.word_data import WordData


class Cloudia:
class CloudiaBase:
def __init__(self,
data,
single_words=[],
stop_words=STOPWORDS,
extract_postags=['名詞', '英単語', 'ローマ字文'],
word_num=100,
parser=None,
parse_func=None,
sampling_rate=1.0):
data: Any,
single_words: List[str] = [],
stop_words: List[str] = STOPWORDS,
extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
word_num: int = 100,
parser: Any = None,
parse_func: Any = None,
sampling_rate: float = 1.0):
self.wd = WordData(data=data,
single_words=single_words,
stop_words=stop_words,
Expand All @@ -23,17 +25,9 @@ def __init__(self,
parse_func=parse_func,
sampling_rate=sampling_rate)

def plot(self, dark_theme=False, figsize=(7.2, 4.8), wcsize=(720, 480), title_size=12, row_num=3):
wc = self.make_wordcloud(dark_theme, wcsize)
self.make_fig(wc, dark_theme, figsize, title_size, row_num)

def save(self, fig_path, dark_theme=False, figsize=(7.2, 4.8), wcsize=(720, 480), title_size=12, row_num=3):
wc = self.make_wordcloud(dark_theme, wcsize)
self.make_fig(wc, dark_theme, figsize, title_size, row_num)
plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight")

def make_wordcloud(self, dark_theme, wcsize):
def make_wordcloud(self, dark_theme: bool, rate: int) -> List[Tuple[str, WordCloud]]:
wordcloud_list = []
wcsize = self._calc_wc_size(rate)
for name, words in self.wd:
wordcloud = WordCloud(font_path=japanize_matplotlib.get_font_ttf_path(),
background_color=self._color(dark_theme),
Expand All @@ -43,8 +37,8 @@ def make_wordcloud(self, dark_theme, wcsize):
wordcloud_list.append((name, wordcloud))
return wordcloud_list

def make_fig(self, wordcloud_list, dark_theme, figsize, title_size, row_num):
fig = plt.figure(facecolor=self._color(dark_theme), figsize=figsize)
def make_fig(self, wordcloud_list: List[Tuple[str, WordCloud]], dark_theme: bool, title_size: int, row_num: int, rate: int):
fig = plt.figure(facecolor=self._color(dark_theme), figsize=self._calc_fig_size(row_num, len(wordcloud_list), rate))
w, h = self._calc_sub_plot_dimensions(len(wordcloud_list), row_num)
for i, (title, wc) in enumerate(wordcloud_list):
ax = fig.add_subplot(w, h, i + 1)
Expand All @@ -53,11 +47,36 @@ def make_fig(self, wordcloud_list, dark_theme, figsize, title_size, row_num):
ax.axis('off')

@staticmethod
def _calc_sub_plot_dimensions(l, row_num):
def _calc_fig_size(row_num: int, item_num: int, rate: int) -> Tuple[int, int]:
if row_num == 1 and item_num == 1:
return rate * 5 * 2, rate * 3 * 2
if item_num <= row_num:
return rate * 5 * item_num, rate * 3 * item_num
elif item_num // row_num + 1 < row_num:
return rate * 5 * row_num, rate * 3 * ((item_num // row_num + 1) % row_num)
return rate * 5 * row_num, rate * 3 * (row_num + ((item_num // row_num + 1) - row_num))

@staticmethod
def _calc_wc_size(rate: int) -> Tuple[int, int]:
return rate * 5 * 100, rate * 3 * 100

@staticmethod
def _calc_sub_plot_dimensions(l: int, row_num: int) -> Tuple[int, int]:
return (l // row_num) + 1, row_num if l > row_num else l

@staticmethod
def _color(dark_theme, text=False):
def _color(dark_theme: bool, text: bool = False) -> str:
if text:
return 'white' if dark_theme else 'black'
return 'black' if dark_theme else 'white'


class Cloudia(CloudiaBase):
def plot(self, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, figsize_rate: int = 2):
wc = self.make_wordcloud(dark_theme, figsize_rate)
self.make_fig(wc, dark_theme, title_size, row_num, figsize_rate)

def save(self, fig_path: str, dark_theme: bool = False, title_size: int = 12, row_num: int = 3, figsize_rate: int = 2):
wc = self.make_wordcloud(dark_theme, figsize_rate)
self.make_fig(wc, dark_theme, title_size, row_num, figsize_rate)
plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight")
66 changes: 33 additions & 33 deletions cloudia/pandas_accessor.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,56 @@
from cloudia.main import Cloudia
from typing import Any, List

from cloudia.main import CloudiaBase, Cloudia
import matplotlib.pyplot as plt
from wordcloud import STOPWORDS
import pandas as pd


@pd.api.extensions.register_dataframe_accessor('wc')
class CloudiaDataFrame(Cloudia):
class CloudiaDataFrame(CloudiaBase):
def __init__(self, df):
self.df = df

def plot(self,
single_words=[],
stop_words=STOPWORDS,
extract_postags=['名詞', '英単語', 'ローマ字文'],
word_num=100,
parser=None,
parse_func=None,
sampling_rate=1.0,
dark_theme=False,
figsize=(7.2, 4.8),
wcsize=(720, 480),
title_size=12,
row_num=3):
single_words: List[str] = [],
stop_words: List[str] = STOPWORDS,
extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
word_num: int = 100,
parser: Any = None,
parse_func: Any = None,
sampling_rate: float = 1.0,
dark_theme: bool = False,
title_size: int = 12,
row_num: int = 3,
figsize_rate: int = 2):
Cloudia(self.df, single_words, stop_words, extract_postags, word_num, parser, parse_func,
sampling_rate).plot(dark_theme, figsize, wcsize, title_size, row_num)
sampling_rate).plot(dark_theme, title_size, row_num, figsize_rate)

def save(self, fig_path, dark_theme, **args):
self.plot(args)
def save(self, fig_path: str, dark_theme: bool, **args: Any):
self.plot(**args)
plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight")


@pd.api.extensions.register_series_accessor('wc')
class CloudiaSeries(Cloudia):
class CloudiaSeries(CloudiaBase):
def __init__(self, series):
self.series = series

def plot(self,
single_words=[],
stop_words=STOPWORDS,
extract_postags=['名詞', '英単語', 'ローマ字文'],
word_num=100,
parser=None,
parse_func=None,
sampling_rate=1.0,
dark_theme=False,
figsize=(7.2, 4.8),
wcsize=(720, 480),
title_size=12,
row_num=3):
single_words: List[str] = [],
stop_words: List[str] = STOPWORDS,
extract_postags: List[str] = ['名詞', '英単語', 'ローマ字文'],
word_num: int = 100,
parser: Any = None,
parse_func: Any = None,
sampling_rate: float = 1.0,
dark_theme: bool = False,
title_size: int = 12,
row_num: int = 3,
figsize_rate: int = 2):
Cloudia(self.series, single_words, stop_words, extract_postags, word_num, parser, parse_func,
sampling_rate).plot(dark_theme, figsize, wcsize, title_size, row_num)
sampling_rate).plot(dark_theme, title_size, row_num, figsize_rate)

def save(self, fig_path, dark_theme, **args):
self.plot(args)
def save(self, fig_path: str, dark_theme: bool, **args: Any):
self.plot(**args)
plt.savefig(fig_path, facecolor=self._color(dark_theme), pad_inches=0.0, bbox_inches="tight")
19 changes: 11 additions & 8 deletions cloudia/word_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Any, List, Tuple, Dict
import re

from collections import Counter
import pandas as pd
import re
from wurlitzer import pipes

with pipes() as (out, err):
Expand All @@ -9,20 +11,21 @@


class WordData:
def __init__(self, data, single_words, stop_words, extract_postags, word_num, parser, parse_func, sampling_rate):
self.words, self.names = self._init_data(data, sampling_rate)
def __init__(self, data: Any, single_words: List[str], stop_words: List[str], extract_postags: List[str], word_num: int, parser: Any, parse_func: Any,
sampling_rate: float):
words, self.names = self._init_data(data, sampling_rate)
self.word_num = word_num
self.single_words = single_words
self.extract_postags = extract_postags
self.stop_words = stop_words
self.parser = nagisa.Tagger(single_word_list=self.single_words) if not parser else parser
self.num_regex = re.compile('^[0-9]+$')
if parse_func:
self.words = [self.count(parse_func(x)) for x in self.words]
self.words = [self.count(parse_func(x)) for x in words]
else:
self.words = [self.count(self.parse(x)) for x in self.words]
self.words = [self.count(self.parse(x)) for x in words]

def _init_data(self, data, sampling_rate):
def _init_data(self, data: Any, sampling_rate: float) -> Tuple[List[str], List[str]]:
words, names = [], []
if isinstance(data, list):
if isinstance(data[0], tuple):
Expand Down Expand Up @@ -53,14 +56,14 @@ def _init_data(self, data, sampling_rate):

return words, names

def count(self, words):
def count(self, words: List[str]) -> Dict[str, float]:
c = Counter(words).most_common()
_max_count = c[0][1]
weight = {k: v / _max_count for k, v in c if k not in self.stop_words}
weight = {k: weight[k] for k in list(weight.keys())[:self.word_num]}
return weight

def parse(self, text):
def parse(self, text: str) -> List[str]:
for x in ['"', ';', ',', '(', ')', '\u3000']:
text = text.replace(x, ' ')
text = text.lower()
Expand Down
42 changes: 40 additions & 2 deletions test/unit_test/test_main.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,48 @@
from cloudia.main import Cloudia
from cloudia.main import CloudiaBase
import unittest


class TestCloudia(unittest.TestCase):
# TODO: split test case
def setUp(self):
self.cls = Cloudia('test')
self.cls = CloudiaBase('test')

def test_calc_fig_size(self):
# row_num==item_num==1
output = self.cls._calc_fig_size(1, 1, 1)
self.assertTupleEqual(output, (10, 6))

# rate
output = self.cls._calc_fig_size(1, 1, 2)
self.assertTupleEqual(output, (20, 12))

# item_num<=row_num
output = self.cls._calc_fig_size(1, 2, 1)
self.assertTupleEqual(output, (5, 9))

output = self.cls._calc_fig_size(1, 2, 2)
self.assertTupleEqual(output, (10, 18))

# item_num // row_num + 1 < row_num
output = self.cls._calc_fig_size(2, 3, 1)
self.assertTupleEqual(output, (10, 6))

output = self.cls._calc_fig_size(2, 3, 2)
self.assertTupleEqual(output, (20, 12))

# else
output = self.cls._calc_fig_size(3, 10, 1)
self.assertTupleEqual(output, (15, 12))

output = self.cls._calc_fig_size(3, 10, 2)
self.assertTupleEqual(output, (30, 24))

def test_calc_wc_size(self):
output = self.cls._calc_wc_size(1)
self.assertTupleEqual(output, (500, 300))

output = self.cls._calc_wc_size(2)
self.assertTupleEqual(output, (1000, 600))

def test_calc_sub_plot_dimensions(self):
output = self.cls._calc_sub_plot_dimensions(10, 3)
Expand Down

0 comments on commit 08aeab4

Please sign in to comment.