-
-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #15 from vaaaaanquish/joblib_sample
using multiprocessing
- Loading branch information
Showing
12 changed files
with
239 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from collections import Counter | ||
from typing import List | ||
import re | ||
|
||
from wurlitzer import pipes | ||
|
||
with pipes() as (out, err): | ||
# https://github.com/clab/dynet/issues/1528 | ||
import nagisa | ||
|
||
NUM_REGEX = re.compile('^[0-9]+$') | ||
|
||
|
||
def default_parse_func(text: str, single_words: List[str], extract_postags: List[str], stop_words: List[str]) -> List[str]: | ||
parser = nagisa.Tagger(single_word_list=single_words) | ||
for x in ['"', ';', ',', '(', ')', '\u3000']: | ||
text = text.replace(x, ' ') | ||
text = text.lower() | ||
return [x for x in parser.extract(text, extract_postags=extract_postags).words if len(x) > 1 and not NUM_REGEX.match(x) and x not in stop_words] | ||
|
||
|
||
def function_wrapper(func): | ||
def _f(t, **kwargs): | ||
i = kwargs.pop('_index') | ||
d = Counter(func(t, **kwargs)) | ||
return d, i | ||
|
||
return _f |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import sys | ||
import pathlib | ||
import traceback | ||
import pandas as pd | ||
|
||
if __name__ == '__main__': | ||
current_dir = pathlib.Path(__file__).resolve().parent | ||
sys.path.append(str(current_dir.parents[1])) | ||
import cloudia # noqa | ||
|
||
try: | ||
for multiprocess in [True, False]: | ||
pd.DataFrame({'test': ['hoge']}).wc.plot(multiprocess=multiprocess) | ||
pd.DataFrame({'test': ['hoge']})['test'].wc.plot(multiprocess=multiprocess) | ||
pd.Series(['hoge']).wc.plot(multiprocess=multiprocess) | ||
except Exception: | ||
traceback.print_exc() | ||
sys.exit(1) | ||
sys.exit(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import sys | ||
import pathlib | ||
import traceback | ||
import pandas as pd | ||
|
||
if __name__ == '__main__': | ||
current_dir = pathlib.Path(__file__).resolve().parent | ||
sys.path.append(str(current_dir.parents[1])) | ||
from cloudia.main import Cloudia | ||
|
||
try: | ||
for multiprocess in [True, False]: | ||
Cloudia([('test', pd.Series(['hoge']))], multiprocess=multiprocess).plot() | ||
Cloudia([('test', 'hoge')], multiprocess=multiprocess).plot() | ||
Cloudia(['hoge'], multiprocess=multiprocess).plot() | ||
Cloudia([pd.Series(['hoge'])], multiprocess=multiprocess).plot() | ||
Cloudia('hoge', multiprocess=multiprocess).plot() | ||
Cloudia(('test', 'hoge'), multiprocess=multiprocess).plot() | ||
Cloudia(pd.DataFrame({'test': ['hoge']}), multiprocess=multiprocess).plot() | ||
Cloudia(pd.Series(['hoge']), multiprocess=multiprocess).plot() | ||
except Exception: | ||
traceback.print_exc() | ||
sys.exit(1) | ||
sys.exit(0) |
Oops, something went wrong.