Source code for simplechinese.visualization

import pandas as pd

from wordcloud import WordCloud
from collections import Counter

from matplotlib.colors import LinearSegmentedColormap as lsg
import matplotlib.pyplot as plt

from .nlp import extract_words

[docs]def wordcloud( x: pd.Series, font_path: str = None, width: int = 400, height: int = 200, max_words=200, mask=None, contour_width=0, contour_color="white", background_color="white", relative_scaling="auto", colormap=None, return_figure=False, ): s = extract_words(x, token=" ") text = s.str.cat(sep=" ") if colormap is None: # Custom palette. # TODO move it under tools. corn = (255.0 / 256, 242.0 / 256, 117.0 / 256) mango_tango = (255.0 / 256, 140.0 / 256, 66.0 / 256) crayola = (63.0 / 256, 136.0 / 256, 197.0 / 256) crimson = (215.0 / 256, 38.0 / 256, 61.0 / 256) oxford_blue = (2.0 / 256, 24.0 / 256, 43.0 / 256) texthero_cm = lsg.from_list( "texthero", [corn, mango_tango, crayola, crimson, oxford_blue] ) colormap = texthero_cm words = s.str.cat(sep=" ").split() wordcloud = WordCloud( font_path=font_path, width=width, height=height, max_words=max_words, mask=mask, contour_width=contour_width, contour_color=contour_color, background_color=background_color, relative_scaling=relative_scaling, colormap=colormap, # stopwords=[], # TODO. Will use generate from frequencies. # normalize_plurals=False, # TODO. ).generate_from_frequencies(dict(Counter(words))) # fig = px.imshow(wordcloud) # fig.show() fig, ax = plt.subplots(figsize=(20, 10)) ax.imshow(wordcloud, interpolation="bilinear") ax.axis("off") if return_figure: return fig