Source code for simplechinese.visualization
import pandas as pd
from wordcloud import WordCloud
from collections import Counter
from matplotlib.colors import LinearSegmentedColormap as lsg
import matplotlib.pyplot as plt
from .nlp import extract_words
[docs]def wordcloud(
x: pd.Series,
font_path: str = None,
width: int = 400,
height: int = 200,
max_words=200,
mask=None,
contour_width=0,
contour_color="white",
background_color="white",
relative_scaling="auto",
colormap=None,
return_figure=False,
):
s = extract_words(x, token=" ")
text = s.str.cat(sep=" ")
if colormap is None:
# Custom palette.
# TODO move it under tools.
corn = (255.0 / 256, 242.0 / 256, 117.0 / 256)
mango_tango = (255.0 / 256, 140.0 / 256, 66.0 / 256)
crayola = (63.0 / 256, 136.0 / 256, 197.0 / 256)
crimson = (215.0 / 256, 38.0 / 256, 61.0 / 256)
oxford_blue = (2.0 / 256, 24.0 / 256, 43.0 / 256)
texthero_cm = lsg.from_list(
"texthero", [corn, mango_tango, crayola, crimson, oxford_blue]
)
colormap = texthero_cm
words = s.str.cat(sep=" ").split()
wordcloud = WordCloud(
font_path=font_path,
width=width,
height=height,
max_words=max_words,
mask=mask,
contour_width=contour_width,
contour_color=contour_color,
background_color=background_color,
relative_scaling=relative_scaling,
colormap=colormap,
# stopwords=[], # TODO. Will use generate from frequencies.
# normalize_plurals=False, # TODO.
).generate_from_frequencies(dict(Counter(words)))
# fig = px.imshow(wordcloud)
# fig.show()
fig, ax = plt.subplots(figsize=(20, 10))
ax.imshow(wordcloud, interpolation="bilinear")
ax.axis("off")
if return_figure:
return fig