Skip to content

Latest commit

 

History

History
79 lines (60 loc) · 2.13 KB

Word_cloud_Using_R.md

File metadata and controls

79 lines (60 loc) · 2.13 KB

Word Cloud in R

define a data

dat <- "habib reza @ ? 
        negin raziye mohammad Stat mehdi nazanin habib"
library(tm)
Loading required package: NLP
library(wordcloud)
Loading required package: RColorBrewer
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ ggplot2::annotate() masks NLP::annotate()
✖ dplyr::filter()     masks stats::filter()
✖ dplyr::lag()        masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
coo <- Corpus(VectorSource(dat))
coo <- tm_map(coo, content_transformer(tolower))
Warning in tm_map.SimpleCorpus(coo, content_transformer(tolower)):
transformation drops documents
coo <- coo |> 
        tm_map(removePunctuation)
Warning in tm_map.SimpleCorpus(coo, removePunctuation): transformation drops
documents
coo <- coo |> 
        tm_map(removeNumbers)
Warning in tm_map.SimpleCorpus(coo, removeNumbers): transformation drops
documents
coo <- coo |> 
        tm_map(removeWords, stopwords("english"))
Warning in tm_map.SimpleCorpus(coo, removeWords, stopwords("english")):
transformation drops documents
tdm <- TermDocumentMatrix(coo)
m <- as.matrix(tdm)
word_freqs <- sort(rowSums(m), decreasing = T)
df <- data.frame(word = names(word_freqs), freq = word_freqs)
wordcloud(words = df$word, freq = df$freq, min.freq = 1, 
            max.words = 200, random.order = F, 
            rot.per = 0.35, colors = brewer.pal(8, "Dark2"))