Browse Source

Merge branch 'cleanup-2019' of janwey/ilfs-data into master

master
janwey 7 months ago
parent
commit
29332f7e18
5 changed files with 313 additions and 403 deletions
  1. 0
    399
      collecto.R
  2. 241
    0
      scripts/collecto.R
  3. 70
    0
      scripts/functions.R
  4. 2
    4
      scripts/plotte.R
  5. 0
    0
      scripts/word_cloud.py

+ 0
- 399
collecto.R View File

@@ -1,399 +0,0 @@
################################################################################
# Copyright (c) 2018 Free Software Foundation Europe e.V. <contatc@fsfe.org>
# Author 2018 Jan Weymeirsch <janwey@fsfe.org>
# Author 2018 Vincent Lequertier <vincent@fsfe.org>
# SPDX-License-Identifier: GPL-3.0
################################################################################

### Loading Packages {{{ ----
#### Twitter
install.packages("rtweet")
library("rtweet")
# had to install "httr" via packagemanager

#### Fediverse (eg: mastodon)
install.packages("curl")
library("curl")
install.packages("rjson")
library(rjson)

### Reddit
install.packages("RedditExtractoR")
library("RedditExtractoR")
# }}}

## Twitter Collector {{{ ----

### Authenticate to Twitter
#### Manual input (uncomment if needed)
#twitter_consumerkey <- readline("[Twitter] Enter your consumer API key.")
#twitter_consumerpri <- readline("[Twitter] Enter your consumer API secret.")
#twitter_tokenaccess <- readline("[Twitter] Enter your Access Token.")
#twitter_tokensecret <- readline("[Twitter] Enter your Token Secret.")
#### Saved credentials
twitter_api_cred <- read.table(file = "./twitter_api.txt", header = TRUE, sep = ";")
twitter_consumerkey <- as.character(twitter_api_cred$consumer_key)
twitter_consumerpri <- as.character(twitter_api_cred$consumer_private)
twitter_appname <- as.character(twitter_api_cred$appname)

twitter_token <- create_token(app = twitter_appname,
consumer_key = twitter_consumerkey,
consumer_secret = twitter_consumerpri)

# Note -------------------------------------------------------------------------
# Please refer to the Documentation on where to receive your API credentials.
# ------------------------------------------------------------------------------

### Collecting Tweets
twitter_tw <- search_tweets(q = "#ilovefs",
n = 9999,
include_rts = FALSE)

twitter_number <- length(twitter_tw$text)
text <- twitter_tw$text
user <- twitter_tw$screen_name
clnt <- twitter_tw$source
favs <- twitter_tw$favorite_count
retw <- twitter_tw$retweet_count
lang <- twitter_tw$lang
fdat <- twitter_tw$created_at

link <- vector(mode = "character", length = twitter_number)
murl <- vector(mode = "character", length = twitter_number)
for(i in 1:twitter_number){
link[i] <- twitter_tw$urls_expanded_url[[i]][1]
murl[i] <- twitter_tw$media_expanded_url[[i]][1]
}

### Forming variables for dataframe
time <- sub(pattern = ".* ", x = fdat, replace = "")
time <- gsub(pattern = ":", x = time, replace = "")
date <- sub(pattern = " .*", x = fdat, replace = "")
date <- gsub(pattern = "-", x = date, replace = "")

###
twitter_exclude <- which(as.numeric(date) > 20180216 | as.numeric(date) < 20180210)
date <- date[-twitter_exclude]
time <- time[-twitter_exclude]
fdat <- fdat[-twitter_exclude]
retw <- retw[-twitter_exclude]
favs <- favs[-twitter_exclude]
text <- text[-twitter_exclude]
lang <- lang[-twitter_exclude]
murl <- murl[-twitter_exclude]
link <- link[-twitter_exclude]
clnt <- clnt[-twitter_exclude]
user <- user[-twitter_exclude]

### Creating dataframe
twitter <- data.frame(cbind(date, time, fdat, retw, favs, text, lang, murl, link, clnt, user))

#### Clean-Up
rm(list = c("date", "time", "fdat", "retw", "favs", "text", "link", "murl", "lang", "clnt", "user"))

twitter <- within(data = twitter, expr = {
date <- as.character(date);
time <- as.character(time);
fdat <- as.character(fdat);
retw <- as.character(retw);
favs <- as.character(favs);
text <- as.character(text);
link <- as.character(link);
murl <- as.character(murl);
lang <- as.character(lang);
clnt <- as.character(clnt);
user <- as.character(user);
})
# }}}

## Mastodon Collector {{{ ----

mastodon.fetchdata <- function(data){

tmp_datetime <- c()
tmp_lang <- c()
tmp_inst <- c()
tmp_link <- c()
tmp_text <- c()
tmp_reto <- c()
tmp_favs <- c()
tmp_murl <- c()
tmp_acct <- c()
for(i in 1:length(data)){

#### Time and Date of Toot
if(length(data[[i]]$created_at) > 0){
tmp_datetime[i] <- data[[i]]$created_at
} else {
# insert empty value, if it does not exist
tmp_datetime[i] <- NA
}

#### Language of Toot
if(length(data[[i]]$language) > 0){
tmp_lang[i] <- data[[i]]$language
} else {
# insert empty value, if it does not exist
tmp_lang[i] <- NA
}

#### Instance of Toot
if(length(data[[i]]$uri) > 0){
tmp_inst[i] <- data[[i]]$uri
} else {
# insert empty value, if it does not exist
tmp_inst[i] <- NA
}

#### URL of Toot
if(length(data[[i]]$url) > 0){
tmp_link[i] <- data[[i]]$url
} else {
# insert empty value, if it does not exist
tmp_link[i] <- NA
}

#### Text/Content of Toot
if(length(data[[i]]$content) > 0){
tmp_text[i] <- data[[i]]$content
} else {
# insert empty value, if it does not exist
tmp_text[i] <- NA
}

#### Number of Retoots
if(length(data[[i]]$reblogs_count) > 0){
tmp_reto[i] <- data[[i]]$reblogs_count
} else {
# insert empty value, if it does not exist
tmp_reto[i] <- NA
}

#### Number of Favorites
if(length(data[[i]]$favourites_count) > 0){
tmp_favs[i] <- data[[i]]$favourites_count
} else {
# insert empty value, if it does not exist
tmp_favs[i] <- NA
}

#### Number of Favorites
if(length(data[[i]]$media_attachments) > 0){
tmp_murl[i] <- data[[i]]$media_attachments[[1]]$url
} else {
# insert empty value, if it does not exist
tmp_murl[i] <- NA
}

#### Account of Tooter
if(length(data[[i]]$account) > 0){
tmp_acct[i] <- data[[i]]$account$acct
} else {
# insert empty value, if it does not exist
tmp_acct[i] <- NA
}

}
return(data.frame(cbind(tmp_datetime,
tmp_lang,
tmp_inst,
tmp_text,
tmp_link,
tmp_reto,
tmp_favs,
tmp_murl,
tmp_acct)))
}

datetime <- c()
lang <- c()
inst <- c()
link <- c()
text <- c()
reto <- c()
favs <- c()
murl <- c()
acct <- c()
mastodon_instance <- "https://mastodon.social"
mastodon_hashtag <- "ilovefs"
mastodon_url <- paste0(mastodon_instance,
"/api/v1/timelines/tag/",
mastodon_hashtag,
"?limit=40")
for(i in 1:999){
mastodon_reqres <- curl_fetch_memory(mastodon_url)

mastodon_rawjson <- rawToChar(mastodon_reqres$content)
toots <- fromJSON(mastodon_rawjson)

if(length(toots) > 0){
tmp_mastodon_df <- mastodon.fetchdata(data = toots)
datetime <- c(datetime, as.character(tmp_mastodon_df$tmp_datetime))
lang <- c(lang, as.character(tmp_mastodon_df$tmp_lang))
inst <- c(inst, as.character(tmp_mastodon_df$tmp_inst))
link <- c(link, as.character(tmp_mastodon_df$tmp_link))
text <- c(text, as.character(tmp_mastodon_df$tmp_text))
reto <- c(reto, as.character(tmp_mastodon_df$tmp_reto))
favs <- c(favs, as.character(tmp_mastodon_df$tmp_favs))
murl <- c(murl, as.character(tmp_mastodon_df$tmp_murl))
acct <- c(acct, as.character(tmp_mastodon_df$tmp_acct))
} else {
break
}

# Update the URL for the next iteration of the for loop so we can download
# the next toots.
mastodon_lheader <- parse_headers(mastodon_reqres$headers)[11]
mastodon_next <- sub(x = mastodon_lheader, pattern = ".*link:\ <", replace = "")
mastodon_url <- sub(x = mastodon_next, pattern = ">;\ rel=\"next\".*", replace = "")
}

### Time of post
#### date
date <- sub(pattern = "T.*", x = datetime, replacement = "")
date <- gsub(pattern = "-", x = date, replacement = "")
#### time
time <- sub(pattern = ".*T", x = datetime, replacement = "")
time <- sub(pattern = "\\..*", x = time, replacement = "")
time <- gsub(pattern = ":", x = time, replacement = "")
#### full time
fdat <- strptime(x = paste0(date, time), format = "%Y%m%d%H%M%S", tz = "CET")
fdat <- as.character(fdat)

### Removing HTML-Tags from Toots
text <- gsub(pattern = "<.*?>", x = text, replacement = "")
text <- gsub(pattern = " ", x = text, replacement = "")

### Cleaning Instance-String
#### GNUsocial
inst <- sub(pattern = "tag:", x = inst, replacement = "")
inst <- sub(pattern = ",\\d+.*", x = inst, replacement = "")
#### Mastodon
inst <- sub(pattern = "https:\\/\\/", x = inst, replacement = "")
inst <- sub(pattern = "\\/.*", x = inst, replacement = "")

### Only include Toots from this year
mastodon_exclude <- which(as.numeric(date) < 20180210 | as.numeric(date) > 20180216)
date <- date[-mastodon_exclude]
time <- time[-mastodon_exclude]
fdat <- fdat[-mastodon_exclude]
lang <- lang[-mastodon_exclude]
inst <- inst[-mastodon_exclude]
text <- text[-mastodon_exclude]
link <- link[-mastodon_exclude]
reto <- reto[-mastodon_exclude]
favs <- favs[-mastodon_exclude]
murl <- murl[-mastodon_exclude]
acct <- acct[-mastodon_exclude]

### Creating dataframe
mastodon <- data.frame(cbind(date, time, fdat, lang, inst, text, link, reto, favs, murl, acct))

#### Clean-Up
rm(list = c("date", "time", "fdat", "lang", "inst", "text", "link", "favs", "reto", "murl", "datetime", "acct"))

mastodon <- within(data = mastodon, expr = {
date <- as.character(date);
time <- as.character(time);
fdat <- as.character(fdat);
text <- as.character(text);
link <- as.character(link);
murl <- as.character(murl);
})
# }}}

## Reddit Collector {{{ ----

### Authentication at Reddit
# no authentication necessary, hence we can directly start scraping

### Get posts on Reddit
reddit_post_dirty <- reddit_urls(search_terms = "ilovefs",
#subreddit = "freesoftware linux opensource",
cn_threshold = 0,
page_threshold = 99999,
sort_by = "new",
wait_time = 5)

### Only use posts from the current year
reddit_searchinyear <- 18 # has to have format "YY", eg "18" for "2018"
reddit_post_year <- gsub(x = reddit_post_dirty$date,
pattern = "\\d.-\\d.-",
replace = "")
reddit_post <- reddit_post_dirty[which(reddit_post_year == reddit_searchinyear),]

### Extracting relevant variables
comt <- c() # Comments / Replies
subr <- c() # Subreddit
ptns <- c() # Points / Score
ttle <- c() # Title
text <- c() # Text / Content
link <- c() # Linked to Website
date <- c() # Date
rurl <- c() # Reddit-URL of post
acct <- c() # Author of Post
for(i in c(1:length(reddit_post$URL))){
comt[i] <- reddit_post$num_comments[i]
ttle[i] <- reddit_post$title[i]
rurl[i] <- reddit_post$URL[i]
date[i] <- gsub(x = reddit_post$date[i], pattern = "-", replace = "")
subr[i] <- reddit_post$subreddit[i]

Sys.sleep(2)
reddit_content <- reddit_content(URL = reddit_post$URL[i], wait_time = 0)
ptns[i] <- reddit_content$post_score[1]
text[i] <- reddit_content$post_text[1]
link[i] <- reddit_content$link[1]
acct[i] <- reddit_content$author[1]
}

### Creating dataframe
reddit <- data.frame(cbind(date, rurl, link, text, ttle, ptns, subr, comt, acct))

#### Clean-Up
rm(list = c("date", "rurl", "link", "text", "ttle", "ptns", "subr", "comt", "acct"))

reddit <- within(data = reddit, expr = {
date <- as.character(date);
rurl <- as.character(rurl);
link <- as.character(link);
text <- as.character(text);
ttle <- as.character(ttle);
ptns <- as.numeric(as.character(ptns));
subr <- as.character(subr);
comt <- as.numeric(as.character(comt));
})
# }}}

### Exporting data {{{ ----

time_of_saving <- sub(x = Sys.time(), pattern = " CET", replace = "")
time_of_saving <- sub(x = time_of_saving, pattern = " ", replace = "_")
time_of_saving <- gsub(x = time_of_saving, pattern = ":", replace = "-")

#### RData
save_path <- paste0("./data/ilovefs-all_", time_of_saving, ".RData")
save(list = c("twitter", "mastodon", "reddit"), file = save_path)

#### Text
##### Fediverse
save_path_fed_t <- paste0("./data/ilovefs-fediverse_", time_of_saving, ".txt")
write.table(mastodon, file = save_path_fed_t)
##### Twitter
save_path_twitter_t <- paste0("./data/ilovefs-twitter_", time_of_saving, ".txt")
write.table(twitter, file = save_path_twitter_t)
##### Reddit
save_path_reddit_t <- paste0("./data/ilovefs-reddit_", time_of_saving, ".txt")
write.table(reddit, file = save_path_reddit_t)

#### CSV
##### Fediverse
save_path_fed_c <- paste0("./data/ilovefs-fediverse_", time_of_saving, ".csv")
write.csv(mastodon, file = save_path_fed_c)
##### Twitter
save_path_twitter_c <- paste0("./data/ilovefs-twitter_", time_of_saving, ".csv")
write.csv(twitter, file = save_path_twitter_c)
##### Reddit
save_path_reddit_c <- paste0("./data/ilovefs-reddit_", time_of_saving, ".csv")
write.csv(reddit, file = save_path_reddit_c)
# }}}

+ 241
- 0
scripts/collecto.R View File

@@ -0,0 +1,241 @@
################################################################################
# Copyright (c) 2018 Free Software Foundation Europe e.V. <contatc@fsfe.org>
# Author 2018 Jan Weymeirsch <janwey@fsfe.org>
# Author 2018 Vincent Lequertier <vincent@fsfe.org>
# SPDX-License-Identifier: GPL-3.0
################################################################################

# Loading Packages and Functions -----------------------------------------------

# Twitter ------------------------------
if(!require("rtweet")){ install.packages("rtweet"); library("rtweet") }
# had to install "httr" via packagemanager

# Fediverse (eg: mastodon) -------------
if(!require("curl")){ install.packages("curl"); library("curl") }
if(!require("rjson")){ install.packages("rjson"); library("rjson") }

# Reddit -------------------------------
if(!require("RedditExtractoR")){
install.packages("RedditExtractoR")
library("RedditExtractoR")
}

# Export as ODS ------------------------
if(!require("readODS")){ install.packages("readODS"); library("readODS") }

# Read helper functions ----------------
source("./functions.R")

# Twitter Collector ------------------------------------------------------------


# Reading stored API credentials -------
tw_cred <- read.table(file = "../twitter_api.txt", header = TRUE, sep = ";",
colClasses = "character")

# Create Twitter Token -----------------
twitter_token <- create_token(app = tw_cred$appname,
consumer_key = tw_cred$consumer_key,
consumer_secret = tw_cred$consumer_private)

# Note
# Please refer to the Documentation on where to receive your API credentials.


# Collecting Tweets ------------------------------------------------------------

# Scrape Tweets ------------------------
tweets <- search_tweets(q = "#ilovefs",
n = 9999,
include_rts = FALSE)[,
# include only revelvant information
c("user_id", "created_at", "text", "source", "favorite_count",
"retweet_count", "hashtags", "urls_expanded_url", "media_expanded_url",
"ext_media_expanded_url", "lang", "location", "status_url", "protected")]

# Recoding -----------------------------
tweets <- within(data = tweets, expr = {
# replace global user ID by index only unique to this dataset
user <- as.numeric(as.factor(user_id))
rm("user_id")

# extract date and time
time <- sub(pattern = ".*\\s", x = created_at, replace = "")
date <- sub(pattern = "\\s.*", x = created_at, replace = "")
rm("created_at")

# extract "clean" text (without URLs or lineabreaks)
ctxt <- gsub(pattern = "http.?://.+($|\\s)", x = text, replace = "") %>%
gsub(pattern = "\n", x = text, replace = "")

# Client data
clnt <- as.factor(source)
rm("source")

# Favorites and Retweets
favs <- favorite_count
retw <- retweet_count
rm(list = c("favorite_count", "retweet_count"))

# List Hashtags in single Variable
htag <- list2vec(x = hashtags)
rm("hashtags")

# URLs and Media
link <- status_url
urls <- list2vec(urls_expanded_url)
murl <- list2vec(media_expanded_url)
mext <- list2vec(ext_media_expanded_url)
rm(list = c("urls_expanded_url", "media_expanded_url",
"ext_media_expanded_url", "status_url"))

# Location
posi <- location
rm("location")
})

# Eclusion------------------------------
# before 2019-01-01, after 2019-02-17, protected tweets
tweets <- tweets[(as.Date(tweets$date) > as.Date("2019-01-01") &
as.Date(tweets$date) < as.Date("2019-02-17")),]
tweets <- tweets[!tweets$protected,]

# Mastodon Collector -----------------------------------------------------------

# Set search parameters ----------------
toots <- c()
mastodon_iterations <- 999
mastodon_instance <- "https://mastodon.social"
mastodon_hashtag <- "ilovefs"
mastodon_url <- paste0(mastodon_instance, "/api/v1/timelines/tag/",
mastodon_hashtag, "?limit=40")

# Scrape Mastodon ----------------------
for(i in 1:mastodon_iterations){

# Download and extract Posts
mastodon_reqres <- curl_fetch_memory(mastodon_url)
mastodon_rawjson <- rawToChar(mastodon_reqres$content)
raw_toots <- fromJSON(mastodon_rawjson)

# If Post-Data is present, extract it. Else break the loop
if(length(raw_toots) > 0){
tmp_toots <- mastodon.extract(data = raw_toots)
toots <- rbind(toots, tmp_toots)
} else {
break
}

# Update the URL for the next iteration of the for loop so we can download
# the next toots.
mastodon_lheader <- parse_headers(mastodon_reqres$headers)[11]
mastodon_url <- gsub(x = mastodon_lheader,
pattern = "(.*link:\ <)|(>;\ rel=\"next\".*)",
replace = "")
}
# adding variable-names (again)
names(toots) <- c("time", "date", "lang", "inst", "link", "text",
"rebl", "favs", "acct", "murl")

# Simple(!) anonymization --------------
toots$acct <- as.numeric(toots$acct) # unique only to this dataframe
toots$link <- as.numeric(toots$link) # unique only to this dataframe

# Cleanup ------------------------------
toots <- within(data = toots, expr = {

# Time Variables
time <- as.character(time)
date <- as.character(date)
fdat <- strptime(x = paste(date, time), format = "%Y-%m-%d %H:%M:%S",
tz = "CET")

# Instances
inst <- gsub(pattern = "(tag:)|(,\\d+.*)|(https:\\/\\/)|(\\/.*)",
x = inst, replacement = "")
})

# Exclusion ----------------------------
# Only include Toots from this year
mst_exclude <- which(as.Date(toots$date) < as.Date("2019-01-01") &
as.Date(toots$date) > as.Date("2019-01-17"))
if(length(mst_exclude) > 0){ toots <- toots[-mst_exclude,] }

# Reddit Collector -------------------------------------------------------------

# Get posts on Reddit ------------------
reddit_post_dirty <- reddit_urls(search_terms = "ilovefs",
#subreddit = "freesoftware linux opensource",
cn_threshold = 0,
page_threshold = 99999,
sort_by = "new",
wait_time = 5)

# Extract relevant information ---------
reddit <- within(data = reddit_post_dirty, expr = {

# extract year
year <- paste0(20, gsub(x = date, pattern = ".*-", replacement = ""))

# rename relevant variables
cmts <- num_comments
name <- title
subr <- subreddit
link <- URL

# Cleanup
rm(list = c("num_comments", "title", "subreddit", "URL"))
})

# Exclude ------------------------------
# Limit to this year only
reddit_exclude <- which(as.numeric(reddit$year) < 2019)
reddit <- reddit[-reddit_exclude,]

# Additional Information ---------------
# for all remaining posts, additional information may be gathered
rdcnt <- lapply(X = reddit$link, FUN = function(x){
reddit_content(URL = x, wait_time = 30)
})

# merge additional information into main dataset
reddit$ptns <- reddit$text <- reddit$user <- NA
for(i in 1:length(rdcnt)){
reddit$ptns[i] <- rdcnt[[i]]$post_score[1]
reddit$text[i] <- rdcnt[[i]]$post_text[1]
reddit$user[i] <- rdcnt[[i]]$author[1]
}

# Exporting data ---------------------------------------------------------------

# Create timestamp ---------------------
time_of_saving <- sub(x = Sys.time(), pattern = " CET", replace = "") %>%
sub(pattern = " ", replace = "_") %>%
gsub(pattern = ":", replace = "-")

# Save as RData ------------------------
save_path <- paste0("../data/ilovefs-all_", time_of_saving, ".RData")
save(list = c("tweets", "toots", "reddit"), file = save_path)

# Save as Text -------------------------
save_path <- paste0("../data/ilovefs-", c("fediverse_", "twitter_", "reddit_"),
time_of_saving, ".txt")
write.table(x = toots, file = save_path[1])
write.table(x = tweets, file = save_path[2])
write.table(x = reddit, file = save_path[3])

# Save as CSV --------------------------
save_path <- paste0("../data/ilovefs-", c("fediverse_", "twitter_", "reddit_"),
time_of_saving, ".csv")
write.csv(x = toots, file = save_path[1])
write.csv(x = tweets, file = save_path[2])
write.csv(x = reddit, file = save_path[3])

# Save as ODS --------------------------
save_path <- paste0("../data/ilovefs-", c("fediverse_", "twitter_", "reddit_"),
time_of_saving, ".ods")
write_ods(x = toots, path = save_path[1])
write_ods(x = tweets, path = save_path[2])
write_ods(x = reddit, path = save_path[3])

+ 70
- 0
scripts/functions.R View File

@@ -0,0 +1,70 @@
################################################################################
# Copyright (c) 2018 Free Software Foundation Europe e.V. <contatc@fsfe.org>
# Author 2019 Jan Weymeirsch <janwey@fsfe.org>
# SPDX-License-Identifier: GPL-3.0
################################################################################

# Helper Functions -------------------------------------------------------------
# to be sourced from the main script.

# List2Vec ---------------------------------------------------------------------
# transforms a list() object x into a vector with single strings, divided by a
# seperator
list2vec <- function(x, sep = ","){
sapply(X = x, FUN = function(y) paste(unlist(y), collapse = sep))
}

# ValIfExst --------------------------------------------------------------------
# tests whether an object contains values. If yes, it returns that value, else
# returns NA
valifexst <- function(x) ifelse(test = length(x) > 0, yes = x, no = NA)

# Mastodon Extractor -----------------------------------------------------------
# extracts a set of information from a nested list-item containing every single
# information on a single post within one upper list, as returned by the
# "tags" mastodon-API v1
mastodon.extract <- function(data){

# Within each post
data <- sapply(X = data, FUN = function(x){

# time and date
time <- gsub(x = x$created_at, pattern = ".*T|\\..*", replacement = "")
date <- sub(x = x$created_at, pattern = "T.*", replacement = "")

# simple extraction, return NA if value does not exist
lang <- valifexst(x$language) # language
inst <- valifexst(x$uri) # instance name
link <- valifexst(x$url) # post URL
rebl <- valifexst(x$reblogs_count) # number of reblogs
favs <- valifexst(x$favourites_count) # number of favorites
acct <- valifexst(x$account$url) # account url (unique)

# sanitizing text (removing HTML tags and whitespace)
text <- gsub(pattern = "<.*?>|\\s{2,}", x = x$content, replacement = "")

# media URL (multiple possible)
murl <- valifexst(
list2vec(
sapply(X = x$media_attachements, FUN = function(y){
y$url
})
)
)

# return extracted data only
return(data.frame(
rbind(time, date, lang, inst, link, text, rebl, favs, acct, murl)
))
})

# transform "clean" list object into dataframe
data <- as.data.frame(
t(matrix(data = unlist(data), nrow = length(data[[1]])))
)

# return data.frame object
return(data)
}

# EOF functions.R

plotte.R → scripts/plotte.R View File

@@ -6,11 +6,9 @@

## Loading Packages {{{ ----
### ggplot2 for plots
install.packages("ggplot2")
library("ggplot2")
if(!require("ggplot2")){ install.packages("ggplot2"); library("ggplot2") }
### arrange ggplot2 plots
install.packages("gridExtra")
library("gridExtra")
if(!require("gridExtra")){ install.packages("gridExtra"); library("gridExtra") }
# }}}

## Loading Data {{{ ----

word_cloud.py → scripts/word_cloud.py View File


Loading…
Cancel
Save