Browse Source

Edit: implement changes by Vincent in a similar style to the rest of the code

pull/4/head
janwey 1 year ago
parent
commit
0c5b89c921
1 changed files with 118 additions and 95 deletions
  1. 118
    95
      collecto.R

+ 118
- 95
collecto.R View File

@@ -12,10 +12,10 @@ library("twitteR")
# had to install "httr" via packagemanager

#### Fediverse (eg: mastodon)
install.packages("devtools")
# requires libssl-dev
devtools::install_github("ThomasChln/mastodon", force = TRUE)
library("mastodon")
install.packages("curl")
library("curl")
install.packages("rjson")
library(rjson)

### Reddit
install.packages("RedditExtractoR")
@@ -156,121 +156,144 @@ twitter <- within(data = twitter, expr = {

## Mastodon Collector with curl {{{ ----

library(curl)
library(rjson)
toot_raw_json <- rawToChar(curl_fetch_memory("https://mastodon.social/api/v1/timelines/tag/ilovefs?limit=40")$content)
toots = fromJSON(toot_raw_json)
toots_mat = matrix(ncol = 4, nrow = length(toots))
mastodon_instance <- "https://mastodon.social"
mastodon_hashtag <- "ilovefs"
mastodon_apiurl <- paste0(mastodon_instance,
"/api/v1/timelines/tag/",
mastodon_hashtag,
"?limit=40")
mastodon_reqres <- curl_fetch_memory(mastodon_apiurl)
toot_raw_json <- rawToChar(mastodon_reqres$content)
toots <- fromJSON(toot_raw_json)

mastodon_lheader <- parse_headers(mastodon_reqres$headers)[11]
mastodon_next <- sub(x = mastodon_lheader, pattern = ".*rel=\"next\",\ <", replace = "")
mastodon_next <- sub(x = mastodon_next, pattern = ">;\ rel=\"prev\"", replace = "")

datetime <- c()
lang <- c()
inst <- c()
link <- c()
text <- c()
reto <- c()
favs <- c()
murl <- c()
for(i in 1:length(toots)){
toots_mat[i,] <- c(toots[[i]]$created_at, toots[[i]]$url, toots[[i]]$content, toots[[i]]$language)
}
toots_df = data.frame(toots_mat)
names(toots_df) = c ('created_at', 'url', 'content', 'language')
toots_df$content <- gsub(pattern = "<.*?>", x = toots_df$content, replacement = "")
toots_df$content <- gsub(pattern = " ", x = toots_df$content, replacement = "")

## Mastodon Collector {{{ ----
#### Time and Date of Toot
if(length(toots[[i]]$created_at) > 0){
datetime[i] <- toots[[i]]$created_at
} else {
# insert empty value, if it does not exist
datetime[i] <- NA
}

### Authenticate to the Fediverse (here: Mastodon)
#### Language of Toot
if(length(toots[[i]]$language) > 0){
lang[i] <- toots[[i]]$language
} else {
# insert empty value, if it does not exist
lang[i] <- NA
}

#### Manual input (uncomment if needed)
#mastodon_auth_insta <- readline("[Mastodon] Enter your Instance-URL."
#mastodon_auth_login <- readline("[Mastodon] Enter your registered mail.")
#mastodon_auth_passw <- readline("[Mastodon] Enter your password.")
#### Saved credentials
mastodon_api_cred <- read.table(file = "./fediverse_mastodon_api.txt", header = TRUE, sep = ";")
mastodon_auth_insta <- as.character(mastodon_api_cred$instance)
mastodon_auth_login <- as.character(mastodon_api_cred$mail)
mastodon_auth_passw <- as.character(mastodon_api_cred$password)
#### Instance of Toot
if(length(toots[[i]]$uri) > 0){
inst[i] <- toots[[i]]$uri
} else {
# insert empty value, if it does not exist
inst[i] <- NA
}

#### Authentification process
mastodon_auth <- mastodon::login(instance = mastodon_auth_insta,
user = mastodon_auth_login,
pass = mastodon_auth_passw)
#### URL of Toot
if(length(toots[[i]]$url) > 0){
link[i] <- toots[[i]]$url
} else {
# insert empty value, if it does not exist
link[i] <- NA
}

### Get posts from mastodon
mastodon_toot <- mastodon::get_hashtag(token = mastodon_auth,
hashtag = "ilovefs",
local = FALSE,
n = 20)
#### Text/Content of Toot
if(length(toots[[i]]$content) > 0){
text[i] <- toots[[i]]$content
} else {
# insert empty value, if it does not exist
text[i] <- NA
}

### public and non-public posts
mastodon_priv <- mastodon_toot[[7]]
#### Number of Retoots
if(length(toots[[i]]$reblogs_count) > 0){
reto[i] <- toots[[i]]$reblogs_count
} else {
# insert empty value, if it does not exist
reto[i] <- NA
}

### Time of post
#### date (as numeric value)
mastodon_date <- sub(pattern = "T.*", x = mastodon_toot[[2]], replacement = "")
mastodon_date <- gsub(pattern = "-", x = mastodon_date, replacement = "")
mastodon_date <- as.numeric(mastodon_date)
#### time (as numeric value)
mastodon_time <- sub(pattern = ".*T", x = mastodon_toot[[2]], replacement = "")
mastodon_time <- sub(pattern = "\\..*", x = mastodon_time, replacement = "")
mastodon_time <- gsub(pattern = ":", x = mastodon_time, replacement = "")
mastodon_time <- as.numeric(mastodon_time)

### Language of post
mastodon_lang <- mastodon_toot[[8]]

### Instance of post
mastodon_insta <- sub(pattern = "tag:", x = mastodon_toot[[9]], replacement = "")
mastodon_insta <- sub(pattern = ",\\d+.*", x = mastodon_insta, replacement = "")
#### in case the instance name is a full url
mastodon_insta <- sub(pattern = ".*://", x = mastodon_insta, replacement = "")
mastodon_insta <- sub(pattern = "/.*", x = mastodon_insta, replacement = "")

### Text of post
#### exclude all HTML
mastodon_txt <- gsub(pattern = "<.*?>", x = mastodon_toot[[10]], replacement = "")
mastodon_txt <- gsub(pattern = " ", x = mastodon_txt, replacement = "")

### URL of post
mastodon_url <- mastodon_toot[[11]]

### Favorites of posts
mastodon_fav <- mastodon_toot[[13]]

### Information about posters
mastodon_pers <- mastodon_toot[[19]]
mastodon_bot <- c()
for(i in 1:length(mastodon_pers)){
if(mastodon_pers[[i]]$username == "TrendingBot"){
mastodon_bot[i] <- TRUE
#### Number of Favorites
if(length(toots[[i]]$favourites_count) > 0){
favs[i] <- toots[[i]]$favourites_count
} else {
mastodon_bot[i] <- FALSE
# insert empty value, if it does not exist
favs[i] <- NA
}
}

### images of post
mastodon_img <- c()
for(i in 1:length(mastodon_toot[[20]])){
mastodon_img[i] <- length(mastodon_toot[[20]][[i]])
#### Number of Favorites
if(length(toots[[i]]$media_attachments) > 0){
murl[i] <- toots[[i]]$media_attachments[[1]]$url
} else {
# insert empty value, if it does not exist
murl[i] <- NA
}
}

### Cleaning data (removal of excluded posts)
mastodon_exclude <- c(which(mastodon_bot),
which(mastodon_date < 20180101),
which(mastodon_priv != "public"))
### Time of post
#### date (as numeric value)
date <- sub(pattern = "T.*", x = datetime, replacement = "")
date <- gsub(pattern = "-", x = date, replacement = "")
date <- as.numeric(date)
#### time (as numeric value)
time <- sub(pattern = ".*T", x = datetime, replacement = "")
time <- sub(pattern = "\\..*", x = time, replacement = "")
time <- gsub(pattern = ":", x = time, replacement = "")
time <- as.numeric(time)

### Removing HTML-Tags from Toots
text <- gsub(pattern = "<.*?>", x = text, replacement = "")
text <- gsub(pattern = " ", x = text, replacement = "")

### Cleaning Instance-String
#### GNUsocial
inst <- sub(pattern = "tag:", x = inst, replacement = "")
inst <- sub(pattern = ",\\d+.*", x = inst, replacement = "")
#### Mastodon
inst <- sub(pattern = "https:\\/\\/", x = inst, replacement = "")
inst <- sub(pattern = "\\/.*", x = inst, replacement = "")

### Only include Toots from this year
mastodon_exclude <- which(date < 20180101)
date <- date[-mastodon_exclude]
time <- time[-mastodon_exclude]
lang <- lang[-mastodon_exclude]
inst <- inst[-mastodon_exclude]
text <- text[-mastodon_exclude]
link <- link[-mastodon_exclude]
reto <- reto[-mastodon_exclude]
favs <- favs[-mastodon_exclude]
murl <- murl[-mastodon_exclude]

date <- mastodon_date[-mastodon_exclude]
time <- mastodon_time[-mastodon_exclude]
lang <- mastodon_lang[-mastodon_exclude]
inst <- mastodon_insta[-mastodon_exclude]
text <- mastodon_txt[-mastodon_exclude]
link <- mastodon_url[-mastodon_exclude]
favs <- mastodon_fav[-mastodon_exclude]
imag <- mastodon_img[-mastodon_exclude]
## Mastodon Collector {{{ ----

### Creating dataframe
mastodon <- data.frame(cbind(date, time, lang, inst, text, link, favs, imag))
mastodon <- data.frame(cbind(date, time, lang, inst, text, link, reto, favs, murl))

#### Clean-Up
rm(list = c("date", "time", "lang", "inst", "text", "link", "favs", "imag"))
rm(list = c("date", "time", "lang", "inst", "text", "link", "favs", "reto", "murl", "datetime"))

mastodon <- within(data = mastodon, expr = {
date <- as.numeric(as.character(date));
time <- as.numeric(as.character(time));
text <- as.character(text);
link <- as.character(link);
murl <- as.character(murl);
})
# }}}


Loading…
Cancel
Save