浏览代码

Add: initial Mastodon (fediverse) support

pull/1/head
janwey 1年前
父节点
当前提交
ee8a88d618
共有 3 个文件被更改,包括 185 次插入13 次删除
  1. 1
    0
      .gitignore
  2. 182
    13
      collecto.R
  3. 2
    0
      fediverse_mastodon_api_example.txt

+ 1
- 0
.gitignore 查看文件

@@ -1,3 +1,4 @@
.httr-oauth
twitter_api.txt
facebook_api.txt
fediverse_mastodon_api.txt

+ 182
- 13
collecto.R 查看文件

@@ -6,12 +6,20 @@
################################################################################

### Loading Packages {{{ ----
#### Twitter
install.packages("twitteR")
library("twitteR")
# had to install "httr" via packagemanager

#### Facebook
install.packages("Rfacebook")
library("Rfacebook")

#### Fediverse (eg: mastodon)
install.packages("devtools")
# requires libssl-dev
devtools::install_github("ThomasChln/mastodon")
library("mastodon")
# }}}

## Twitter Collector {{{ ----
@@ -51,13 +59,13 @@ twitter_tw <- strip_retweets(tweets = twitter_tw_dirty,
strip_mt = FALSE)

### Time of tweet
twitter_time <- c()
twitter_timedate <- c()
for(i in 1:length(twitter_tw)){
if(length(twitter_tw[[i]]$created) > 0){
twitter_time[i] <- as.character(twitter_tw[[i]]$created)
twitter_timedate[i] <- as.character(twitter_tw[[i]]$created)
} else {
# insert empty value, if it does not exist
twitter_time[i] <- NA
twitter_timedate[i] <- NA
}
}

@@ -130,13 +138,35 @@ for(i in 1:length(twitter_tw)){
}

# Control output, uncomment if needed
#twitter_time
#twitter_client
#twitter_name
#twitter_rts
#twitter_fav
#twitter_url
#twitter_txt
twitter_timedate
twitter_client
twitter_name
twitter_rts
twitter_fav
twitter_url
twitter_txt

time <- sub(pattern = ".* ", x = twitter_timedate, replace = "")
time <- as.numeric(gsub(pattern = ":", x = time, replace = ""))
date <- sub(pattern = " .*", x = twitter_timedate, replace = "")
date <- as.numeric(gsub(pattern = "-", x = date, replace = ""))
retw <- as.factor(twitter_rts)
favs <- as.factor(twitter_fav)
link <- as.character(twitter_url)
text <- as.character(twitter_txt)

### Creating dataframe
twitter <- data.frame(cbind(date, time, retw, favs, text, link))

#### Clean-Up
rm(list = c("date", "time", "retw", "favs", "text", "link"))

twitter <- within(data = twitter, expr = {
date <- as.numeric(as.character(date));
time <- as.numeric(as.character(time));
text <- as.character(text);
link <- as.character(link);
})
# }}}

## Facebook Collector [WIP] {{{ ----
@@ -150,14 +180,153 @@ facebook_api_cred <- read.table(file = "./facebook_api.txt", header = TRUE, sep
facebook_app_id <- as.character(facebook_api_cred$app_id)
facebook_secret <- as.character(facebook_api_cred$app_secret)

facebook_oath <- fbOAuth(app_id = facebook_api_id,
facebook_auth <- fbOAuth(app_id = facebook_api_id,
app_secret = facebook_secret)

### Get posts from FSFE
facebook_fsfe_posts <- Rfacebook::getPage(page = "thefsfe",
token = facebook_oauth,
token = facebook_auth,
since = "2018-01-01",
until = "2018-31-12",
until = "2018-31-12")
# }}}

## Mastodon Collector {{{ ----

### Authenticate to the Fediverse (here: Mastodon)

# Note -------------------------------------------------------------------------
# It is sub-optimal to use clear-text credentials for the authentification
# process, but the mastodon-package does not (yet) support oath
# ------------------------------------------------------------------------------

#### Manual input (uncomment if needed)
#mastodon_auth_insta <- readline("[Mastodon] Enter your Instance-URL."
#mastodon_auth_login <- readline("[Mastodon] Enter your registered mail.")
#mastodon_auth_passw <- readline("[Mastodon] Enter your password.")
#### Saved credentials
mastodon_api_cred <- read.table(file = "./fediverse_mastodon_api.txt", header = TRUE, sep = ";")
mastodon_auth_insta <- as.character(mastodon_api_cred$instance)
mastodon_auth_login <- as.character(mastodon_api_cred$mail)
mastodon_auth_passw <- as.character(mastodon_api_cred$password)

#### Authentification process
mastodon_auth <- mastodon::login(instance = mastodon_insta,
user = mastodon_login,
pass = mastodon_passw)

### Get posts from mastodon
mastodon_toot <- mastodon::get_hashtag(token = mastodon_auth,
hashtag = "ilovefs",
local = FALSE,
n = 100)

# Note -------------------------------------------------------------------------
# Documentation is really poor, so here is a guess of the variables in the
# list() item
# 1. id
# 2. time
# 3.
# 4.
# 5.
# 6.
# 7. public/private
# 8. language
# 9. user-agent
# 10. post-text (html)
# 11. url of post
# 12.
# 13. favorites
# 14.
# 15.
# 16.
# 17.
# 18.
# 19. poster-information
# 20. image in post
# 21.
# 22. information about searched hashtag
# 23.
# ------------------------------------------------------------------------------

### Sort out non-public posts
mastodon_priv <- which(mastodon_toot[[7]] != "public")
if(length(mastodon_priv) > 0){
for(i in 1:length(mastodon_toot)){
mastodon_toot[[i]] <- mastodon_toot[[i]][-c(mastodon_priv)]
}
}

### Time of post
#### date (as numeric value)
mastodon_date <- sub(pattern = "T.*", x = mastodon_toot[[2]], replacement = "")
mastodon_date <- gsub(pattern = "-", x = mastodon_date, replacement = "")
mastodon_date <- as.numeric(mastodon_date)
#### time (as numeric value)
mastodon_time <- sub(pattern = ".*T", x = mastodon_toot[[2]], replacement = "")
mastodon_time <- sub(pattern = "\\..*", x = mastodon_time, replacement = "")
mastodon_time <- gsub(pattern = ":", x = mastodon_time, replacement = "")
mastodon_time <- as.numeric(mastodon_time)

### Language of post
mastodon_lang <- mastodon_toot[[8]]

### Instance of post
mastodon_insta <- sub(pattern = "tag:", x = mastodon_toot[[9]], replacement = "")
mastodon_insta <- sub(pattern = ",\\d+.*", x = mastodon_insta, replacement = "")
#### in case the instance name is a full url
mastodon_insta <- sub(pattern = ".*://", x = mastodon_insta, replacement = "")
mastodon_insta <- sub(pattern = "/.*", x = mastodon_insta, replacement = "")

### Text of post
#### exclude all HTML
mastodon_txt <- gsub(pattern = "<.*?>", x = mastodon_toot[[10]], replacement = "")
mastodon_txt <- gsub(pattern = " ", x = mastodon_txt, replacement = "")

### URL of post
mastodon_url <- mastodon_toot[[11]]

### Favorites of posts
mastodon_fav <- mastodon_toot[[13]]

### Information about posters
mastodon_pers <- mastodon_toot[[19]]
mastodon_bot <- c()
for(i in 1:length(mastodon_pers)){
if(mastodon_pers[[i]]$username == "TrendingBot"){
mastodon_bot[i] <- TRUE
} else {
mastodon_bot[i] <- FALSE
}
}

### images of post
mastodon_img <- c()
for(i in 1:length(mastodon_toot[[20]])){
mastodon_img[i] <- length(mastodon_toot[[20]][[i]])
}

### Cleaning data (removal of excluded posts)
mastodon_exclude <- c(which(mastodon_bot),
which(mastodon_date < 20180101))
date <- mastodon_date[-mastodon_exclude]
time <- mastodon_time[-mastodon_exclude]
lang <- mastodon_lang[-mastodon_exclude]
inst <- mastodon_insta[-mastodon_exclude]
text <- mastodon_txt[-mastodon_exclude]
link <- mastodon_url[-mastodon_exclude]
favs <- mastodon_fav[-mastodon_exclude]
imag <- mastodon_img[-mastodon_exclude]

### Creating dataframe
mastodon <- data.frame(cbind(date, time, lang, inst, text, link, favs, imag))

#### Clean-Up
rm(list = c("date", "time", "lang", "inst", "text", "link", "favs", "imag"))

mastodon <- within(data = mastodon, expr = {
date <- as.numeric(as.character(date));
time <- as.numeric(as.character(time));
text <- as.character(text);
link <- as.character(link);
})
# }}}

+ 2
- 0
fediverse_mastodon_api_example.txt 查看文件

@@ -0,0 +1,2 @@
instance;mail;password
https://mastodon.social;me@localhost;ThisIsNotASafePassword

正在加载...
取消
保存