Browse Source

Add: initial Mastodon (fediverse) support

janwey 10 months ago
parent
commit
ee8a88d618
3 changed files with 185 additions and 13 deletions
  1. 1
    0
      .gitignore
  2. 182
    13
      collecto.R
  3. 2
    0
      fediverse_mastodon_api_example.txt

+ 1
- 0
.gitignore View File

@@ -1,3 +1,4 @@
1 1
 .httr-oauth
2 2
 twitter_api.txt
3 3
 facebook_api.txt
4
+fediverse_mastodon_api.txt

+ 182
- 13
collecto.R View File

@@ -6,12 +6,20 @@
6 6
 ################################################################################
7 7
 
8 8
 ### Loading Packages {{{ ----
9
+#### Twitter
9 10
 install.packages("twitteR")
10 11
 library("twitteR")
11 12
 # had to install "httr" via packagemanager
12 13
 
14
+#### Facebook
13 15
 install.packages("Rfacebook")
14 16
 library("Rfacebook")
17
+
18
+#### Fediverse (eg: mastodon)
19
+install.packages("devtools")
20
+# requires libssl-dev
21
+devtools::install_github("ThomasChln/mastodon")
22
+library("mastodon")
15 23
 # }}}
16 24
 
17 25
 ## Twitter Collector {{{ ----
@@ -51,13 +59,13 @@ twitter_tw <- strip_retweets(tweets = twitter_tw_dirty,
51 59
 			     strip_mt = FALSE)
52 60
 
53 61
 ### Time of tweet
54
-twitter_time <- c()
62
+twitter_timedate <- c()
55 63
 for(i in 1:length(twitter_tw)){
56 64
   if(length(twitter_tw[[i]]$created) > 0){
57
-    twitter_time[i] <- as.character(twitter_tw[[i]]$created)
65
+    twitter_timedate[i] <- as.character(twitter_tw[[i]]$created)
58 66
   } else {
59 67
   # insert empty value, if it does not exist
60
-    twitter_time[i] <- NA
68
+    twitter_timedate[i] <- NA
61 69
   }
62 70
 }
63 71
 
@@ -130,13 +138,35 @@ for(i in 1:length(twitter_tw)){
130 138
 }
131 139
 
132 140
 # Control output, uncomment if needed
133
-#twitter_time
134
-#twitter_client
135
-#twitter_name
136
-#twitter_rts
137
-#twitter_fav
138
-#twitter_url
139
-#twitter_txt
141
+twitter_timedate
142
+twitter_client
143
+twitter_name
144
+twitter_rts
145
+twitter_fav
146
+twitter_url
147
+twitter_txt
148
+
149
+time <- sub(pattern = ".* ", x = twitter_timedate, replace = "")
150
+time <- as.numeric(gsub(pattern = ":", x = time, replace = ""))
151
+date <- sub(pattern = " .*", x = twitter_timedate, replace = "")
152
+date <- as.numeric(gsub(pattern = "-", x = date, replace = ""))
153
+retw <- as.factor(twitter_rts)
154
+favs <- as.factor(twitter_fav)
155
+link <- as.character(twitter_url)
156
+text <- as.character(twitter_txt)
157
+
158
+### Creating dataframe
159
+twitter <- data.frame(cbind(date, time, retw, favs, text, link))
160
+
161
+#### Clean-Up
162
+rm(list = c("date", "time", "retw", "favs", "text", "link"))
163
+
164
+twitter <- within(data = twitter, expr = {
165
+		     date <- as.numeric(as.character(date));
166
+		     time <- as.numeric(as.character(time));
167
+		     text <- as.character(text);
168
+		     link <- as.character(link);
169
+		  })
140 170
 # }}}
141 171
 
142 172
 ## Facebook Collector [WIP] {{{ ----
@@ -150,14 +180,153 @@ facebook_api_cred <- read.table(file = "./facebook_api.txt", header = TRUE, sep
150 180
 facebook_app_id <- as.character(facebook_api_cred$app_id)
151 181
 facebook_secret <- as.character(facebook_api_cred$app_secret)
152 182
 
153
-facebook_oath <- fbOAuth(app_id = facebook_api_id,
183
+facebook_auth <- fbOAuth(app_id = facebook_api_id,
154 184
 			 app_secret = facebook_secret)
155 185
 
156 186
 ### Get posts from FSFE
157 187
 facebook_fsfe_posts <- Rfacebook::getPage(page = "thefsfe",
158
-					  token = facebook_oauth,
188
+					  token = facebook_auth,
159 189
 					  since = "2018-01-01",
160
-					  until = "2018-31-12",
190
+					  until = "2018-31-12")
191
+# }}}
192
+
193
+## Mastodon Collector {{{ ----
194
+
195
+### Authenticate to the Fediverse (here: Mastodon)
196
+
197
+# Note -------------------------------------------------------------------------
198
+# It is sub-optimal to use clear-text credentials for the authentification
199
+# process, but the mastodon-package does not (yet) support oath
200
+# ------------------------------------------------------------------------------
201
+
202
+#### Manual input (uncomment if needed)
203
+#mastodon_auth_insta <- readline("[Mastodon] Enter your Instance-URL."
204
+#mastodon_auth_login <- readline("[Mastodon] Enter your registered mail.")
205
+#mastodon_auth_passw <- readline("[Mastodon] Enter your password.")
206
+#### Saved credentials
207
+mastodon_api_cred <- read.table(file = "./fediverse_mastodon_api.txt", header = TRUE, sep = ";")
208
+mastodon_auth_insta <- as.character(mastodon_api_cred$instance)
209
+mastodon_auth_login <- as.character(mastodon_api_cred$mail)
210
+mastodon_auth_passw <- as.character(mastodon_api_cred$password)
211
+
212
+#### Authentification process
213
+mastodon_auth <- mastodon::login(instance = mastodon_insta,
214
+				 user = mastodon_login,
215
+				 pass = mastodon_passw)
216
+
217
+### Get posts from mastodon
218
+mastodon_toot <- mastodon::get_hashtag(token = mastodon_auth,
219
+				       hashtag = "ilovefs",
220
+				       local = FALSE,
221
+				       n = 100)
222
+
223
+# Note -------------------------------------------------------------------------
224
+# Documentation is really poor, so here is a guess of the variables in the
225
+# list() item
226
+#  1. id
227
+#  2. time
228
+#  3. 
229
+#  4. 
230
+#  5. 
231
+#  6.
232
+#  7. public/private
233
+#  8. language
234
+#  9. user-agent
235
+# 10. post-text (html)
236
+# 11. url of post
237
+# 12. 
238
+# 13. favorites
239
+# 14.
240
+# 15. 
241
+# 16.
242
+# 17.
243
+# 18.
244
+# 19. poster-information
245
+# 20. image in post
246
+# 21.
247
+# 22. information about searched hashtag
248
+# 23.
249
+# ------------------------------------------------------------------------------
250
+
251
+### Sort out non-public posts
252
+mastodon_priv <- which(mastodon_toot[[7]] != "public")
253
+if(length(mastodon_priv) > 0){
254
+  for(i in 1:length(mastodon_toot)){
255
+    mastodon_toot[[i]] <- mastodon_toot[[i]][-c(mastodon_priv)]
256
+  }
257
+}
258
+
259
+### Time of post
260
+#### date (as numeric value)
261
+mastodon_date <- sub(pattern = "T.*", x = mastodon_toot[[2]], replacement = "")
262
+mastodon_date <- gsub(pattern = "-", x = mastodon_date, replacement = "")
263
+mastodon_date <- as.numeric(mastodon_date)
264
+#### time (as numeric value)
265
+mastodon_time <- sub(pattern = ".*T", x = mastodon_toot[[2]], replacement = "")
266
+mastodon_time <- sub(pattern = "\\..*", x = mastodon_time, replacement = "")
267
+mastodon_time <- gsub(pattern = ":", x = mastodon_time, replacement = "")
268
+mastodon_time <- as.numeric(mastodon_time)
269
+
270
+### Language of post
271
+mastodon_lang <- mastodon_toot[[8]]
272
+
273
+### Instance of post
274
+mastodon_insta <- sub(pattern = "tag:", x = mastodon_toot[[9]], replacement = "")
275
+mastodon_insta <- sub(pattern = ",\\d+.*", x = mastodon_insta, replacement = "")
276
+#### in case the instance name is a full url
277
+mastodon_insta <- sub(pattern = ".*://", x = mastodon_insta, replacement = "")
278
+mastodon_insta <- sub(pattern = "/.*", x = mastodon_insta, replacement = "")
279
+
280
+### Text of post
281
+#### exclude all HTML
282
+mastodon_txt <- gsub(pattern = "<.*?>", x = mastodon_toot[[10]], replacement = "")
283
+mastodon_txt <- gsub(pattern = "  ", x = mastodon_txt, replacement = "")
284
+
285
+### URL of post
286
+mastodon_url <- mastodon_toot[[11]]
287
+
288
+### Favorites of posts
289
+mastodon_fav <- mastodon_toot[[13]]
290
+
291
+### Information about posters
292
+mastodon_pers <- mastodon_toot[[19]]
293
+mastodon_bot <- c()
294
+for(i in 1:length(mastodon_pers)){
295
+  if(mastodon_pers[[i]]$username == "TrendingBot"){
296
+    mastodon_bot[i] <- TRUE
297
+  } else {
298
+    mastodon_bot[i] <- FALSE
299
+  }
300
+}
301
+
302
+### images of post
303
+mastodon_img <- c()
304
+for(i in 1:length(mastodon_toot[[20]])){
305
+  mastodon_img[i] <- length(mastodon_toot[[20]][[i]])
306
+}
307
+
308
+### Cleaning data (removal of excluded posts)
309
+mastodon_exclude <- c(which(mastodon_bot),
310
+		      which(mastodon_date < 20180101))
311
+date <- mastodon_date[-mastodon_exclude]
312
+time <- mastodon_time[-mastodon_exclude]
313
+lang <- mastodon_lang[-mastodon_exclude]
314
+inst <- mastodon_insta[-mastodon_exclude]
315
+text <- mastodon_txt[-mastodon_exclude]
316
+link <- mastodon_url[-mastodon_exclude]
317
+favs <- mastodon_fav[-mastodon_exclude]
318
+imag <- mastodon_img[-mastodon_exclude]
319
+
320
+### Creating dataframe
321
+mastodon <- data.frame(cbind(date, time, lang, inst, text, link, favs, imag))
161 322
 
323
+#### Clean-Up
324
+rm(list = c("date", "time", "lang", "inst", "text", "link", "favs", "imag"))
162 325
 
326
+mastodon <- within(data = mastodon, expr = {
327
+		     date <- as.numeric(as.character(date));
328
+		     time <- as.numeric(as.character(time));
329
+		     text <- as.character(text);
330
+		     link <- as.character(link);
331
+		  })
163 332
 # }}}

+ 2
- 0
fediverse_mastodon_api_example.txt View File

@@ -0,0 +1,2 @@
1
+instance;mail;password
2
+https://mastodon.social;me@localhost;ThisIsNotASafePassword