|
@@ -6,12 +6,20 @@
|
6
|
6
|
################################################################################
|
7
|
7
|
|
8
|
8
|
### Loading Packages {{{ ----
|
|
9
|
+#### Twitter
|
9
|
10
|
install.packages("twitteR")
|
10
|
11
|
library("twitteR")
|
11
|
12
|
# had to install "httr" via packagemanager
|
12
|
13
|
|
|
14
|
+#### Facebook
|
13
|
15
|
install.packages("Rfacebook")
|
14
|
16
|
library("Rfacebook")
|
|
17
|
+
|
|
18
|
+#### Fediverse (eg: mastodon)
|
|
19
|
+install.packages("devtools")
|
|
20
|
+# requires libssl-dev
|
|
21
|
+devtools::install_github("ThomasChln/mastodon")
|
|
22
|
+library("mastodon")
|
15
|
23
|
# }}}
|
16
|
24
|
|
17
|
25
|
## Twitter Collector {{{ ----
|
|
@@ -51,13 +59,13 @@ twitter_tw <- strip_retweets(tweets = twitter_tw_dirty,
|
51
|
59
|
strip_mt = FALSE)
|
52
|
60
|
|
53
|
61
|
### Time of tweet
|
54
|
|
-twitter_time <- c()
|
|
62
|
+twitter_timedate <- c()
|
55
|
63
|
for(i in 1:length(twitter_tw)){
|
56
|
64
|
if(length(twitter_tw[[i]]$created) > 0){
|
57
|
|
- twitter_time[i] <- as.character(twitter_tw[[i]]$created)
|
|
65
|
+ twitter_timedate[i] <- as.character(twitter_tw[[i]]$created)
|
58
|
66
|
} else {
|
59
|
67
|
# insert empty value, if it does not exist
|
60
|
|
- twitter_time[i] <- NA
|
|
68
|
+ twitter_timedate[i] <- NA
|
61
|
69
|
}
|
62
|
70
|
}
|
63
|
71
|
|
|
@@ -130,13 +138,35 @@ for(i in 1:length(twitter_tw)){
|
130
|
138
|
}
|
131
|
139
|
|
132
|
140
|
# Control output, uncomment if needed
|
133
|
|
-#twitter_time
|
134
|
|
-#twitter_client
|
135
|
|
-#twitter_name
|
136
|
|
-#twitter_rts
|
137
|
|
-#twitter_fav
|
138
|
|
-#twitter_url
|
139
|
|
-#twitter_txt
|
|
141
|
+twitter_timedate
|
|
142
|
+twitter_client
|
|
143
|
+twitter_name
|
|
144
|
+twitter_rts
|
|
145
|
+twitter_fav
|
|
146
|
+twitter_url
|
|
147
|
+twitter_txt
|
|
148
|
+
|
|
149
|
+time <- sub(pattern = ".* ", x = twitter_timedate, replace = "")
|
|
150
|
+time <- as.numeric(gsub(pattern = ":", x = time, replace = ""))
|
|
151
|
+date <- sub(pattern = " .*", x = twitter_timedate, replace = "")
|
|
152
|
+date <- as.numeric(gsub(pattern = "-", x = date, replace = ""))
|
|
153
|
+retw <- as.factor(twitter_rts)
|
|
154
|
+favs <- as.factor(twitter_fav)
|
|
155
|
+link <- as.character(twitter_url)
|
|
156
|
+text <- as.character(twitter_txt)
|
|
157
|
+
|
|
158
|
+### Creating dataframe
|
|
159
|
+twitter <- data.frame(cbind(date, time, retw, favs, text, link))
|
|
160
|
+
|
|
161
|
+#### Clean-Up
|
|
162
|
+rm(list = c("date", "time", "retw", "favs", "text", "link"))
|
|
163
|
+
|
|
164
|
+twitter <- within(data = twitter, expr = {
|
|
165
|
+ date <- as.numeric(as.character(date));
|
|
166
|
+ time <- as.numeric(as.character(time));
|
|
167
|
+ text <- as.character(text);
|
|
168
|
+ link <- as.character(link);
|
|
169
|
+ })
|
140
|
170
|
# }}}
|
141
|
171
|
|
142
|
172
|
## Facebook Collector [WIP] {{{ ----
|
|
@@ -150,14 +180,153 @@ facebook_api_cred <- read.table(file = "./facebook_api.txt", header = TRUE, sep
|
150
|
180
|
facebook_app_id <- as.character(facebook_api_cred$app_id)
|
151
|
181
|
facebook_secret <- as.character(facebook_api_cred$app_secret)
|
152
|
182
|
|
153
|
|
-facebook_oath <- fbOAuth(app_id = facebook_api_id,
|
|
183
|
+facebook_auth <- fbOAuth(app_id = facebook_api_id,
|
154
|
184
|
app_secret = facebook_secret)
|
155
|
185
|
|
156
|
186
|
### Get posts from FSFE
|
157
|
187
|
facebook_fsfe_posts <- Rfacebook::getPage(page = "thefsfe",
|
158
|
|
- token = facebook_oauth,
|
|
188
|
+ token = facebook_auth,
|
159
|
189
|
since = "2018-01-01",
|
160
|
|
- until = "2018-31-12",
|
|
190
|
+ until = "2018-31-12")
|
|
191
|
+# }}}
|
|
192
|
+
|
|
193
|
+## Mastodon Collector {{{ ----
|
|
194
|
+
|
|
195
|
+### Authenticate to the Fediverse (here: Mastodon)
|
|
196
|
+
|
|
197
|
+# Note -------------------------------------------------------------------------
|
|
198
|
+# It is sub-optimal to use clear-text credentials for the authentification
|
|
199
|
+# process, but the mastodon-package does not (yet) support oath
|
|
200
|
+# ------------------------------------------------------------------------------
|
|
201
|
+
|
|
202
|
+#### Manual input (uncomment if needed)
|
|
203
|
+#mastodon_auth_insta <- readline("[Mastodon] Enter your Instance-URL."
|
|
204
|
+#mastodon_auth_login <- readline("[Mastodon] Enter your registered mail.")
|
|
205
|
+#mastodon_auth_passw <- readline("[Mastodon] Enter your password.")
|
|
206
|
+#### Saved credentials
|
|
207
|
+mastodon_api_cred <- read.table(file = "./fediverse_mastodon_api.txt", header = TRUE, sep = ";")
|
|
208
|
+mastodon_auth_insta <- as.character(mastodon_api_cred$instance)
|
|
209
|
+mastodon_auth_login <- as.character(mastodon_api_cred$mail)
|
|
210
|
+mastodon_auth_passw <- as.character(mastodon_api_cred$password)
|
|
211
|
+
|
|
212
|
+#### Authentification process
|
|
213
|
+mastodon_auth <- mastodon::login(instance = mastodon_insta,
|
|
214
|
+ user = mastodon_login,
|
|
215
|
+ pass = mastodon_passw)
|
|
216
|
+
|
|
217
|
+### Get posts from mastodon
|
|
218
|
+mastodon_toot <- mastodon::get_hashtag(token = mastodon_auth,
|
|
219
|
+ hashtag = "ilovefs",
|
|
220
|
+ local = FALSE,
|
|
221
|
+ n = 100)
|
|
222
|
+
|
|
223
|
+# Note -------------------------------------------------------------------------
|
|
224
|
+# Documentation is really poor, so here is a guess of the variables in the
|
|
225
|
+# list() item
|
|
226
|
+# 1. id
|
|
227
|
+# 2. time
|
|
228
|
+# 3.
|
|
229
|
+# 4.
|
|
230
|
+# 5.
|
|
231
|
+# 6.
|
|
232
|
+# 7. public/private
|
|
233
|
+# 8. language
|
|
234
|
+# 9. user-agent
|
|
235
|
+# 10. post-text (html)
|
|
236
|
+# 11. url of post
|
|
237
|
+# 12.
|
|
238
|
+# 13. favorites
|
|
239
|
+# 14.
|
|
240
|
+# 15.
|
|
241
|
+# 16.
|
|
242
|
+# 17.
|
|
243
|
+# 18.
|
|
244
|
+# 19. poster-information
|
|
245
|
+# 20. image in post
|
|
246
|
+# 21.
|
|
247
|
+# 22. information about searched hashtag
|
|
248
|
+# 23.
|
|
249
|
+# ------------------------------------------------------------------------------
|
|
250
|
+
|
|
251
|
+### Sort out non-public posts
|
|
252
|
+mastodon_priv <- which(mastodon_toot[[7]] != "public")
|
|
253
|
+if(length(mastodon_priv) > 0){
|
|
254
|
+ for(i in 1:length(mastodon_toot)){
|
|
255
|
+ mastodon_toot[[i]] <- mastodon_toot[[i]][-c(mastodon_priv)]
|
|
256
|
+ }
|
|
257
|
+}
|
|
258
|
+
|
|
259
|
+### Time of post
|
|
260
|
+#### date (as numeric value)
|
|
261
|
+mastodon_date <- sub(pattern = "T.*", x = mastodon_toot[[2]], replacement = "")
|
|
262
|
+mastodon_date <- gsub(pattern = "-", x = mastodon_date, replacement = "")
|
|
263
|
+mastodon_date <- as.numeric(mastodon_date)
|
|
264
|
+#### time (as numeric value)
|
|
265
|
+mastodon_time <- sub(pattern = ".*T", x = mastodon_toot[[2]], replacement = "")
|
|
266
|
+mastodon_time <- sub(pattern = "\\..*", x = mastodon_time, replacement = "")
|
|
267
|
+mastodon_time <- gsub(pattern = ":", x = mastodon_time, replacement = "")
|
|
268
|
+mastodon_time <- as.numeric(mastodon_time)
|
|
269
|
+
|
|
270
|
+### Language of post
|
|
271
|
+mastodon_lang <- mastodon_toot[[8]]
|
|
272
|
+
|
|
273
|
+### Instance of post
|
|
274
|
+mastodon_insta <- sub(pattern = "tag:", x = mastodon_toot[[9]], replacement = "")
|
|
275
|
+mastodon_insta <- sub(pattern = ",\\d+.*", x = mastodon_insta, replacement = "")
|
|
276
|
+#### in case the instance name is a full url
|
|
277
|
+mastodon_insta <- sub(pattern = ".*://", x = mastodon_insta, replacement = "")
|
|
278
|
+mastodon_insta <- sub(pattern = "/.*", x = mastodon_insta, replacement = "")
|
|
279
|
+
|
|
280
|
+### Text of post
|
|
281
|
+#### exclude all HTML
|
|
282
|
+mastodon_txt <- gsub(pattern = "<.*?>", x = mastodon_toot[[10]], replacement = "")
|
|
283
|
+mastodon_txt <- gsub(pattern = " ", x = mastodon_txt, replacement = "")
|
|
284
|
+
|
|
285
|
+### URL of post
|
|
286
|
+mastodon_url <- mastodon_toot[[11]]
|
|
287
|
+
|
|
288
|
+### Favorites of posts
|
|
289
|
+mastodon_fav <- mastodon_toot[[13]]
|
|
290
|
+
|
|
291
|
+### Information about posters
|
|
292
|
+mastodon_pers <- mastodon_toot[[19]]
|
|
293
|
+mastodon_bot <- c()
|
|
294
|
+for(i in 1:length(mastodon_pers)){
|
|
295
|
+ if(mastodon_pers[[i]]$username == "TrendingBot"){
|
|
296
|
+ mastodon_bot[i] <- TRUE
|
|
297
|
+ } else {
|
|
298
|
+ mastodon_bot[i] <- FALSE
|
|
299
|
+ }
|
|
300
|
+}
|
|
301
|
+
|
|
302
|
+### images of post
|
|
303
|
+mastodon_img <- c()
|
|
304
|
+for(i in 1:length(mastodon_toot[[20]])){
|
|
305
|
+ mastodon_img[i] <- length(mastodon_toot[[20]][[i]])
|
|
306
|
+}
|
|
307
|
+
|
|
308
|
+### Cleaning data (removal of excluded posts)
|
|
309
|
+mastodon_exclude <- c(which(mastodon_bot),
|
|
310
|
+ which(mastodon_date < 20180101))
|
|
311
|
+date <- mastodon_date[-mastodon_exclude]
|
|
312
|
+time <- mastodon_time[-mastodon_exclude]
|
|
313
|
+lang <- mastodon_lang[-mastodon_exclude]
|
|
314
|
+inst <- mastodon_insta[-mastodon_exclude]
|
|
315
|
+text <- mastodon_txt[-mastodon_exclude]
|
|
316
|
+link <- mastodon_url[-mastodon_exclude]
|
|
317
|
+favs <- mastodon_fav[-mastodon_exclude]
|
|
318
|
+imag <- mastodon_img[-mastodon_exclude]
|
|
319
|
+
|
|
320
|
+### Creating dataframe
|
|
321
|
+mastodon <- data.frame(cbind(date, time, lang, inst, text, link, favs, imag))
|
161
|
322
|
|
|
323
|
+#### Clean-Up
|
|
324
|
+rm(list = c("date", "time", "lang", "inst", "text", "link", "favs", "imag"))
|
162
|
325
|
|
|
326
|
+mastodon <- within(data = mastodon, expr = {
|
|
327
|
+ date <- as.numeric(as.character(date));
|
|
328
|
+ time <- as.numeric(as.character(time));
|
|
329
|
+ text <- as.character(text);
|
|
330
|
+ link <- as.character(link);
|
|
331
|
+ })
|
163
|
332
|
# }}}
|