Collecting, Analyzing and Presenting data about the participation in #ilovefs day
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

functions.R 2.6KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. ################################################################################
  2. # Copyright (c) 2018 Free Software Foundation Europe e.V. <contatc@fsfe.org>
  3. # Author 2019 Jan Weymeirsch <janwey@fsfe.org>
  4. # SPDX-License-Identifier: GPL-3.0
  5. ################################################################################
  6. # Helper Functions -------------------------------------------------------------
  7. # to be sourced from the main script.
  8. # List2Vec ---------------------------------------------------------------------
  9. # transforms a list() object x into a vector with single strings, divided by a
  10. # seperator
  11. list2vec <- function(x, sep = ","){
  12. sapply(X = x, FUN = function(y) paste(unlist(y), collapse = sep))
  13. }
  14. # ValIfExst --------------------------------------------------------------------
  15. # tests whether an object contains values. If yes, it returns that value, else
  16. # returns NA
  17. valifexst <- function(x) ifelse(test = length(x) > 0, yes = x, no = NA)
  18. # Mastodon Extractor -----------------------------------------------------------
  19. # extracts a set of information from a nested list-item containing every single
  20. # information on a single post within one upper list, as returned by the
  21. # "tags" mastodon-API v1
  22. mastodon.extract <- function(data){
  23. # Within each post
  24. data <- sapply(X = data, FUN = function(x){
  25. # time and date
  26. time <- gsub(x = x$created_at, pattern = ".*T|\\..*", replacement = "")
  27. date <- sub(x = x$created_at, pattern = "T.*", replacement = "")
  28. # simple extraction, return NA if value does not exist
  29. lang <- valifexst(x$language) # language
  30. inst <- valifexst(x$uri) # instance name
  31. link <- valifexst(x$url) # post URL
  32. rebl <- valifexst(x$reblogs_count) # number of reblogs
  33. favs <- valifexst(x$favourites_count) # number of favorites
  34. acct <- valifexst(x$account$url) # account url (unique)
  35. # sanitizing text (removing HTML tags and whitespace)
  36. text <- gsub(pattern = "<.*?>|\\s{2,}", x = x$content, replacement = "")
  37. # media URL (multiple possible)
  38. murl <- valifexst(
  39. list2vec(
  40. sapply(X = x$media_attachements, FUN = function(y){
  41. y$url
  42. })
  43. )
  44. )
  45. # return extracted data only
  46. return(data.frame(
  47. rbind(time, date, lang, inst, link, text, rebl, favs, acct, murl)
  48. ))
  49. })
  50. # transform "clean" list object into dataframe
  51. data <- as.data.frame(
  52. t(matrix(data = unlist(data), nrow = length(data[[1]])))
  53. )
  54. # return data.frame object
  55. return(data)
  56. }
  57. # EOF functions.R