Browse Source

WIP: reworking plots

master
JayVii 7 months ago
parent
commit
7d2ecc107f
3 changed files with 170 additions and 93 deletions
  1. 2
    3
      scripts/collecto.R
  2. 6
    4
      scripts/functions.R
  3. 162
    86
      scripts/plotte.R

+ 2
- 3
scripts/collecto.R View File

@@ -75,8 +75,7 @@ tweets <- within(data = tweets, expr = {

# extract mentioned accounts
ment <- list2vec(stri_extract_all(regex = "@\\S+", str = text)) %>%
gsub(pattern = "[\\?!;:\\.]|(,$)", replacement = "")
gsub(pattern = "[\\?!;:\\.]|(,$)|\\(|\\)", replacement = "")

# Client data
clnt <- as.factor(source)
@@ -145,7 +144,7 @@ for(i in 1:mastodon_iterations){
}
# adding variable-names (again)
names(toots) <- c("time", "date", "lang", "inst", "link", "text",
"rebl", "favs", "acct", "murl")
"rebl", "favs", "acct", "murl", "htag", "ment")

# Simple(!) anonymization --------------
toots$acct <- as.numeric(toots$acct) # unique only to this dataframe

+ 6
- 4
scripts/functions.R View File

@@ -45,12 +45,13 @@ mastodon.extract <- function(data){

# extract hashtags
htag <- list2vec(
stri_extract_all(str = mastodon$text, regex = "(#|!)[A-Z,a-z,0-9]+")
stri_extract_all(str = text, regex = "(#|!)[A-Z,a-z,0-9]+")
)

# extract mentions
ment <- list2vec(stri_extract_all(regex = "@\\S+", str = mastodon$text)) %>%
gsub(pattern = "[\\?!;:]|(,$)", replacement = "")
ment <- list2vec(stri_extract_all(regex = "@\\S+", str = text)) %>%
gsub(pattern = "(@twitter\\.com)|[\\?!;:\\.]|(,$)|\\(|\\)",
replacement = "")

# media URL (multiple possible)
murl <- valifexst(
@@ -63,7 +64,8 @@ mastodon.extract <- function(data){

# return extracted data only
return(data.frame(
rbind(time, date, lang, inst, link, text, rebl, favs, acct, murl)
rbind(time, date, lang, inst, link, text, rebl, favs, acct, murl,
htag, ment)
))
})


+ 162
- 86
scripts/plotte.R View File

@@ -4,98 +4,166 @@
# SPDX-License-Identifier: GPL-3.0
################################################################################

## Loading Packages {{{ ----
### ggplot2 for plots
# Loading Packages -------------------------------------------------------------

# Plotting -----------------------------
if(!require("ggplot2")){ install.packages("ggplot2"); library("ggplot2") }
### arrange ggplot2 plots
if(!require("gridExtra")){ install.packages("gridExtra"); library("gridExtra") }
# }}}

## Loading Data {{{ ----
load(file = "./data/ilovefs-all_2018-02-20_14-57-16.RData")
### following requires you to extract the hashtags first (see further down).
### this can later be imported via a CSV file again
hash_ment <- read.csv2(file = "./data/tags_mentions.csv", sep = ",", header = FALSE)
# }}}
# Text Manipulation --------------------
if(!require("stringi")){ install.packages("stringi"); library("stringi") }

## Extract Hashtags {{{ ----
### Fediverse
mastodon_hashtags_l <- stri_extract_all(str = mastodon$text, regex = "#\\w+")
mastodon_hashtags <- c()
for(i in c(1:length(mastodon_hashtags_l))){
for(j in c(1:length(mastodon_hashtags_l[[i]]))){
mastodon_hashtags <- c(mastodon_hashtags, mastodon_hashtags_l[[i]][j])
}
}
mastodon_hashtags <- sub(x = mastodon_hashtags, pattern = ",", replace = "")
mastodon_hashtags <- sub(x = mastodon_hashtags, pattern = "\\:", replace = "")
mastodon_hashtags <- sub(x = mastodon_hashtags, pattern = "\\?", replace = "")
mastodon_hashtags <- sub(x = mastodon_hashtags, pattern = "\\!", replace = "")
mastodon_hashtags <- sub(x = mastodon_hashtags, pattern = "…", replace = "")
unique(mastodon_hashtags)

### Twitter
twitter_hashtags_l <- stri_extract_all(str = twitter$text, regex = "#\\w+")
twitter_hashtags <- c()
for(i in c(1:length(twitter_hashtags_l))){
for(j in c(1:length(twitter_hashtags_l[[i]]))){
twitter_hashtags <- c(twitter_hashtags, twitter_hashtags_l[[i]][j])
}
}
twitter_hashtags <- sub(x = twitter_hashtags, pattern = ",", replace = "")
twitter_hashtags <- sub(x = twitter_hashtags, pattern = "\\:", replace = "")
twitter_hashtags <- sub(x = twitter_hashtags, pattern = "\\?", replace = "")
twitter_hashtags <- sub(x = twitter_hashtags, pattern = "\\!", replace = "")
twitter_hashtags <- sub(x = twitter_hashtags, pattern = "…", replace = "")
unique(twitter_hashtags)
# }}}
# Loading Data -----------------------------------------------------------------

## Extract Mentions {{{ ----
### Fediverse
mastodon_mentions_l <- stri_extract_all(str = mastodon$text, regex = "\\@\\S+")
mastodon_mentions <- c()
for(i in c(1:length(mastodon_mentions_l))){
for(j in c(1:length(mastodon_mentions_l[[i]]))){
mastodon_mentions <- c(mastodon_mentions, mastodon_mentions_l[[i]][j])
}
}
mastodon_mentions <- sub(x = mastodon_mentions, pattern = ",", replace = "")
mastodon_mentions <- sub(x = mastodon_mentions, pattern = "\\:", replace = "")
mastodon_mentions <- sub(x = mastodon_mentions, pattern = "\\?", replace = "")
mastodon_mentions <- sub(x = mastodon_mentions, pattern = "\\!", replace = "")
mastodon_mentions <- sub(x = mastodon_mentions, pattern = "…", replace = "")
unique(mastodon_mentions)

### Twitter
twitter_mentions_l <- stri_extract_all(str = twitter$text, regex = "\\@\\S+")
twitter_mentions <- c()
for(i in c(1:length(twitter_mentions_l))){
for(j in c(1:length(twitter_mentions_l[[i]]))){
twitter_mentions <- c(twitter_mentions, twitter_mentions_l[[i]][j])
}
}
twitter_mentions <- sub(x = twitter_mentions, pattern = ",", replace = "")
twitter_mentions <- sub(x = twitter_mentions, pattern = "\\:", replace = "")
twitter_mentions <- sub(x = twitter_mentions, pattern = "\\?", replace = "")
twitter_mentions <- sub(x = twitter_mentions, pattern = "\\!", replace = "")
twitter_mentions <- sub(x = twitter_mentions, pattern = "…", replace = "")
unique(twitter_mentions)
# }}}

### Participation per Platform {{{ ----
# Full Dataset -------------------------
load(file = "../data/ilovefs-all_2018-02-20_14-57-16.RData")

#### Calculating Platform numbers
##### Platform (Twitter/Fediverse)
twitter_number <- rep(x = "twitter", times = length(twitter$text))
fediver_number <- rep(x = "fediverse", times = length(mastodon$text))
platform <- factor(c(twitter_number, fediver_number), levels = c("fediverse", "twitter"))
##### Instances (Fediverse)
msoc <- grep(x = as.character(mastodon$acct), pattern = "@", invert = TRUE)
instances <- sub(x = as.character(mastodon$acct), pattern = ".*\\@", replace = "")
instances[msoc] <- "mastodon.social"
instances <- as.factor(instances)
# Extracted "mentioned projects" -------
if(length(grep(x = list.files("../data/"), pattern = "tags_mentions.csv")) > 0){
hash_ment <- read.csv2(file = "../data/tags_mentions.csv", sep = ",",
header = FALSE)
}

#### Plotting the results
## Extract Mentions ------------------------------------------------------------

# Fediverse ----------------------------
mst_htag <- unlist(strsplit(x = as.character(toots$htag), split = ","))
mst_ment <- unlist(strsplit(x = as.character(toots$ment), split = ","))

# CleanUp
mst_htag <- gsub(x = mst_htag, pattern = "#|!", replacement = "")
mst_htag[which(mst_htag == "")] <- NA
mst_htag <- as.factor(na.omit(mst_htag))
mst_ment <- gsub(x = mst_ment, pattern = "@", replacement = "")
mst_ment[which(mst_ment == "NA")] <- NA
mst_ment <- as.factor(na.omit(mst_ment))

# Twitter ------------------------------
twt_htag <- unlist(strsplit(x = tweets$htag, split = ","))
twt_ment <- unlist(strsplit(x = tweets$ment, split = ","))

# CleanUp
twt_htag <- gsub(x = twt_htag, pattern = "#", replacement = "")
twt_htag[which(twt_htag == "")] <- NA
twt_htag <- as.factor(na.omit(twt_htag))
twt_ment <- gsub(x = twt_ment, pattern = "@", replacement = "")
twt_ment[which(twt_ment == "NA")] <- NA
twt_ment <- as.factor(na.omit(twt_ment))

# Participation per Platform ---------------------------------------------------

# Calculating Platform numbers ---------
twt_num <- rep(x = "twitter", times = length(tweets$text))
mst_num <- rep(x = "fediverse", times = length(toots$text))
rdt_num <- rep(x = "reddit", times = length(reddit$name))
platform <- factor(c(twt_num, mst_num, rdt_num),
levels = c("twitter", "fediverse", "reddit"))

# Cleaning Instances -------------------
instances <- sub(x = toots$inst, pattern = "urn:X-dfrn:", replacement = "") %>%
sub(pattern = ":.*", replacement = "") %>%
as.factor()

# Preparing Plot -----------------------
part1_df <- data.frame(count = as.numeric(table(platform)),
category = as.character(levels(platform)))
part1_df <- within(data = part1_df, expr = {
fraction <- prop.table(count)
ymax <- cumsum(fraction)
ymin <- c(0, head(ymax, n = -1))
colors <- c("#4D922199", "#D7302799", "#4575B499")
pos <- (cumsum(fraction)- fraction/2)
count[count == 0] <- NA
})

part2_df <- data.frame(count = as.numeric(table(instances)),
category = as.character(levels(instances)))
part2_df <- within(data = part2_df, expr = {
fraction <- prop.table(count)
ymax <- cumsum(fraction)
ymin <- c(0, head(ymax, n = -1))
colors <- rainbow(length(count)) %>%
sub(pattern = "FF$", replacement = "80")
pos <- (cumsum(fraction)- fraction/2)
count[count == 0] <- NA
})

# Plotting the results -----------------
part1_plot <- ggplot(
part1_df,
aes(fill = category,
ymax = ymax,
ymin = ymin,
xmax = 4,
xmin = 3
)) +
geom_rect() +
coord_polar(theta = "y") + # I have actually no idea, what this does.
xlim(c(0, 4)) +
theme_minimal() +
theme(
legend.position = "right",
panel.grid=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
text = element_text(size = 14),
plot.margin=unit(c(-0.5, -0.5, -0.5, -0.5),"in"),
# top, right, bottom, left
) +
annotate("text", x = 0, y = 0, fontface = 2, size = 8,
label = "Participation in #iLoveFS\nper Platform") +
geom_text(
aes(
x = 3.5, # xmax is set to 4, 3.5 is barely within the circle
y = pos,
label = count
),
size = 8) +
labs(title="") +
scale_fill_manual(
name=NULL,
breaks= part1_df$category,
values = part1_df$colors
)

part2_plot <- ggplot(
part2_df,
aes(fill = category,
ymax = ymax,
ymin = ymin,
xmax = 4,
xmin = 3
)) +
geom_rect() +
coord_polar(theta = "y") + # I have actually no idea, what this does.
xlim(c(0, 4)) +
theme_minimal() +
theme(
legend.position = "right",
panel.grid=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
text = element_text(size = 13),
plot.margin=unit(c(-1, 0, -1, 0),"in"), # top, right, bottom, left
) +
annotate("text", x = 0, y = 0, fontface = 2, size = 8,
label = "Participation in #iLoveFS\nin the Fediverse") +
labs(title="") +
scale_fill_manual(
name=NULL,
labels = paste0(as.character(part2_df$category), " (", part2_df$count,
")"),
breaks= part2_df$category,
values = part2_df$colors
)



# Plotting the results -----------------
pdf(file = "./plots/participation_platform.pdf", height = 10, width = 20)
par(mfrow = c(1,2))

@@ -145,6 +213,14 @@ mastodon_plot <- ggplot(data = mastodon, aes(x=mastodon_time)) +

#### Export / Save plots as PDF
pdf(file="./plots/ilfs-participation-by-date.pdf", width=14, height=7)
grid.arrange(twitter_plot, mastodon_plot, ncol = 2)
grid.arrange(twitter_plot, mastodon_plot, nrow = 2)
dev.off()
# }}}

pdf(file = "test_platform.pdf", height = 7, width = 9)
part1_plot
dev.off()

pdf(file = "test_instance.pdf", height = 7, width = 12)
part2_plot
dev.off()

Loading…
Cancel
Save