Browse Source

extract mentions and hashtags

master
JayVii 8 months ago
parent
commit
f7918503d3
2 changed files with 17 additions and 0 deletions
  1. 8
    0
      scripts/collecto.R
  2. 9
    0
      scripts/functions.R

+ 8
- 0
scripts/collecto.R View File

@@ -24,6 +24,9 @@ if(!require("RedditExtractoR")){
# Export as ODS ------------------------
if(!require("readODS")){ install.packages("readODS"); library("readODS") }

# Text Manipulation --------------------
if(!require("stringi")){ install.packages("stringi"); library("stringi") }

# Read helper functions ----------------
source("./functions.R")

@@ -70,6 +73,11 @@ tweets <- within(data = tweets, expr = {
ctxt <- gsub(pattern = "http.?://.+($|\\s)", x = text, replace = "") %>%
gsub(pattern = "\n", x = text, replace = " ")

# extract mentioned accounts
ment <- list2vec(stri_extract_all(regex = "@\\S+", str = text)) %>%
gsub(pattern = "[\\?!;:\\.]|(,$)", replacement = "")

# Client data
clnt <- as.factor(source)
rm("source")

+ 9
- 0
scripts/functions.R View File

@@ -43,6 +43,15 @@ mastodon.extract <- function(data){
# sanitizing text (removing HTML tags and whitespace)
text <- gsub(pattern = "<.*?>|\\s{2,}", x = x$content, replacement = "")

# extract hashtags
htag <- list2vec(
stri_extract_all(str = mastodon$text, regex = "(#|!)[A-Z,a-z,0-9]+")
)

# extract mentions
ment <- list2vec(stri_extract_all(regex = "@\\S+", str = mastodon$text)) %>%
gsub(pattern = "[\\?!;:]|(,$)", replacement = "")

# media URL (multiple possible)
murl <- valifexst(
list2vec(

Loading…
Cancel
Save