Browse Source

Edit: Performance Fixes for Twitter-Section

pull/3/head
janwey 1 year ago
parent
commit
1e90ca2f28
1 changed files with 21 additions and 27 deletions
  1. 21
    27
      collecto.R

+ 21
- 27
collecto.R View File

@@ -57,85 +57,79 @@ twitter_tw <- strip_retweets(tweets = twitter_tw_dirty,
strip_manual = FALSE,
strip_mt = FALSE)

### Time of tweet
### Extract relevant data from dataset
twitter_timedate <- c()
twitter_client <- c()
twitter_name <- c()
twitter_rts <- c()
twitter_fav <- c()
twitter_url <- c()
twitter_txt <- c()
for(i in 1:length(twitter_tw)){

#### Time of tweet
if(length(twitter_tw[[i]]$created) > 0){
twitter_timedate[i] <- as.character(twitter_tw[[i]]$created)
} else {
# insert empty value, if it does not exist
twitter_timedate[i] <- NA
}
}

### Client used
twitter_client <- c()
for(i in 1:length(twitter_tw)){
#### Client used
if(length(twitter_tw[[i]]$statusSource) > 0){
twitter_client[i] <- as.character(twitter_tw[[i]]$statusSource)
} else {
# insert empty value, if it does not exist
twitter_client[i] <- NA
}
}
twitter_client <- sub(pattern = ".*\">", replace = "", x = twitter_client)
twitter_client <- sub(pattern = "</a>", replace = "", x = twitter_client)

### Screen names / Twitter Handles
twitter_name <- c()
for(i in 1:length(twitter_tw)){
#### Screen names / Twitter Handles
if(length(twitter_tw[[i]]$screenName) > 0){
twitter_name[i] <- as.character(twitter_tw[[i]]$screenName)
} else {
# insert empty value, if it does not exist
twitter_name[i] <- NA
}
}

### Number of retweets
twitter_rts <- c()
for(i in 1:length(twitter_tw)){
#### Number of retweets
if(length(twitter_tw[[i]]$retweetCount) > 0){
twitter_rts[i] <- as.character(twitter_tw[[i]]$retweetCount)
} else {
# insert empty value, if it does not exist
twitter_rts[i] <- NA
}
}

### Number of favorites
twitter_fav <- c()
for(i in 1:length(twitter_tw)){
#### Number of favorites
if(length(twitter_tw[[i]]$favoriteCount) > 0){
twitter_fav[i] <- as.character(twitter_tw[[i]]$favoriteCount)
} else {
# insert empty value, if it does not exist
twitter_fav[i] <- NA
}
}

### URLs posted about
twitter_url <- c()
for(i in 1:length(twitter_tw)){
#### URLs posted about
if(length(twitter_tw[[i]]$urls$expanded_url) > 0){
twitter_url[i] <- as.character(twitter_tw[[i]]$urls$expanded_url)
} else {
# insert empty value, if it does not exist
twitter_url[i] <- NA
}
}

### actual tweet/text
twitter_txt <- c()
for(i in 1:length(twitter_tw)){
#### actual tweet/text
if(length(twitter_tw[[i]]$text) > 0){
twitter_txt[i] <- as.character(twitter_tw[[i]]$text)
} else {
# insert empty value, if it does not exist
twitter_txt[i] <- NA
}

}

### Removing HTML-Tags from Client-info
twitter_client <- sub(pattern = ".*\">", replace = "", x = twitter_client)
twitter_client <- sub(pattern = "</a>", replace = "", x = twitter_client)

### Forming variables for dataframe
time <- sub(pattern = ".* ", x = twitter_timedate, replace = "")
time <- as.numeric(gsub(pattern = ":", x = time, replace = ""))
date <- sub(pattern = " .*", x = twitter_timedate, replace = "")

Loading…
Cancel
Save