Browse Source

Edit: Performance Fixes for Twitter-Section

janwey 1 year ago
parent
commit
1e90ca2f28
1 changed files with 21 additions and 27 deletions
  1. 21
    27
      collecto.R

+ 21
- 27
collecto.R View File

@@ -57,85 +57,79 @@ twitter_tw <- strip_retweets(tweets = twitter_tw_dirty,
57 57
 			     strip_manual = FALSE,
58 58
 			     strip_mt = FALSE)
59 59
 
60
-### Time of tweet
60
+### Extract relevant data from dataset
61 61
 twitter_timedate <- c()
62
+twitter_client <- c()
63
+twitter_name <- c()
64
+twitter_rts <- c()
65
+twitter_fav <- c()
66
+twitter_url <- c()
67
+twitter_txt <- c()
62 68
 for(i in 1:length(twitter_tw)){
69
+
70
+  #### Time of tweet
63 71
   if(length(twitter_tw[[i]]$created) > 0){
64 72
     twitter_timedate[i] <- as.character(twitter_tw[[i]]$created)
65 73
   } else {
66 74
   # insert empty value, if it does not exist
67 75
     twitter_timedate[i] <- NA
68 76
   }
69
-}
70 77
 
71
-### Client used
72
-twitter_client <- c()
73
-for(i in 1:length(twitter_tw)){
78
+  #### Client used
74 79
   if(length(twitter_tw[[i]]$statusSource) > 0){
75 80
     twitter_client[i] <- as.character(twitter_tw[[i]]$statusSource)
76 81
   } else {
77 82
   # insert empty value, if it does not exist
78 83
     twitter_client[i] <- NA
79 84
   }
80
-}
81
-twitter_client <- sub(pattern = ".*\">", replace = "", x = twitter_client)
82
-twitter_client <- sub(pattern = "</a>", replace = "", x = twitter_client)
83 85
 
84
-### Screen names / Twitter Handles
85
-twitter_name <- c()
86
-for(i in 1:length(twitter_tw)){
86
+  #### Screen names / Twitter Handles
87 87
   if(length(twitter_tw[[i]]$screenName) > 0){
88 88
     twitter_name[i] <- as.character(twitter_tw[[i]]$screenName)
89 89
   } else {
90 90
     # insert empty value, if it does not exist
91 91
     twitter_name[i] <- NA
92 92
   }
93
-}
94 93
 
95
-### Number of retweets
96
-twitter_rts <- c()
97
-for(i in 1:length(twitter_tw)){
94
+  #### Number of retweets
98 95
   if(length(twitter_tw[[i]]$retweetCount) > 0){
99 96
     twitter_rts[i] <- as.character(twitter_tw[[i]]$retweetCount)
100 97
   } else {
101 98
   # insert empty value, if it does not exist
102 99
     twitter_rts[i] <- NA
103 100
   }
104
-}
105 101
 
106
-### Number of favorites
107
-twitter_fav <- c()
108
-for(i in 1:length(twitter_tw)){
102
+  #### Number of favorites
109 103
   if(length(twitter_tw[[i]]$favoriteCount) > 0){
110 104
     twitter_fav[i] <- as.character(twitter_tw[[i]]$favoriteCount)
111 105
   } else {
112 106
   # insert empty value, if it does not exist
113 107
     twitter_fav[i] <- NA
114 108
   }
115
-}
116 109
 
117
-### URLs posted about
118
-twitter_url <- c()
119
-for(i in 1:length(twitter_tw)){
110
+  #### URLs posted about
120 111
   if(length(twitter_tw[[i]]$urls$expanded_url) > 0){
121 112
     twitter_url[i] <- as.character(twitter_tw[[i]]$urls$expanded_url)
122 113
   } else {
123 114
   # insert empty value, if it does not exist
124 115
     twitter_url[i] <- NA
125 116
   }
126
-}
127 117
 
128
-### actual tweet/text
129
-twitter_txt <- c()
130
-for(i in 1:length(twitter_tw)){
118
+  #### actual tweet/text
131 119
   if(length(twitter_tw[[i]]$text) > 0){
132 120
     twitter_txt[i] <- as.character(twitter_tw[[i]]$text)
133 121
   } else {
134 122
   # insert empty value, if it does not exist
135 123
     twitter_txt[i] <- NA
136 124
   }
125
+
137 126
 }
138 127
 
128
+### Removing HTML-Tags from Client-info
129
+twitter_client <- sub(pattern = ".*\">", replace = "", x = twitter_client)
130
+twitter_client <- sub(pattern = "</a>", replace = "", x = twitter_client)
131
+
132
+### Forming variables for dataframe
139 133
 time <- sub(pattern = ".* ", x = twitter_timedate, replace = "")
140 134
 time <- as.numeric(gsub(pattern = ":", x = time, replace = ""))
141 135
 date <- sub(pattern = " .*", x = twitter_timedate, replace = "")

Loading…
Cancel
Save