I don't often use R interactively. I generally make a script, set it in crontab, and have it run automatically. So, I adapted it to run via Rscript.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/Rscript | |
# Adapted from http://decisionsandr.blogspot.com/2013/11/using-r-to-find-obamas-most-frequent.html | |
# As Seen on http://blog.revolutionanalytics.com/2013/11/what-does-barack-obama-tweet-about-most.html | |
# I often run this sort of thing via command-line, so I use Cairo | |
# so I can make images without X11 | |
# Usage | |
# hashtags.rs <TwitterHandle> | |
# ---------------------------------------------------------------------- | |
# Libraries | |
library( twitteR ) | |
library( ggplot2 ) | |
library( Cairo ) | |
# ---------------------------------------------------------------------- | |
# Arguments | |
args <- commandArgs(trailingOnly = TRUE) | |
user <- "" | |
if ( length(args) > 0 ) { | |
user <- args[1] | |
} else { | |
print( "No user set. Exiting." ) | |
quit() | |
} | |
# ---------------------------------------------------------------------- | |
# Authentication and Configuration | |
load("~/twitter-cred.Rd") # created with getTwitterOAuth | |
registerTwitterOAuth(cred) | |
NumberOfTweets <- 3200 #choose number of tweets to download | |
NumberOfBars <- 20 # plot the top N hashtags by frequency | |
# ---------------------------------------------------------------------- | |
# Functions | |
extract.hashes = function(vec){ | |
hash.pattern = "#[[:alnum:]]+" # alnum to allow numerals | |
have.hash = grep(x = vec, pattern = hash.pattern) | |
hash.matches = gregexpr(pattern = hash.pattern, | |
text = vec[have.hash]) | |
extracted.hash = regmatches(x = vec[have.hash], m = hash.matches) | |
df = data.frame(table(tolower(unlist(extracted.hash)))) | |
colnames(df) = c("tag","freq") | |
df = df[order(df$freq,decreasing = TRUE),] | |
return(df) | |
} | |
# ---------------------------------------------------------------------- | |
# Get and Handle Data | |
tw = userTimeline(user, n = NumberOfTweets) | |
tw = twListToDF(tw) | |
vec1 = tw$text | |
dat = head(extract.hashes(vec1), NumberOfBars) | |
dat2 = transform(dat,tag = reorder(tag,freq)) | |
# ---------------------------------------------------------------------- | |
# Make Plot | |
file <- paste( | |
'/home/jacoby/twitter-hash-' , user , '.png' , | |
sep = '' ,collapse = '' ) # do better | |
CairoPNG( | |
filename = file , | |
width = 600 , | |
height = 400 , | |
pointsize = 12 | |
) | |
ggplot( | |
dat2, | |
aes(x = tag, y = freq) | |
) + | |
geom_bar( | |
fill = "blue" , | |
stat = "identity" # fixes warning bug about stat="bin" | |
) + | |
coord_flip() + | |
labs( | |
title = paste("Hashtag frequencies in the tweets of @",user,sep="") | |
) |
No comments:
Post a Comment