diff --git a/.build.yml b/.build.yml index 37183ee..4fb3a6e 100644 --- a/.build.yml +++ b/.build.yml @@ -59,3 +59,26 @@ tasks: duration=$SECONDS echo "Task completed in $(($duration / 60))m$(($duration % 60))s." + + - integration_test: | + SECONDS=0 + + cd twitter_offline_engine/cmd + ./tests.sh + + duration=$SECONDS + echo "Task completed in $(($duration / 60))m$(($duration % 60))s." + + - compile: | + SECONDS=0 + + cd twitter_offline_engine/cmd + go build -o ../../twitter ./twitter + cd ../.. + chmod +x twitter + sudo mv twitter /usr/local/bin + + which twitter + + duration=$SECONDS + echo "Task completed in $(($duration / 60))m$(($duration % 60))s." diff --git a/cmd/tests.sh b/cmd/tests.sh new file mode 100755 index 0000000..43863b1 --- /dev/null +++ b/cmd/tests.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -e +set -x + +test -e data && rm -r data + +go run ./twitter create_profile data + +# Fetch a user +go run ./twitter fetch_user data Denlesks +test $(sqlite3 data/twitter.db "select handle from users") = "Denlesks" +test $(sqlite3 data/twitter.db "select count(*) from users") = "1" +go run ./twitter fetch_user data Denlesks +test $(sqlite3 data/twitter.db "select count(*) from users") = "1" + +# Fetch a tweet with images +go run ./twitter fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700 +test $(sqlite3 data/twitter.db "select count(*) from tweets") = "1" +test $(sqlite3 data/twitter.db "select text from tweets") = "These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do" +go run ./twitter fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700 +test $(sqlite3 data/twitter.db "select count(*) from tweets") = "1" diff --git a/cmd/twitter/helpers.go b/cmd/twitter/helpers.go new file mode 100644 index 0000000..ce91eb6 --- /dev/null +++ b/cmd/twitter/helpers.go @@ -0,0 +1,60 @@ +package main + +import ( + "fmt" + "os" + "offline_twitter/scraper" + "offline_twitter/terminal_utils" + "strings" +) + + +/** + * Help message to print if command syntax is incorrect + */ +const help_message = `Usage: twitter [TARGET] + +: + - create_profile (no target needed) + + - fetch_user (TARGET is the user handle) + - fetch_tweet (TARGET is the full URL of the tweet) + - fetch_tweet_and_replies (TARGET is the full URL of the tweet) + +: the path to the directory containing the data directories, database files, and settings files. + +TARGET is optional depending on +` + + +/** + * Helper function + */ +func die(text string, display_help bool, exit_code int) { + if text != "" { + fmt.Print(terminal_utils.COLOR_RED + text + terminal_utils.COLOR_RESET + "\n") + } + if display_help { + fmt.Print(help_message) + } + os.Exit(exit_code) +} + +/** + * Helper function - parse a tweet permalink URL to extract the tweet ID + * + * args: + * - url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131" + * + * returns: the id at the end of the tweet: e.g., 1395882872729477131 + */ +func extract_id_from(url string) (scraper.TweetID, error) { + parts := strings.Split(url, "/") + if len(parts) != 6 { + return "", fmt.Errorf("Tweet format isn't right (%d)", len(parts)) + } + if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" { + return "", fmt.Errorf("Tweet format isn't right") + } + return scraper.TweetID(parts[5]), nil +} diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go new file mode 100644 index 0000000..58fb74e --- /dev/null +++ b/cmd/twitter/main.go @@ -0,0 +1,118 @@ +package main + +import ( + "os" + "fmt" + "offline_twitter/scraper" + "offline_twitter/persistence" +) + +/** + * Global variable referencing the open data profile + */ +var profile persistence.Profile + + +/** + * Main method + */ +func main() { + if len(os.Args) < 3 { + die("", true, 1) + } + + operation := os.Args[1] + profile_dir := os.Args[2] + + if operation == "create_profile" { + create_profile(profile_dir) + return + } + + if len(os.Args) < 4 { + die("", true, 1) + } + + target := os.Args[3] + + var err error + profile, err = persistence.LoadProfile(profile_dir) + if err != nil { + die("Could not load profile: " + err.Error(), true, 2) + } + + switch (operation) { + case "create_profile": + create_profile(target) + case "fetch_user": + fetch_user(scraper.UserHandle(target)) + case "fetch_tweet_only": + fetch_tweet_only(target) + default: + die("Invalid operation: " + operation, true, 3) + } +} + +/** + * Create a data directory. + * + * args: + * - target_dir: the location of the new data dir. + */ +func create_profile(target_dir string) { + _, err := persistence.NewProfile(target_dir) + if err != nil { + panic(err) + } +} + +/** + * Scrape a user and save it in the database. + * + * args: + * - handle: e.g., "michaelmalice" + */ +func fetch_user(handle scraper.UserHandle) { + if profile.UserExists(handle) { + fmt.Println("User is already in database. Updating user...") + } + user, err := scraper.GetUser(handle) + if err != nil { + die(err.Error(), false, -1) + } + fmt.Println(user) + + err = profile.SaveUser(user) + if err != nil { + die("Error saving user: " + err.Error(), false, 4) + } + fmt.Println("Saved the user. Exiting successfully") +} + +/** + * Scrape a single tweet and save it in the database. + * + * args: + * - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131" + */ +func fetch_tweet_only(tweet_url string) { + tweet_id, err := extract_id_from(tweet_url) + if err != nil { + die(err.Error(), false, -1) + } + + if profile.IsTweetInDatabase(tweet_id) { + fmt.Println("Tweet is already in database. Updating...") + } + tweet, err := scraper.GetTweet(tweet_id) + if err != nil { + die("Error fetching tweet: " + err.Error(), false, -1) + } + fmt.Println(tweet) + + err = profile.SaveTweet(tweet) + if err != nil { + die("Error saving tweet: " + err.Error(), false, 4) + } + fmt.Println("Saved the tweet. Exiting successfully") +}