Create twitter main package

This commit is contained in:
Alessio 2021-08-02 14:46:06 -07:00
parent f4636a25be
commit 18a63cd2eb
4 changed files with 223 additions and 0 deletions

View File

@ -59,3 +59,26 @@ tasks:
duration=$SECONDS duration=$SECONDS
echo "Task completed in $(($duration / 60))m$(($duration % 60))s." echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
- integration_test: |
SECONDS=0
cd twitter_offline_engine/cmd
./tests.sh
duration=$SECONDS
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
- compile: |
SECONDS=0
cd twitter_offline_engine/cmd
go build -o ../../twitter ./twitter
cd ../..
chmod +x twitter
sudo mv twitter /usr/local/bin
which twitter
duration=$SECONDS
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."

22
cmd/tests.sh Executable file
View File

@ -0,0 +1,22 @@
#!/bin/bash
set -e
set -x
test -e data && rm -r data
go run ./twitter create_profile data
# Fetch a user
go run ./twitter fetch_user data Denlesks
test $(sqlite3 data/twitter.db "select handle from users") = "Denlesks"
test $(sqlite3 data/twitter.db "select count(*) from users") = "1"
go run ./twitter fetch_user data Denlesks
test $(sqlite3 data/twitter.db "select count(*) from users") = "1"
# Fetch a tweet with images
go run ./twitter fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "1"
test $(sqlite3 data/twitter.db "select text from tweets") = "These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do"
go run ./twitter fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "1"

60
cmd/twitter/helpers.go Normal file
View File

@ -0,0 +1,60 @@
package main
import (
"fmt"
"os"
"offline_twitter/scraper"
"offline_twitter/terminal_utils"
"strings"
)
/**
* Help message to print if command syntax is incorrect
*/
const help_message = `Usage: twitter <operation> <profile_dir> [TARGET]
<operation>:
- create_profile (no target needed)
- fetch_user (TARGET is the user handle)
- fetch_tweet (TARGET is the full URL of the tweet)
- fetch_tweet_and_replies (TARGET is the full URL of the tweet)
<profile_dir>: the path to the directory containing the data directories, database files, and settings files.
TARGET is optional depending on <operation>
`
/**
* Helper function
*/
func die(text string, display_help bool, exit_code int) {
if text != "" {
fmt.Print(terminal_utils.COLOR_RED + text + terminal_utils.COLOR_RESET + "\n")
}
if display_help {
fmt.Print(help_message)
}
os.Exit(exit_code)
}
/**
* Helper function - parse a tweet permalink URL to extract the tweet ID
*
* args:
* - url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
*
* returns: the id at the end of the tweet: e.g., 1395882872729477131
*/
func extract_id_from(url string) (scraper.TweetID, error) {
parts := strings.Split(url, "/")
if len(parts) != 6 {
return "", fmt.Errorf("Tweet format isn't right (%d)", len(parts))
}
if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" {
return "", fmt.Errorf("Tweet format isn't right")
}
return scraper.TweetID(parts[5]), nil
}

118
cmd/twitter/main.go Normal file
View File

@ -0,0 +1,118 @@
package main
import (
"os"
"fmt"
"offline_twitter/scraper"
"offline_twitter/persistence"
)
/**
* Global variable referencing the open data profile
*/
var profile persistence.Profile
/**
* Main method
*/
func main() {
if len(os.Args) < 3 {
die("", true, 1)
}
operation := os.Args[1]
profile_dir := os.Args[2]
if operation == "create_profile" {
create_profile(profile_dir)
return
}
if len(os.Args) < 4 {
die("", true, 1)
}
target := os.Args[3]
var err error
profile, err = persistence.LoadProfile(profile_dir)
if err != nil {
die("Could not load profile: " + err.Error(), true, 2)
}
switch (operation) {
case "create_profile":
create_profile(target)
case "fetch_user":
fetch_user(scraper.UserHandle(target))
case "fetch_tweet_only":
fetch_tweet_only(target)
default:
die("Invalid operation: " + operation, true, 3)
}
}
/**
* Create a data directory.
*
* args:
* - target_dir: the location of the new data dir.
*/
func create_profile(target_dir string) {
_, err := persistence.NewProfile(target_dir)
if err != nil {
panic(err)
}
}
/**
* Scrape a user and save it in the database.
*
* args:
* - handle: e.g., "michaelmalice"
*/
func fetch_user(handle scraper.UserHandle) {
if profile.UserExists(handle) {
fmt.Println("User is already in database. Updating user...")
}
user, err := scraper.GetUser(handle)
if err != nil {
die(err.Error(), false, -1)
}
fmt.Println(user)
err = profile.SaveUser(user)
if err != nil {
die("Error saving user: " + err.Error(), false, 4)
}
fmt.Println("Saved the user. Exiting successfully")
}
/**
* Scrape a single tweet and save it in the database.
*
* args:
* - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
*/
func fetch_tweet_only(tweet_url string) {
tweet_id, err := extract_id_from(tweet_url)
if err != nil {
die(err.Error(), false, -1)
}
if profile.IsTweetInDatabase(tweet_id) {
fmt.Println("Tweet is already in database. Updating...")
}
tweet, err := scraper.GetTweet(tweet_id)
if err != nil {
die("Error fetching tweet: " + err.Error(), false, -1)
}
fmt.Println(tweet)
err = profile.SaveTweet(tweet)
if err != nil {
die("Error saving tweet: " + err.Error(), false, 4)
}
fmt.Println("Saved the tweet. Exiting successfully")
}