Create twitter
main package
This commit is contained in:
parent
f4636a25be
commit
18a63cd2eb
23
.build.yml
23
.build.yml
@ -59,3 +59,26 @@ tasks:
|
||||
|
||||
duration=$SECONDS
|
||||
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||
|
||||
- integration_test: |
|
||||
SECONDS=0
|
||||
|
||||
cd twitter_offline_engine/cmd
|
||||
./tests.sh
|
||||
|
||||
duration=$SECONDS
|
||||
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||
|
||||
- compile: |
|
||||
SECONDS=0
|
||||
|
||||
cd twitter_offline_engine/cmd
|
||||
go build -o ../../twitter ./twitter
|
||||
cd ../..
|
||||
chmod +x twitter
|
||||
sudo mv twitter /usr/local/bin
|
||||
|
||||
which twitter
|
||||
|
||||
duration=$SECONDS
|
||||
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||
|
22
cmd/tests.sh
Executable file
22
cmd/tests.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
set -x
|
||||
|
||||
test -e data && rm -r data
|
||||
|
||||
go run ./twitter create_profile data
|
||||
|
||||
# Fetch a user
|
||||
go run ./twitter fetch_user data Denlesks
|
||||
test $(sqlite3 data/twitter.db "select handle from users") = "Denlesks"
|
||||
test $(sqlite3 data/twitter.db "select count(*) from users") = "1"
|
||||
go run ./twitter fetch_user data Denlesks
|
||||
test $(sqlite3 data/twitter.db "select count(*) from users") = "1"
|
||||
|
||||
# Fetch a tweet with images
|
||||
go run ./twitter fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700
|
||||
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "1"
|
||||
test $(sqlite3 data/twitter.db "select text from tweets") = "These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do"
|
||||
go run ./twitter fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700
|
||||
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "1"
|
60
cmd/twitter/helpers.go
Normal file
60
cmd/twitter/helpers.go
Normal file
@ -0,0 +1,60 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"offline_twitter/scraper"
|
||||
"offline_twitter/terminal_utils"
|
||||
"strings"
|
||||
)
|
||||
|
||||
|
||||
/**
|
||||
* Help message to print if command syntax is incorrect
|
||||
*/
|
||||
const help_message = `Usage: twitter <operation> <profile_dir> [TARGET]
|
||||
|
||||
<operation>:
|
||||
- create_profile (no target needed)
|
||||
|
||||
- fetch_user (TARGET is the user handle)
|
||||
- fetch_tweet (TARGET is the full URL of the tweet)
|
||||
- fetch_tweet_and_replies (TARGET is the full URL of the tweet)
|
||||
|
||||
<profile_dir>: the path to the directory containing the data directories, database files, and settings files.
|
||||
|
||||
TARGET is optional depending on <operation>
|
||||
`
|
||||
|
||||
|
||||
/**
|
||||
* Helper function
|
||||
*/
|
||||
func die(text string, display_help bool, exit_code int) {
|
||||
if text != "" {
|
||||
fmt.Print(terminal_utils.COLOR_RED + text + terminal_utils.COLOR_RESET + "\n")
|
||||
}
|
||||
if display_help {
|
||||
fmt.Print(help_message)
|
||||
}
|
||||
os.Exit(exit_code)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function - parse a tweet permalink URL to extract the tweet ID
|
||||
*
|
||||
* args:
|
||||
* - url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
|
||||
*
|
||||
* returns: the id at the end of the tweet: e.g., 1395882872729477131
|
||||
*/
|
||||
func extract_id_from(url string) (scraper.TweetID, error) {
|
||||
parts := strings.Split(url, "/")
|
||||
if len(parts) != 6 {
|
||||
return "", fmt.Errorf("Tweet format isn't right (%d)", len(parts))
|
||||
}
|
||||
if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" {
|
||||
return "", fmt.Errorf("Tweet format isn't right")
|
||||
}
|
||||
return scraper.TweetID(parts[5]), nil
|
||||
}
|
118
cmd/twitter/main.go
Normal file
118
cmd/twitter/main.go
Normal file
@ -0,0 +1,118 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"fmt"
|
||||
"offline_twitter/scraper"
|
||||
"offline_twitter/persistence"
|
||||
)
|
||||
|
||||
/**
|
||||
* Global variable referencing the open data profile
|
||||
*/
|
||||
var profile persistence.Profile
|
||||
|
||||
|
||||
/**
|
||||
* Main method
|
||||
*/
|
||||
func main() {
|
||||
if len(os.Args) < 3 {
|
||||
die("", true, 1)
|
||||
}
|
||||
|
||||
operation := os.Args[1]
|
||||
profile_dir := os.Args[2]
|
||||
|
||||
if operation == "create_profile" {
|
||||
create_profile(profile_dir)
|
||||
return
|
||||
}
|
||||
|
||||
if len(os.Args) < 4 {
|
||||
die("", true, 1)
|
||||
}
|
||||
|
||||
target := os.Args[3]
|
||||
|
||||
var err error
|
||||
profile, err = persistence.LoadProfile(profile_dir)
|
||||
if err != nil {
|
||||
die("Could not load profile: " + err.Error(), true, 2)
|
||||
}
|
||||
|
||||
switch (operation) {
|
||||
case "create_profile":
|
||||
create_profile(target)
|
||||
case "fetch_user":
|
||||
fetch_user(scraper.UserHandle(target))
|
||||
case "fetch_tweet_only":
|
||||
fetch_tweet_only(target)
|
||||
default:
|
||||
die("Invalid operation: " + operation, true, 3)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a data directory.
|
||||
*
|
||||
* args:
|
||||
* - target_dir: the location of the new data dir.
|
||||
*/
|
||||
func create_profile(target_dir string) {
|
||||
_, err := persistence.NewProfile(target_dir)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrape a user and save it in the database.
|
||||
*
|
||||
* args:
|
||||
* - handle: e.g., "michaelmalice"
|
||||
*/
|
||||
func fetch_user(handle scraper.UserHandle) {
|
||||
if profile.UserExists(handle) {
|
||||
fmt.Println("User is already in database. Updating user...")
|
||||
}
|
||||
user, err := scraper.GetUser(handle)
|
||||
if err != nil {
|
||||
die(err.Error(), false, -1)
|
||||
}
|
||||
fmt.Println(user)
|
||||
|
||||
err = profile.SaveUser(user)
|
||||
if err != nil {
|
||||
die("Error saving user: " + err.Error(), false, 4)
|
||||
}
|
||||
fmt.Println("Saved the user. Exiting successfully")
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrape a single tweet and save it in the database.
|
||||
*
|
||||
* args:
|
||||
* - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
|
||||
*/
|
||||
func fetch_tweet_only(tweet_url string) {
|
||||
tweet_id, err := extract_id_from(tweet_url)
|
||||
if err != nil {
|
||||
die(err.Error(), false, -1)
|
||||
}
|
||||
|
||||
if profile.IsTweetInDatabase(tweet_id) {
|
||||
fmt.Println("Tweet is already in database. Updating...")
|
||||
}
|
||||
tweet, err := scraper.GetTweet(tweet_id)
|
||||
if err != nil {
|
||||
die("Error fetching tweet: " + err.Error(), false, -1)
|
||||
}
|
||||
fmt.Println(tweet)
|
||||
|
||||
err = profile.SaveTweet(tweet)
|
||||
if err != nil {
|
||||
die("Error saving tweet: " + err.Error(), false, 4)
|
||||
}
|
||||
fmt.Println("Saved the tweet. Exiting successfully")
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user