Create twitter
main package
This commit is contained in:
parent
f4636a25be
commit
18a63cd2eb
23
.build.yml
23
.build.yml
@ -59,3 +59,26 @@ tasks:
|
|||||||
|
|
||||||
duration=$SECONDS
|
duration=$SECONDS
|
||||||
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||||
|
|
||||||
|
- integration_test: |
|
||||||
|
SECONDS=0
|
||||||
|
|
||||||
|
cd twitter_offline_engine/cmd
|
||||||
|
./tests.sh
|
||||||
|
|
||||||
|
duration=$SECONDS
|
||||||
|
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||||
|
|
||||||
|
- compile: |
|
||||||
|
SECONDS=0
|
||||||
|
|
||||||
|
cd twitter_offline_engine/cmd
|
||||||
|
go build -o ../../twitter ./twitter
|
||||||
|
cd ../..
|
||||||
|
chmod +x twitter
|
||||||
|
sudo mv twitter /usr/local/bin
|
||||||
|
|
||||||
|
which twitter
|
||||||
|
|
||||||
|
duration=$SECONDS
|
||||||
|
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||||
|
22
cmd/tests.sh
Executable file
22
cmd/tests.sh
Executable file
@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
set -x
|
||||||
|
|
||||||
|
test -e data && rm -r data
|
||||||
|
|
||||||
|
go run ./twitter create_profile data
|
||||||
|
|
||||||
|
# Fetch a user
|
||||||
|
go run ./twitter fetch_user data Denlesks
|
||||||
|
test $(sqlite3 data/twitter.db "select handle from users") = "Denlesks"
|
||||||
|
test $(sqlite3 data/twitter.db "select count(*) from users") = "1"
|
||||||
|
go run ./twitter fetch_user data Denlesks
|
||||||
|
test $(sqlite3 data/twitter.db "select count(*) from users") = "1"
|
||||||
|
|
||||||
|
# Fetch a tweet with images
|
||||||
|
go run ./twitter fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700
|
||||||
|
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "1"
|
||||||
|
test $(sqlite3 data/twitter.db "select text from tweets") = "These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do"
|
||||||
|
go run ./twitter fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700
|
||||||
|
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "1"
|
60
cmd/twitter/helpers.go
Normal file
60
cmd/twitter/helpers.go
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"offline_twitter/scraper"
|
||||||
|
"offline_twitter/terminal_utils"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Help message to print if command syntax is incorrect
|
||||||
|
*/
|
||||||
|
const help_message = `Usage: twitter <operation> <profile_dir> [TARGET]
|
||||||
|
|
||||||
|
<operation>:
|
||||||
|
- create_profile (no target needed)
|
||||||
|
|
||||||
|
- fetch_user (TARGET is the user handle)
|
||||||
|
- fetch_tweet (TARGET is the full URL of the tweet)
|
||||||
|
- fetch_tweet_and_replies (TARGET is the full URL of the tweet)
|
||||||
|
|
||||||
|
<profile_dir>: the path to the directory containing the data directories, database files, and settings files.
|
||||||
|
|
||||||
|
TARGET is optional depending on <operation>
|
||||||
|
`
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function
|
||||||
|
*/
|
||||||
|
func die(text string, display_help bool, exit_code int) {
|
||||||
|
if text != "" {
|
||||||
|
fmt.Print(terminal_utils.COLOR_RED + text + terminal_utils.COLOR_RESET + "\n")
|
||||||
|
}
|
||||||
|
if display_help {
|
||||||
|
fmt.Print(help_message)
|
||||||
|
}
|
||||||
|
os.Exit(exit_code)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function - parse a tweet permalink URL to extract the tweet ID
|
||||||
|
*
|
||||||
|
* args:
|
||||||
|
* - url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
|
||||||
|
*
|
||||||
|
* returns: the id at the end of the tweet: e.g., 1395882872729477131
|
||||||
|
*/
|
||||||
|
func extract_id_from(url string) (scraper.TweetID, error) {
|
||||||
|
parts := strings.Split(url, "/")
|
||||||
|
if len(parts) != 6 {
|
||||||
|
return "", fmt.Errorf("Tweet format isn't right (%d)", len(parts))
|
||||||
|
}
|
||||||
|
if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" {
|
||||||
|
return "", fmt.Errorf("Tweet format isn't right")
|
||||||
|
}
|
||||||
|
return scraper.TweetID(parts[5]), nil
|
||||||
|
}
|
118
cmd/twitter/main.go
Normal file
118
cmd/twitter/main.go
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"fmt"
|
||||||
|
"offline_twitter/scraper"
|
||||||
|
"offline_twitter/persistence"
|
||||||
|
)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Global variable referencing the open data profile
|
||||||
|
*/
|
||||||
|
var profile persistence.Profile
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main method
|
||||||
|
*/
|
||||||
|
func main() {
|
||||||
|
if len(os.Args) < 3 {
|
||||||
|
die("", true, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
operation := os.Args[1]
|
||||||
|
profile_dir := os.Args[2]
|
||||||
|
|
||||||
|
if operation == "create_profile" {
|
||||||
|
create_profile(profile_dir)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(os.Args) < 4 {
|
||||||
|
die("", true, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
target := os.Args[3]
|
||||||
|
|
||||||
|
var err error
|
||||||
|
profile, err = persistence.LoadProfile(profile_dir)
|
||||||
|
if err != nil {
|
||||||
|
die("Could not load profile: " + err.Error(), true, 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (operation) {
|
||||||
|
case "create_profile":
|
||||||
|
create_profile(target)
|
||||||
|
case "fetch_user":
|
||||||
|
fetch_user(scraper.UserHandle(target))
|
||||||
|
case "fetch_tweet_only":
|
||||||
|
fetch_tweet_only(target)
|
||||||
|
default:
|
||||||
|
die("Invalid operation: " + operation, true, 3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a data directory.
|
||||||
|
*
|
||||||
|
* args:
|
||||||
|
* - target_dir: the location of the new data dir.
|
||||||
|
*/
|
||||||
|
func create_profile(target_dir string) {
|
||||||
|
_, err := persistence.NewProfile(target_dir)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scrape a user and save it in the database.
|
||||||
|
*
|
||||||
|
* args:
|
||||||
|
* - handle: e.g., "michaelmalice"
|
||||||
|
*/
|
||||||
|
func fetch_user(handle scraper.UserHandle) {
|
||||||
|
if profile.UserExists(handle) {
|
||||||
|
fmt.Println("User is already in database. Updating user...")
|
||||||
|
}
|
||||||
|
user, err := scraper.GetUser(handle)
|
||||||
|
if err != nil {
|
||||||
|
die(err.Error(), false, -1)
|
||||||
|
}
|
||||||
|
fmt.Println(user)
|
||||||
|
|
||||||
|
err = profile.SaveUser(user)
|
||||||
|
if err != nil {
|
||||||
|
die("Error saving user: " + err.Error(), false, 4)
|
||||||
|
}
|
||||||
|
fmt.Println("Saved the user. Exiting successfully")
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scrape a single tweet and save it in the database.
|
||||||
|
*
|
||||||
|
* args:
|
||||||
|
* - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
|
||||||
|
*/
|
||||||
|
func fetch_tweet_only(tweet_url string) {
|
||||||
|
tweet_id, err := extract_id_from(tweet_url)
|
||||||
|
if err != nil {
|
||||||
|
die(err.Error(), false, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if profile.IsTweetInDatabase(tweet_id) {
|
||||||
|
fmt.Println("Tweet is already in database. Updating...")
|
||||||
|
}
|
||||||
|
tweet, err := scraper.GetTweet(tweet_id)
|
||||||
|
if err != nil {
|
||||||
|
die("Error fetching tweet: " + err.Error(), false, -1)
|
||||||
|
}
|
||||||
|
fmt.Println(tweet)
|
||||||
|
|
||||||
|
err = profile.SaveTweet(tweet)
|
||||||
|
if err != nil {
|
||||||
|
die("Error saving tweet: " + err.Error(), false, 4)
|
||||||
|
}
|
||||||
|
fmt.Println("Saved the tweet. Exiting successfully")
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user