From 24129c4852ce4c2bc96f613e01322d6c524b94a4 Mon Sep 17 00:00:00 2001 From: Alessio Date: Fri, 9 Aug 2024 19:41:39 -0700 Subject: [PATCH] REFACTOR: reduce technical debt, particularly that caused by singleton pattern in `pkg/scraper` - ensure all scraper functions have a `api.XYZ` version and a package-level convenience function - isolate `the_api` to top-level convenience functions, in preparation for removal - move a bunch of scraper functions around to be nearby their related functions - new ErrLoginRequired - remove obsolete APIv1 stuff (Feed, TweetDetail) - rename scraper function GetUserFeedGraphqlFor => GetUserFeed - fix go.mod Go version incorrectly claiming it's compatible with Go 1.16 (should be Go 1.17) --- cmd/twitter/main.go | 20 ++- go.mod | 18 ++- internal/webserver/handler_messages.go | 12 +- internal/webserver/handler_user_feed.go | 2 +- internal/webserver/stopwatch.go | 8 +- pkg/scraper/api_errors.go | 1 + pkg/scraper/api_obsolete_requests.go | 190 ++++++++++++++++++++++++ pkg/scraper/api_request_utils.go | 99 ------------ pkg/scraper/api_types_dms.go | 39 +++-- pkg/scraper/api_types_dms_test.go | 14 +- pkg/scraper/api_types_lists.go | 24 ++- pkg/scraper/api_types_posting.go | 14 +- pkg/scraper/api_types_spaces.go | 13 ++ pkg/scraper/api_types_v2.go | 103 +++++++++++-- pkg/scraper/dm_chat_room.go | 4 +- pkg/scraper/dm_trove.go | 78 ++++------ pkg/scraper/space.go | 8 - pkg/scraper/tweet.go | 100 ++----------- pkg/scraper/user_feed.go | 37 ----- 19 files changed, 437 insertions(+), 347 deletions(-) create mode 100644 pkg/scraper/api_obsolete_requests.go delete mode 100644 pkg/scraper/user_feed.go diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index a4bfd2c..edbd51c 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -127,7 +127,6 @@ func main() { *session_name = (*session_name)[:len(*session_name)-8] } scraper.InitApi(profile.LoadSession(scraper.UserHandle(*session_name))) - // fmt.Printf("Operating as user: @%s\n", scraper.the_api.UserHandle) } else { session, err := scraper.NewGuestSession() if err != nil { @@ -235,7 +234,7 @@ func main() { // - username: twitter username or email address // - password: twitter account password func login(username string, password string) { - // Skip the scraper.the_api variable, just use a local one since no scraping is happening + // Skip the scraper.InitApi, just use a local one since no scraping is happening api, err := scraper.NewGuestSession() if err != nil { die(fmt.Sprintf("Unable to create session: %s", err.Error()), false, 1) @@ -350,7 +349,7 @@ func fetch_user_feed(handle string, how_many int) { die(fmt.Sprintf("Error getting user: %s\n %s", handle, err.Error()), false, -1) } - trove, err := scraper.GetUserFeedGraphqlFor(user.ID, how_many) + trove, err := scraper.GetUserFeed(user.ID, how_many) if is_scrape_failure(err) { die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2) } @@ -526,7 +525,10 @@ func start_webserver(addr string, should_auto_open bool) { } func fetch_inbox(how_many int) { - trove, _ := scraper.GetInbox(how_many) + trove, _, err := scraper.GetInbox(how_many) + if err != nil { + die(fmt.Sprintf("Failed to fetch inbox:\n %s", err.Error()), false, 1) + } profile.SaveTweetTrove(trove, true) happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil) } @@ -537,7 +539,10 @@ func fetch_dm(id string, how_many int) { panic(err) } max_id := scraper.DMMessageID(^uint(0) >> 1) - trove := scraper.GetConversation(room.ID, max_id, how_many) + trove, err := scraper.GetConversation(room.ID, max_id, how_many) + if err != nil { + die(fmt.Sprintf("Failed to fetch dm:\n %s", err.Error()), false, 1) + } profile.SaveTweetTrove(trove, true) happy_exit( fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), @@ -551,7 +556,10 @@ func send_dm(room_id string, text string, in_reply_to_id int) { die(fmt.Sprintf("No such chat room: %d", in_reply_to_id), false, 1) } - trove := scraper.SendDMMessage(room.ID, text, scraper.DMMessageID(in_reply_to_id)) + trove, err := scraper.SendDMMessage(room.ID, text, scraper.DMMessageID(in_reply_to_id)) + if err != nil { + die(fmt.Sprintf("Failed to send dm:\n %s", err.Error()), false, 1) + } profile.SaveTweetTrove(trove, true) happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil) } diff --git a/go.mod b/go.mod index 0fced54..f41fb8a 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module gitlab.com/offline-twitter/twitter_offline_engine -go 1.16 +go 1.17 require ( github.com/Masterminds/sprig/v3 v3.2.3 @@ -15,3 +15,19 @@ require ( golang.org/x/net v0.9.0 golang.org/x/term v0.7.0 ) + +require ( + github.com/Masterminds/goutils v1.1.1 // indirect + github.com/Masterminds/semver/v3 v3.2.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/huandu/xstrings v1.3.3 // indirect + github.com/imdario/mergo v0.3.11 // indirect + github.com/mitchellh/copystructure v1.0.0 // indirect + github.com/mitchellh/reflectwalk v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/shopspring/decimal v1.2.0 // indirect + github.com/spf13/cast v1.3.1 // indirect + golang.org/x/crypto v0.3.0 // indirect + golang.org/x/sys v0.7.0 // indirect + gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect +) diff --git a/internal/webserver/handler_messages.go b/internal/webserver/handler_messages.go index 22cc05b..dab8aaf 100644 --- a/internal/webserver/handler_messages.go +++ b/internal/webserver/handler_messages.go @@ -33,7 +33,7 @@ func (app *Application) message_mark_as_read(w http.ResponseWriter, r *http.Requ c.PageSize = 1 chat_contents := app.Profile.GetChatRoomMessagesByCursor(c) last_message_id := chat_contents.MessageIDs[len(chat_contents.MessageIDs)-1] - scraper.MarkDMChatRead(room_id, last_message_id) + panic_if(scraper.MarkDMChatRead(room_id, last_message_id)) room := chat_contents.Rooms[room_id] participant, is_ok := room.Participants[app.ActiveUser.ID] if !is_ok { @@ -66,7 +66,10 @@ func (app *Application) message_send(w http.ResponseWriter, r *http.Request) { in_reply_to_id = 0 } - trove := scraper.SendDMMessage(room_id, message_data.Text, scraper.DMMessageID(in_reply_to_id)) + trove, err := scraper.SendDMMessage(room_id, message_data.Text, scraper.DMMessageID(in_reply_to_id)) + if err != nil { + panic(err) + } app.Profile.SaveTweetTrove(trove, false) go app.Profile.SaveTweetTrove(trove, true) } @@ -115,7 +118,10 @@ func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) { if r.URL.Query().Has("scrape") && !app.IsScrapingDisabled { max_id := scraper.DMMessageID(^uint(0) >> 1) - trove := scraper.GetConversation(room_id, max_id, 50) // TODO: parameterizable + trove, err := scraper.GetConversation(room_id, max_id, 50) // TODO: parameterizable + if err != nil { + panic(err) + } app.Profile.SaveTweetTrove(trove, false) go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background } diff --git a/internal/webserver/handler_user_feed.go b/internal/webserver/handler_user_feed.go index c94f893..df37fb7 100644 --- a/internal/webserver/handler_user_feed.go +++ b/internal/webserver/handler_user_feed.go @@ -46,7 +46,7 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) { if len(parts) == 1 { // The URL is just the user handle // Run scraper - trove, err := scraper.GetUserFeedGraphqlFor(user.ID, 50) // TODO: parameterizable + trove, err := scraper.GetUserFeed(user.ID, 50) // TODO: parameterizable if err != nil { app.ErrorLog.Print(err) // TOOD: show error in UI diff --git a/internal/webserver/stopwatch.go b/internal/webserver/stopwatch.go index efd4a3c..bf2653f 100644 --- a/internal/webserver/stopwatch.go +++ b/internal/webserver/stopwatch.go @@ -103,10 +103,14 @@ func (app *Application) background_dm_polling_scrape() { fmt.Println("Scraping user DMs...") var trove scraper.TweetTrove + var err error if inbox_cursor == "" { - trove, inbox_cursor = scraper.GetInbox(0) + trove, inbox_cursor, err = scraper.GetInbox(0) } else { - trove, inbox_cursor = scraper.PollInboxUpdates(inbox_cursor) + trove, inbox_cursor, err = scraper.PollInboxUpdates(inbox_cursor) + } + if err != nil { + panic(err) } fmt.Println("Saving DM results...") app.Profile.SaveTweetTrove(trove, false) diff --git a/pkg/scraper/api_errors.go b/pkg/scraper/api_errors.go index a5ff6ea..361c33d 100644 --- a/pkg/scraper/api_errors.go +++ b/pkg/scraper/api_errors.go @@ -12,6 +12,7 @@ var ( ErrRateLimited = errors.New("rate limited") ErrorDMCA = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)") ErrMediaDownload404 = errors.New("media download HTTP 404") + ErrLoginRequired = errors.New("login required; please provide `--session ` flag") // These are not API errors, but network errors generally ErrNoInternet = errors.New("no internet connection") diff --git a/pkg/scraper/api_obsolete_requests.go b/pkg/scraper/api_obsolete_requests.go new file mode 100644 index 0000000..78790cf --- /dev/null +++ b/pkg/scraper/api_obsolete_requests.go @@ -0,0 +1,190 @@ +//go:build obsolete_user_feed + +// Nothing in this file is used. It's outdated; user feed comes from APIv2 instead now. + +package scraper + +import ( + "errors" + "fmt" + "net/url" +) + +const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/" +const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/" + +func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) { + url, err := url.Parse(fmt.Sprintf("%s%d.json", API_USER_TIMELINE_BASE_PATH, user_id)) + if err != nil { + panic(err) + } + queryParams := url.Query() + add_tweet_query_params(&queryParams) + url.RawQuery = queryParams.Encode() + + var result TweetResponse + err = api.do_http(url.String(), cursor, &result) + + return result, err +} + +/** + * Resend the request to get more tweets if necessary + * + * args: + * - user_id: the user's UserID + * - response: an "out" parameter; the TweetResponse that tweets, RTs and users will be appended to + * - min_tweets: the desired minimum amount of tweets to get + */ +func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, min_tweets int) error { + last_response := response + for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < min_tweets { + fresh_response, err := api.GetFeedFor(user_id, last_response.GetCursor()) + if err != nil { + return err + } + + if fresh_response.GetCursor() == last_response.GetCursor() && len(fresh_response.GlobalObjects.Tweets) == 0 { + // Empty response, cursor same as previous: end of feed has been reached + return END_OF_FEED + } + if fresh_response.IsEndOfFeed() { + // Response has a pinned tweet, but no other content: end of feed has been reached + return END_OF_FEED + } + + last_response = &fresh_response + + // Copy over the tweets and the users + for id, tweet := range last_response.GlobalObjects.Tweets { + response.GlobalObjects.Tweets[id] = tweet + } + for id, user := range last_response.GlobalObjects.Users { + response.GlobalObjects.Users[id] = user + } + fmt.Printf("Have %d tweets, and %d users so far\n", len(response.GlobalObjects.Tweets), len(response.GlobalObjects.Users)) + } + return nil +} + +/** + * Get a list of tweets that appear on the given user's page, along with a list of associated + * users for any retweets. + * + * args: + * - user_id: the ID of the user whomst feed to fetch + * - min_tweets: get at least this many tweets, if there are any + * + * returns: a slice of Tweets, Retweets, and Users + */ +func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) { + tweet_response, err := the_api.GetFeedFor(user_id, "") + if err != nil { + err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err) + return + } + + if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" { + err = the_api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets) + if err != nil && !errors.Is(err, END_OF_FEED) { + return + } + } + + return tweet_response.ToTweetTrove() +} + +/** + * Return a list of tweets, including the original and the rest of its thread, + * along with a list of associated users. + * + * Mark the main tweet as "is_conversation_downloaded = true", and update its "last_scraped_at" + * value. + * + * args: + * - id: the ID of the tweet to get + * + * returns: the tweet, list of its replies and context, and users associated with those replies + */ +func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) { + tweet_response, err := the_api.GetTweet(id, "") + if err != nil { + err = fmt.Errorf("Error getting tweet: %d\n %w", id, err) + return + } + if len(tweet_response.GlobalObjects.Tweets) < how_many && + tweet_response.GetCursor() != "" { + err = the_api.GetMoreReplies(id, &tweet_response, how_many) + if err != nil { + err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err) + return + } + } + + // This has to be called BEFORE ToTweetTrove, because it modifies the TweetResponse (adds tombstone tweets to its tweets list) + tombstoned_users := tweet_response.HandleTombstones() + + trove, err = tweet_response.ToTweetTrove() + if err != nil { + panic(err) + } + trove.TombstoneUsers = tombstoned_users + + // Quoted tombstones need their user_id filled out from the tombstoned_users list + log.Debug("Running tweet trove post-processing\n") + err = trove.PostProcess() + if err != nil { + err = fmt.Errorf("Error getting tweet (id %d):\n %w", id, err) + return + } + + // Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at" + tweet, ok := trove.Tweets[id] + if !ok { + panic("Trove didn't contain its own tweet!") + } + tweet.LastScrapedAt = Timestamp{time.Now()} + tweet.IsConversationScraped = true + trove.Tweets[id] = tweet + + return +} + +func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) { + url, err := url.Parse(fmt.Sprintf("%s%d.json", API_CONVERSATION_BASE_PATH, id)) + if err != nil { + panic(err) + } + queryParams := url.Query() + if cursor != "" { + queryParams.Add("referrer", "tweet") + } + add_tweet_query_params(&queryParams) + url.RawQuery = queryParams.Encode() + + var result TweetResponse + err = api.do_http(url.String(), cursor, &result) + return result, err +} + +// Resend the request to get more replies if necessary +func (api *API) GetMoreReplies(tweet_id TweetID, response *TweetResponse, max_replies int) error { + last_response := response + for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_replies { + fresh_response, err := api.GetTweet(tweet_id, last_response.GetCursor()) + if err != nil { + return err + } + + last_response = &fresh_response + + // Copy over the tweets and the users + for id, tweet := range last_response.GlobalObjects.Tweets { + response.GlobalObjects.Tweets[id] = tweet + } + for id, user := range last_response.GlobalObjects.Users { + response.GlobalObjects.Users[id] = user + } + } + return nil +} diff --git a/pkg/scraper/api_request_utils.go b/pkg/scraper/api_request_utils.go index 33f1b5a..334a212 100644 --- a/pkg/scraper/api_request_utils.go +++ b/pkg/scraper/api_request_utils.go @@ -15,9 +15,6 @@ import ( log "github.com/sirupsen/logrus" ) -const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/" -const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/" - type API struct { UserHandle UserHandle UserID UserID @@ -307,102 +304,6 @@ func add_tweet_query_params(query *url.Values) { query.Add("count", "20") } -func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) { - // TODO: this function isn't actually used for anything (APIv2 is used instead) - url, err := url.Parse(fmt.Sprintf("%s%d.json", API_USER_TIMELINE_BASE_PATH, user_id)) - if err != nil { - panic(err) - } - queryParams := url.Query() - add_tweet_query_params(&queryParams) - url.RawQuery = queryParams.Encode() - - var result TweetResponse - err = api.do_http(url.String(), cursor, &result) - - return result, err -} - -/** - * Resend the request to get more tweets if necessary - * - * args: - * - user_id: the user's UserID - * - response: an "out" parameter; the TweetResponse that tweets, RTs and users will be appended to - * - min_tweets: the desired minimum amount of tweets to get - */ -func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, min_tweets int) error { - // TODO user-feed-infinite-fetch: what if you reach the end of the user's timeline? Might loop - // forever getting no new tweets - last_response := response - for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < min_tweets { - fresh_response, err := api.GetFeedFor(user_id, last_response.GetCursor()) - if err != nil { - return err - } - - if fresh_response.GetCursor() == last_response.GetCursor() && len(fresh_response.GlobalObjects.Tweets) == 0 { - // Empty response, cursor same as previous: end of feed has been reached - return END_OF_FEED - } - if fresh_response.IsEndOfFeed() { - // Response has a pinned tweet, but no other content: end of feed has been reached - return END_OF_FEED - } - - last_response = &fresh_response - - // Copy over the tweets and the users - for id, tweet := range last_response.GlobalObjects.Tweets { - response.GlobalObjects.Tweets[id] = tweet - } - for id, user := range last_response.GlobalObjects.Users { - response.GlobalObjects.Users[id] = user - } - fmt.Printf("Have %d tweets, and %d users so far\n", len(response.GlobalObjects.Tweets), len(response.GlobalObjects.Users)) - } - return nil -} - -func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) { - url, err := url.Parse(fmt.Sprintf("%s%d.json", API_CONVERSATION_BASE_PATH, id)) - if err != nil { - panic(err) - } - queryParams := url.Query() - if cursor != "" { - queryParams.Add("referrer", "tweet") - } - add_tweet_query_params(&queryParams) - url.RawQuery = queryParams.Encode() - - var result TweetResponse - err = api.do_http(url.String(), cursor, &result) - return result, err -} - -// Resend the request to get more replies if necessary -func (api *API) GetMoreReplies(tweet_id TweetID, response *TweetResponse, max_replies int) error { - last_response := response - for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_replies { - fresh_response, err := api.GetTweet(tweet_id, last_response.GetCursor()) - if err != nil { - return err - } - - last_response = &fresh_response - - // Copy over the tweets and the users - for id, tweet := range last_response.GlobalObjects.Tweets { - response.GlobalObjects.Tweets[id] = tweet - } - for id, user := range last_response.GlobalObjects.Users { - response.GlobalObjects.Users[id] = user - } - } - return nil -} - func DownloadMedia(url string) ([]byte, error) { return the_api.DownloadMedia(url) } diff --git a/pkg/scraper/api_types_dms.go b/pkg/scraper/api_types_dms.go index 6835909..2d5677c 100644 --- a/pkg/scraper/api_types_dms.go +++ b/pkg/scraper/api_types_dms.go @@ -186,7 +186,7 @@ type APIDMResponse struct { UserEvents APIInbox `json:"user_events"` } -func (r APIInbox) ToTweetTrove() TweetTrove { +func (r APIInbox) ToTweetTrove(current_user_id UserID) TweetTrove { ret := NewTweetTrove() for _, entry := range r.Entries { @@ -212,7 +212,7 @@ func (r APIInbox) ToTweetTrove() TweetTrove { ret.MergeWith(entry.Message.ToTweetTrove()) } for _, room := range r.Conversations { - result := ParseAPIDMChatRoom(room) + result := ParseAPIDMChatRoom(room, current_user_id) ret.Rooms[result.ID] = result } for _, u := range r.Users { @@ -403,7 +403,11 @@ func (api *API) GetDMConversation(id DMChatRoomID, max_id DMMessageID) (APIInbox return result.ConversationTimeline, err } -func (api *API) PollInboxUpdates(cursor string) (APIInbox, error) { +// Returns a TweetTrove and the cursor for the next update, or an error +func (api *API) PollInboxUpdates(cursor string) (TweetTrove, string, error) { + if !api.IsAuthenticated { + return TweetTrove{}, "", ErrLoginRequired + } url, err := url.Parse("https://twitter.com/i/api/1.1/dm/user_updates.json") if err != nil { panic(err) @@ -449,10 +453,16 @@ func (api *API) PollInboxUpdates(cursor string) (APIInbox, error) { var result APIDMResponse err = api.do_http(url.String(), "", &result) - return result.UserEvents, err + if err != nil { + return TweetTrove{}, "", err + } + return result.UserEvents.ToTweetTrove(api.UserID), result.UserEvents.Cursor, nil } -func (api *API) SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) (APIInbox, error) { +func (api *API) SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) (TweetTrove, error) { + if !api.IsAuthenticated { + return TweetTrove{}, ErrLoginRequired + } url, err := url.Parse("https://twitter.com/i/api/1.1/dm/new2.json") if err != nil { panic(err) @@ -519,11 +529,18 @@ func (api *API) SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id var result APIInbox err = api.do_http_POST(url.String(), post_data, &result) - return result, err + + if err != nil { + return TweetTrove{}, err + } + return result.ToTweetTrove(api.UserID), nil } // Send a reacc func (api *API) SendDMReaction(room_id DMChatRoomID, message_id DMMessageID, reacc string) error { + if !api.IsAuthenticated { + return ErrLoginRequired + } url := "https://twitter.com/i/api/graphql/VyDyV9pC2oZEj6g52hgnhA/useDMReactionMutationAddMutation" body := `{"variables":{"conversationId":"` + string(room_id) + `","messageId":"` + fmt.Sprint(message_id) + `","reactionTypes":["Emoji"],"emojiReactions":["` + reacc + `"]},"queryId":"VyDyV9pC2oZEj6g52hgnhA"}` @@ -546,14 +563,14 @@ func (api *API) SendDMReaction(room_id DMChatRoomID, message_id DMMessageID, rea } // Mark a chat as read. -func (api *API) MarkDMChatRead(room_id DMChatRoomID, read_message_id DMMessageID) { +func (api *API) MarkDMChatRead(room_id DMChatRoomID, read_message_id DMMessageID) error { + if !api.IsAuthenticated { + return ErrLoginRequired + } url := fmt.Sprintf("https://twitter.com/i/api/1.1/dm/conversation/%s/mark_read.json", room_id) // `do_http_POST` will set the "content-type" header based on whether the body starts with '{' or not. data := fmt.Sprintf("conversationId=%s&last_read_event_id=%d", room_id, read_message_id) - err := api.do_http_POST(url, data, nil) // Expected: HTTP 204 - if err != nil { - panic(err) - } + return api.do_http_POST(url, data, nil) // Expected: HTTP 204 } diff --git a/pkg/scraper/api_types_dms_test.go b/pkg/scraper/api_types_dms_test.go index 4f1c55c..637af86 100644 --- a/pkg/scraper/api_types_dms_test.go +++ b/pkg/scraper/api_types_dms_test.go @@ -168,9 +168,7 @@ func TestParseAPIDMConversation(t *testing.T) { require.NoError(t, err) // Simulate one of the participants being logged in - InitApi(API{UserID: 1458284524761075714}) - - chat_room := ParseAPIDMChatRoom(api_room) + chat_room := ParseAPIDMChatRoom(api_room, UserID(1458284524761075714)) assert.Equal(DMChatRoomID("1458284524761075714-1488963321701171204"), chat_room.ID) assert.Equal("ONE_TO_ONE", chat_room.Type) assert.Equal(TimestampFromUnixMilli(1686025129086), chat_room.LastMessagedAt) @@ -204,9 +202,7 @@ func TestParseAPIDMGroupChat(t *testing.T) { require.NoError(t, err) // Simulate one of the participants being logged in - InitApi(API{UserID: 1458284524761075714}) - - chat_room := ParseAPIDMChatRoom(api_room) + chat_room := ParseAPIDMChatRoom(api_room, UserID(1458284524761075714)) assert.Equal(DMChatRoomID("1710215025518948715"), chat_room.ID) assert.Equal("GROUP_DM", chat_room.Type) assert.Equal(TimestampFromUnixMilli(1700112789457), chat_room.LastMessagedAt) @@ -232,7 +228,7 @@ func TestParseInbox(t *testing.T) { err = json.Unmarshal(data, &inbox) require.NoError(t, err) - trove := inbox.InboxInitialState.ToTweetTrove() + trove := inbox.InboxInitialState.ToTweetTrove(UserID(0)) for _, id := range []DMMessageID{1663623062195957773, 1663623203644751885, 1665922180176044037, 1665936253483614212} { m, is_ok := trove.Messages[id] @@ -259,7 +255,7 @@ func TestParseDMRoomResponse(t *testing.T) { err = json.Unmarshal(data, &inbox) require.NoError(t, err) - trove := inbox.ConversationTimeline.ToTweetTrove() + trove := inbox.ConversationTimeline.ToTweetTrove(UserID(0)) for _, id := range []DMMessageID{ 1663623062195957773, @@ -293,7 +289,7 @@ func TestParseInboxUpdates(t *testing.T) { err = json.Unmarshal(data, &inbox) require.NoError(t, err) - trove := inbox.UserEvents.ToTweetTrove() + trove := inbox.UserEvents.ToTweetTrove(UserID(0)) assert.Len(trove.Messages, 2) // Should ignore stuff that isn't a message diff --git a/pkg/scraper/api_types_lists.go b/pkg/scraper/api_types_lists.go index 0f1e2af..1cbcf93 100644 --- a/pkg/scraper/api_types_lists.go +++ b/pkg/scraper/api_types_lists.go @@ -4,7 +4,7 @@ import ( "net/url" ) -func (api *API) GetFollowees(user_id UserID, cursor string) (APIV2Response, error) { +func (api *API) GetFolloweesPage(user_id UserID, cursor string) (APIV2Response, error) { url, err := url.Parse(GraphqlURL{ BaseUrl: "https://twitter.com/i/api/graphql/0yD6Eiv23DKXRDU9VxlG2A/Following", Variables: GraphqlVariables{ @@ -51,17 +51,21 @@ type PaginatedFollowees struct { } func (p PaginatedFollowees) NextPage(api *API, cursor string) (APIV2Response, error) { - return api.GetFollowees(p.user_id, cursor) + return api.GetFolloweesPage(p.user_id, cursor) } func (p PaginatedFollowees) ToTweetTrove(r APIV2Response) (TweetTrove, error) { return r.ToTweetTrove() } -func GetFollowees(user_id UserID, how_many int) (TweetTrove, error) { - return the_api.GetPaginatedQuery(PaginatedFollowees{user_id}, how_many) +func (api *API) GetFollowees(user_id UserID, how_many int) (TweetTrove, error) { + return api.GetPaginatedQuery(PaginatedFollowees{user_id}, how_many) } -func (api *API) GetFollowers(user_id UserID, cursor string) (APIV2Response, error) { +func GetFollowees(user_id UserID, how_many int) (TweetTrove, error) { + return the_api.GetFollowees(user_id, how_many) +} + +func (api *API) GetFollowersPage(user_id UserID, cursor string) (APIV2Response, error) { url, err := url.Parse(GraphqlURL{ BaseUrl: "https://twitter.com/i/api/graphql/3_7xfjmh897x8h_n6QBqTA/Followers", Variables: GraphqlVariables{ @@ -108,12 +112,16 @@ type PaginatedFollowers struct { } func (p PaginatedFollowers) NextPage(api *API, cursor string) (APIV2Response, error) { - return api.GetFollowers(p.user_id, cursor) + return api.GetFollowersPage(p.user_id, cursor) } func (p PaginatedFollowers) ToTweetTrove(r APIV2Response) (TweetTrove, error) { return r.ToTweetTrove() } -func GetFollowers(user_id UserID, how_many int) (TweetTrove, error) { - return the_api.GetPaginatedQuery(PaginatedFollowers{user_id}, how_many) +func (api *API) GetFollowers(user_id UserID, how_many int) (TweetTrove, error) { + return api.GetPaginatedQuery(PaginatedFollowers{user_id}, how_many) +} + +func GetFollowers(user_id UserID, how_many int) (TweetTrove, error) { + return the_api.GetFollowers(user_id, how_many) } diff --git a/pkg/scraper/api_types_posting.go b/pkg/scraper/api_types_posting.go index 5e1c110..d733f66 100644 --- a/pkg/scraper/api_types_posting.go +++ b/pkg/scraper/api_types_posting.go @@ -4,14 +4,15 @@ import ( "errors" "fmt" "strings" - - log "github.com/sirupsen/logrus" ) var AlreadyLikedThisTweet error = errors.New("already liked this tweet") var HaventLikedThisTweet error = errors.New("Haven't liked this tweet") func (api API) LikeTweet(id TweetID) (Like, error) { + if !api.IsAuthenticated { + return Like{}, ErrLoginRequired + } type LikeResponse struct { Data struct { FavoriteTweet string `json:"favorite_tweet"` @@ -52,6 +53,9 @@ func (api API) LikeTweet(id TweetID) (Like, error) { } func (api API) UnlikeTweet(id TweetID) error { + if !api.IsAuthenticated { + return ErrLoginRequired + } type UnlikeResponse struct { Data struct { UnfavoriteTweet string `json:"unfavorite_tweet"` @@ -84,14 +88,8 @@ func (api API) UnlikeTweet(id TweetID) error { } func LikeTweet(id TweetID) (Like, error) { - if !the_api.IsAuthenticated { - log.Fatalf("Must be authenticated!") - } return the_api.LikeTweet(id) } func UnlikeTweet(id TweetID) error { - if !the_api.IsAuthenticated { - log.Fatalf("Must be authenticated!") - } return the_api.UnlikeTweet(id) } diff --git a/pkg/scraper/api_types_spaces.go b/pkg/scraper/api_types_spaces.go index 8b6c4fc..10fbcf6 100644 --- a/pkg/scraper/api_types_spaces.go +++ b/pkg/scraper/api_types_spaces.go @@ -1,6 +1,7 @@ package scraper import ( + "fmt" "net/url" ) @@ -131,3 +132,15 @@ func (api API) GetSpace(id SpaceID) (SpaceResponse, error) { err = api.do_http(url.String(), "", &result) return result, err } + +func (api *API) FetchSpaceDetail(id SpaceID) (TweetTrove, error) { + space_response, err := api.GetSpace(id) + if err != nil { + return TweetTrove{}, fmt.Errorf("Error in API call to fetch Space (id %q):\n %w", id, err) + } + return space_response.ToTweetTrove(), nil +} + +func FetchSpaceDetail(id SpaceID) (TweetTrove, error) { + return the_api.FetchSpaceDetail(id) +} diff --git a/pkg/scraper/api_types_v2.go b/pkg/scraper/api_types_v2.go index 83611ef..0cd13f1 100644 --- a/pkg/scraper/api_types_v2.go +++ b/pkg/scraper/api_types_v2.go @@ -7,6 +7,7 @@ import ( "net/url" "strconv" "strings" + "time" log "github.com/sirupsen/logrus" ) @@ -885,6 +886,10 @@ func (r APIV2Response) ToTweetTroveAsBookmarks() (TweetTrove, error) { return ret, err } +// --------------------------------------------------------- +// Paginated queries API +// --------------------------------------------------------- + type PaginatedQuery interface { NextPage(api *API, cursor string) (APIV2Response, error) ToTweetTrove(r APIV2Response) (TweetTrove, error) @@ -952,8 +957,11 @@ func (api *API) GetPaginatedQuery(pq PaginatedQuery, count int) (TweetTrove, err return trove, err // `err` will be either nil, END_OF_FEED, or ErrRateLimited } +// Paginated User Feed +// ------------------- + // Get a User feed using the new GraphQL twitter api -func (api *API) GetGraphqlFeedFor(user_id UserID, cursor string) (APIV2Response, error) { +func (api *API) GetUserFeedPage(user_id UserID, cursor string) (APIV2Response, error) { url, err := url.Parse(GraphqlURL{ BaseUrl: "https://twitter.com/i/api/graphql/Q6aAvPw7azXZbqXzuqTALA/UserTweetsAndReplies", Variables: GraphqlVariables{ @@ -1001,7 +1009,7 @@ type PaginatedUserFeed struct { } func (p PaginatedUserFeed) NextPage(api *API, cursor string) (APIV2Response, error) { - return api.GetGraphqlFeedFor(p.user_id, cursor) + return api.GetUserFeedPage(p.user_id, cursor) } func (p PaginatedUserFeed) ToTweetTrove(r APIV2Response) (TweetTrove, error) { ret, err := r.ToTweetTrove() @@ -1010,6 +1018,17 @@ func (p PaginatedUserFeed) ToTweetTrove(r APIV2Response) (TweetTrove, error) { return ret, err } +func (api *API) GetUserFeed(user_id UserID, min_tweets int) (trove TweetTrove, err error) { + return api.GetPaginatedQuery(PaginatedUserFeed{user_id}, min_tweets) +} + +func GetUserFeed(user_id UserID, min_tweets int) (trove TweetTrove, err error) { + return the_api.GetUserFeed(user_id, min_tweets) +} + +// Paginated Tweet Detail (conversation) +// ------------------------------------- + func (api *API) GetTweetDetail(tweet_id TweetID, cursor string) (APIV2Response, error) { url, err := url.Parse(GraphqlURL{ BaseUrl: "https://twitter.com/i/api/graphql/tPRAv4UnqM9dOgDWggph7Q/TweetDetail", @@ -1072,7 +1091,39 @@ func (p PaginatedTweetReplies) ToTweetTrove(r APIV2Response) (TweetTrove, error) return r.ToTweetTrove() } -func (api *API) GetUserLikes(user_id UserID, cursor string) (APIV2Response, error) { +func (api *API) GetTweetFullAPIV2(id TweetID, how_many int) (TweetTrove, error) { + trove, err := api.GetPaginatedQuery(PaginatedTweetReplies{id}, how_many) + + // Handle deleted tweet + if errors.Is(err, ErrDoesntExist) { + trove := NewTweetTrove() + fake_user := GetUnknownUser() + trove.Users[fake_user.ID] = fake_user + trove.Tweets[id] = Tweet{ID: id, UserID: fake_user.ID, TombstoneType: "deleted", IsConversationScraped: true, IsStub: true} + return trove, nil + } else if err != nil { + return trove, err + } + + // Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at" + tweet, ok := trove.Tweets[id] + if !ok { + panic("Trove didn't contain its own tweet!") + } + tweet.LastScrapedAt = Timestamp{time.Now()} + tweet.IsConversationScraped = true + trove.Tweets[id] = tweet + + return trove, err +} +func GetTweetFullAPIV2(id TweetID, how_many int) (TweetTrove, error) { + return the_api.GetTweetFullAPIV2(id, how_many) +} + +// Paginated User Likes +// -------------------- + +func (api *API) GetUserLikesPage(user_id UserID, cursor string) (APIV2Response, error) { url, err := url.Parse(GraphqlURL{ BaseUrl: "https://twitter.com/i/api/graphql/2Z6LYO4UTM4BnWjaNCod6g/Likes", Variables: GraphqlVariables{ @@ -1120,7 +1171,7 @@ type PaginatedUserLikes struct { } func (p PaginatedUserLikes) NextPage(api *API, cursor string) (APIV2Response, error) { - return api.GetUserLikes(p.user_id, cursor) + return api.GetUserLikesPage(p.user_id, cursor) } func (p PaginatedUserLikes) ToTweetTrove(r APIV2Response) (TweetTrove, error) { ret, err := r.ToTweetTroveAsLikes() @@ -1137,11 +1188,18 @@ func (p PaginatedUserLikes) ToTweetTrove(r APIV2Response) (TweetTrove, error) { return ret, nil } -func GetUserLikes(user_id UserID, how_many int) (TweetTrove, error) { - return the_api.GetPaginatedQuery(PaginatedUserLikes{user_id}, how_many) +func (api *API) GetUserLikes(user_id UserID, how_many int) (TweetTrove, error) { + return api.GetPaginatedQuery(PaginatedUserLikes{user_id}, how_many) } -func (api *API) GetBookmarks(cursor string) (APIV2Response, error) { +func GetUserLikes(user_id UserID, how_many int) (TweetTrove, error) { + return the_api.GetUserLikes(user_id, how_many) +} + +// Paginated Bookmarks +// ------------------- + +func (api *API) GetBookmarksPage(cursor string) (APIV2Response, error) { url, err := url.Parse(GraphqlURL{ BaseUrl: "https://twitter.com/i/api/graphql/xLjCVTqYWz8CGSprLU349w/Bookmarks", Variables: GraphqlVariables{ @@ -1186,7 +1244,7 @@ type PaginatedBookmarks struct { } func (p PaginatedBookmarks) NextPage(api *API, cursor string) (APIV2Response, error) { - return api.GetBookmarks(cursor) + return api.GetBookmarksPage(cursor) } func (p PaginatedBookmarks) ToTweetTrove(r APIV2Response) (TweetTrove, error) { ret, err := r.ToTweetTroveAsBookmarks() @@ -1203,10 +1261,18 @@ func (p PaginatedBookmarks) ToTweetTrove(r APIV2Response) (TweetTrove, error) { return ret, nil } -func GetBookmarks(how_many int) (TweetTrove, error) { - return the_api.GetPaginatedQuery(PaginatedBookmarks{the_api.UserID}, how_many) +func (api *API) GetBookmarks(how_many int) (TweetTrove, error) { + return api.GetPaginatedQuery(PaginatedBookmarks{api.UserID}, how_many) } +func GetBookmarks(how_many int) (TweetTrove, error) { + return the_api.GetBookmarks(how_many) +} + +// Paginated Home Timeline +// ----------------------- + +// TODO: paginated? func (api *API) GetHomeTimeline(cursor string, is_following_only bool) (TweetTrove, error) { var url string body_struct := struct { @@ -1269,6 +1335,9 @@ func GetHomeTimeline(cursor string, is_following_only bool) (TweetTrove, error) return the_api.GetHomeTimeline(cursor, is_following_only) } +// Get User +// -------- + func (api API) GetUser(handle UserHandle) (APIUser, error) { url, err := url.Parse(GraphqlURL{ BaseUrl: "https://api.twitter.com/graphql/SAMkL5y_N9pmahSw8yy6gw/UserByScreenName", @@ -1311,7 +1380,10 @@ func (api API) GetUser(handle UserHandle) (APIUser, error) { return response.ConvertToAPIUser(), err } -func (api *API) Search(query string, cursor string) (APIV2Response, error) { +// Paginated Search +// ---------------- + +func (api *API) SearchPage(query string, cursor string) (APIV2Response, error) { url, err := url.Parse(GraphqlURL{ BaseUrl: "https://twitter.com/i/api/graphql/NA567V_8AFwu0cZEkAAKcw/SearchTimeline", Variables: GraphqlVariables{ @@ -1360,7 +1432,7 @@ type PaginatedSearch struct { } func (p PaginatedSearch) NextPage(api *API, cursor string) (APIV2Response, error) { - return api.Search(p.query, cursor) + return api.SearchPage(p.query, cursor) } func (p PaginatedSearch) ToTweetTrove(r APIV2Response) (TweetTrove, error) { return r.ToTweetTrove() @@ -1372,6 +1444,9 @@ func (p PaginatedSearch) ToTweetTrove(r APIV2Response) (TweetTrove, error) { // - search for users // - photos // - videos -func Search(query string, min_results int) (trove TweetTrove, err error) { - return the_api.GetPaginatedQuery(PaginatedSearch{query}, min_results) +func (api *API) Search(query string, min_results int) (trove TweetTrove, err error) { + return api.GetPaginatedQuery(PaginatedSearch{query}, min_results) +} +func Search(query string, min_results int) (trove TweetTrove, err error) { + return the_api.Search(query, min_results) } diff --git a/pkg/scraper/dm_chat_room.go b/pkg/scraper/dm_chat_room.go index fc18dd6..a66bfec 100644 --- a/pkg/scraper/dm_chat_room.go +++ b/pkg/scraper/dm_chat_room.go @@ -53,7 +53,7 @@ func (r DMChatRoom) GetParticipantIDs() []UserID { return ret } -func ParseAPIDMChatRoom(api_room APIDMConversation) DMChatRoom { +func ParseAPIDMChatRoom(api_room APIDMConversation, current_user_id UserID) DMChatRoom { ret := DMChatRoom{} ret.ID = DMChatRoomID(api_room.ConversationID) ret.Type = api_room.Type @@ -80,7 +80,7 @@ func ParseAPIDMChatRoom(api_room APIDMConversation) DMChatRoom { participant.LastReadEventID = DMMessageID(api_participant.LastReadEventID) // Process chat settings if this is the logged-in user - if participant.UserID == the_api.UserID { + if participant.UserID == current_user_id { participant.IsNotificationsDisabled = api_room.NotificationsDisabled participant.IsReadOnly = api_room.ReadOnly participant.IsTrusted = api_room.Trusted diff --git a/pkg/scraper/dm_trove.go b/pkg/scraper/dm_trove.go index c789d5f..2777903 100644 --- a/pkg/scraper/dm_trove.go +++ b/pkg/scraper/dm_trove.go @@ -1,9 +1,5 @@ package scraper -import ( - log "github.com/sirupsen/logrus" -) - func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID { oldest := DMMessageID(^uint(0) >> 1) // Max integer for _, m := range t.Messages { @@ -17,87 +13,73 @@ func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID { // TODO: Why are these all here? => // Returns a TweetTrove and the cursor for the next update -func GetInbox(how_many int) (TweetTrove, string) { - if !the_api.IsAuthenticated { - log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`") +func (api *API) GetInbox(how_many int) (TweetTrove, string, error) { + if !api.IsAuthenticated { + return TweetTrove{}, "", ErrLoginRequired } - dm_response, err := the_api.GetDMInbox() + dm_response, err := api.GetDMInbox() if err != nil { panic(err) } - trove := dm_response.ToTweetTrove() + trove := dm_response.ToTweetTrove(api.UserID) cursor := dm_response.Cursor next_cursor_id := dm_response.InboxTimelines.Trusted.MinEntryID for len(trove.Rooms) < how_many && dm_response.Status != "AT_END" { - dm_response, err = the_api.GetInboxTrusted(next_cursor_id) + dm_response, err = api.GetInboxTrusted(next_cursor_id) if err != nil { panic(err) } - next_trove := dm_response.ToTweetTrove() + next_trove := dm_response.ToTweetTrove(api.UserID) next_cursor_id = dm_response.MinEntryID trove.MergeWith(next_trove) } - return trove, cursor + return trove, cursor, nil +} +func GetInbox(how_many int) (TweetTrove, string, error) { + return the_api.GetInbox(how_many) } -func GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) TweetTrove { - if !the_api.IsAuthenticated { - log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`") +func (api *API) GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) (TweetTrove, error) { + if !api.IsAuthenticated { + return TweetTrove{}, ErrLoginRequired } - dm_response, err := the_api.GetDMConversation(id, max_id) + dm_response, err := api.GetDMConversation(id, max_id) if err != nil { panic(err) } - trove := dm_response.ToTweetTrove() + trove := dm_response.ToTweetTrove(api.UserID) oldest := trove.GetOldestMessage(id) for len(trove.Messages) < how_many && dm_response.Status != "AT_END" { - dm_response, err = the_api.GetDMConversation(id, oldest) + dm_response, err = api.GetDMConversation(id, oldest) if err != nil { panic(err) } - next_trove := dm_response.ToTweetTrove() + next_trove := dm_response.ToTweetTrove(api.UserID) oldest = next_trove.GetOldestMessage(id) trove.MergeWith(next_trove) } - return trove + return trove, nil +} +func GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) (TweetTrove, error) { + return the_api.GetConversation(id, max_id, how_many) } -// Returns a TweetTrove and the cursor for the next update -func PollInboxUpdates(cursor string) (TweetTrove, string) { - if !the_api.IsAuthenticated { - log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`") - } - dm_response, err := the_api.PollInboxUpdates(cursor) - if err != nil { - panic(err) - } - - return dm_response.ToTweetTrove(), dm_response.Cursor +func PollInboxUpdates(cursor string) (TweetTrove, string, error) { + return the_api.PollInboxUpdates(cursor) } -func SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) TweetTrove { - if !the_api.IsAuthenticated { - log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`") - } - dm_response, err := the_api.SendDMMessage(room_id, text, in_reply_to_id) - if err != nil { - panic(err) - } - return dm_response.ToTweetTrove() +func SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) (TweetTrove, error) { + return the_api.SendDMMessage(room_id, text, in_reply_to_id) } + func SendDMReaction(room_id DMChatRoomID, message_id DMMessageID, reacc string) error { - if !the_api.IsAuthenticated { - log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`") - } return the_api.SendDMReaction(room_id, message_id, reacc) } -func MarkDMChatRead(room_id DMChatRoomID, read_message_id DMMessageID) { - if !the_api.IsAuthenticated { - log.Fatalf("Writing DMs can only be done when authenticated. Please provide `--session [user]`") - } - the_api.MarkDMChatRead(room_id, read_message_id) + +func MarkDMChatRead(room_id DMChatRoomID, read_message_id DMMessageID) error { + return the_api.MarkDMChatRead(room_id, read_message_id) } diff --git a/pkg/scraper/space.go b/pkg/scraper/space.go index b08c3ba..2056310 100644 --- a/pkg/scraper/space.go +++ b/pkg/scraper/space.go @@ -48,11 +48,3 @@ func ParseAPISpace(apiCard APICard) Space { return ret } - -func FetchSpaceDetail(id SpaceID) (TweetTrove, error) { - space_response, err := the_api.GetSpace(id) - if err != nil { - return TweetTrove{}, fmt.Errorf("Error in API call to fetch Space (id %q):\n %w", id, err) - } - return space_response.ToTweetTrove(), nil -} diff --git a/pkg/scraper/tweet.go b/pkg/scraper/tweet.go index 7588a82..33d6fdf 100644 --- a/pkg/scraper/tweet.go +++ b/pkg/scraper/tweet.go @@ -4,7 +4,6 @@ import ( "database/sql/driver" "errors" "fmt" - log "github.com/sirupsen/logrus" "strings" "time" @@ -244,16 +243,14 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { return } -/** - * Get a single tweet with no replies from the API. - * - * args: - * - id: the ID of the tweet to get - * - * returns: the single Tweet - */ -func GetTweet(id TweetID) (Tweet, error) { - resp, err := the_api.GetTweetDetail(id, "") +// Get a single tweet with no replies from the API. +// +// args: +// - id: the ID of the tweet to get +// +// returns: the single Tweet +func (api *API) GetTweet(id TweetID) (Tweet, error) { + resp, err := api.GetTweetDetail(id, "") if err != nil { return Tweet{}, fmt.Errorf("Error getting tweet detail: %d\n %w", id, err) } @@ -271,83 +268,6 @@ func GetTweet(id TweetID) (Tweet, error) { tweet.IsConversationScraped = true return tweet, nil } - -/** - * Return a list of tweets, including the original and the rest of its thread, - * along with a list of associated users. - * - * Mark the main tweet as "is_conversation_downloaded = true", and update its "last_scraped_at" - * value. - * - * args: - * - id: the ID of the tweet to get - * - * returns: the tweet, list of its replies and context, and users associated with those replies - */ -func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) { - tweet_response, err := the_api.GetTweet(id, "") - if err != nil { - err = fmt.Errorf("Error getting tweet: %d\n %w", id, err) - return - } - if len(tweet_response.GlobalObjects.Tweets) < how_many && - tweet_response.GetCursor() != "" { - err = the_api.GetMoreReplies(id, &tweet_response, how_many) - if err != nil { - err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err) - return - } - } - - // This has to be called BEFORE ToTweetTrove, because it modifies the TweetResponse (adds tombstone tweets to its tweets list) - tombstoned_users := tweet_response.HandleTombstones() - - trove, err = tweet_response.ToTweetTrove() - if err != nil { - panic(err) - } - trove.TombstoneUsers = tombstoned_users - - // Quoted tombstones need their user_id filled out from the tombstoned_users list - log.Debug("Running tweet trove post-processing\n") - err = trove.PostProcess() - if err != nil { - err = fmt.Errorf("Error getting tweet (id %d):\n %w", id, err) - return - } - - // Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at" - tweet, ok := trove.Tweets[id] - if !ok { - panic("Trove didn't contain its own tweet!") - } - tweet.LastScrapedAt = Timestamp{time.Now()} - tweet.IsConversationScraped = true - trove.Tweets[id] = tweet - - return -} - -func GetTweetFullAPIV2(id TweetID, how_many int) (TweetTrove, error) { - trove, err := the_api.GetPaginatedQuery(PaginatedTweetReplies{id}, how_many) - if errors.Is(err, ErrDoesntExist) { - trove := NewTweetTrove() - fake_user := GetUnknownUser() - trove.Users[fake_user.ID] = fake_user - trove.Tweets[id] = Tweet{ID: id, UserID: fake_user.ID, TombstoneType: "deleted", IsConversationScraped: true, IsStub: true} - return trove, nil - } else if err != nil { - return trove, err - } - - // Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at" - tweet, ok := trove.Tweets[id] - if !ok { - panic("Trove didn't contain its own tweet!") - } - tweet.LastScrapedAt = Timestamp{time.Now()} - tweet.IsConversationScraped = true - trove.Tweets[id] = tweet - - return trove, err +func GetTweet(id TweetID) (Tweet, error) { + return the_api.GetTweet(id) } diff --git a/pkg/scraper/user_feed.go b/pkg/scraper/user_feed.go deleted file mode 100644 index 30bc539..0000000 --- a/pkg/scraper/user_feed.go +++ /dev/null @@ -1,37 +0,0 @@ -package scraper - -import ( - "errors" - "fmt" -) - -/** - * Get a list of tweets that appear on the given user's page, along with a list of associated - * users for any retweets. - * - * args: - * - user_id: the ID of the user whomst feed to fetch - * - min_tweets: get at least this many tweets, if there are any - * - * returns: a slice of Tweets, Retweets, and Users - */ -func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) { - tweet_response, err := the_api.GetFeedFor(user_id, "") - if err != nil { - err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err) - return - } - - if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" { - err = the_api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets) - if err != nil && !errors.Is(err, END_OF_FEED) { - return - } - } - - return tweet_response.ToTweetTrove() -} - -func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) { - return the_api.GetPaginatedQuery(PaginatedUserFeed{user_id}, min_tweets) -}