REFACTOR: reduce technical debt, particularly that caused by singleton pattern in pkg/scraper

- ensure all scraper functions have a `api.XYZ` version and a package-level convenience function
	- isolate `the_api` to top-level convenience functions, in preparation for removal
- move a bunch of scraper functions around to be nearby their related functions
- new ErrLoginRequired
- remove obsolete APIv1 stuff (Feed, TweetDetail)
- rename scraper function GetUserFeedGraphqlFor => GetUserFeed
- fix go.mod Go version incorrectly claiming it's compatible with Go 1.16 (should be Go 1.17)
This commit is contained in:
Alessio 2024-08-09 19:41:39 -07:00
parent 6a464827c7
commit 24129c4852
19 changed files with 437 additions and 347 deletions

View File

@ -127,7 +127,6 @@ func main() {
*session_name = (*session_name)[:len(*session_name)-8]
}
scraper.InitApi(profile.LoadSession(scraper.UserHandle(*session_name)))
// fmt.Printf("Operating as user: @%s\n", scraper.the_api.UserHandle)
} else {
session, err := scraper.NewGuestSession()
if err != nil {
@ -235,7 +234,7 @@ func main() {
// - username: twitter username or email address
// - password: twitter account password
func login(username string, password string) {
// Skip the scraper.the_api variable, just use a local one since no scraping is happening
// Skip the scraper.InitApi, just use a local one since no scraping is happening
api, err := scraper.NewGuestSession()
if err != nil {
die(fmt.Sprintf("Unable to create session: %s", err.Error()), false, 1)
@ -350,7 +349,7 @@ func fetch_user_feed(handle string, how_many int) {
die(fmt.Sprintf("Error getting user: %s\n %s", handle, err.Error()), false, -1)
}
trove, err := scraper.GetUserFeedGraphqlFor(user.ID, how_many)
trove, err := scraper.GetUserFeed(user.ID, how_many)
if is_scrape_failure(err) {
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
}
@ -526,7 +525,10 @@ func start_webserver(addr string, should_auto_open bool) {
}
func fetch_inbox(how_many int) {
trove, _ := scraper.GetInbox(how_many)
trove, _, err := scraper.GetInbox(how_many)
if err != nil {
die(fmt.Sprintf("Failed to fetch inbox:\n %s", err.Error()), false, 1)
}
profile.SaveTweetTrove(trove, true)
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
}
@ -537,7 +539,10 @@ func fetch_dm(id string, how_many int) {
panic(err)
}
max_id := scraper.DMMessageID(^uint(0) >> 1)
trove := scraper.GetConversation(room.ID, max_id, how_many)
trove, err := scraper.GetConversation(room.ID, max_id, how_many)
if err != nil {
die(fmt.Sprintf("Failed to fetch dm:\n %s", err.Error()), false, 1)
}
profile.SaveTweetTrove(trove, true)
happy_exit(
fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)),
@ -551,7 +556,10 @@ func send_dm(room_id string, text string, in_reply_to_id int) {
die(fmt.Sprintf("No such chat room: %d", in_reply_to_id), false, 1)
}
trove := scraper.SendDMMessage(room.ID, text, scraper.DMMessageID(in_reply_to_id))
trove, err := scraper.SendDMMessage(room.ID, text, scraper.DMMessageID(in_reply_to_id))
if err != nil {
die(fmt.Sprintf("Failed to send dm:\n %s", err.Error()), false, 1)
}
profile.SaveTweetTrove(trove, true)
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
}

18
go.mod
View File

@ -1,6 +1,6 @@
module gitlab.com/offline-twitter/twitter_offline_engine
go 1.16
go 1.17
require (
github.com/Masterminds/sprig/v3 v3.2.3
@ -15,3 +15,19 @@ require (
golang.org/x/net v0.9.0
golang.org/x/term v0.7.0
)
require (
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
github.com/imdario/mergo v0.3.11 // indirect
github.com/mitchellh/copystructure v1.0.0 // indirect
github.com/mitchellh/reflectwalk v1.0.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/shopspring/decimal v1.2.0 // indirect
github.com/spf13/cast v1.3.1 // indirect
golang.org/x/crypto v0.3.0 // indirect
golang.org/x/sys v0.7.0 // indirect
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
)

View File

@ -33,7 +33,7 @@ func (app *Application) message_mark_as_read(w http.ResponseWriter, r *http.Requ
c.PageSize = 1
chat_contents := app.Profile.GetChatRoomMessagesByCursor(c)
last_message_id := chat_contents.MessageIDs[len(chat_contents.MessageIDs)-1]
scraper.MarkDMChatRead(room_id, last_message_id)
panic_if(scraper.MarkDMChatRead(room_id, last_message_id))
room := chat_contents.Rooms[room_id]
participant, is_ok := room.Participants[app.ActiveUser.ID]
if !is_ok {
@ -66,7 +66,10 @@ func (app *Application) message_send(w http.ResponseWriter, r *http.Request) {
in_reply_to_id = 0
}
trove := scraper.SendDMMessage(room_id, message_data.Text, scraper.DMMessageID(in_reply_to_id))
trove, err := scraper.SendDMMessage(room_id, message_data.Text, scraper.DMMessageID(in_reply_to_id))
if err != nil {
panic(err)
}
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true)
}
@ -115,7 +118,10 @@ func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) {
if r.URL.Query().Has("scrape") && !app.IsScrapingDisabled {
max_id := scraper.DMMessageID(^uint(0) >> 1)
trove := scraper.GetConversation(room_id, max_id, 50) // TODO: parameterizable
trove, err := scraper.GetConversation(room_id, max_id, 50) // TODO: parameterizable
if err != nil {
panic(err)
}
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background
}

View File

@ -46,7 +46,7 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) {
if len(parts) == 1 { // The URL is just the user handle
// Run scraper
trove, err := scraper.GetUserFeedGraphqlFor(user.ID, 50) // TODO: parameterizable
trove, err := scraper.GetUserFeed(user.ID, 50) // TODO: parameterizable
if err != nil {
app.ErrorLog.Print(err)
// TOOD: show error in UI

View File

@ -103,10 +103,14 @@ func (app *Application) background_dm_polling_scrape() {
fmt.Println("Scraping user DMs...")
var trove scraper.TweetTrove
var err error
if inbox_cursor == "" {
trove, inbox_cursor = scraper.GetInbox(0)
trove, inbox_cursor, err = scraper.GetInbox(0)
} else {
trove, inbox_cursor = scraper.PollInboxUpdates(inbox_cursor)
trove, inbox_cursor, err = scraper.PollInboxUpdates(inbox_cursor)
}
if err != nil {
panic(err)
}
fmt.Println("Saving DM results...")
app.Profile.SaveTweetTrove(trove, false)

View File

@ -12,6 +12,7 @@ var (
ErrRateLimited = errors.New("rate limited")
ErrorDMCA = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)")
ErrMediaDownload404 = errors.New("media download HTTP 404")
ErrLoginRequired = errors.New("login required; please provide `--session <user>` flag")
// These are not API errors, but network errors generally
ErrNoInternet = errors.New("no internet connection")

View File

@ -0,0 +1,190 @@
//go:build obsolete_user_feed
// Nothing in this file is used. It's outdated; user feed comes from APIv2 instead now.
package scraper
import (
"errors"
"fmt"
"net/url"
)
const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/"
const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/"
func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) {
url, err := url.Parse(fmt.Sprintf("%s%d.json", API_USER_TIMELINE_BASE_PATH, user_id))
if err != nil {
panic(err)
}
queryParams := url.Query()
add_tweet_query_params(&queryParams)
url.RawQuery = queryParams.Encode()
var result TweetResponse
err = api.do_http(url.String(), cursor, &result)
return result, err
}
/**
* Resend the request to get more tweets if necessary
*
* args:
* - user_id: the user's UserID
* - response: an "out" parameter; the TweetResponse that tweets, RTs and users will be appended to
* - min_tweets: the desired minimum amount of tweets to get
*/
func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, min_tweets int) error {
last_response := response
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < min_tweets {
fresh_response, err := api.GetFeedFor(user_id, last_response.GetCursor())
if err != nil {
return err
}
if fresh_response.GetCursor() == last_response.GetCursor() && len(fresh_response.GlobalObjects.Tweets) == 0 {
// Empty response, cursor same as previous: end of feed has been reached
return END_OF_FEED
}
if fresh_response.IsEndOfFeed() {
// Response has a pinned tweet, but no other content: end of feed has been reached
return END_OF_FEED
}
last_response = &fresh_response
// Copy over the tweets and the users
for id, tweet := range last_response.GlobalObjects.Tweets {
response.GlobalObjects.Tweets[id] = tweet
}
for id, user := range last_response.GlobalObjects.Users {
response.GlobalObjects.Users[id] = user
}
fmt.Printf("Have %d tweets, and %d users so far\n", len(response.GlobalObjects.Tweets), len(response.GlobalObjects.Users))
}
return nil
}
/**
* Get a list of tweets that appear on the given user's page, along with a list of associated
* users for any retweets.
*
* args:
* - user_id: the ID of the user whomst feed to fetch
* - min_tweets: get at least this many tweets, if there are any
*
* returns: a slice of Tweets, Retweets, and Users
*/
func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
tweet_response, err := the_api.GetFeedFor(user_id, "")
if err != nil {
err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err)
return
}
if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" {
err = the_api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets)
if err != nil && !errors.Is(err, END_OF_FEED) {
return
}
}
return tweet_response.ToTweetTrove()
}
/**
* Return a list of tweets, including the original and the rest of its thread,
* along with a list of associated users.
*
* Mark the main tweet as "is_conversation_downloaded = true", and update its "last_scraped_at"
* value.
*
* args:
* - id: the ID of the tweet to get
*
* returns: the tweet, list of its replies and context, and users associated with those replies
*/
func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) {
tweet_response, err := the_api.GetTweet(id, "")
if err != nil {
err = fmt.Errorf("Error getting tweet: %d\n %w", id, err)
return
}
if len(tweet_response.GlobalObjects.Tweets) < how_many &&
tweet_response.GetCursor() != "" {
err = the_api.GetMoreReplies(id, &tweet_response, how_many)
if err != nil {
err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err)
return
}
}
// This has to be called BEFORE ToTweetTrove, because it modifies the TweetResponse (adds tombstone tweets to its tweets list)
tombstoned_users := tweet_response.HandleTombstones()
trove, err = tweet_response.ToTweetTrove()
if err != nil {
panic(err)
}
trove.TombstoneUsers = tombstoned_users
// Quoted tombstones need their user_id filled out from the tombstoned_users list
log.Debug("Running tweet trove post-processing\n")
err = trove.PostProcess()
if err != nil {
err = fmt.Errorf("Error getting tweet (id %d):\n %w", id, err)
return
}
// Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at"
tweet, ok := trove.Tweets[id]
if !ok {
panic("Trove didn't contain its own tweet!")
}
tweet.LastScrapedAt = Timestamp{time.Now()}
tweet.IsConversationScraped = true
trove.Tweets[id] = tweet
return
}
func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) {
url, err := url.Parse(fmt.Sprintf("%s%d.json", API_CONVERSATION_BASE_PATH, id))
if err != nil {
panic(err)
}
queryParams := url.Query()
if cursor != "" {
queryParams.Add("referrer", "tweet")
}
add_tweet_query_params(&queryParams)
url.RawQuery = queryParams.Encode()
var result TweetResponse
err = api.do_http(url.String(), cursor, &result)
return result, err
}
// Resend the request to get more replies if necessary
func (api *API) GetMoreReplies(tweet_id TweetID, response *TweetResponse, max_replies int) error {
last_response := response
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_replies {
fresh_response, err := api.GetTweet(tweet_id, last_response.GetCursor())
if err != nil {
return err
}
last_response = &fresh_response
// Copy over the tweets and the users
for id, tweet := range last_response.GlobalObjects.Tweets {
response.GlobalObjects.Tweets[id] = tweet
}
for id, user := range last_response.GlobalObjects.Users {
response.GlobalObjects.Users[id] = user
}
}
return nil
}

View File

@ -15,9 +15,6 @@ import (
log "github.com/sirupsen/logrus"
)
const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/"
const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/"
type API struct {
UserHandle UserHandle
UserID UserID
@ -307,102 +304,6 @@ func add_tweet_query_params(query *url.Values) {
query.Add("count", "20")
}
func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) {
// TODO: this function isn't actually used for anything (APIv2 is used instead)
url, err := url.Parse(fmt.Sprintf("%s%d.json", API_USER_TIMELINE_BASE_PATH, user_id))
if err != nil {
panic(err)
}
queryParams := url.Query()
add_tweet_query_params(&queryParams)
url.RawQuery = queryParams.Encode()
var result TweetResponse
err = api.do_http(url.String(), cursor, &result)
return result, err
}
/**
* Resend the request to get more tweets if necessary
*
* args:
* - user_id: the user's UserID
* - response: an "out" parameter; the TweetResponse that tweets, RTs and users will be appended to
* - min_tweets: the desired minimum amount of tweets to get
*/
func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, min_tweets int) error {
// TODO user-feed-infinite-fetch: what if you reach the end of the user's timeline? Might loop
// forever getting no new tweets
last_response := response
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < min_tweets {
fresh_response, err := api.GetFeedFor(user_id, last_response.GetCursor())
if err != nil {
return err
}
if fresh_response.GetCursor() == last_response.GetCursor() && len(fresh_response.GlobalObjects.Tweets) == 0 {
// Empty response, cursor same as previous: end of feed has been reached
return END_OF_FEED
}
if fresh_response.IsEndOfFeed() {
// Response has a pinned tweet, but no other content: end of feed has been reached
return END_OF_FEED
}
last_response = &fresh_response
// Copy over the tweets and the users
for id, tweet := range last_response.GlobalObjects.Tweets {
response.GlobalObjects.Tweets[id] = tweet
}
for id, user := range last_response.GlobalObjects.Users {
response.GlobalObjects.Users[id] = user
}
fmt.Printf("Have %d tweets, and %d users so far\n", len(response.GlobalObjects.Tweets), len(response.GlobalObjects.Users))
}
return nil
}
func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) {
url, err := url.Parse(fmt.Sprintf("%s%d.json", API_CONVERSATION_BASE_PATH, id))
if err != nil {
panic(err)
}
queryParams := url.Query()
if cursor != "" {
queryParams.Add("referrer", "tweet")
}
add_tweet_query_params(&queryParams)
url.RawQuery = queryParams.Encode()
var result TweetResponse
err = api.do_http(url.String(), cursor, &result)
return result, err
}
// Resend the request to get more replies if necessary
func (api *API) GetMoreReplies(tweet_id TweetID, response *TweetResponse, max_replies int) error {
last_response := response
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_replies {
fresh_response, err := api.GetTweet(tweet_id, last_response.GetCursor())
if err != nil {
return err
}
last_response = &fresh_response
// Copy over the tweets and the users
for id, tweet := range last_response.GlobalObjects.Tweets {
response.GlobalObjects.Tweets[id] = tweet
}
for id, user := range last_response.GlobalObjects.Users {
response.GlobalObjects.Users[id] = user
}
}
return nil
}
func DownloadMedia(url string) ([]byte, error) {
return the_api.DownloadMedia(url)
}

View File

@ -186,7 +186,7 @@ type APIDMResponse struct {
UserEvents APIInbox `json:"user_events"`
}
func (r APIInbox) ToTweetTrove() TweetTrove {
func (r APIInbox) ToTweetTrove(current_user_id UserID) TweetTrove {
ret := NewTweetTrove()
for _, entry := range r.Entries {
@ -212,7 +212,7 @@ func (r APIInbox) ToTweetTrove() TweetTrove {
ret.MergeWith(entry.Message.ToTweetTrove())
}
for _, room := range r.Conversations {
result := ParseAPIDMChatRoom(room)
result := ParseAPIDMChatRoom(room, current_user_id)
ret.Rooms[result.ID] = result
}
for _, u := range r.Users {
@ -403,7 +403,11 @@ func (api *API) GetDMConversation(id DMChatRoomID, max_id DMMessageID) (APIInbox
return result.ConversationTimeline, err
}
func (api *API) PollInboxUpdates(cursor string) (APIInbox, error) {
// Returns a TweetTrove and the cursor for the next update, or an error
func (api *API) PollInboxUpdates(cursor string) (TweetTrove, string, error) {
if !api.IsAuthenticated {
return TweetTrove{}, "", ErrLoginRequired
}
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/user_updates.json")
if err != nil {
panic(err)
@ -449,10 +453,16 @@ func (api *API) PollInboxUpdates(cursor string) (APIInbox, error) {
var result APIDMResponse
err = api.do_http(url.String(), "", &result)
return result.UserEvents, err
if err != nil {
return TweetTrove{}, "", err
}
return result.UserEvents.ToTweetTrove(api.UserID), result.UserEvents.Cursor, nil
}
func (api *API) SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) (APIInbox, error) {
func (api *API) SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) (TweetTrove, error) {
if !api.IsAuthenticated {
return TweetTrove{}, ErrLoginRequired
}
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/new2.json")
if err != nil {
panic(err)
@ -519,11 +529,18 @@ func (api *API) SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id
var result APIInbox
err = api.do_http_POST(url.String(), post_data, &result)
return result, err
if err != nil {
return TweetTrove{}, err
}
return result.ToTweetTrove(api.UserID), nil
}
// Send a reacc
func (api *API) SendDMReaction(room_id DMChatRoomID, message_id DMMessageID, reacc string) error {
if !api.IsAuthenticated {
return ErrLoginRequired
}
url := "https://twitter.com/i/api/graphql/VyDyV9pC2oZEj6g52hgnhA/useDMReactionMutationAddMutation"
body := `{"variables":{"conversationId":"` + string(room_id) + `","messageId":"` + fmt.Sprint(message_id) +
`","reactionTypes":["Emoji"],"emojiReactions":["` + reacc + `"]},"queryId":"VyDyV9pC2oZEj6g52hgnhA"}`
@ -546,14 +563,14 @@ func (api *API) SendDMReaction(room_id DMChatRoomID, message_id DMMessageID, rea
}
// Mark a chat as read.
func (api *API) MarkDMChatRead(room_id DMChatRoomID, read_message_id DMMessageID) {
func (api *API) MarkDMChatRead(room_id DMChatRoomID, read_message_id DMMessageID) error {
if !api.IsAuthenticated {
return ErrLoginRequired
}
url := fmt.Sprintf("https://twitter.com/i/api/1.1/dm/conversation/%s/mark_read.json", room_id)
// `do_http_POST` will set the "content-type" header based on whether the body starts with '{' or not.
data := fmt.Sprintf("conversationId=%s&last_read_event_id=%d", room_id, read_message_id)
err := api.do_http_POST(url, data, nil) // Expected: HTTP 204
if err != nil {
panic(err)
}
return api.do_http_POST(url, data, nil) // Expected: HTTP 204
}

View File

@ -168,9 +168,7 @@ func TestParseAPIDMConversation(t *testing.T) {
require.NoError(t, err)
// Simulate one of the participants being logged in
InitApi(API{UserID: 1458284524761075714})
chat_room := ParseAPIDMChatRoom(api_room)
chat_room := ParseAPIDMChatRoom(api_room, UserID(1458284524761075714))
assert.Equal(DMChatRoomID("1458284524761075714-1488963321701171204"), chat_room.ID)
assert.Equal("ONE_TO_ONE", chat_room.Type)
assert.Equal(TimestampFromUnixMilli(1686025129086), chat_room.LastMessagedAt)
@ -204,9 +202,7 @@ func TestParseAPIDMGroupChat(t *testing.T) {
require.NoError(t, err)
// Simulate one of the participants being logged in
InitApi(API{UserID: 1458284524761075714})
chat_room := ParseAPIDMChatRoom(api_room)
chat_room := ParseAPIDMChatRoom(api_room, UserID(1458284524761075714))
assert.Equal(DMChatRoomID("1710215025518948715"), chat_room.ID)
assert.Equal("GROUP_DM", chat_room.Type)
assert.Equal(TimestampFromUnixMilli(1700112789457), chat_room.LastMessagedAt)
@ -232,7 +228,7 @@ func TestParseInbox(t *testing.T) {
err = json.Unmarshal(data, &inbox)
require.NoError(t, err)
trove := inbox.InboxInitialState.ToTweetTrove()
trove := inbox.InboxInitialState.ToTweetTrove(UserID(0))
for _, id := range []DMMessageID{1663623062195957773, 1663623203644751885, 1665922180176044037, 1665936253483614212} {
m, is_ok := trove.Messages[id]
@ -259,7 +255,7 @@ func TestParseDMRoomResponse(t *testing.T) {
err = json.Unmarshal(data, &inbox)
require.NoError(t, err)
trove := inbox.ConversationTimeline.ToTweetTrove()
trove := inbox.ConversationTimeline.ToTweetTrove(UserID(0))
for _, id := range []DMMessageID{
1663623062195957773,
@ -293,7 +289,7 @@ func TestParseInboxUpdates(t *testing.T) {
err = json.Unmarshal(data, &inbox)
require.NoError(t, err)
trove := inbox.UserEvents.ToTweetTrove()
trove := inbox.UserEvents.ToTweetTrove(UserID(0))
assert.Len(trove.Messages, 2) // Should ignore stuff that isn't a message

View File

@ -4,7 +4,7 @@ import (
"net/url"
)
func (api *API) GetFollowees(user_id UserID, cursor string) (APIV2Response, error) {
func (api *API) GetFolloweesPage(user_id UserID, cursor string) (APIV2Response, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://twitter.com/i/api/graphql/0yD6Eiv23DKXRDU9VxlG2A/Following",
Variables: GraphqlVariables{
@ -51,17 +51,21 @@ type PaginatedFollowees struct {
}
func (p PaginatedFollowees) NextPage(api *API, cursor string) (APIV2Response, error) {
return api.GetFollowees(p.user_id, cursor)
return api.GetFolloweesPage(p.user_id, cursor)
}
func (p PaginatedFollowees) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
return r.ToTweetTrove()
}
func GetFollowees(user_id UserID, how_many int) (TweetTrove, error) {
return the_api.GetPaginatedQuery(PaginatedFollowees{user_id}, how_many)
func (api *API) GetFollowees(user_id UserID, how_many int) (TweetTrove, error) {
return api.GetPaginatedQuery(PaginatedFollowees{user_id}, how_many)
}
func (api *API) GetFollowers(user_id UserID, cursor string) (APIV2Response, error) {
func GetFollowees(user_id UserID, how_many int) (TweetTrove, error) {
return the_api.GetFollowees(user_id, how_many)
}
func (api *API) GetFollowersPage(user_id UserID, cursor string) (APIV2Response, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://twitter.com/i/api/graphql/3_7xfjmh897x8h_n6QBqTA/Followers",
Variables: GraphqlVariables{
@ -108,12 +112,16 @@ type PaginatedFollowers struct {
}
func (p PaginatedFollowers) NextPage(api *API, cursor string) (APIV2Response, error) {
return api.GetFollowers(p.user_id, cursor)
return api.GetFollowersPage(p.user_id, cursor)
}
func (p PaginatedFollowers) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
return r.ToTweetTrove()
}
func GetFollowers(user_id UserID, how_many int) (TweetTrove, error) {
return the_api.GetPaginatedQuery(PaginatedFollowers{user_id}, how_many)
func (api *API) GetFollowers(user_id UserID, how_many int) (TweetTrove, error) {
return api.GetPaginatedQuery(PaginatedFollowers{user_id}, how_many)
}
func GetFollowers(user_id UserID, how_many int) (TweetTrove, error) {
return the_api.GetFollowers(user_id, how_many)
}

View File

@ -4,14 +4,15 @@ import (
"errors"
"fmt"
"strings"
log "github.com/sirupsen/logrus"
)
var AlreadyLikedThisTweet error = errors.New("already liked this tweet")
var HaventLikedThisTweet error = errors.New("Haven't liked this tweet")
func (api API) LikeTweet(id TweetID) (Like, error) {
if !api.IsAuthenticated {
return Like{}, ErrLoginRequired
}
type LikeResponse struct {
Data struct {
FavoriteTweet string `json:"favorite_tweet"`
@ -52,6 +53,9 @@ func (api API) LikeTweet(id TweetID) (Like, error) {
}
func (api API) UnlikeTweet(id TweetID) error {
if !api.IsAuthenticated {
return ErrLoginRequired
}
type UnlikeResponse struct {
Data struct {
UnfavoriteTweet string `json:"unfavorite_tweet"`
@ -84,14 +88,8 @@ func (api API) UnlikeTweet(id TweetID) error {
}
func LikeTweet(id TweetID) (Like, error) {
if !the_api.IsAuthenticated {
log.Fatalf("Must be authenticated!")
}
return the_api.LikeTweet(id)
}
func UnlikeTweet(id TweetID) error {
if !the_api.IsAuthenticated {
log.Fatalf("Must be authenticated!")
}
return the_api.UnlikeTweet(id)
}

View File

@ -1,6 +1,7 @@
package scraper
import (
"fmt"
"net/url"
)
@ -131,3 +132,15 @@ func (api API) GetSpace(id SpaceID) (SpaceResponse, error) {
err = api.do_http(url.String(), "", &result)
return result, err
}
func (api *API) FetchSpaceDetail(id SpaceID) (TweetTrove, error) {
space_response, err := api.GetSpace(id)
if err != nil {
return TweetTrove{}, fmt.Errorf("Error in API call to fetch Space (id %q):\n %w", id, err)
}
return space_response.ToTweetTrove(), nil
}
func FetchSpaceDetail(id SpaceID) (TweetTrove, error) {
return the_api.FetchSpaceDetail(id)
}

View File

@ -7,6 +7,7 @@ import (
"net/url"
"strconv"
"strings"
"time"
log "github.com/sirupsen/logrus"
)
@ -885,6 +886,10 @@ func (r APIV2Response) ToTweetTroveAsBookmarks() (TweetTrove, error) {
return ret, err
}
// ---------------------------------------------------------
// Paginated queries API
// ---------------------------------------------------------
type PaginatedQuery interface {
NextPage(api *API, cursor string) (APIV2Response, error)
ToTweetTrove(r APIV2Response) (TweetTrove, error)
@ -952,8 +957,11 @@ func (api *API) GetPaginatedQuery(pq PaginatedQuery, count int) (TweetTrove, err
return trove, err // `err` will be either nil, END_OF_FEED, or ErrRateLimited
}
// Paginated User Feed
// -------------------
// Get a User feed using the new GraphQL twitter api
func (api *API) GetGraphqlFeedFor(user_id UserID, cursor string) (APIV2Response, error) {
func (api *API) GetUserFeedPage(user_id UserID, cursor string) (APIV2Response, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://twitter.com/i/api/graphql/Q6aAvPw7azXZbqXzuqTALA/UserTweetsAndReplies",
Variables: GraphqlVariables{
@ -1001,7 +1009,7 @@ type PaginatedUserFeed struct {
}
func (p PaginatedUserFeed) NextPage(api *API, cursor string) (APIV2Response, error) {
return api.GetGraphqlFeedFor(p.user_id, cursor)
return api.GetUserFeedPage(p.user_id, cursor)
}
func (p PaginatedUserFeed) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
ret, err := r.ToTweetTrove()
@ -1010,6 +1018,17 @@ func (p PaginatedUserFeed) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
return ret, err
}
func (api *API) GetUserFeed(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
return api.GetPaginatedQuery(PaginatedUserFeed{user_id}, min_tweets)
}
func GetUserFeed(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
return the_api.GetUserFeed(user_id, min_tweets)
}
// Paginated Tweet Detail (conversation)
// -------------------------------------
func (api *API) GetTweetDetail(tweet_id TweetID, cursor string) (APIV2Response, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://twitter.com/i/api/graphql/tPRAv4UnqM9dOgDWggph7Q/TweetDetail",
@ -1072,7 +1091,39 @@ func (p PaginatedTweetReplies) ToTweetTrove(r APIV2Response) (TweetTrove, error)
return r.ToTweetTrove()
}
func (api *API) GetUserLikes(user_id UserID, cursor string) (APIV2Response, error) {
func (api *API) GetTweetFullAPIV2(id TweetID, how_many int) (TweetTrove, error) {
trove, err := api.GetPaginatedQuery(PaginatedTweetReplies{id}, how_many)
// Handle deleted tweet
if errors.Is(err, ErrDoesntExist) {
trove := NewTweetTrove()
fake_user := GetUnknownUser()
trove.Users[fake_user.ID] = fake_user
trove.Tweets[id] = Tweet{ID: id, UserID: fake_user.ID, TombstoneType: "deleted", IsConversationScraped: true, IsStub: true}
return trove, nil
} else if err != nil {
return trove, err
}
// Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at"
tweet, ok := trove.Tweets[id]
if !ok {
panic("Trove didn't contain its own tweet!")
}
tweet.LastScrapedAt = Timestamp{time.Now()}
tweet.IsConversationScraped = true
trove.Tweets[id] = tweet
return trove, err
}
func GetTweetFullAPIV2(id TweetID, how_many int) (TweetTrove, error) {
return the_api.GetTweetFullAPIV2(id, how_many)
}
// Paginated User Likes
// --------------------
func (api *API) GetUserLikesPage(user_id UserID, cursor string) (APIV2Response, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://twitter.com/i/api/graphql/2Z6LYO4UTM4BnWjaNCod6g/Likes",
Variables: GraphqlVariables{
@ -1120,7 +1171,7 @@ type PaginatedUserLikes struct {
}
func (p PaginatedUserLikes) NextPage(api *API, cursor string) (APIV2Response, error) {
return api.GetUserLikes(p.user_id, cursor)
return api.GetUserLikesPage(p.user_id, cursor)
}
func (p PaginatedUserLikes) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
ret, err := r.ToTweetTroveAsLikes()
@ -1137,11 +1188,18 @@ func (p PaginatedUserLikes) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
return ret, nil
}
func GetUserLikes(user_id UserID, how_many int) (TweetTrove, error) {
return the_api.GetPaginatedQuery(PaginatedUserLikes{user_id}, how_many)
func (api *API) GetUserLikes(user_id UserID, how_many int) (TweetTrove, error) {
return api.GetPaginatedQuery(PaginatedUserLikes{user_id}, how_many)
}
func (api *API) GetBookmarks(cursor string) (APIV2Response, error) {
func GetUserLikes(user_id UserID, how_many int) (TweetTrove, error) {
return the_api.GetUserLikes(user_id, how_many)
}
// Paginated Bookmarks
// -------------------
func (api *API) GetBookmarksPage(cursor string) (APIV2Response, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://twitter.com/i/api/graphql/xLjCVTqYWz8CGSprLU349w/Bookmarks",
Variables: GraphqlVariables{
@ -1186,7 +1244,7 @@ type PaginatedBookmarks struct {
}
func (p PaginatedBookmarks) NextPage(api *API, cursor string) (APIV2Response, error) {
return api.GetBookmarks(cursor)
return api.GetBookmarksPage(cursor)
}
func (p PaginatedBookmarks) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
ret, err := r.ToTweetTroveAsBookmarks()
@ -1203,10 +1261,18 @@ func (p PaginatedBookmarks) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
return ret, nil
}
func GetBookmarks(how_many int) (TweetTrove, error) {
return the_api.GetPaginatedQuery(PaginatedBookmarks{the_api.UserID}, how_many)
func (api *API) GetBookmarks(how_many int) (TweetTrove, error) {
return api.GetPaginatedQuery(PaginatedBookmarks{api.UserID}, how_many)
}
func GetBookmarks(how_many int) (TweetTrove, error) {
return the_api.GetBookmarks(how_many)
}
// Paginated Home Timeline
// -----------------------
// TODO: paginated?
func (api *API) GetHomeTimeline(cursor string, is_following_only bool) (TweetTrove, error) {
var url string
body_struct := struct {
@ -1269,6 +1335,9 @@ func GetHomeTimeline(cursor string, is_following_only bool) (TweetTrove, error)
return the_api.GetHomeTimeline(cursor, is_following_only)
}
// Get User
// --------
func (api API) GetUser(handle UserHandle) (APIUser, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://api.twitter.com/graphql/SAMkL5y_N9pmahSw8yy6gw/UserByScreenName",
@ -1311,7 +1380,10 @@ func (api API) GetUser(handle UserHandle) (APIUser, error) {
return response.ConvertToAPIUser(), err
}
func (api *API) Search(query string, cursor string) (APIV2Response, error) {
// Paginated Search
// ----------------
func (api *API) SearchPage(query string, cursor string) (APIV2Response, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://twitter.com/i/api/graphql/NA567V_8AFwu0cZEkAAKcw/SearchTimeline",
Variables: GraphqlVariables{
@ -1360,7 +1432,7 @@ type PaginatedSearch struct {
}
func (p PaginatedSearch) NextPage(api *API, cursor string) (APIV2Response, error) {
return api.Search(p.query, cursor)
return api.SearchPage(p.query, cursor)
}
func (p PaginatedSearch) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
return r.ToTweetTrove()
@ -1372,6 +1444,9 @@ func (p PaginatedSearch) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
// - search for users
// - photos
// - videos
func Search(query string, min_results int) (trove TweetTrove, err error) {
return the_api.GetPaginatedQuery(PaginatedSearch{query}, min_results)
func (api *API) Search(query string, min_results int) (trove TweetTrove, err error) {
return api.GetPaginatedQuery(PaginatedSearch{query}, min_results)
}
func Search(query string, min_results int) (trove TweetTrove, err error) {
return the_api.Search(query, min_results)
}

View File

@ -53,7 +53,7 @@ func (r DMChatRoom) GetParticipantIDs() []UserID {
return ret
}
func ParseAPIDMChatRoom(api_room APIDMConversation) DMChatRoom {
func ParseAPIDMChatRoom(api_room APIDMConversation, current_user_id UserID) DMChatRoom {
ret := DMChatRoom{}
ret.ID = DMChatRoomID(api_room.ConversationID)
ret.Type = api_room.Type
@ -80,7 +80,7 @@ func ParseAPIDMChatRoom(api_room APIDMConversation) DMChatRoom {
participant.LastReadEventID = DMMessageID(api_participant.LastReadEventID)
// Process chat settings if this is the logged-in user
if participant.UserID == the_api.UserID {
if participant.UserID == current_user_id {
participant.IsNotificationsDisabled = api_room.NotificationsDisabled
participant.IsReadOnly = api_room.ReadOnly
participant.IsTrusted = api_room.Trusted

View File

@ -1,9 +1,5 @@
package scraper
import (
log "github.com/sirupsen/logrus"
)
func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID {
oldest := DMMessageID(^uint(0) >> 1) // Max integer
for _, m := range t.Messages {
@ -17,87 +13,73 @@ func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID {
// TODO: Why are these all here? =>
// Returns a TweetTrove and the cursor for the next update
func GetInbox(how_many int) (TweetTrove, string) {
if !the_api.IsAuthenticated {
log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`")
func (api *API) GetInbox(how_many int) (TweetTrove, string, error) {
if !api.IsAuthenticated {
return TweetTrove{}, "", ErrLoginRequired
}
dm_response, err := the_api.GetDMInbox()
dm_response, err := api.GetDMInbox()
if err != nil {
panic(err)
}
trove := dm_response.ToTweetTrove()
trove := dm_response.ToTweetTrove(api.UserID)
cursor := dm_response.Cursor
next_cursor_id := dm_response.InboxTimelines.Trusted.MinEntryID
for len(trove.Rooms) < how_many && dm_response.Status != "AT_END" {
dm_response, err = the_api.GetInboxTrusted(next_cursor_id)
dm_response, err = api.GetInboxTrusted(next_cursor_id)
if err != nil {
panic(err)
}
next_trove := dm_response.ToTweetTrove()
next_trove := dm_response.ToTweetTrove(api.UserID)
next_cursor_id = dm_response.MinEntryID
trove.MergeWith(next_trove)
}
return trove, cursor
return trove, cursor, nil
}
func GetInbox(how_many int) (TweetTrove, string, error) {
return the_api.GetInbox(how_many)
}
func GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) TweetTrove {
if !the_api.IsAuthenticated {
log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`")
func (api *API) GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) (TweetTrove, error) {
if !api.IsAuthenticated {
return TweetTrove{}, ErrLoginRequired
}
dm_response, err := the_api.GetDMConversation(id, max_id)
dm_response, err := api.GetDMConversation(id, max_id)
if err != nil {
panic(err)
}
trove := dm_response.ToTweetTrove()
trove := dm_response.ToTweetTrove(api.UserID)
oldest := trove.GetOldestMessage(id)
for len(trove.Messages) < how_many && dm_response.Status != "AT_END" {
dm_response, err = the_api.GetDMConversation(id, oldest)
dm_response, err = api.GetDMConversation(id, oldest)
if err != nil {
panic(err)
}
next_trove := dm_response.ToTweetTrove()
next_trove := dm_response.ToTweetTrove(api.UserID)
oldest = next_trove.GetOldestMessage(id)
trove.MergeWith(next_trove)
}
return trove
return trove, nil
}
func GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) (TweetTrove, error) {
return the_api.GetConversation(id, max_id, how_many)
}
// Returns a TweetTrove and the cursor for the next update
func PollInboxUpdates(cursor string) (TweetTrove, string) {
if !the_api.IsAuthenticated {
log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`")
}
dm_response, err := the_api.PollInboxUpdates(cursor)
if err != nil {
panic(err)
}
return dm_response.ToTweetTrove(), dm_response.Cursor
func PollInboxUpdates(cursor string) (TweetTrove, string, error) {
return the_api.PollInboxUpdates(cursor)
}
func SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) TweetTrove {
if !the_api.IsAuthenticated {
log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`")
}
dm_response, err := the_api.SendDMMessage(room_id, text, in_reply_to_id)
if err != nil {
panic(err)
}
return dm_response.ToTweetTrove()
func SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) (TweetTrove, error) {
return the_api.SendDMMessage(room_id, text, in_reply_to_id)
}
func SendDMReaction(room_id DMChatRoomID, message_id DMMessageID, reacc string) error {
if !the_api.IsAuthenticated {
log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`")
}
return the_api.SendDMReaction(room_id, message_id, reacc)
}
func MarkDMChatRead(room_id DMChatRoomID, read_message_id DMMessageID) {
if !the_api.IsAuthenticated {
log.Fatalf("Writing DMs can only be done when authenticated. Please provide `--session [user]`")
}
the_api.MarkDMChatRead(room_id, read_message_id)
func MarkDMChatRead(room_id DMChatRoomID, read_message_id DMMessageID) error {
return the_api.MarkDMChatRead(room_id, read_message_id)
}

View File

@ -48,11 +48,3 @@ func ParseAPISpace(apiCard APICard) Space {
return ret
}
func FetchSpaceDetail(id SpaceID) (TweetTrove, error) {
space_response, err := the_api.GetSpace(id)
if err != nil {
return TweetTrove{}, fmt.Errorf("Error in API call to fetch Space (id %q):\n %w", id, err)
}
return space_response.ToTweetTrove(), nil
}

View File

@ -4,7 +4,6 @@ import (
"database/sql/driver"
"errors"
"fmt"
log "github.com/sirupsen/logrus"
"strings"
"time"
@ -244,16 +243,14 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
return
}
/**
* Get a single tweet with no replies from the API.
*
* args:
* - id: the ID of the tweet to get
*
* returns: the single Tweet
*/
func GetTweet(id TweetID) (Tweet, error) {
resp, err := the_api.GetTweetDetail(id, "")
// Get a single tweet with no replies from the API.
//
// args:
// - id: the ID of the tweet to get
//
// returns: the single Tweet
func (api *API) GetTweet(id TweetID) (Tweet, error) {
resp, err := api.GetTweetDetail(id, "")
if err != nil {
return Tweet{}, fmt.Errorf("Error getting tweet detail: %d\n %w", id, err)
}
@ -271,83 +268,6 @@ func GetTweet(id TweetID) (Tweet, error) {
tweet.IsConversationScraped = true
return tweet, nil
}
/**
* Return a list of tweets, including the original and the rest of its thread,
* along with a list of associated users.
*
* Mark the main tweet as "is_conversation_downloaded = true", and update its "last_scraped_at"
* value.
*
* args:
* - id: the ID of the tweet to get
*
* returns: the tweet, list of its replies and context, and users associated with those replies
*/
func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) {
tweet_response, err := the_api.GetTweet(id, "")
if err != nil {
err = fmt.Errorf("Error getting tweet: %d\n %w", id, err)
return
}
if len(tweet_response.GlobalObjects.Tweets) < how_many &&
tweet_response.GetCursor() != "" {
err = the_api.GetMoreReplies(id, &tweet_response, how_many)
if err != nil {
err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err)
return
}
}
// This has to be called BEFORE ToTweetTrove, because it modifies the TweetResponse (adds tombstone tweets to its tweets list)
tombstoned_users := tweet_response.HandleTombstones()
trove, err = tweet_response.ToTweetTrove()
if err != nil {
panic(err)
}
trove.TombstoneUsers = tombstoned_users
// Quoted tombstones need their user_id filled out from the tombstoned_users list
log.Debug("Running tweet trove post-processing\n")
err = trove.PostProcess()
if err != nil {
err = fmt.Errorf("Error getting tweet (id %d):\n %w", id, err)
return
}
// Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at"
tweet, ok := trove.Tweets[id]
if !ok {
panic("Trove didn't contain its own tweet!")
}
tweet.LastScrapedAt = Timestamp{time.Now()}
tweet.IsConversationScraped = true
trove.Tweets[id] = tweet
return
}
func GetTweetFullAPIV2(id TweetID, how_many int) (TweetTrove, error) {
trove, err := the_api.GetPaginatedQuery(PaginatedTweetReplies{id}, how_many)
if errors.Is(err, ErrDoesntExist) {
trove := NewTweetTrove()
fake_user := GetUnknownUser()
trove.Users[fake_user.ID] = fake_user
trove.Tweets[id] = Tweet{ID: id, UserID: fake_user.ID, TombstoneType: "deleted", IsConversationScraped: true, IsStub: true}
return trove, nil
} else if err != nil {
return trove, err
}
// Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at"
tweet, ok := trove.Tweets[id]
if !ok {
panic("Trove didn't contain its own tweet!")
}
tweet.LastScrapedAt = Timestamp{time.Now()}
tweet.IsConversationScraped = true
trove.Tweets[id] = tweet
return trove, err
func GetTweet(id TweetID) (Tweet, error) {
return the_api.GetTweet(id)
}

View File

@ -1,37 +0,0 @@
package scraper
import (
"errors"
"fmt"
)
/**
* Get a list of tweets that appear on the given user's page, along with a list of associated
* users for any retweets.
*
* args:
* - user_id: the ID of the user whomst feed to fetch
* - min_tweets: get at least this many tweets, if there are any
*
* returns: a slice of Tweets, Retweets, and Users
*/
func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
tweet_response, err := the_api.GetFeedFor(user_id, "")
if err != nil {
err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err)
return
}
if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" {
err = the_api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets)
if err != nil && !errors.Is(err, END_OF_FEED) {
return
}
}
return tweet_response.ToTweetTrove()
}
func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
return the_api.GetPaginatedQuery(PaginatedUserFeed{user_id}, min_tweets)
}