From 62f14856d63c49dce618e50ddc43126528f0414d Mon Sep 17 00:00:00 2001 From: Alessio Date: Sat, 14 Oct 2023 19:34:45 -0300 Subject: [PATCH] Add parsing of edited tweets --- pkg/scraper/api_types_v2.go | 39 ++++++++++++++++++- pkg/scraper/api_types_v2_test.go | 21 ++++++++++ .../api_v2/tweet_with_newer_version.json | 1 + 3 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 pkg/scraper/test_responses/api_v2/tweet_with_newer_version.json diff --git a/pkg/scraper/api_types_v2.go b/pkg/scraper/api_types_v2.go index 0c1ad00..a060986 100644 --- a/pkg/scraper/api_types_v2.go +++ b/pkg/scraper/api_types_v2.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net/url" + "strconv" "strings" log "github.com/sirupsen/logrus" @@ -143,6 +144,25 @@ func (u APIV2UserResult) ToUser() User { return user } +type Int64Slice []int64 + +func (s *Int64Slice) UnmarshalJSON(data []byte) error { + var result []string + + if err := json.Unmarshal(data, &result); err != nil { + return err + } + + for _, str := range result { + num, err := strconv.ParseInt(str, 10, 64) + if err != nil { + return err + } + *s = append(*s, num) + } + return nil +} + type Tombstone struct { Text struct { Text string `json:"text"` @@ -164,6 +184,9 @@ type _Result struct { } `json:"result"` } `json:"note_tweet_results"` } `json:"note_tweet"` + EditControl struct { + EditTweetIDs Int64Slice `json:"edit_tweet_ids"` + } `json:"edit_control"` } type APIV2Result struct { @@ -183,7 +206,8 @@ func (api_result APIV2Result) ToTweetTrove() (TweetTrove, error) { return ret, ErrorIsTombstone } - if api_result.Result.Legacy.ID == 0 && api_result.Result.Tweet.Legacy.ID != 0 { + if api_result.Result.Legacy.ID == 0 && api_result.Result.Tweet.Legacy.ID != 0 || + api_result.Result.ID == 0 && api_result.Result.Tweet.ID != 0 { // If the tweet has "__typename" of "TweetWithVisibilityResults", it uses a new structure with // a "tweet" field with the regular data, alongside a "tweetInterstitial" field which is ignored // for now. @@ -201,7 +225,18 @@ func (api_result APIV2Result) ToTweetTrove() (TweetTrove, error) { // Process the tweet itself main_tweet_trove, err := api_result.Result.Legacy.ToTweetTrove() if errors.Is(err, ERR_NO_TWEET) { - return TweetTrove{}, err + // If the tweet is edited, the entry is just a list of the more recent versions + edit_tweet_ids := api_result.Result.EditControl.EditTweetIDs + if api_result.Result.ID != 0 && len(edit_tweet_ids) > 1 && edit_tweet_ids[len(edit_tweet_ids)-1] != api_result.Result.ID { + // There's a more recent version of the tweet available + main_tweet_trove.Tweets[TweetID(api_result.Result.ID)] = Tweet{ + TombstoneType: "newer-version-available", + ID: TweetID(api_result.Result.ID), + } + } else { + // Not edited; something else is wrong + return TweetTrove{}, err + } } else if err != nil { panic(err) } diff --git a/pkg/scraper/api_types_v2_test.go b/pkg/scraper/api_types_v2_test.go index aa5d1e8..8716ed2 100644 --- a/pkg/scraper/api_types_v2_test.go +++ b/pkg/scraper/api_types_v2_test.go @@ -608,6 +608,27 @@ func TestAPIV2UserFeedTombstoneEntry(t *testing.T) { // assert.True(user.IsIdFake) } +func TestTweetWithNewerVersion(t *testing.T) { + assert := assert.New(t) + data, err := os.ReadFile("test_responses/api_v2/tweet_with_newer_version.json") + require.NoError(t, err) + + var entry APIV2Result + err = json.Unmarshal(data, &entry) + require.NoError(t, err) + + trove, err := entry.ToTweetTrove() + assert.NoError(err) + + assert.Len(trove.Tweets, 1) + tweet, is_ok := trove.Tweets[1653413433461579783] + assert.True(is_ok) + assert.Equal(tweet.TombstoneType, "newer-version-available") + + assert.Len(trove.Users, 0) + assert.Len(trove.Retweets, 0) +} + func TestAPIV2ConversationThreadWithTombstones(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/api_v2/conversation_thread_with_tombstones.json") diff --git a/pkg/scraper/test_responses/api_v2/tweet_with_newer_version.json b/pkg/scraper/test_responses/api_v2/tweet_with_newer_version.json new file mode 100644 index 0000000..f6c0454 --- /dev/null +++ b/pkg/scraper/test_responses/api_v2/tweet_with_newer_version.json @@ -0,0 +1 @@ +{"result":{"__typename":"TweetWithVisibilityResults","tweet":{"rest_id":"1653413433461579783","unmention_info":{},"edit_control":{"edit_tweet_ids":["1653413433461579783","1653413735866814470"],"editable_until_msecs":"1683041256000","is_edit_eligible":true,"edits_remaining":"4"}},"limitedActionResults":{"limited_actions":[{"action":"QuoteTweet"},{"action":"CopyLink"},{"action":"Like"},{"action":"VoteOnPoll"},{"action":"AddToMoment"},{"action":"PinToProfile"},{"action":"ShareTweetVia"},{"action":"React"},{"action":"Embed"},{"action":"Retweet"},{"action":"SendViaDm"},{"action":"AddToBookmarks"},{"action":"HideCommunityTweet"},{"action":"Reply"},{"action":"ViewTweetActivity"}]}}}