From cd021100219e60b2608bbf0f0d8543deaf006b01 Mon Sep 17 00:00:00 2001 From: Alessio Date: Sun, 2 Feb 2025 17:28:18 -0800 Subject: [PATCH] BUGFIX: fix a scraping error on encountering a 'composer' entry in a tweet thread --- pkg/scraper/api_types_v2.go | 4 ++++ pkg/scraper/api_types_v2_test.go | 12 +++++++----- .../api_v2/composer_entry_full_thread.json | 1 + .../api_v2/composer_entry_item_non_module.json | 1 - 4 files changed, 12 insertions(+), 6 deletions(-) create mode 100644 pkg/scraper/test_responses/api_v2/composer_entry_full_thread.json delete mode 100644 pkg/scraper/test_responses/api_v2/composer_entry_item_non_module.json diff --git a/pkg/scraper/api_types_v2.go b/pkg/scraper/api_types_v2.go index 7509886..c24a075 100644 --- a/pkg/scraper/api_types_v2.go +++ b/pkg/scraper/api_types_v2.go @@ -710,6 +710,10 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) { } // Infer "in_reply_to_id" for tombstoned tweets from the order of entries, if applicable if entry.Content.EntryType == "TimelineTimelineItem" { + if strings.HasPrefix(entry.EntryID, "tweetcomposer") { + // Skip composer + continue + } entry_type, main_tweet_id := entry.ParseID() if entry_type == "cursor-showmorethreadsprompt" || entry_type == "cursor-bottom" || diff --git a/pkg/scraper/api_types_v2_test.go b/pkg/scraper/api_types_v2_test.go index 309cb21..88bcb8a 100644 --- a/pkg/scraper/api_types_v2_test.go +++ b/pkg/scraper/api_types_v2_test.go @@ -1001,12 +1001,14 @@ func TestNoFailOnComposerEntryInTimelineModule(t *testing.T) { func TestNoFailOnComposerEntryInRegularThread(t *testing.T) { assert := assert.New(t) require := require.New(t) - data, err := os.ReadFile("test_responses/api_v2/composer_entry_item_non_module.json") + data, err := os.ReadFile("test_responses/api_v2/composer_entry_full_thread.json") require.NoError(err) - var entry_result APIV2Entry - err = json.Unmarshal(data, &entry_result) + var api_response APIV2Response + err = json.Unmarshal(data, &api_response) require.NoError(err) - trove := entry_result.ToTweetTrove() - assert.Len(trove.Tweets, 0) + trove, err := api_response.ToTweetTrove() + require.NoError(err) + + assert.Len(trove.Tweets, 3) } diff --git a/pkg/scraper/test_responses/api_v2/composer_entry_full_thread.json b/pkg/scraper/test_responses/api_v2/composer_entry_full_thread.json new file mode 100644 index 0000000..930c39b --- /dev/null +++ b/pkg/scraper/test_responses/api_v2/composer_entry_full_thread.json @@ -0,0 +1 @@ +{"data":{"threaded_conversation_with_injections_v2":{"instructions":[{"type":"TimelineAddEntries","entries":[{"entryId":"tweet-1581333491010404352","sortIndex":"7642038545844371455","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"Tweet","rest_id":"1581333491010404352","has_birdwatch_notes":false,"core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjoxNDU4Mjg0NTI0NzYxMDc1NzE0","rest_id":"1458284524761075714","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"can_dm":true,"can_media_tag":false,"created_at":"Wed Nov 10 04:05:16 +0000 2021","default_profile":true,"default_profile_image":false,"description":"rightwing bodybuilder · composability guru · display: flex","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"offline-twitter.com","expanded_url":"https://offline-twitter.com/","url":"https://t.co/7nDTwkyzRJ","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":34274,"followers_count":928,"friends_count":199,"has_custom_timelines":false,"is_translator":false,"listed_count":14,"location":"on my computer","media_count":718,"name":"wispem-wantex","needs_phone_verification":false,"normal_followers_count":928,"pinned_tweet_ids_str":["1834672954946601035"],"possibly_sensitive":false,"profile_image_url_https":"https://pbs.twimg.com/profile_images/1724933823144620032/sYTzWQy2_normal.jpg","profile_interstitial_type":"","screen_name":"wispem_wantex","statuses_count":12159,"translator_type":"none","url":"https://t.co/7nDTwkyzRJ","verified":false,"want_retweets":false,"withheld_in_countries":[]}}}},"unmention_data":{},"edit_control":{"edit_tweet_ids":["1581333491010404352"],"editable_until_msecs":"1665856058000","is_edit_eligible":false,"edits_remaining":"5"},"is_translatable":false,"views":{"state":"Enabled"},"source":"Twitter Web App","legacy":{"bookmark_count":0,"bookmarked":false,"created_at":"Sat Oct 15 17:17:38 +0000 2022","conversation_id_str":"1581333491010404352","display_text_range":[0,249],"entities":{"user_mentions":[],"urls":[],"hashtags":[],"symbols":[]},"favorite_count":2,"favorited":false,"full_text":"I'm trying out a self-hosted build server called Woodpecker, and I like the results so far.\n\nAs appears to be the standard approach now, build steps run in separate containers with a shared volume containing the checked-out code / working directory.","is_quote_status":false,"lang":"en","quote_count":0,"reply_count":1,"retweet_count":0,"retweeted":false,"user_id_str":"1458284524761075714","id_str":"1581333491010404352"},"quick_promote_eligibility":{"eligibility":"IneligibleNotProfessional"}}},"tweetDisplayType":"SelfThread","hasModeratedReplies":false}}},{"entryId":"tweet-1581333493375971328","sortIndex":"7642038543478804479","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"Tweet","rest_id":"1581333493375971328","has_birdwatch_notes":false,"core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjoxNDU4Mjg0NTI0NzYxMDc1NzE0","rest_id":"1458284524761075714","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"can_dm":true,"can_media_tag":false,"created_at":"Wed Nov 10 04:05:16 +0000 2021","default_profile":true,"default_profile_image":false,"description":"rightwing bodybuilder · composability guru · display: flex","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"offline-twitter.com","expanded_url":"https://offline-twitter.com/","url":"https://t.co/7nDTwkyzRJ","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":34274,"followers_count":928,"friends_count":199,"has_custom_timelines":false,"is_translator":false,"listed_count":14,"location":"on my computer","media_count":718,"name":"wispem-wantex","needs_phone_verification":false,"normal_followers_count":928,"pinned_tweet_ids_str":["1834672954946601035"],"possibly_sensitive":false,"profile_image_url_https":"https://pbs.twimg.com/profile_images/1724933823144620032/sYTzWQy2_normal.jpg","profile_interstitial_type":"","screen_name":"wispem_wantex","statuses_count":12159,"translator_type":"none","url":"https://t.co/7nDTwkyzRJ","verified":false,"want_retweets":false,"withheld_in_countries":[]}}}},"unmention_data":{},"edit_control":{"edit_tweet_ids":["1581333493375971328"],"editable_until_msecs":"1665856059000","is_edit_eligible":false,"edits_remaining":"5"},"is_translatable":false,"views":{"state":"Enabled"},"source":"Twitter Web App","legacy":{"bookmark_count":0,"bookmarked":false,"created_at":"Sat Oct 15 17:17:39 +0000 2022","conversation_id_str":"1581333491010404352","display_text_range":[0,238],"entities":{"user_mentions":[],"urls":[],"hashtags":[],"symbols":[]},"favorite_count":1,"favorited":false,"full_text":"However, since there's only one runner / worker (because I'm self hosting it), the \"waiting for instance to be available\" step takes like half a second or less, because the image is already there. This leads to extremely fast build times","in_reply_to_screen_name":"wispem_wantex","in_reply_to_status_id_str":"1581333491010404352","in_reply_to_user_id_str":"1458284524761075714","is_quote_status":false,"lang":"en","quote_count":0,"reply_count":1,"retweet_count":0,"retweeted":false,"user_id_str":"1458284524761075714","id_str":"1581333493375971328"},"quick_promote_eligibility":{"eligibility":"IneligibleNotProfessional"}}},"tweetDisplayType":"SelfThread"}}},{"entryId":"tweet-1581333495393107968","sortIndex":"7642038541461667839","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"Tweet","rest_id":"1581333495393107968","has_birdwatch_notes":false,"core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjoxNDU4Mjg0NTI0NzYxMDc1NzE0","rest_id":"1458284524761075714","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"can_dm":true,"can_media_tag":false,"created_at":"Wed Nov 10 04:05:16 +0000 2021","default_profile":true,"default_profile_image":false,"description":"rightwing bodybuilder · composability guru · display: flex","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"offline-twitter.com","expanded_url":"https://offline-twitter.com/","url":"https://t.co/7nDTwkyzRJ","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":34274,"followers_count":928,"friends_count":199,"has_custom_timelines":false,"is_translator":false,"listed_count":14,"location":"on my computer","media_count":718,"name":"wispem-wantex","needs_phone_verification":false,"normal_followers_count":928,"pinned_tweet_ids_str":["1834672954946601035"],"possibly_sensitive":false,"profile_image_url_https":"https://pbs.twimg.com/profile_images/1724933823144620032/sYTzWQy2_normal.jpg","profile_interstitial_type":"","screen_name":"wispem_wantex","statuses_count":12159,"translator_type":"none","url":"https://t.co/7nDTwkyzRJ","verified":false,"want_retweets":false,"withheld_in_countries":[]}}}},"unmention_data":{},"edit_control":{"edit_tweet_ids":["1581333495393107968"],"editable_until_msecs":"1665856059000","is_edit_eligible":false,"edits_remaining":"5"},"is_translatable":false,"views":{"state":"Enabled"},"source":"Twitter Web App","legacy":{"bookmark_count":0,"bookmarked":false,"created_at":"Sat Oct 15 17:17:39 +0000 2022","conversation_id_str":"1581333491010404352","display_text_range":[0,270],"entities":{"user_mentions":[],"urls":[],"hashtags":[],"symbols":[]},"favorite_count":1,"favorited":false,"full_text":"I think this is pretty cool.\n\nThe only real drawback compared to using a free service is there isn't integration-by-default; you have to manually enable builds for each repo you want builds on. Other than this, it's great. Clean UI, very fast, not (particularly) flaky","in_reply_to_screen_name":"wispem_wantex","in_reply_to_status_id_str":"1581333493375971328","in_reply_to_user_id_str":"1458284524761075714","is_quote_status":false,"lang":"en","quote_count":0,"reply_count":0,"retweet_count":0,"retweeted":false,"user_id_str":"1458284524761075714","id_str":"1581333495393107968"},"quick_promote_eligibility":{"eligibility":"IneligibleNotProfessional"}}},"tweetDisplayType":"SelfThread"}}},{"entryId":"tweetcomposer--1","sortIndex":"7642038541461667829","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweetComposer","__typename":"TimelineTweetComposer","composerDisplayType":"SelfThread","composerButtonText":"Add another post","composerButtonUrl":{"url":"twitter://post?in_reply_to_status_id=1581333495393107968","urlType":"DeepLink"}}}}]},{"type":"TimelineTerminateTimeline","direction":"Top"}],"metadata":{"reader_mode_config":{"is_reader_mode_available":true}}}}} diff --git a/pkg/scraper/test_responses/api_v2/composer_entry_item_non_module.json b/pkg/scraper/test_responses/api_v2/composer_entry_item_non_module.json deleted file mode 100644 index 3954832..0000000 --- a/pkg/scraper/test_responses/api_v2/composer_entry_item_non_module.json +++ /dev/null @@ -1 +0,0 @@ -{"entryId":"tweetcomposer--1","sortIndex":"7407168758699622302","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweetComposer","__typename":"TimelineTweetComposer","composerDisplayType":"SelfThread","composerButtonText":"Add another post","composerButtonUrl":{"url":"twitter://post?in_reply_to_status_id=1816203278155153495","urlType":"DeepLink"}}}}