2021-05-22 18:20:18 -04:00
|
|
|
|
package scraper_test
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"encoding/json"
|
2022-03-13 16:33:17 -07:00
|
|
|
|
"os"
|
2021-05-22 18:20:18 -04:00
|
|
|
|
"testing"
|
|
|
|
|
|
2022-03-13 17:09:43 -07:00
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
|
"github.com/stretchr/testify/require"
|
2022-01-31 19:14:14 -08:00
|
|
|
|
|
|
|
|
|
. "offline_twitter/scraper"
|
2021-05-22 18:20:18 -04:00
|
|
|
|
)
|
|
|
|
|
|
2022-03-13 17:09:43 -07:00
|
|
|
|
func load_tweet_from_file(filename string) Tweet {
|
2022-03-13 16:33:17 -07:00
|
|
|
|
data, err := os.ReadFile(filename)
|
2021-05-22 18:20:18 -04:00
|
|
|
|
if err != nil {
|
|
|
|
|
panic(err)
|
|
|
|
|
}
|
2022-01-31 19:14:14 -08:00
|
|
|
|
var apitweet APITweet
|
2021-09-27 18:08:14 -07:00
|
|
|
|
err = json.Unmarshal(data, &apitweet)
|
2021-05-22 18:20:18 -04:00
|
|
|
|
if err != nil {
|
2021-09-27 18:43:24 -07:00
|
|
|
|
panic(err)
|
2021-05-22 18:20:18 -04:00
|
|
|
|
}
|
2022-01-31 19:14:14 -08:00
|
|
|
|
tweet, err := ParseSingleTweet(apitweet)
|
2021-05-22 18:20:18 -04:00
|
|
|
|
if err != nil {
|
2021-09-27 18:43:24 -07:00
|
|
|
|
panic(err)
|
2021-05-22 18:20:18 -04:00
|
|
|
|
}
|
2021-09-27 18:43:24 -07:00
|
|
|
|
return tweet
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestParseSingleTweet(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2021-09-27 18:43:24 -07:00
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json")
|
2021-05-22 18:20:18 -04:00
|
|
|
|
|
2022-03-13 17:09:43 -07:00
|
|
|
|
assert.Equal("The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the "+
|
2022-03-06 15:06:06 -08:00
|
|
|
|
"largest white pill I’ve swallowed in years.", tweet.Text)
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Len(tweet.Mentions, 1)
|
|
|
|
|
assert.Contains(tweet.Mentions, UserHandle("michaelmalice"))
|
|
|
|
|
assert.Empty(tweet.Urls)
|
|
|
|
|
assert.Equal(int64(1621639105), tweet.PostedAt.Unix())
|
|
|
|
|
assert.Zero(tweet.QuotedTweetID)
|
|
|
|
|
assert.Empty(tweet.Polls)
|
2021-05-22 18:20:18 -04:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-27 18:08:14 -07:00
|
|
|
|
func TestParseTweetWithImage(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2021-09-27 18:43:24 -07:00
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_image.json")
|
2021-05-22 18:20:18 -04:00
|
|
|
|
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Equal("this saddens me every time", tweet.Text)
|
|
|
|
|
assert.Len(tweet.Images, 1)
|
2021-09-27 18:08:14 -07:00
|
|
|
|
}
|
2021-05-22 18:20:18 -04:00
|
|
|
|
|
2022-03-02 14:34:42 -08:00
|
|
|
|
/**
|
|
|
|
|
* Ensure the fake url (link to the quoted tweet) is not parsed as a URL; it should just be ignored
|
|
|
|
|
*/
|
2021-09-27 18:08:14 -07:00
|
|
|
|
func TestParseTweetWithQuotedTweetAsLink(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2021-09-27 18:43:24 -07:00
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json")
|
2021-05-22 18:20:18 -04:00
|
|
|
|
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Equal("sometimes they're too dimwitted to even get the wrong title right", tweet.Text)
|
|
|
|
|
assert.Equal(TweetID(1395882872729477131), tweet.InReplyToID)
|
|
|
|
|
assert.Equal(TweetID(1396194494710788100), tweet.QuotedTweetID)
|
|
|
|
|
assert.Empty(tweet.ReplyMentions)
|
|
|
|
|
assert.Empty(tweet.Polls)
|
2022-03-02 14:34:42 -08:00
|
|
|
|
assert.Empty(tweet.Urls)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Quote-tweets with links should work properly
|
|
|
|
|
*/
|
|
|
|
|
func TestParseTweetWithQuotedTweetAndLink(t *testing.T) {
|
|
|
|
|
assert := assert.New(t)
|
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json")
|
|
|
|
|
|
2022-03-13 17:09:43 -07:00
|
|
|
|
assert.Equal("This is video he’s talking about. Please watch. Is there a single US politician capable of doing this with the "+
|
2022-03-06 15:06:06 -08:00
|
|
|
|
"weasels and rats running American industry today?", tweet.Text)
|
2022-03-02 14:34:42 -08:00
|
|
|
|
assert.Equal(TweetID(1497997890999898115), tweet.QuotedTweetID)
|
|
|
|
|
|
|
|
|
|
assert.Len(tweet.Urls, 1)
|
|
|
|
|
url := tweet.Urls[0]
|
|
|
|
|
assert.Equal(url.Text, "https://youtu.be/VjrlTMvirVo")
|
2021-05-22 18:20:18 -04:00
|
|
|
|
}
|
2021-06-15 15:18:09 -07:00
|
|
|
|
|
2021-07-25 14:51:17 -07:00
|
|
|
|
func TestParseTweetWithVideo(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2021-09-27 18:43:24 -07:00
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_video.json")
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Empty(tweet.Images)
|
|
|
|
|
assert.Len(tweet.Videos, 1)
|
2021-09-27 18:43:24 -07:00
|
|
|
|
|
2022-01-31 19:14:14 -08:00
|
|
|
|
v := tweet.Videos[0]
|
|
|
|
|
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", v.RemoteURL)
|
|
|
|
|
assert.False(v.IsGif)
|
2021-07-25 14:51:17 -07:00
|
|
|
|
}
|
|
|
|
|
|
2022-10-15 12:40:40 -04:00
|
|
|
|
func TestParseTweetWith2Videos(t *testing.T) {
|
|
|
|
|
assert := assert.New(t)
|
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_2_videos.json")
|
|
|
|
|
assert.Empty(tweet.Images)
|
|
|
|
|
assert.Len(tweet.Videos, 2)
|
|
|
|
|
|
|
|
|
|
v1 := tweet.Videos[0]
|
|
|
|
|
assert.Equal("https://video.twimg.com/ext_tw_video/1579701730148847617/pu/vid/576x576/ghA0fyf58v-2naWR.mp4?tag=12", v1.RemoteURL)
|
|
|
|
|
assert.False(v1.IsGif)
|
|
|
|
|
v2 := tweet.Videos[1]
|
|
|
|
|
assert.Equal("https://video.twimg.com/ext_tw_video/1579701730157252608/pu/vid/480x480/VQ69Ut84XT2BgIzX.mp4?tag=12", v2.RemoteURL)
|
|
|
|
|
assert.False(v2.IsGif)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestParseTweetWithImageAndVideo(t *testing.T) {
|
|
|
|
|
assert := assert.New(t)
|
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_image_and_video.json")
|
|
|
|
|
assert.Len(tweet.Images, 1)
|
|
|
|
|
assert.Len(tweet.Videos, 1)
|
|
|
|
|
|
|
|
|
|
img := tweet.Images[0]
|
|
|
|
|
assert.Equal(img.ID, ImageID(1579292192580911104))
|
|
|
|
|
assert.Equal(img.RemoteURL, "https://pbs.twimg.com/media/FerF4bdVQAAKeYJ.jpg")
|
|
|
|
|
|
|
|
|
|
vid := tweet.Videos[0]
|
|
|
|
|
assert.Equal(vid.ID, VideoID(1579292197752430592))
|
|
|
|
|
assert.Equal(vid.ThumbnailRemoteUrl, "https://pbs.twimg.com/ext_tw_video_thumb/1579292197752430592/pu/img/soG4wMWOy3AVpllM.jpg")
|
|
|
|
|
assert.Equal(vid.RemoteURL, "https://video.twimg.com/ext_tw_video/1579292197752430592/pu/vid/640x750/UE-PSqG2EE5N2dN8.mp4?tag=12")
|
|
|
|
|
}
|
|
|
|
|
|
2021-10-04 21:06:53 -07:00
|
|
|
|
func TestParseTweetWithGif(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2021-10-04 21:06:53 -07:00
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json")
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Len(tweet.Videos, 1)
|
2021-10-04 21:06:53 -07:00
|
|
|
|
|
2022-01-31 19:14:14 -08:00
|
|
|
|
v := tweet.Videos[0]
|
|
|
|
|
assert.Equal("https://video.twimg.com/tweet_video/E189-VhVoAYcrDv.mp4", v.RemoteURL)
|
|
|
|
|
assert.True(v.IsGif)
|
2021-10-04 21:06:53 -07:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-17 18:04:12 -07:00
|
|
|
|
func TestParseTweetWithUrl(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2021-09-27 18:43:24 -07:00
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_card.json")
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Empty(tweet.Polls)
|
|
|
|
|
assert.Len(tweet.Urls, 1)
|
2021-09-17 18:04:12 -07:00
|
|
|
|
|
2022-01-31 19:14:14 -08:00
|
|
|
|
u := tweet.Urls[0]
|
|
|
|
|
assert.Equal("https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/", u.Text)
|
2022-02-01 15:48:43 -08:00
|
|
|
|
assert.Equal("https://t.co/Y1lWjNEiPK", u.ShortText)
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.True(u.HasCard)
|
|
|
|
|
assert.Equal("reason.com", u.Domain)
|
2021-09-17 18:04:12 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestParseTweetWithUrlButNoCard(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2021-09-27 18:43:24 -07:00
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_but_no_card.json")
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Len(tweet.Urls, 1)
|
2021-09-17 18:04:12 -07:00
|
|
|
|
|
2022-01-31 19:14:14 -08:00
|
|
|
|
u := tweet.Urls[0]
|
|
|
|
|
assert.Equal("https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364", u.Text)
|
2022-02-01 15:48:43 -08:00
|
|
|
|
assert.Equal("https://t.co/ZigZyLctwt", u.ShortText)
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.False(u.HasCard)
|
2021-09-17 18:04:12 -07:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-17 19:45:31 -07:00
|
|
|
|
func TestParseTweetWithMultipleUrls(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2021-09-27 18:43:24 -07:00
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_multiple_urls.json")
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Empty(tweet.Polls)
|
|
|
|
|
assert.Len(tweet.Urls, 3)
|
2021-09-17 19:45:31 -07:00
|
|
|
|
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.False(tweet.Urls[0].HasCard)
|
|
|
|
|
assert.False(tweet.Urls[1].HasCard)
|
2022-03-13 17:09:43 -07:00
|
|
|
|
assert.True(tweet.Urls[2].HasCard)
|
2021-12-12 16:42:32 -08:00
|
|
|
|
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Equal("Biden’s victory came from the suburbs", tweet.Urls[2].Title)
|
2021-09-17 19:45:31 -07:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-27 18:12:28 -07:00
|
|
|
|
func TestTweetWithLotsOfReplyMentions(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2021-09-27 18:43:24 -07:00
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_at_mentions_in_front.json")
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Len(tweet.ReplyMentions, 4)
|
2021-09-27 18:12:28 -07:00
|
|
|
|
|
2022-01-31 19:14:14 -08:00
|
|
|
|
for i, v := range []UserHandle{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
|
|
|
|
|
assert.Equal(v, tweet.ReplyMentions[i])
|
2021-09-27 18:12:28 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-12 16:42:32 -08:00
|
|
|
|
func TestTweetWithPoll(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2021-12-12 16:42:32 -08:00
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_poll_4_choices.json")
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Len(tweet.Polls, 1)
|
2021-12-12 16:42:32 -08:00
|
|
|
|
|
|
|
|
|
p := tweet.Polls[0]
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Equal(tweet.ID, p.TweetID)
|
|
|
|
|
assert.Equal(4, p.NumChoices)
|
|
|
|
|
assert.Equal("Tribal armband", p.Choice1)
|
|
|
|
|
assert.Equal("Marijuana leaf", p.Choice2)
|
|
|
|
|
assert.Equal("Butterfly", p.Choice3)
|
|
|
|
|
assert.Equal("Maple leaf", p.Choice4)
|
|
|
|
|
assert.Equal(1593, p.Choice1_Votes)
|
|
|
|
|
assert.Equal(624, p.Choice2_Votes)
|
|
|
|
|
assert.Equal(778, p.Choice3_Votes)
|
|
|
|
|
assert.Equal(1138, p.Choice4_Votes)
|
2022-03-13 17:09:43 -07:00
|
|
|
|
assert.Equal(1440*60, p.VotingDuration)
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Equal(int64(1638331934), p.VotingEndsAt.Unix())
|
|
|
|
|
assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix())
|
2021-12-12 16:42:32 -08:00
|
|
|
|
}
|
|
|
|
|
|
2022-05-14 15:02:15 -07:00
|
|
|
|
func TestTweetWithSpace(t *testing.T) {
|
|
|
|
|
assert := assert.New(t)
|
|
|
|
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_space_card.json")
|
|
|
|
|
assert.Len(tweet.Urls, 0)
|
|
|
|
|
assert.Len(tweet.Spaces, 1)
|
|
|
|
|
|
|
|
|
|
s := tweet.Spaces[0]
|
|
|
|
|
assert.Equal(SpaceID("1YpKkZVyQjoxj"), s.ID)
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-15 15:18:09 -07:00
|
|
|
|
func TestParseTweetResponse(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2022-03-13 16:33:17 -07:00
|
|
|
|
data, err := os.ReadFile("test_responses/michael_malice_feed.json")
|
2021-06-15 15:18:09 -07:00
|
|
|
|
if err != nil {
|
|
|
|
|
panic(err)
|
|
|
|
|
}
|
2022-01-31 19:14:14 -08:00
|
|
|
|
var tweet_resp TweetResponse
|
2021-06-15 15:18:09 -07:00
|
|
|
|
err = json.Unmarshal(data, &tweet_resp)
|
2022-01-31 19:14:14 -08:00
|
|
|
|
require.NoError(t, err)
|
2021-06-15 15:18:09 -07:00
|
|
|
|
|
2022-02-12 20:39:30 -08:00
|
|
|
|
trove, err := ParseTweetResponse(tweet_resp)
|
2022-01-31 19:14:14 -08:00
|
|
|
|
require.NoError(t, err)
|
2022-02-12 20:39:30 -08:00
|
|
|
|
tweets, retweets, users := trove.Transform()
|
2021-06-15 15:18:09 -07:00
|
|
|
|
|
2022-03-13 17:09:43 -07:00
|
|
|
|
assert.Len(tweets, 29-3)
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Len(retweets, 3)
|
|
|
|
|
assert.Len(users, 9)
|
2021-11-06 13:37:46 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestParseTweetResponseWithTombstones(t *testing.T) {
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert := assert.New(t)
|
2022-03-13 16:33:17 -07:00
|
|
|
|
data, err := os.ReadFile("test_responses/tombstones/tombstone_deleted.json")
|
2021-11-06 13:37:46 -07:00
|
|
|
|
if err != nil {
|
|
|
|
|
panic(err)
|
|
|
|
|
}
|
2022-01-31 19:14:14 -08:00
|
|
|
|
var tweet_resp TweetResponse
|
2021-11-06 13:37:46 -07:00
|
|
|
|
err = json.Unmarshal(data, &tweet_resp)
|
2022-01-31 19:14:14 -08:00
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
2021-11-06 13:37:46 -07:00
|
|
|
|
extra_users := tweet_resp.HandleTombstones()
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Len(extra_users, 1)
|
2021-11-06 13:37:46 -07:00
|
|
|
|
|
2022-02-12 20:39:30 -08:00
|
|
|
|
trove, err := ParseTweetResponse(tweet_resp)
|
2022-01-31 19:14:14 -08:00
|
|
|
|
require.NoError(t, err)
|
2022-02-12 20:39:30 -08:00
|
|
|
|
tweets, retweets, users := trove.Transform()
|
2021-11-06 13:37:46 -07:00
|
|
|
|
|
2022-01-31 19:14:14 -08:00
|
|
|
|
assert.Len(tweets, 2)
|
|
|
|
|
assert.Len(retweets, 0)
|
|
|
|
|
assert.Len(users, 1)
|
2021-06-15 15:18:09 -07:00
|
|
|
|
}
|