2021-05-22 18:20:18 -04:00
package scraper_test
import (
"encoding/json"
"io/ioutil"
"testing"
2022-01-31 19:14:14 -08:00
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
2021-05-22 18:20:18 -04:00
)
2022-01-31 19:14:14 -08:00
func load_tweet_from_file ( filename string ) Tweet {
2021-09-27 18:43:24 -07:00
data , err := ioutil . ReadFile ( filename )
2021-05-22 18:20:18 -04:00
if err != nil {
panic ( err )
}
2022-01-31 19:14:14 -08:00
var apitweet APITweet
2021-09-27 18:08:14 -07:00
err = json . Unmarshal ( data , & apitweet )
2021-05-22 18:20:18 -04:00
if err != nil {
2021-09-27 18:43:24 -07:00
panic ( err )
2021-05-22 18:20:18 -04:00
}
2022-01-31 19:14:14 -08:00
tweet , err := ParseSingleTweet ( apitweet )
2021-05-22 18:20:18 -04:00
if err != nil {
2021-09-27 18:43:24 -07:00
panic ( err )
2021-05-22 18:20:18 -04:00
}
2021-09-27 18:43:24 -07:00
return tweet
}
func TestParseSingleTweet ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 18:43:24 -07:00
tweet := load_tweet_from_file ( "test_responses/single_tweets/tweet_with_unicode_chars.json" )
2021-05-22 18:20:18 -04:00
2022-01-31 19:14:14 -08:00
assert . Equal ( "The fact that @michaelmalice new book ‘ The Anarchist Handbook’ is just absolutely destroying on the charts is the largest white pill I’ ve swallowed in years." , tweet . Text )
assert . Len ( tweet . Mentions , 1 )
assert . Contains ( tweet . Mentions , UserHandle ( "michaelmalice" ) )
assert . Empty ( tweet . Urls )
assert . Equal ( int64 ( 1621639105 ) , tweet . PostedAt . Unix ( ) )
assert . Zero ( tweet . QuotedTweetID )
assert . Empty ( tweet . Polls )
2021-05-22 18:20:18 -04:00
}
2021-09-27 18:08:14 -07:00
func TestParseTweetWithImage ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 18:43:24 -07:00
tweet := load_tweet_from_file ( "test_responses/single_tweets/tweet_with_image.json" )
2021-05-22 18:20:18 -04:00
2022-01-31 19:14:14 -08:00
assert . Equal ( "this saddens me every time" , tweet . Text )
assert . Len ( tweet . Images , 1 )
2021-09-27 18:08:14 -07:00
}
2021-05-22 18:20:18 -04:00
2021-09-27 18:08:14 -07:00
func TestParseTweetWithQuotedTweetAsLink ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 18:43:24 -07:00
tweet := load_tweet_from_file ( "test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json" )
2021-05-22 18:20:18 -04:00
2022-01-31 19:14:14 -08:00
assert . Equal ( "sometimes they're too dimwitted to even get the wrong title right" , tweet . Text )
assert . Equal ( TweetID ( 1395882872729477131 ) , tweet . InReplyToID )
assert . Equal ( TweetID ( 1396194494710788100 ) , tweet . QuotedTweetID )
assert . Empty ( tweet . ReplyMentions )
assert . Empty ( tweet . Polls )
2021-05-22 18:20:18 -04:00
}
2021-06-15 15:18:09 -07:00
2021-07-25 14:51:17 -07:00
func TestParseTweetWithVideo ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 18:43:24 -07:00
tweet := load_tweet_from_file ( "test_responses/single_tweets/tweet_with_video.json" )
2022-01-31 19:14:14 -08:00
assert . Empty ( tweet . Images )
assert . Len ( tweet . Videos , 1 )
2021-09-27 18:43:24 -07:00
2022-01-31 19:14:14 -08:00
v := tweet . Videos [ 0 ]
assert . Equal ( "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12" , v . RemoteURL )
assert . False ( v . IsGif )
2021-07-25 14:51:17 -07:00
}
2021-10-04 21:06:53 -07:00
func TestParseTweetWithGif ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-10-04 21:06:53 -07:00
tweet := load_tweet_from_file ( "test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json" )
2022-01-31 19:14:14 -08:00
assert . Len ( tweet . Videos , 1 )
2021-10-04 21:06:53 -07:00
2022-01-31 19:14:14 -08:00
v := tweet . Videos [ 0 ]
assert . Equal ( "https://video.twimg.com/tweet_video/E189-VhVoAYcrDv.mp4" , v . RemoteURL )
assert . True ( v . IsGif )
2021-10-04 21:06:53 -07:00
}
2021-09-17 18:04:12 -07:00
func TestParseTweetWithUrl ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 18:43:24 -07:00
tweet := load_tweet_from_file ( "test_responses/single_tweets/tweet_with_url_card.json" )
2022-01-31 19:14:14 -08:00
assert . Empty ( tweet . Polls )
assert . Len ( tweet . Urls , 1 )
2021-09-17 18:04:12 -07:00
2022-01-31 19:14:14 -08:00
u := tweet . Urls [ 0 ]
assert . Equal ( "https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/" , u . Text )
assert . True ( u . HasCard )
assert . Equal ( "reason.com" , u . Domain )
2021-09-17 18:04:12 -07:00
}
func TestParseTweetWithUrlButNoCard ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 18:43:24 -07:00
tweet := load_tweet_from_file ( "test_responses/single_tweets/tweet_with_url_but_no_card.json" )
2022-01-31 19:14:14 -08:00
assert . Len ( tweet . Urls , 1 )
2021-09-17 18:04:12 -07:00
2022-01-31 19:14:14 -08:00
u := tweet . Urls [ 0 ]
assert . Equal ( "https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364" , u . Text )
assert . False ( u . HasCard )
2021-09-17 18:04:12 -07:00
}
2021-09-17 19:45:31 -07:00
func TestParseTweetWithMultipleUrls ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 18:43:24 -07:00
tweet := load_tweet_from_file ( "test_responses/single_tweets/tweet_with_multiple_urls.json" )
2022-01-31 19:14:14 -08:00
assert . Empty ( tweet . Polls )
assert . Len ( tweet . Urls , 3 )
2021-09-17 19:45:31 -07:00
2022-01-31 19:14:14 -08:00
assert . False ( tweet . Urls [ 0 ] . HasCard )
assert . False ( tweet . Urls [ 1 ] . HasCard )
assert . True ( tweet . Urls [ 2 ] . HasCard )
2021-12-12 16:42:32 -08:00
2022-01-31 19:14:14 -08:00
assert . Equal ( "Biden’ s victory came from the suburbs" , tweet . Urls [ 2 ] . Title )
2021-09-17 19:45:31 -07:00
}
2021-09-27 18:12:28 -07:00
func TestTweetWithLotsOfReplyMentions ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 18:43:24 -07:00
tweet := load_tweet_from_file ( "test_responses/single_tweets/tweet_with_at_mentions_in_front.json" )
2022-01-31 19:14:14 -08:00
assert . Len ( tweet . ReplyMentions , 4 )
2021-09-27 18:12:28 -07:00
2022-01-31 19:14:14 -08:00
for i , v := range [ ] UserHandle { "rob_mose" , "primalpoly" , "jmasseypoet" , "SpaceX" } {
assert . Equal ( v , tweet . ReplyMentions [ i ] )
2021-09-27 18:12:28 -07:00
}
}
2021-12-12 16:42:32 -08:00
func TestTweetWithPoll ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-12-12 16:42:32 -08:00
tweet := load_tweet_from_file ( "test_responses/single_tweets/tweet_with_poll_4_choices.json" )
2022-01-31 19:14:14 -08:00
assert . Len ( tweet . Polls , 1 )
2021-12-12 16:42:32 -08:00
p := tweet . Polls [ 0 ]
2022-01-31 19:14:14 -08:00
assert . Equal ( tweet . ID , p . TweetID )
assert . Equal ( 4 , p . NumChoices )
assert . Equal ( "Tribal armband" , p . Choice1 )
assert . Equal ( "Marijuana leaf" , p . Choice2 )
assert . Equal ( "Butterfly" , p . Choice3 )
assert . Equal ( "Maple leaf" , p . Choice4 )
assert . Equal ( 1593 , p . Choice1_Votes )
assert . Equal ( 624 , p . Choice2_Votes )
assert . Equal ( 778 , p . Choice3_Votes )
assert . Equal ( 1138 , p . Choice4_Votes )
assert . Equal ( 1440 * 60 , p . VotingDuration )
assert . Equal ( int64 ( 1638331934 ) , p . VotingEndsAt . Unix ( ) )
assert . Equal ( int64 ( 1638331935 ) , p . LastUpdatedAt . Unix ( ) )
2021-12-12 16:42:32 -08:00
}
2021-09-17 19:45:31 -07:00
2021-06-15 15:18:09 -07:00
func TestParseTweetResponse ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-06-15 15:18:09 -07:00
data , err := ioutil . ReadFile ( "test_responses/michael_malice_feed.json" )
if err != nil {
panic ( err )
}
2022-01-31 19:14:14 -08:00
var tweet_resp TweetResponse
2021-06-15 15:18:09 -07:00
err = json . Unmarshal ( data , & tweet_resp )
2022-01-31 19:14:14 -08:00
require . NoError ( t , err )
2021-06-15 15:18:09 -07:00
2022-01-31 19:14:14 -08:00
tweets , retweets , users , err := ParseTweetResponse ( tweet_resp )
require . NoError ( t , err )
2021-06-15 15:18:09 -07:00
2022-01-31 19:14:14 -08:00
assert . Len ( tweets , 29 - 3 )
assert . Len ( retweets , 3 )
assert . Len ( users , 9 )
2021-11-06 13:37:46 -07:00
}
func TestParseTweetResponseWithTombstones ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-11-06 13:37:46 -07:00
data , err := ioutil . ReadFile ( "test_responses/tombstones/tombstone_deleted.json" )
if err != nil {
panic ( err )
}
2022-01-31 19:14:14 -08:00
var tweet_resp TweetResponse
2021-11-06 13:37:46 -07:00
err = json . Unmarshal ( data , & tweet_resp )
2022-01-31 19:14:14 -08:00
require . NoError ( t , err )
2021-11-06 13:37:46 -07:00
extra_users := tweet_resp . HandleTombstones ( )
2022-01-31 19:14:14 -08:00
assert . Len ( extra_users , 1 )
2021-11-06 13:37:46 -07:00
2022-01-31 19:14:14 -08:00
tweets , retweets , users , err := ParseTweetResponse ( tweet_resp )
require . NoError ( t , err )
2021-11-06 13:37:46 -07:00
2022-01-31 19:14:14 -08:00
assert . Len ( tweets , 2 )
assert . Len ( retweets , 0 )
assert . Len ( users , 1 )
2021-06-15 15:18:09 -07:00
}