2021-06-13 14:34:20 -07:00
package scraper_test
import (
2022-01-08 18:36:23 -05:00
"testing"
2021-06-13 14:34:20 -07:00
"encoding/json"
"io/ioutil"
2022-01-08 18:36:23 -05:00
"net/http"
"github.com/jarcoal/httpmock"
2022-01-31 19:14:14 -08:00
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
2021-06-13 14:34:20 -07:00
2022-01-31 19:14:14 -08:00
. "offline_twitter/scraper"
2021-06-13 14:34:20 -07:00
)
func TestParseSingleUser ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-06-13 14:34:20 -07:00
data , err := ioutil . ReadFile ( "test_responses/michael_malice_user_profile.json" )
if err != nil {
panic ( err )
}
2022-01-31 19:14:14 -08:00
var user_resp UserResponse
2021-06-13 14:34:20 -07:00
err = json . Unmarshal ( data , & user_resp )
2022-01-31 19:14:14 -08:00
require . NoError ( t , err )
2021-06-13 14:34:20 -07:00
2022-01-31 19:14:14 -08:00
apiUser := user_resp . ConvertToAPIUser ( )
2021-06-13 14:34:20 -07:00
2022-01-31 19:14:14 -08:00
user , err := ParseSingleUser ( apiUser )
require . NoError ( t , err )
assert . Equal ( UserID ( 44067298 ) , user . ID )
assert . Equal ( "Michael Malice" , user . DisplayName )
assert . Equal ( UserHandle ( "michaelmalice" ) , user . Handle )
assert . Equal ( "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nUnderwear Model\nHe/Him ⚑" , user . Bio )
assert . Equal ( 941 , user . FollowingCount )
assert . Equal ( 208589 , user . FollowersCount )
assert . Equal ( "Brooklyn" , user . Location )
assert . Equal ( "https://amzn.to/3oInafv" , user . Website )
assert . Equal ( int64 ( 1243920952 ) , user . JoinDate . Unix ( ) )
assert . False ( user . IsPrivate )
assert . True ( user . IsVerified )
assert . False ( user . IsBanned )
assert . Equal ( "https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg" , user . ProfileImageUrl )
assert . Equal ( "https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg" , user . GetTinyProfileImageUrl ( ) )
assert . Equal ( "https://pbs.twimg.com/profile_banners/44067298/1615134676" , user . BannerImageUrl )
assert . Equal ( "michaelmalice_profile_Lbwdb_C9.jpg" , user . ProfileImageLocalPath )
assert . Equal ( "michaelmalice_banner_1615134676.jpg" , user . BannerImageLocalPath )
assert . Equal ( TweetID ( 1403835414373339136 ) , user . PinnedTweetID )
2021-06-13 14:34:20 -07:00
}
2022-01-06 13:43:22 -05:00
2022-01-06 14:39:31 -05:00
/ * *
* Should correctly parse a banned user
* /
func TestParseBannedUser ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2022-01-06 14:39:31 -05:00
data , err := ioutil . ReadFile ( "test_responses/suspended_user.json" )
if err != nil {
panic ( err )
}
2022-01-31 19:14:14 -08:00
var user_resp UserResponse
2022-01-06 14:39:31 -05:00
err = json . Unmarshal ( data , & user_resp )
2022-01-31 19:14:14 -08:00
require . NoError ( t , err )
2022-01-06 14:39:31 -05:00
2022-01-31 19:14:14 -08:00
apiUser := user_resp . ConvertToAPIUser ( )
2022-01-06 14:39:31 -05:00
2022-01-31 19:14:14 -08:00
user , err := ParseSingleUser ( apiUser )
require . NoError ( t , err )
assert . Equal ( UserID ( 193918550 ) , user . ID )
assert . True ( user . IsBanned )
2022-01-06 15:21:27 -05:00
// Test generation of profile images for banned user
2022-01-31 19:14:14 -08:00
assert . Equal ( "https://abs.twimg.com/sticky/default_profile_images/default_profile.png" , user . GetTinyProfileImageUrl ( ) )
assert . Equal ( "default_profile.png" , user . GetTinyProfileImageLocalPath ( ) )
2022-01-06 14:39:31 -05:00
}
2022-01-06 13:43:22 -05:00
/ * *
* Should extract a user handle from a tweet URL , or fail if URL is invalid
* /
func TestParseHandleFromTweetUrl ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2022-01-06 13:43:22 -05:00
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
2022-01-31 19:14:14 -08:00
result , err := ParseHandleFromTweetUrl ( url )
assert . NoError ( err )
assert . Equal ( UserHandle ( "kanesays23" ) , result )
2022-01-06 13:43:22 -05:00
2022-01-10 11:41:03 -05:00
// Test url with GET params
2022-01-31 19:14:14 -08:00
result , err = ParseHandleFromTweetUrl ( "https://twitter.com/NerdNoticing/status/1263192389050654720?s=20" )
assert . NoError ( err )
assert . Equal ( UserHandle ( "NerdNoticing" ) , result )
2022-01-06 13:43:22 -05:00
// Test invalid url
2022-01-31 19:14:14 -08:00
_ , err = ParseHandleFromTweetUrl ( "https://twitter.com/NerdNoticing/status/1263192389050654720s=20" )
assert . Error ( err )
2022-01-06 13:43:22 -05:00
// Test empty string
2022-01-31 19:14:14 -08:00
_ , err = ParseHandleFromTweetUrl ( "" )
assert . Error ( err )
2022-01-06 13:43:22 -05:00
}
2022-01-08 18:36:23 -05:00
/ * *
* Should extract a user handle from a shortened tweet URL
* /
func TestParseHandleFromShortenedTweetUrl ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2022-01-08 18:36:23 -05:00
short_url := "https://t.co/rZVrNGJyDe"
expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529"
httpmock . Activate ( )
defer httpmock . DeactivateAndReset ( )
httpmock . RegisterResponder ( "GET" , short_url , func ( req * http . Request ) ( * http . Response , error ) {
header := http . Header { }
header . Set ( "Location" , expanded_url )
return & http . Response { StatusCode : 301 , Header : header } , nil
} )
2022-02-01 15:48:43 -08:00
// Check the httpmock interceptor is working correctly
2022-01-31 19:14:14 -08:00
require . Equal ( t , expanded_url , ExpandShortUrl ( short_url ) , "httpmock didn't intercept the request" )
2022-01-08 18:36:23 -05:00
2022-01-31 19:14:14 -08:00
result , err := ParseHandleFromTweetUrl ( short_url )
require . NoError ( t , err )
assert . Equal ( UserHandle ( "MarkSnyderJr1" ) , result )
2022-01-08 18:36:23 -05:00
}