2021-09-17 13:41:43 -07:00
package scraper_test
import (
"testing"
"io/ioutil"
"encoding/json"
2022-01-31 19:14:14 -08:00
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
2021-09-17 13:41:43 -07:00
)
func TestParseAPIUrlCard ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 15:30:05 -07:00
data , err := ioutil . ReadFile ( "test_responses/tweet_content/url_card.json" )
2021-09-17 13:41:43 -07:00
if err != nil {
panic ( err )
}
2022-01-31 19:14:14 -08:00
var apiCard APICard
2021-09-17 13:41:43 -07:00
err = json . Unmarshal ( data , & apiCard )
2022-01-31 19:14:14 -08:00
require . NoError ( t , err )
2021-09-17 13:41:43 -07:00
2022-01-31 19:14:14 -08:00
url := ParseAPIUrlCard ( apiCard )
assert . Equal ( "reason.com" , url . Domain )
assert . Equal ( "L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'" , url . Title )
assert . Equal ( "\"It’ s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned resilience.\"" , url . Description )
assert . Equal ( 600 , url . ThumbnailWidth )
assert . Equal ( 315 , url . ThumbnailHeight )
assert . Equal ( "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600" , url . ThumbnailRemoteUrl )
assert . Equal ( "odDi9EqO_600x600.jpg" , url . ThumbnailLocalPath )
assert . Equal ( UserID ( 155581583 ) , url . CreatorID )
assert . Equal ( UserID ( 16467567 ) , url . SiteID )
assert . True ( url . HasThumbnail )
assert . False ( url . IsContentDownloaded )
2021-09-17 13:41:43 -07:00
}
2021-09-17 17:35:55 -07:00
func TestParseAPIUrlCardWithPlayer ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 15:30:05 -07:00
data , err := ioutil . ReadFile ( "test_responses/tweet_content/url_card_with_player.json" )
2021-09-17 17:35:55 -07:00
if err != nil {
panic ( err )
}
2022-01-31 19:14:14 -08:00
var apiCard APICard
2021-09-17 17:35:55 -07:00
err = json . Unmarshal ( data , & apiCard )
2022-01-31 19:14:14 -08:00
require . NoError ( t , err )
2021-09-17 17:35:55 -07:00
2022-01-31 19:14:14 -08:00
url := ParseAPIUrlCard ( apiCard )
assert . Equal ( "www.youtube.com" , url . Domain )
assert . Equal ( "The Politically Incorrect Guide to the Constitution (Starring Tom..." , url . Title )
assert . Equal ( "Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8Watch this episode on Rumble: https://rumble..." , url . Description )
assert . Equal ( "https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1" , url . ThumbnailRemoteUrl )
assert . Equal ( "_1t0btyt_800x320_1.jpg" , url . ThumbnailLocalPath )
assert . Equal ( UserID ( 10228272 ) , url . SiteID )
assert . True ( url . HasThumbnail )
assert . False ( url . IsContentDownloaded )
2021-09-17 17:35:55 -07:00
}
2021-09-17 20:50:28 -07:00
2021-11-22 14:52:18 -08:00
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-11-22 14:52:18 -08:00
data , err := ioutil . ReadFile ( "test_responses/tweet_content/url_card_with_player_placeholder_image.json" )
if err != nil {
panic ( err )
}
2022-01-31 19:14:14 -08:00
var apiCard APICard
2021-11-22 14:52:18 -08:00
err = json . Unmarshal ( data , & apiCard )
2022-01-31 19:14:14 -08:00
require . NoError ( t , err )
2021-11-22 14:52:18 -08:00
2022-01-31 19:14:14 -08:00
url := ParseAPIUrlCard ( apiCard )
assert . Equal ( "www.youtube.com" , url . Domain )
assert . Equal ( "Did Michael Malice Turn Me into an Anarchist? | Ep 181" , url . Title )
assert . Equal ( "SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily______________________________________________________..." , url . Description )
assert . Equal ( "https://pbs.twimg.com/cards/player-placeholder.png" , url . ThumbnailRemoteUrl )
assert . Equal ( "player-placeholder.png" , url . ThumbnailLocalPath )
assert . Equal ( UserID ( 10228272 ) , url . SiteID )
assert . True ( url . HasThumbnail )
assert . False ( url . IsContentDownloaded )
2021-11-22 14:52:18 -08:00
}
2021-09-17 20:50:28 -07:00
func TestParseAPIUrlCardWithoutThumbnail ( t * testing . T ) {
2022-01-31 19:14:14 -08:00
assert := assert . New ( t )
2021-09-27 15:30:05 -07:00
data , err := ioutil . ReadFile ( "test_responses/tweet_content/url_card_without_thumbnail.json" )
2021-09-17 20:50:28 -07:00
if err != nil {
panic ( err )
}
2022-01-31 19:14:14 -08:00
var apiCard APICard
2021-09-17 20:50:28 -07:00
err = json . Unmarshal ( data , & apiCard )
2022-01-31 19:14:14 -08:00
require . NoError ( t , err )
2021-09-17 20:50:28 -07:00
2022-01-31 19:14:14 -08:00
url := ParseAPIUrlCard ( apiCard )
assert . Equal ( "en.m.wikipedia.org" , url . Domain )
assert . Equal ( "Entryism - Wikipedia" , url . Title )
assert . Equal ( "" , url . Description )
assert . True ( url . HasCard )
assert . False ( url . HasThumbnail )
2021-09-17 20:50:28 -07:00
}
2022-03-02 14:34:42 -08:00
/ * *
* Should check if a url is a tweet url , and if so , parse it
* /
func TestParseTweetUrl ( t * testing . T ) {
assert := assert . New ( t )
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
handle , id , is_ok := TryParseTweetUrl ( url )
assert . True ( is_ok )
assert . Equal ( UserHandle ( "kanesays23" ) , handle )
assert . Equal ( TweetID ( 1429583672827465730 ) , id )
// Test url with GET params
handle , id , is_ok = TryParseTweetUrl ( "https://twitter.com/NerdNoticing/status/1263192389050654720?s=20" )
assert . True ( is_ok )
assert . Equal ( UserHandle ( "NerdNoticing" ) , handle )
assert . Equal ( TweetID ( 1263192389050654720 ) , id )
// Test invalid url
_ , _ , is_ok = TryParseTweetUrl ( "https://twitter.com/NerdNoticing/status/1263192389050654720s=20" )
assert . False ( is_ok )
// Test empty string
_ , _ , is_ok = TryParseTweetUrl ( "" )
assert . False ( is_ok )
}
/ * *
* Should extract a user handle from a tweet URL , or fail if URL is invalid
* /
func TestParseHandleFromTweetUrl ( t * testing . T ) {
assert := assert . New ( t )
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
result , err := ParseHandleFromTweetUrl ( url )
assert . NoError ( err )
assert . Equal ( UserHandle ( "kanesays23" ) , result )
// Test url with GET params
result , err = ParseHandleFromTweetUrl ( "https://twitter.com/NerdNoticing/status/1263192389050654720?s=20" )
assert . NoError ( err )
assert . Equal ( UserHandle ( "NerdNoticing" ) , result )
// Test invalid url
_ , err = ParseHandleFromTweetUrl ( "https://twitter.com/NerdNoticing/status/1263192389050654720s=20" )
assert . Error ( err )
// Test empty string
_ , err = ParseHandleFromTweetUrl ( "" )
assert . Error ( err )
}