Add helper method to parse tweet URLs

This commit is contained in:
Alessio 2022-01-06 13:43:22 -05:00
parent bf2dbede94
commit b1c7db6540
2 changed files with 42 additions and 0 deletions

View File

@ -78,6 +78,20 @@ Joined %s
return ret
}
/**
* Given a tweet URL, return the corresponding user handle.
* If tweet url is not valid, return an error.
*/
func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
r := regexp.MustCompile(`https://twitter.com/(\w+)/status/\d+`)
matches := r.FindStringSubmatch(tweet_url)
if len(matches) != 2 { // matches[0] is the full string
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)
}
return UserHandle(matches[1]), nil
}
// Turn an APIUser, as returned from the scraper, into a properly structured User object
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
ret.ID = UserID(apiUser.ID)

View File

@ -85,3 +85,31 @@ func TestParseSingleUser(t *testing.T) {
t.Errorf("Expected %q, got %q", expected_id, user.PinnedTweet)
}
}
/**
* Should extract a user handle from a tweet URL, or fail if URL is invalid
*/
func TestParseHandleFromTweetUrl(t *testing.T) {
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
expected_user := scraper.UserHandle("kanesays23")
result, err := scraper.ParseHandleFromTweetUrl(url)
if err != nil {
t.Errorf("Unexpected error while parsing handle: %s", err)
}
if result != expected_user {
t.Errorf("Expected handle %q, got %q", expected_user, result)
}
// Test invalid url
_, err = scraper.ParseHandleFromTweetUrl("awjgwekf")
if err == nil {
t.Errorf("Should have produced an error for invalid URL")
}
// Test empty string
_, err = scraper.ParseHandleFromTweetUrl("awjgwekf")
if err == nil {
t.Errorf("Should have produced an error for invalid URL")
}
}