Use new helper methods in tweet re-parser

This commit is contained in:
Alessio 2021-05-23 21:03:55 -04:00
parent 0738f77b55
commit 5449b16038

View File

@ -3,7 +3,6 @@ package scraper
import (
"time"
"fmt"
"strings"
)
@ -35,32 +34,14 @@ Urls: %v Images: %v Mentions: %v Hashtags: %v`,
t.ID, t.User, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumQuoteTweets, t.NumReplies, t.Urls, t.Images, t.Mentions, t.Hashtags)
}
// Turn an APITweet, as returned from the scraper, into a properly structured Tweet object
func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
apiTweet.NormalizeContent()
ret.ID = TweetID(apiTweet.ID)
ret.User = UserID(apiTweet.UserIDStr)
ret.Text = apiTweet.FullText
// Remove embedded links at the end of the text
if len(apiTweet.Entities.URLs) == 1 {
url := apiTweet.Entities.URLs[0].URL
if strings.Index(ret.Text, url) == len(ret.Text) - len(url) {
ret.Text = ret.Text[0:len(ret.Text) - len(url) - 1] // Also strip the newline
}
}
if len(apiTweet.Entities.Media) == 1 {
url := apiTweet.Entities.Media[0].URL
if strings.Index(ret.Text, url) == len(ret.Text) - len(url) {
ret.Text = ret.Text[0:len(ret.Text) - len(url) - 1] // Also strip the trailing space
}
}
// Remove leading `@username` for replies
if apiTweet.InReplyToScreenName != "" {
if strings.Index(ret.Text, "@" + apiTweet.InReplyToScreenName) == 0 {
ret.Text = ret.Text[len(apiTweet.InReplyToScreenName) + 2:] // `@`, username, space
}
}
ret.PostedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt)
if err != nil {
return