Add parsing of DM images, videos and URLs

This commit is contained in:
Alessio 2024-03-08 17:22:52 -08:00
parent 3718b152b6
commit 0d12166966
9 changed files with 167 additions and 37 deletions

View File

@ -51,6 +51,7 @@ type APIExtendedMedia struct {
R interface{} `json:"r"` R interface{} `json:"r"`
} `json:"mediaStats"` } `json:"mediaStats"`
} `json:"ext"` } `json:"ext"`
URL string `json:"url"` // For DM videos
} }
type APICard struct { type APICard struct {

View File

@ -28,10 +28,12 @@ type APIDMMessage struct {
ReplyData struct { ReplyData struct {
ID int `json:"id,string"` ID int `json:"id,string"`
} `json:"reply_data"` } `json:"reply_data"`
URLs []struct { Entities struct {
Url string `json:"url"` URLs []struct {
Indices []int `json:"indices"` ExpandedURL string `json:"expanded_url"`
} `json:"urls"` ShortenedUrl string `json:"url"`
} `json:"urls"`
} `json:"entities"`
Attachment struct { Attachment struct {
Tweet struct { Tweet struct {
Url string `json:"url"` Url string `json:"url"`
@ -40,6 +42,9 @@ type APIDMMessage struct {
User APIUser `json:"user"` User APIUser `json:"user"`
} `json:"status"` } `json:"status"`
} `json:"tweet"` } `json:"tweet"`
Photo APIMedia `json:"photo"`
Video APIExtendedMedia `json:"video"`
Card APICard `json:"card"`
} `json:"attachment"` } `json:"attachment"`
} `json:"message_data"` } `json:"message_data"`
MessageReactions []APIDMReaction `json:"message_reactions"` MessageReactions []APIDMReaction `json:"message_reactions"`
@ -47,9 +52,25 @@ type APIDMMessage struct {
// Remove embedded tweet short-URLs // Remove embedded tweet short-URLs
func (m *APIDMMessage) NormalizeContent() { func (m *APIDMMessage) NormalizeContent() {
// All URLs
for _, url := range m.MessageData.Entities.URLs {
index := strings.Index(m.MessageData.Text, url.ShortenedUrl)
if index == (len(m.MessageData.Text) - len(url.ShortenedUrl)) {
m.MessageData.Text = strings.TrimSpace(m.MessageData.Text[0:index])
}
}
// Specific items
if m.MessageData.Attachment.Tweet.Status.ID != 0 { if m.MessageData.Attachment.Tweet.Status.ID != 0 {
m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Tweet.Url, "", 1) m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Tweet.Url, "", 1)
} }
if m.MessageData.Attachment.Photo.ID != 0 {
m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Photo.URL, "", 1)
}
if m.MessageData.Attachment.Video.ID != 0 {
m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Video.URL, "", 1)
}
m.MessageData.Text = strings.TrimSpace(m.MessageData.Text) m.MessageData.Text = strings.TrimSpace(m.MessageData.Text)
} }

View File

@ -81,6 +81,80 @@ func TestParseAPIDMMessageWithEmbeddedTweet(t *testing.T) {
assert.True(is_ok) assert.True(is_ok)
} }
func TestParseAPIDMMessageWithEmbeddedImage(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/dms/dm_message_with_image.json")
if err != nil {
panic(err)
}
var api_message APIDMMessage
err = json.Unmarshal(data, &api_message)
require.NoError(t, err)
trove := api_message.ToDMTrove()
assert.Len(trove.Messages, 1)
m, is_ok := trove.Messages[DMMessageID(1766224476729995648)]
assert.True(is_ok)
// Check that the short-URL is stripped
assert.Equal("A gastropub staffed by white college girls and the chefs are all Latino", m.Text)
assert.Len(m.Images, 1)
assert.Equal(m.ID, m.Images[0].DMMessageID)
assert.Equal("https://ton.twitter.com/1.1/ton/data/dm/1766224476729995648/1766224374648958976/L4Ah1GSh.jpg", m.Images[0].RemoteURL)
}
func TestParseAPIDMMessageWithEmbeddedVideo(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/dms/dm_message_with_video.json")
if err != nil {
panic(err)
}
var api_message APIDMMessage
err = json.Unmarshal(data, &api_message)
require.NoError(t, err)
trove := api_message.ToDMTrove()
assert.Len(trove.Messages, 1)
m, is_ok := trove.Messages[DMMessageID(1766248283901776125)]
assert.True(is_ok)
// Check the short-URL is stripped
assert.Equal("", m.Text)
assert.Len(m.Videos, 1)
assert.Equal(m.ID, m.Videos[0].DMMessageID)
assert.Equal(
"https://video.twimg.com/dm_video/1766248268416385024/vid/avc1/500x280/edFuZXtEVvem158AjvmJ3SZ_1DdG9cbSoW4fm6cDF1k.mp4?tag=1",
m.Videos[0].RemoteURL)
}
func TestParseAPIDMMessageWithUrlCard(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/dms/dm_message_with_url_card.json")
if err != nil {
panic(err)
}
var api_message APIDMMessage
err = json.Unmarshal(data, &api_message)
require.NoError(t, err)
trove := api_message.ToDMTrove()
assert.Len(trove.Messages, 1)
m, is_ok := trove.Messages[DMMessageID(1766255994668191902)]
assert.True(is_ok)
assert.Len(m.Urls, 1)
assert.Equal("You wrote this?", m.Text)
url := m.Urls[0]
assert.Equal(m.ID, url.DMMessageID)
assert.Equal("https://offline-twitter.com/introduction/data-ownership-and-composability/", url.Text)
assert.Equal("offline-twitter.com", url.Domain)
assert.Equal("Data ownership and composability", url.Title)
}
func TestParseAPIDMConversation(t *testing.T) { func TestParseAPIDMConversation(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/dms/dm_chat_room.json") data, err := os.ReadFile("test_responses/dms/dm_chat_room.json")

View File

@ -29,6 +29,10 @@ type DMMessage struct {
InReplyToID DMMessageID `db:"in_reply_to_id"` InReplyToID DMMessageID `db:"in_reply_to_id"`
EmbeddedTweetID TweetID `db:"embedded_tweet_id"` EmbeddedTweetID TweetID `db:"embedded_tweet_id"`
Reactions map[UserID]DMReaction Reactions map[UserID]DMReaction
Images []Image
Videos []Video
Urls []Url
} }
func ParseAPIDMMessage(message APIDMMessage) DMMessage { func ParseAPIDMMessage(message APIDMMessage) DMMessage {
@ -47,5 +51,35 @@ func ParseAPIDMMessage(message APIDMMessage) DMMessage {
reacc.DMMessageID = ret.ID reacc.DMMessageID = ret.ID
ret.Reactions[reacc.SenderID] = reacc ret.Reactions[reacc.SenderID] = reacc
} }
if message.MessageData.Attachment.Photo.ID != 0 {
new_image := ParseAPIMedia(message.MessageData.Attachment.Photo)
new_image.DMMessageID = ret.ID
ret.Images = []Image{new_image}
}
if message.MessageData.Attachment.Video.ID != 0 {
entity := message.MessageData.Attachment.Video
if entity.Type == "video" || entity.Type == "animated_gif" {
new_video := ParseAPIVideo(entity)
new_video.DMMessageID = ret.ID
ret.Videos = append(ret.Videos, new_video)
}
}
// Process URLs and link previews
for _, url := range message.MessageData.Entities.URLs {
var new_url Url
if message.MessageData.Attachment.Card.ShortenedUrl == url.ShortenedUrl {
if message.MessageData.Attachment.Card.Name == "3691233323:audiospace" {
// This "url" is just a link to a Space. Don't process it as a Url
continue
}
new_url = ParseAPIUrlCard(message.MessageData.Attachment.Card)
}
new_url.Text = url.ExpandedURL
new_url.ShortText = url.ShortenedUrl
new_url.DMMessageID = ret.ID
ret.Urls = append(ret.Urls, new_url)
}
return ret return ret
} }

View File

@ -7,13 +7,14 @@ import (
type ImageID int64 type ImageID int64
type Image struct { type Image struct {
ID ImageID `db:"id"` ID ImageID `db:"id"`
TweetID TweetID `db:"tweet_id"` TweetID TweetID `db:"tweet_id"`
Width int `db:"width"` DMMessageID DMMessageID `db:"chat_message_id"`
Height int `db:"height"` Width int `db:"width"`
RemoteURL string `db:"remote_url"` Height int `db:"height"`
LocalFilename string `db:"local_filename"` RemoteURL string `db:"remote_url"`
IsDownloaded bool `db:"is_downloaded"` LocalFilename string `db:"local_filename"`
IsDownloaded bool `db:"is_downloaded"`
} }
func ParseAPIMedia(apiMedia APIMedia) Image { func ParseAPIMedia(apiMedia APIMedia) Image {

View File

@ -203,7 +203,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
continue continue
} }
new_video := ParseAPIVideo(entity, ret.ID) // This assigns TweetID new_video := ParseAPIVideo(entity)
new_video.TweetID = ret.ID
ret.Videos = append(ret.Videos, new_video) ret.Videos = append(ret.Videos, new_video)
// Remove the thumbnail from the Images list // Remove the thumbnail from the Images list

View File

@ -8,18 +8,19 @@ import (
) )
type Url struct { type Url struct {
TweetID TweetID `db:"tweet_id"` TweetID TweetID `db:"tweet_id"`
Domain string `db:"domain"` DMMessageID DMMessageID `db:"chat_message_id"`
Text string `db:"text"` Domain string `db:"domain"`
ShortText string `db:"short_text"` Text string `db:"text"`
Title string `db:"title"` ShortText string `db:"short_text"`
Description string `db:"description"` Title string `db:"title"`
ThumbnailWidth int `db:"thumbnail_width"` Description string `db:"description"`
ThumbnailHeight int `db:"thumbnail_height"` ThumbnailWidth int `db:"thumbnail_width"`
ThumbnailRemoteUrl string `db:"thumbnail_remote_url"` ThumbnailHeight int `db:"thumbnail_height"`
ThumbnailLocalPath string `db:"thumbnail_local_path"` ThumbnailRemoteUrl string `db:"thumbnail_remote_url"`
CreatorID UserID `db:"creator_id"` ThumbnailLocalPath string `db:"thumbnail_local_path"`
SiteID UserID `db:"site_id"` CreatorID UserID `db:"creator_id"`
SiteID UserID `db:"site_id"`
HasCard bool `db:"has_card"` HasCard bool `db:"has_card"`
HasThumbnail bool `db:"has_thumbnail"` HasThumbnail bool `db:"has_thumbnail"`

View File

@ -12,12 +12,13 @@ type VideoID int64
// from someone else). // from someone else).
type Video struct { type Video struct {
ID VideoID `db:"id"` ID VideoID `db:"id"`
TweetID TweetID `db:"tweet_id"` TweetID TweetID `db:"tweet_id"`
Width int `db:"width"` DMMessageID DMMessageID `db:"chat_message_id"`
Height int `db:"height"` Width int `db:"width"`
RemoteURL string `db:"remote_url"` Height int `db:"height"`
LocalFilename string `db:"local_filename"` RemoteURL string `db:"remote_url"`
LocalFilename string `db:"local_filename"`
ThumbnailRemoteUrl string `db:"thumbnail_remote_url"` ThumbnailRemoteUrl string `db:"thumbnail_remote_url"`
ThumbnailLocalPath string `db:"thumbnail_local_filename"` ThumbnailLocalPath string `db:"thumbnail_local_filename"`
@ -38,7 +39,7 @@ func get_filename(remote_url string) string {
return path.Base(u.Path) return path.Base(u.Path)
} }
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video { func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
variants := apiVideo.VideoInfo.Variants variants := apiVideo.VideoInfo.Variants
sort.Sort(variants) sort.Sort(variants)
video_remote_url := variants[0].URL video_remote_url := variants[0].URL
@ -66,7 +67,6 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
return Video{ return Video{
ID: VideoID(apiVideo.ID), ID: VideoID(apiVideo.ID),
TweetID: tweet_id,
Width: apiVideo.OriginalInfo.Width, Width: apiVideo.OriginalInfo.Width,
Height: apiVideo.OriginalInfo.Height, Height: apiVideo.OriginalInfo.Height,
RemoteURL: video_remote_url, RemoteURL: video_remote_url,

View File

@ -21,10 +21,8 @@ func TestParseAPIVideo(t *testing.T) {
err = json.Unmarshal(data, &apivideo) err = json.Unmarshal(data, &apivideo)
require.NoError(err) require.NoError(err)
tweet_id := TweetID(28) video := ParseAPIVideo(apivideo)
video := ParseAPIVideo(apivideo, tweet_id)
assert.Equal(VideoID(1418951950020845568), video.ID) assert.Equal(VideoID(1418951950020845568), video.ID)
assert.Equal(tweet_id, video.TweetID)
assert.Equal(1280, video.Height) assert.Equal(1280, video.Height)
assert.Equal(720, video.Width) assert.Equal(720, video.Width)
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL) assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
@ -46,7 +44,6 @@ func TestParseGeoblockedVideo(t *testing.T) {
err = json.Unmarshal(data, &apivideo) err = json.Unmarshal(data, &apivideo)
require.NoError(err) require.NoError(err)
tweet_id := TweetID(28) video := ParseAPIVideo(apivideo)
video := ParseAPIVideo(apivideo, tweet_id)
assert.True(video.IsGeoblocked) assert.True(video.IsGeoblocked)
} }