Add parsing of DM images, videos and URLs
This commit is contained in:
parent
3718b152b6
commit
0d12166966
@ -51,6 +51,7 @@ type APIExtendedMedia struct {
|
||||
R interface{} `json:"r"`
|
||||
} `json:"mediaStats"`
|
||||
} `json:"ext"`
|
||||
URL string `json:"url"` // For DM videos
|
||||
}
|
||||
|
||||
type APICard struct {
|
||||
|
@ -28,10 +28,12 @@ type APIDMMessage struct {
|
||||
ReplyData struct {
|
||||
ID int `json:"id,string"`
|
||||
} `json:"reply_data"`
|
||||
URLs []struct {
|
||||
Url string `json:"url"`
|
||||
Indices []int `json:"indices"`
|
||||
} `json:"urls"`
|
||||
Entities struct {
|
||||
URLs []struct {
|
||||
ExpandedURL string `json:"expanded_url"`
|
||||
ShortenedUrl string `json:"url"`
|
||||
} `json:"urls"`
|
||||
} `json:"entities"`
|
||||
Attachment struct {
|
||||
Tweet struct {
|
||||
Url string `json:"url"`
|
||||
@ -40,6 +42,9 @@ type APIDMMessage struct {
|
||||
User APIUser `json:"user"`
|
||||
} `json:"status"`
|
||||
} `json:"tweet"`
|
||||
Photo APIMedia `json:"photo"`
|
||||
Video APIExtendedMedia `json:"video"`
|
||||
Card APICard `json:"card"`
|
||||
} `json:"attachment"`
|
||||
} `json:"message_data"`
|
||||
MessageReactions []APIDMReaction `json:"message_reactions"`
|
||||
@ -47,9 +52,25 @@ type APIDMMessage struct {
|
||||
|
||||
// Remove embedded tweet short-URLs
|
||||
func (m *APIDMMessage) NormalizeContent() {
|
||||
// All URLs
|
||||
for _, url := range m.MessageData.Entities.URLs {
|
||||
index := strings.Index(m.MessageData.Text, url.ShortenedUrl)
|
||||
if index == (len(m.MessageData.Text) - len(url.ShortenedUrl)) {
|
||||
m.MessageData.Text = strings.TrimSpace(m.MessageData.Text[0:index])
|
||||
}
|
||||
}
|
||||
|
||||
// Specific items
|
||||
if m.MessageData.Attachment.Tweet.Status.ID != 0 {
|
||||
m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Tweet.Url, "", 1)
|
||||
}
|
||||
if m.MessageData.Attachment.Photo.ID != 0 {
|
||||
m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Photo.URL, "", 1)
|
||||
}
|
||||
if m.MessageData.Attachment.Video.ID != 0 {
|
||||
m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Video.URL, "", 1)
|
||||
}
|
||||
|
||||
m.MessageData.Text = strings.TrimSpace(m.MessageData.Text)
|
||||
}
|
||||
|
||||
|
@ -81,6 +81,80 @@ func TestParseAPIDMMessageWithEmbeddedTweet(t *testing.T) {
|
||||
assert.True(is_ok)
|
||||
}
|
||||
|
||||
func TestParseAPIDMMessageWithEmbeddedImage(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/dms/dm_message_with_image.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var api_message APIDMMessage
|
||||
err = json.Unmarshal(data, &api_message)
|
||||
require.NoError(t, err)
|
||||
|
||||
trove := api_message.ToDMTrove()
|
||||
|
||||
assert.Len(trove.Messages, 1)
|
||||
m, is_ok := trove.Messages[DMMessageID(1766224476729995648)]
|
||||
assert.True(is_ok)
|
||||
|
||||
// Check that the short-URL is stripped
|
||||
assert.Equal("A gastropub staffed by white college girls and the chefs are all Latino", m.Text)
|
||||
|
||||
assert.Len(m.Images, 1)
|
||||
assert.Equal(m.ID, m.Images[0].DMMessageID)
|
||||
assert.Equal("https://ton.twitter.com/1.1/ton/data/dm/1766224476729995648/1766224374648958976/L4Ah1GSh.jpg", m.Images[0].RemoteURL)
|
||||
}
|
||||
|
||||
func TestParseAPIDMMessageWithEmbeddedVideo(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/dms/dm_message_with_video.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var api_message APIDMMessage
|
||||
err = json.Unmarshal(data, &api_message)
|
||||
require.NoError(t, err)
|
||||
|
||||
trove := api_message.ToDMTrove()
|
||||
|
||||
assert.Len(trove.Messages, 1)
|
||||
m, is_ok := trove.Messages[DMMessageID(1766248283901776125)]
|
||||
assert.True(is_ok)
|
||||
|
||||
// Check the short-URL is stripped
|
||||
assert.Equal("", m.Text)
|
||||
|
||||
assert.Len(m.Videos, 1)
|
||||
assert.Equal(m.ID, m.Videos[0].DMMessageID)
|
||||
assert.Equal(
|
||||
"https://video.twimg.com/dm_video/1766248268416385024/vid/avc1/500x280/edFuZXtEVvem158AjvmJ3SZ_1DdG9cbSoW4fm6cDF1k.mp4?tag=1",
|
||||
m.Videos[0].RemoteURL)
|
||||
}
|
||||
|
||||
func TestParseAPIDMMessageWithUrlCard(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/dms/dm_message_with_url_card.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var api_message APIDMMessage
|
||||
err = json.Unmarshal(data, &api_message)
|
||||
require.NoError(t, err)
|
||||
|
||||
trove := api_message.ToDMTrove()
|
||||
|
||||
assert.Len(trove.Messages, 1)
|
||||
m, is_ok := trove.Messages[DMMessageID(1766255994668191902)]
|
||||
assert.True(is_ok)
|
||||
assert.Len(m.Urls, 1)
|
||||
assert.Equal("You wrote this?", m.Text)
|
||||
url := m.Urls[0]
|
||||
assert.Equal(m.ID, url.DMMessageID)
|
||||
assert.Equal("https://offline-twitter.com/introduction/data-ownership-and-composability/", url.Text)
|
||||
assert.Equal("offline-twitter.com", url.Domain)
|
||||
assert.Equal("Data ownership and composability", url.Title)
|
||||
}
|
||||
|
||||
func TestParseAPIDMConversation(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/dms/dm_chat_room.json")
|
||||
|
@ -29,6 +29,10 @@ type DMMessage struct {
|
||||
InReplyToID DMMessageID `db:"in_reply_to_id"`
|
||||
EmbeddedTweetID TweetID `db:"embedded_tweet_id"`
|
||||
Reactions map[UserID]DMReaction
|
||||
|
||||
Images []Image
|
||||
Videos []Video
|
||||
Urls []Url
|
||||
}
|
||||
|
||||
func ParseAPIDMMessage(message APIDMMessage) DMMessage {
|
||||
@ -47,5 +51,35 @@ func ParseAPIDMMessage(message APIDMMessage) DMMessage {
|
||||
reacc.DMMessageID = ret.ID
|
||||
ret.Reactions[reacc.SenderID] = reacc
|
||||
}
|
||||
if message.MessageData.Attachment.Photo.ID != 0 {
|
||||
new_image := ParseAPIMedia(message.MessageData.Attachment.Photo)
|
||||
new_image.DMMessageID = ret.ID
|
||||
ret.Images = []Image{new_image}
|
||||
}
|
||||
if message.MessageData.Attachment.Video.ID != 0 {
|
||||
entity := message.MessageData.Attachment.Video
|
||||
if entity.Type == "video" || entity.Type == "animated_gif" {
|
||||
new_video := ParseAPIVideo(entity)
|
||||
new_video.DMMessageID = ret.ID
|
||||
ret.Videos = append(ret.Videos, new_video)
|
||||
}
|
||||
}
|
||||
|
||||
// Process URLs and link previews
|
||||
for _, url := range message.MessageData.Entities.URLs {
|
||||
var new_url Url
|
||||
if message.MessageData.Attachment.Card.ShortenedUrl == url.ShortenedUrl {
|
||||
if message.MessageData.Attachment.Card.Name == "3691233323:audiospace" {
|
||||
// This "url" is just a link to a Space. Don't process it as a Url
|
||||
continue
|
||||
}
|
||||
new_url = ParseAPIUrlCard(message.MessageData.Attachment.Card)
|
||||
}
|
||||
new_url.Text = url.ExpandedURL
|
||||
new_url.ShortText = url.ShortenedUrl
|
||||
new_url.DMMessageID = ret.ID
|
||||
ret.Urls = append(ret.Urls, new_url)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
@ -7,13 +7,14 @@ import (
|
||||
type ImageID int64
|
||||
|
||||
type Image struct {
|
||||
ID ImageID `db:"id"`
|
||||
TweetID TweetID `db:"tweet_id"`
|
||||
Width int `db:"width"`
|
||||
Height int `db:"height"`
|
||||
RemoteURL string `db:"remote_url"`
|
||||
LocalFilename string `db:"local_filename"`
|
||||
IsDownloaded bool `db:"is_downloaded"`
|
||||
ID ImageID `db:"id"`
|
||||
TweetID TweetID `db:"tweet_id"`
|
||||
DMMessageID DMMessageID `db:"chat_message_id"`
|
||||
Width int `db:"width"`
|
||||
Height int `db:"height"`
|
||||
RemoteURL string `db:"remote_url"`
|
||||
LocalFilename string `db:"local_filename"`
|
||||
IsDownloaded bool `db:"is_downloaded"`
|
||||
}
|
||||
|
||||
func ParseAPIMedia(apiMedia APIMedia) Image {
|
||||
|
@ -203,7 +203,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
continue
|
||||
}
|
||||
|
||||
new_video := ParseAPIVideo(entity, ret.ID) // This assigns TweetID
|
||||
new_video := ParseAPIVideo(entity)
|
||||
new_video.TweetID = ret.ID
|
||||
ret.Videos = append(ret.Videos, new_video)
|
||||
|
||||
// Remove the thumbnail from the Images list
|
||||
|
@ -8,18 +8,19 @@ import (
|
||||
)
|
||||
|
||||
type Url struct {
|
||||
TweetID TweetID `db:"tweet_id"`
|
||||
Domain string `db:"domain"`
|
||||
Text string `db:"text"`
|
||||
ShortText string `db:"short_text"`
|
||||
Title string `db:"title"`
|
||||
Description string `db:"description"`
|
||||
ThumbnailWidth int `db:"thumbnail_width"`
|
||||
ThumbnailHeight int `db:"thumbnail_height"`
|
||||
ThumbnailRemoteUrl string `db:"thumbnail_remote_url"`
|
||||
ThumbnailLocalPath string `db:"thumbnail_local_path"`
|
||||
CreatorID UserID `db:"creator_id"`
|
||||
SiteID UserID `db:"site_id"`
|
||||
TweetID TweetID `db:"tweet_id"`
|
||||
DMMessageID DMMessageID `db:"chat_message_id"`
|
||||
Domain string `db:"domain"`
|
||||
Text string `db:"text"`
|
||||
ShortText string `db:"short_text"`
|
||||
Title string `db:"title"`
|
||||
Description string `db:"description"`
|
||||
ThumbnailWidth int `db:"thumbnail_width"`
|
||||
ThumbnailHeight int `db:"thumbnail_height"`
|
||||
ThumbnailRemoteUrl string `db:"thumbnail_remote_url"`
|
||||
ThumbnailLocalPath string `db:"thumbnail_local_path"`
|
||||
CreatorID UserID `db:"creator_id"`
|
||||
SiteID UserID `db:"site_id"`
|
||||
|
||||
HasCard bool `db:"has_card"`
|
||||
HasThumbnail bool `db:"has_thumbnail"`
|
||||
|
@ -12,12 +12,13 @@ type VideoID int64
|
||||
// from someone else).
|
||||
|
||||
type Video struct {
|
||||
ID VideoID `db:"id"`
|
||||
TweetID TweetID `db:"tweet_id"`
|
||||
Width int `db:"width"`
|
||||
Height int `db:"height"`
|
||||
RemoteURL string `db:"remote_url"`
|
||||
LocalFilename string `db:"local_filename"`
|
||||
ID VideoID `db:"id"`
|
||||
TweetID TweetID `db:"tweet_id"`
|
||||
DMMessageID DMMessageID `db:"chat_message_id"`
|
||||
Width int `db:"width"`
|
||||
Height int `db:"height"`
|
||||
RemoteURL string `db:"remote_url"`
|
||||
LocalFilename string `db:"local_filename"`
|
||||
|
||||
ThumbnailRemoteUrl string `db:"thumbnail_remote_url"`
|
||||
ThumbnailLocalPath string `db:"thumbnail_local_filename"`
|
||||
@ -38,7 +39,7 @@ func get_filename(remote_url string) string {
|
||||
return path.Base(u.Path)
|
||||
}
|
||||
|
||||
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
||||
func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
|
||||
variants := apiVideo.VideoInfo.Variants
|
||||
sort.Sort(variants)
|
||||
video_remote_url := variants[0].URL
|
||||
@ -66,7 +67,6 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
||||
|
||||
return Video{
|
||||
ID: VideoID(apiVideo.ID),
|
||||
TweetID: tweet_id,
|
||||
Width: apiVideo.OriginalInfo.Width,
|
||||
Height: apiVideo.OriginalInfo.Height,
|
||||
RemoteURL: video_remote_url,
|
||||
|
@ -21,10 +21,8 @@ func TestParseAPIVideo(t *testing.T) {
|
||||
err = json.Unmarshal(data, &apivideo)
|
||||
require.NoError(err)
|
||||
|
||||
tweet_id := TweetID(28)
|
||||
video := ParseAPIVideo(apivideo, tweet_id)
|
||||
video := ParseAPIVideo(apivideo)
|
||||
assert.Equal(VideoID(1418951950020845568), video.ID)
|
||||
assert.Equal(tweet_id, video.TweetID)
|
||||
assert.Equal(1280, video.Height)
|
||||
assert.Equal(720, video.Width)
|
||||
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
|
||||
@ -46,7 +44,6 @@ func TestParseGeoblockedVideo(t *testing.T) {
|
||||
err = json.Unmarshal(data, &apivideo)
|
||||
require.NoError(err)
|
||||
|
||||
tweet_id := TweetID(28)
|
||||
video := ParseAPIVideo(apivideo, tweet_id)
|
||||
video := ParseAPIVideo(apivideo)
|
||||
assert.True(video.IsGeoblocked)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user