Add parsing of DM images, videos and URLs
This commit is contained in:
parent
3718b152b6
commit
0d12166966
@ -51,6 +51,7 @@ type APIExtendedMedia struct {
|
|||||||
R interface{} `json:"r"`
|
R interface{} `json:"r"`
|
||||||
} `json:"mediaStats"`
|
} `json:"mediaStats"`
|
||||||
} `json:"ext"`
|
} `json:"ext"`
|
||||||
|
URL string `json:"url"` // For DM videos
|
||||||
}
|
}
|
||||||
|
|
||||||
type APICard struct {
|
type APICard struct {
|
||||||
|
@ -28,10 +28,12 @@ type APIDMMessage struct {
|
|||||||
ReplyData struct {
|
ReplyData struct {
|
||||||
ID int `json:"id,string"`
|
ID int `json:"id,string"`
|
||||||
} `json:"reply_data"`
|
} `json:"reply_data"`
|
||||||
|
Entities struct {
|
||||||
URLs []struct {
|
URLs []struct {
|
||||||
Url string `json:"url"`
|
ExpandedURL string `json:"expanded_url"`
|
||||||
Indices []int `json:"indices"`
|
ShortenedUrl string `json:"url"`
|
||||||
} `json:"urls"`
|
} `json:"urls"`
|
||||||
|
} `json:"entities"`
|
||||||
Attachment struct {
|
Attachment struct {
|
||||||
Tweet struct {
|
Tweet struct {
|
||||||
Url string `json:"url"`
|
Url string `json:"url"`
|
||||||
@ -40,6 +42,9 @@ type APIDMMessage struct {
|
|||||||
User APIUser `json:"user"`
|
User APIUser `json:"user"`
|
||||||
} `json:"status"`
|
} `json:"status"`
|
||||||
} `json:"tweet"`
|
} `json:"tweet"`
|
||||||
|
Photo APIMedia `json:"photo"`
|
||||||
|
Video APIExtendedMedia `json:"video"`
|
||||||
|
Card APICard `json:"card"`
|
||||||
} `json:"attachment"`
|
} `json:"attachment"`
|
||||||
} `json:"message_data"`
|
} `json:"message_data"`
|
||||||
MessageReactions []APIDMReaction `json:"message_reactions"`
|
MessageReactions []APIDMReaction `json:"message_reactions"`
|
||||||
@ -47,9 +52,25 @@ type APIDMMessage struct {
|
|||||||
|
|
||||||
// Remove embedded tweet short-URLs
|
// Remove embedded tweet short-URLs
|
||||||
func (m *APIDMMessage) NormalizeContent() {
|
func (m *APIDMMessage) NormalizeContent() {
|
||||||
|
// All URLs
|
||||||
|
for _, url := range m.MessageData.Entities.URLs {
|
||||||
|
index := strings.Index(m.MessageData.Text, url.ShortenedUrl)
|
||||||
|
if index == (len(m.MessageData.Text) - len(url.ShortenedUrl)) {
|
||||||
|
m.MessageData.Text = strings.TrimSpace(m.MessageData.Text[0:index])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Specific items
|
||||||
if m.MessageData.Attachment.Tweet.Status.ID != 0 {
|
if m.MessageData.Attachment.Tweet.Status.ID != 0 {
|
||||||
m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Tweet.Url, "", 1)
|
m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Tweet.Url, "", 1)
|
||||||
}
|
}
|
||||||
|
if m.MessageData.Attachment.Photo.ID != 0 {
|
||||||
|
m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Photo.URL, "", 1)
|
||||||
|
}
|
||||||
|
if m.MessageData.Attachment.Video.ID != 0 {
|
||||||
|
m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Video.URL, "", 1)
|
||||||
|
}
|
||||||
|
|
||||||
m.MessageData.Text = strings.TrimSpace(m.MessageData.Text)
|
m.MessageData.Text = strings.TrimSpace(m.MessageData.Text)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,6 +81,80 @@ func TestParseAPIDMMessageWithEmbeddedTweet(t *testing.T) {
|
|||||||
assert.True(is_ok)
|
assert.True(is_ok)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseAPIDMMessageWithEmbeddedImage(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/dms/dm_message_with_image.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var api_message APIDMMessage
|
||||||
|
err = json.Unmarshal(data, &api_message)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
trove := api_message.ToDMTrove()
|
||||||
|
|
||||||
|
assert.Len(trove.Messages, 1)
|
||||||
|
m, is_ok := trove.Messages[DMMessageID(1766224476729995648)]
|
||||||
|
assert.True(is_ok)
|
||||||
|
|
||||||
|
// Check that the short-URL is stripped
|
||||||
|
assert.Equal("A gastropub staffed by white college girls and the chefs are all Latino", m.Text)
|
||||||
|
|
||||||
|
assert.Len(m.Images, 1)
|
||||||
|
assert.Equal(m.ID, m.Images[0].DMMessageID)
|
||||||
|
assert.Equal("https://ton.twitter.com/1.1/ton/data/dm/1766224476729995648/1766224374648958976/L4Ah1GSh.jpg", m.Images[0].RemoteURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAPIDMMessageWithEmbeddedVideo(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/dms/dm_message_with_video.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var api_message APIDMMessage
|
||||||
|
err = json.Unmarshal(data, &api_message)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
trove := api_message.ToDMTrove()
|
||||||
|
|
||||||
|
assert.Len(trove.Messages, 1)
|
||||||
|
m, is_ok := trove.Messages[DMMessageID(1766248283901776125)]
|
||||||
|
assert.True(is_ok)
|
||||||
|
|
||||||
|
// Check the short-URL is stripped
|
||||||
|
assert.Equal("", m.Text)
|
||||||
|
|
||||||
|
assert.Len(m.Videos, 1)
|
||||||
|
assert.Equal(m.ID, m.Videos[0].DMMessageID)
|
||||||
|
assert.Equal(
|
||||||
|
"https://video.twimg.com/dm_video/1766248268416385024/vid/avc1/500x280/edFuZXtEVvem158AjvmJ3SZ_1DdG9cbSoW4fm6cDF1k.mp4?tag=1",
|
||||||
|
m.Videos[0].RemoteURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAPIDMMessageWithUrlCard(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/dms/dm_message_with_url_card.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var api_message APIDMMessage
|
||||||
|
err = json.Unmarshal(data, &api_message)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
trove := api_message.ToDMTrove()
|
||||||
|
|
||||||
|
assert.Len(trove.Messages, 1)
|
||||||
|
m, is_ok := trove.Messages[DMMessageID(1766255994668191902)]
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.Len(m.Urls, 1)
|
||||||
|
assert.Equal("You wrote this?", m.Text)
|
||||||
|
url := m.Urls[0]
|
||||||
|
assert.Equal(m.ID, url.DMMessageID)
|
||||||
|
assert.Equal("https://offline-twitter.com/introduction/data-ownership-and-composability/", url.Text)
|
||||||
|
assert.Equal("offline-twitter.com", url.Domain)
|
||||||
|
assert.Equal("Data ownership and composability", url.Title)
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseAPIDMConversation(t *testing.T) {
|
func TestParseAPIDMConversation(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/dms/dm_chat_room.json")
|
data, err := os.ReadFile("test_responses/dms/dm_chat_room.json")
|
||||||
|
@ -29,6 +29,10 @@ type DMMessage struct {
|
|||||||
InReplyToID DMMessageID `db:"in_reply_to_id"`
|
InReplyToID DMMessageID `db:"in_reply_to_id"`
|
||||||
EmbeddedTweetID TweetID `db:"embedded_tweet_id"`
|
EmbeddedTweetID TweetID `db:"embedded_tweet_id"`
|
||||||
Reactions map[UserID]DMReaction
|
Reactions map[UserID]DMReaction
|
||||||
|
|
||||||
|
Images []Image
|
||||||
|
Videos []Video
|
||||||
|
Urls []Url
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIDMMessage(message APIDMMessage) DMMessage {
|
func ParseAPIDMMessage(message APIDMMessage) DMMessage {
|
||||||
@ -47,5 +51,35 @@ func ParseAPIDMMessage(message APIDMMessage) DMMessage {
|
|||||||
reacc.DMMessageID = ret.ID
|
reacc.DMMessageID = ret.ID
|
||||||
ret.Reactions[reacc.SenderID] = reacc
|
ret.Reactions[reacc.SenderID] = reacc
|
||||||
}
|
}
|
||||||
|
if message.MessageData.Attachment.Photo.ID != 0 {
|
||||||
|
new_image := ParseAPIMedia(message.MessageData.Attachment.Photo)
|
||||||
|
new_image.DMMessageID = ret.ID
|
||||||
|
ret.Images = []Image{new_image}
|
||||||
|
}
|
||||||
|
if message.MessageData.Attachment.Video.ID != 0 {
|
||||||
|
entity := message.MessageData.Attachment.Video
|
||||||
|
if entity.Type == "video" || entity.Type == "animated_gif" {
|
||||||
|
new_video := ParseAPIVideo(entity)
|
||||||
|
new_video.DMMessageID = ret.ID
|
||||||
|
ret.Videos = append(ret.Videos, new_video)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process URLs and link previews
|
||||||
|
for _, url := range message.MessageData.Entities.URLs {
|
||||||
|
var new_url Url
|
||||||
|
if message.MessageData.Attachment.Card.ShortenedUrl == url.ShortenedUrl {
|
||||||
|
if message.MessageData.Attachment.Card.Name == "3691233323:audiospace" {
|
||||||
|
// This "url" is just a link to a Space. Don't process it as a Url
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
new_url = ParseAPIUrlCard(message.MessageData.Attachment.Card)
|
||||||
|
}
|
||||||
|
new_url.Text = url.ExpandedURL
|
||||||
|
new_url.ShortText = url.ShortenedUrl
|
||||||
|
new_url.DMMessageID = ret.ID
|
||||||
|
ret.Urls = append(ret.Urls, new_url)
|
||||||
|
}
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@ type ImageID int64
|
|||||||
type Image struct {
|
type Image struct {
|
||||||
ID ImageID `db:"id"`
|
ID ImageID `db:"id"`
|
||||||
TweetID TweetID `db:"tweet_id"`
|
TweetID TweetID `db:"tweet_id"`
|
||||||
|
DMMessageID DMMessageID `db:"chat_message_id"`
|
||||||
Width int `db:"width"`
|
Width int `db:"width"`
|
||||||
Height int `db:"height"`
|
Height int `db:"height"`
|
||||||
RemoteURL string `db:"remote_url"`
|
RemoteURL string `db:"remote_url"`
|
||||||
|
@ -203,7 +203,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
new_video := ParseAPIVideo(entity, ret.ID) // This assigns TweetID
|
new_video := ParseAPIVideo(entity)
|
||||||
|
new_video.TweetID = ret.ID
|
||||||
ret.Videos = append(ret.Videos, new_video)
|
ret.Videos = append(ret.Videos, new_video)
|
||||||
|
|
||||||
// Remove the thumbnail from the Images list
|
// Remove the thumbnail from the Images list
|
||||||
|
@ -9,6 +9,7 @@ import (
|
|||||||
|
|
||||||
type Url struct {
|
type Url struct {
|
||||||
TweetID TweetID `db:"tweet_id"`
|
TweetID TweetID `db:"tweet_id"`
|
||||||
|
DMMessageID DMMessageID `db:"chat_message_id"`
|
||||||
Domain string `db:"domain"`
|
Domain string `db:"domain"`
|
||||||
Text string `db:"text"`
|
Text string `db:"text"`
|
||||||
ShortText string `db:"short_text"`
|
ShortText string `db:"short_text"`
|
||||||
|
@ -14,6 +14,7 @@ type VideoID int64
|
|||||||
type Video struct {
|
type Video struct {
|
||||||
ID VideoID `db:"id"`
|
ID VideoID `db:"id"`
|
||||||
TweetID TweetID `db:"tweet_id"`
|
TweetID TweetID `db:"tweet_id"`
|
||||||
|
DMMessageID DMMessageID `db:"chat_message_id"`
|
||||||
Width int `db:"width"`
|
Width int `db:"width"`
|
||||||
Height int `db:"height"`
|
Height int `db:"height"`
|
||||||
RemoteURL string `db:"remote_url"`
|
RemoteURL string `db:"remote_url"`
|
||||||
@ -38,7 +39,7 @@ func get_filename(remote_url string) string {
|
|||||||
return path.Base(u.Path)
|
return path.Base(u.Path)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
|
||||||
variants := apiVideo.VideoInfo.Variants
|
variants := apiVideo.VideoInfo.Variants
|
||||||
sort.Sort(variants)
|
sort.Sort(variants)
|
||||||
video_remote_url := variants[0].URL
|
video_remote_url := variants[0].URL
|
||||||
@ -66,7 +67,6 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
|||||||
|
|
||||||
return Video{
|
return Video{
|
||||||
ID: VideoID(apiVideo.ID),
|
ID: VideoID(apiVideo.ID),
|
||||||
TweetID: tweet_id,
|
|
||||||
Width: apiVideo.OriginalInfo.Width,
|
Width: apiVideo.OriginalInfo.Width,
|
||||||
Height: apiVideo.OriginalInfo.Height,
|
Height: apiVideo.OriginalInfo.Height,
|
||||||
RemoteURL: video_remote_url,
|
RemoteURL: video_remote_url,
|
||||||
|
@ -21,10 +21,8 @@ func TestParseAPIVideo(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &apivideo)
|
err = json.Unmarshal(data, &apivideo)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
tweet_id := TweetID(28)
|
video := ParseAPIVideo(apivideo)
|
||||||
video := ParseAPIVideo(apivideo, tweet_id)
|
|
||||||
assert.Equal(VideoID(1418951950020845568), video.ID)
|
assert.Equal(VideoID(1418951950020845568), video.ID)
|
||||||
assert.Equal(tweet_id, video.TweetID)
|
|
||||||
assert.Equal(1280, video.Height)
|
assert.Equal(1280, video.Height)
|
||||||
assert.Equal(720, video.Width)
|
assert.Equal(720, video.Width)
|
||||||
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
|
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
|
||||||
@ -46,7 +44,6 @@ func TestParseGeoblockedVideo(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &apivideo)
|
err = json.Unmarshal(data, &apivideo)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
tweet_id := TweetID(28)
|
video := ParseAPIVideo(apivideo)
|
||||||
video := ParseAPIVideo(apivideo, tweet_id)
|
|
||||||
assert.True(video.IsGeoblocked)
|
assert.True(video.IsGeoblocked)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user