Scraping now updates last_scraped_at and sets is_conversation_scraped
This commit is contained in:
parent
c1bcd54a11
commit
8e13e30ac5
@ -94,7 +94,9 @@ test $(find videos | wc -l) = "$((initial_videos_count + 1))"
|
|||||||
# Download a full thread
|
# Download a full thread
|
||||||
tw fetch_tweet https://twitter.com/RememberAfghan1/status/1429585423702052867
|
tw fetch_tweet https://twitter.com/RememberAfghan1/status/1429585423702052867
|
||||||
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429585423702052867") = "RememberAfghan1"
|
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429585423702052867") = "RememberAfghan1"
|
||||||
|
test $(sqlite3 twitter.db "select is_conversation_scraped, abs(last_scraped_at - strftime('%s','now')) < 30 from tweets where id = 1429585423702052867") = "1|1"
|
||||||
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429584239570391042") = "michaelmalice"
|
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429584239570391042") = "michaelmalice"
|
||||||
|
test $(sqlite3 twitter.db "select is_conversation_scraped from tweets where id = 1429584239570391042") = "0"
|
||||||
# test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429583672827465730") = "kanesays23" TODO: this guy got banned
|
# test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429583672827465730") = "kanesays23" TODO: this guy got banned
|
||||||
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429616911315345414") = "NovaValentis"
|
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429616911315345414") = "NovaValentis"
|
||||||
test $(sqlite3 twitter.db "select reply_mentions from tweets where id = 1429585423702052867") = "michaelmalice"
|
test $(sqlite3 twitter.db "select reply_mentions from tweets where id = 1429585423702052867") = "michaelmalice"
|
||||||
|
@ -163,9 +163,11 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
ret.Polls = []Poll{poll}
|
ret.Polls = []Poll{poll}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process tombstones
|
// Process tombstones and other metadata
|
||||||
ret.TombstoneType = apiTweet.TombstoneText
|
ret.TombstoneType = apiTweet.TombstoneText
|
||||||
ret.IsStub = !(ret.TombstoneType == "")
|
ret.IsStub = !(ret.TombstoneType == "")
|
||||||
|
ret.LastScrapedAt = time.Unix(0, 0) // Caller will change this for the tweet that was actually scraped
|
||||||
|
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -200,6 +202,9 @@ func GetTweet(id TweetID) (Tweet, error) {
|
|||||||
* Return a list of tweets, including the original and the rest of its thread,
|
* Return a list of tweets, including the original and the rest of its thread,
|
||||||
* along with a list of associated users.
|
* along with a list of associated users.
|
||||||
*
|
*
|
||||||
|
* Mark the main tweet as "is_conversation_downloaded = true", and update its "last_scraped_at"
|
||||||
|
* value.
|
||||||
|
*
|
||||||
* args:
|
* args:
|
||||||
* - id: the ID of the tweet to get
|
* - id: the ID of the tweet to get
|
||||||
*
|
*
|
||||||
@ -230,6 +235,18 @@ func GetTweetFull(id TweetID) (tweets []Tweet, retweets []Retweet, users []User,
|
|||||||
users = append(users, fetched_user)
|
users = append(users, fetched_user)
|
||||||
}
|
}
|
||||||
tweets, retweets, _users, err := ParseTweetResponse(tweet_response)
|
tweets, retweets, _users, err := ParseTweetResponse(tweet_response)
|
||||||
|
|
||||||
|
// Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at"
|
||||||
|
scrape_time := time.Now()
|
||||||
|
for i, t := range(tweets) {
|
||||||
|
fmt.Printf("Checking tweet %d (%v)\n", t.ID, t.LastScrapedAt)
|
||||||
|
if t.ID == id {
|
||||||
|
// Index the slice because `tweets[i]` is a reference, whereas `t` is a copy
|
||||||
|
tweets[i].LastScrapedAt = scrape_time
|
||||||
|
tweets[i].IsConversationScraped = true
|
||||||
|
fmt.Printf("Updating tweet %d: %v\n", tweets[i].ID, tweets[i].LastScrapedAt.Unix())
|
||||||
|
}
|
||||||
|
}
|
||||||
users = append(users, _users...)
|
users = append(users, _users...)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user