[bugfix] Read Bookwyrm Articles more thoroughly (#1410)
This commit is contained in:
parent
382512a5a6
commit
271da016b9
|
@ -49,21 +49,22 @@ func ExtractPreferredUsername(i WithPreferredUsername) (string, error) {
|
||||||
return u.GetXMLSchemaString(), nil
|
return u.GetXMLSchemaString(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtractName returns a string representation of an interface's name property.
|
// ExtractName returns a string representation of an interface's name property,
|
||||||
func ExtractName(i WithName) (string, error) {
|
// or an empty string if this is not found.
|
||||||
|
func ExtractName(i WithName) string {
|
||||||
nameProp := i.GetActivityStreamsName()
|
nameProp := i.GetActivityStreamsName()
|
||||||
if nameProp == nil {
|
if nameProp == nil {
|
||||||
return "", errors.New("activityStreamsName not found")
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// take the first name string we can find
|
// take the first name string we can find
|
||||||
for iter := nameProp.Begin(); iter != nameProp.End(); iter = iter.Next() {
|
for iter := nameProp.Begin(); iter != nameProp.End(); iter = iter.Next() {
|
||||||
if iter.IsXMLSchemaString() && iter.GetXMLSchemaString() != "" {
|
if iter.IsXMLSchemaString() && iter.GetXMLSchemaString() != "" {
|
||||||
return iter.GetXMLSchemaString(), nil
|
return iter.GetXMLSchemaString()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return "", errors.New("activityStreamsName not found")
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtractInReplyToURI extracts the inReplyToURI property (if present) from an interface.
|
// ExtractInReplyToURI extracts the inReplyToURI property (if present) from an interface.
|
||||||
|
@ -243,23 +244,24 @@ func ExtractImageURL(i WithImage) (*url.URL, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtractSummary extracts the summary/content warning of an interface.
|
// ExtractSummary extracts the summary/content warning of an interface.
|
||||||
func ExtractSummary(i WithSummary) (string, error) {
|
// Will return an empty string if no summary was present.
|
||||||
|
func ExtractSummary(i WithSummary) string {
|
||||||
summaryProp := i.GetActivityStreamsSummary()
|
summaryProp := i.GetActivityStreamsSummary()
|
||||||
if summaryProp == nil || summaryProp.Len() == 0 {
|
if summaryProp == nil || summaryProp.Len() == 0 {
|
||||||
// no summary to speak of
|
// no summary to speak of
|
||||||
return "", nil
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
for iter := summaryProp.Begin(); iter != summaryProp.End(); iter = iter.Next() {
|
for iter := summaryProp.Begin(); iter != summaryProp.End(); iter = iter.Next() {
|
||||||
switch {
|
switch {
|
||||||
case iter.IsIRI():
|
case iter.IsIRI():
|
||||||
return iter.GetIRI().String(), nil
|
return iter.GetIRI().String()
|
||||||
case iter.IsXMLSchemaString():
|
case iter.IsXMLSchemaString():
|
||||||
return iter.GetXMLSchemaString(), nil
|
return iter.GetXMLSchemaString()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return "", nil
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtractDiscoverable extracts the Discoverable boolean of an interface.
|
// ExtractDiscoverable extracts the Discoverable boolean of an interface.
|
||||||
|
@ -364,36 +366,9 @@ func ExtractContent(i WithContent) string {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtractAttachments returns a slice of attachments on the interface.
|
|
||||||
func ExtractAttachments(i WithAttachment) ([]*gtsmodel.MediaAttachment, error) {
|
|
||||||
attachments := []*gtsmodel.MediaAttachment{}
|
|
||||||
attachmentProp := i.GetActivityStreamsAttachment()
|
|
||||||
if attachmentProp == nil {
|
|
||||||
return attachments, nil
|
|
||||||
}
|
|
||||||
for iter := attachmentProp.Begin(); iter != attachmentProp.End(); iter = iter.Next() {
|
|
||||||
t := iter.GetType()
|
|
||||||
if t == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
attachmentable, ok := t.(Attachmentable)
|
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
attachment, err := ExtractAttachment(attachmentable)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
attachments = append(attachments, attachment)
|
|
||||||
}
|
|
||||||
return attachments, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// ExtractAttachment returns a gts model of an attachment from an attachmentable interface.
|
// ExtractAttachment returns a gts model of an attachment from an attachmentable interface.
|
||||||
func ExtractAttachment(i Attachmentable) (*gtsmodel.MediaAttachment, error) {
|
func ExtractAttachment(i Attachmentable) (*gtsmodel.MediaAttachment, error) {
|
||||||
attachment := >smodel.MediaAttachment{
|
attachment := >smodel.MediaAttachment{}
|
||||||
File: gtsmodel.File{},
|
|
||||||
}
|
|
||||||
|
|
||||||
attachmentURL, err := ExtractURL(i)
|
attachmentURL, err := ExtractURL(i)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -402,17 +377,12 @@ func ExtractAttachment(i Attachmentable) (*gtsmodel.MediaAttachment, error) {
|
||||||
attachment.RemoteURL = attachmentURL.String()
|
attachment.RemoteURL = attachmentURL.String()
|
||||||
|
|
||||||
mediaType := i.GetActivityStreamsMediaType()
|
mediaType := i.GetActivityStreamsMediaType()
|
||||||
if mediaType == nil || mediaType.Get() == "" {
|
if mediaType != nil {
|
||||||
return nil, errors.New("no media type")
|
attachment.File.ContentType = mediaType.Get()
|
||||||
}
|
}
|
||||||
attachment.File.ContentType = mediaType.Get()
|
|
||||||
attachment.Type = gtsmodel.FileTypeImage
|
attachment.Type = gtsmodel.FileTypeImage
|
||||||
|
|
||||||
name, err := ExtractName(i)
|
attachment.Description = ExtractName(i)
|
||||||
if err == nil {
|
|
||||||
attachment.Description = name
|
|
||||||
}
|
|
||||||
|
|
||||||
attachment.Blurhash = ExtractBlurhash(i)
|
attachment.Blurhash = ExtractBlurhash(i)
|
||||||
|
|
||||||
attachment.Processing = gtsmodel.ProcessingStatusReceived
|
attachment.Processing = gtsmodel.ProcessingStatusReceived
|
||||||
|
@ -470,10 +440,11 @@ func ExtractHashtag(i Hashtaggable) (*gtsmodel.Tag, error) {
|
||||||
}
|
}
|
||||||
tag.URL = hrefProp.GetIRI().String()
|
tag.URL = hrefProp.GetIRI().String()
|
||||||
|
|
||||||
name, err := ExtractName(i)
|
name := ExtractName(i)
|
||||||
if err != nil {
|
if name == "" {
|
||||||
return nil, err
|
return nil, errors.New("name prop empty")
|
||||||
}
|
}
|
||||||
|
|
||||||
tag.Name = strings.TrimPrefix(name, "#")
|
tag.Name = strings.TrimPrefix(name, "#")
|
||||||
|
|
||||||
return tag, nil
|
return tag, nil
|
||||||
|
@ -523,9 +494,9 @@ func ExtractEmoji(i Emojiable) (*gtsmodel.Emoji, error) {
|
||||||
emoji.URI = uri.String()
|
emoji.URI = uri.String()
|
||||||
emoji.Domain = uri.Host
|
emoji.Domain = uri.Host
|
||||||
|
|
||||||
name, err := ExtractName(i)
|
name := ExtractName(i)
|
||||||
if err != nil {
|
if name == "" {
|
||||||
return nil, err
|
return nil, errors.New("name prop empty")
|
||||||
}
|
}
|
||||||
emoji.Shortcode = strings.Trim(name, ":")
|
emoji.Shortcode = strings.Trim(name, ":")
|
||||||
|
|
||||||
|
@ -586,14 +557,13 @@ func ExtractMentions(i WithTag) ([]*gtsmodel.Mention, error) {
|
||||||
func ExtractMention(i Mentionable) (*gtsmodel.Mention, error) {
|
func ExtractMention(i Mentionable) (*gtsmodel.Mention, error) {
|
||||||
mention := >smodel.Mention{}
|
mention := >smodel.Mention{}
|
||||||
|
|
||||||
mentionString, err := ExtractName(i)
|
mentionString := ExtractName(i)
|
||||||
if err != nil {
|
if mentionString == "" {
|
||||||
return nil, err
|
return nil, errors.New("name prop empty")
|
||||||
}
|
}
|
||||||
|
|
||||||
// just make sure the mention string is valid so we can handle it properly later on...
|
// just make sure the mention string is valid so we can handle it properly later on...
|
||||||
_, _, err = util.ExtractNamestringParts(mentionString)
|
if _, _, err := util.ExtractNamestringParts(mentionString); err != nil {
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
mention.NameString = mentionString
|
mention.NameString = mentionString
|
||||||
|
|
|
@ -30,53 +30,6 @@ type ExtractAttachmentsTestSuite struct {
|
||||||
ExtractTestSuite
|
ExtractTestSuite
|
||||||
}
|
}
|
||||||
|
|
||||||
func (suite *ExtractAttachmentsTestSuite) TestExtractAttachments() {
|
|
||||||
note := streams.NewActivityStreamsNote()
|
|
||||||
note.SetActivityStreamsAttachment(suite.attachment1)
|
|
||||||
|
|
||||||
attachments, err := ap.ExtractAttachments(note)
|
|
||||||
suite.NoError(err)
|
|
||||||
suite.Len(attachments, 1)
|
|
||||||
|
|
||||||
attachment1 := attachments[0]
|
|
||||||
suite.Equal("image/jpeg", attachment1.File.ContentType)
|
|
||||||
suite.Equal("https://s3-us-west-2.amazonaws.com/plushcity/media_attachments/files/106/867/380/219/163/828/original/88e8758c5f011439.jpg", attachment1.RemoteURL)
|
|
||||||
suite.Equal("It's a cute plushie.", attachment1.Description)
|
|
||||||
suite.Equal("UxQ0EkRP_4tRxtRjWBt7%hozM_ayV@oLf6WB", attachment1.Blurhash)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *ExtractAttachmentsTestSuite) TestExtractNoAttachments() {
|
|
||||||
note := streams.NewActivityStreamsNote()
|
|
||||||
|
|
||||||
attachments, err := ap.ExtractAttachments(note)
|
|
||||||
suite.NoError(err)
|
|
||||||
suite.Empty(attachments)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *ExtractAttachmentsTestSuite) TestExtractAttachmentsMissingContentType() {
|
|
||||||
d1 := suite.document1
|
|
||||||
d1.SetActivityStreamsMediaType(streams.NewActivityStreamsMediaTypeProperty())
|
|
||||||
|
|
||||||
a1 := streams.NewActivityStreamsAttachmentProperty()
|
|
||||||
a1.AppendActivityStreamsDocument(d1)
|
|
||||||
|
|
||||||
note := streams.NewActivityStreamsNote()
|
|
||||||
note.SetActivityStreamsAttachment(a1)
|
|
||||||
|
|
||||||
attachments, err := ap.ExtractAttachments(note)
|
|
||||||
suite.NoError(err)
|
|
||||||
suite.Empty(attachments)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *ExtractAttachmentsTestSuite) TestExtractAttachmentMissingContentType() {
|
|
||||||
d1 := suite.document1
|
|
||||||
d1.SetActivityStreamsMediaType(streams.NewActivityStreamsMediaTypeProperty())
|
|
||||||
|
|
||||||
attachment, err := ap.ExtractAttachment(d1)
|
|
||||||
suite.EqualError(err, "no media type")
|
|
||||||
suite.Nil(attachment)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *ExtractAttachmentsTestSuite) TestExtractAttachmentMissingURL() {
|
func (suite *ExtractAttachmentsTestSuite) TestExtractAttachmentMissingURL() {
|
||||||
d1 := suite.document1
|
d1 := suite.document1
|
||||||
d1.SetActivityStreamsUrl(streams.NewActivityStreamsUrlProperty())
|
d1.SetActivityStreamsUrl(streams.NewActivityStreamsUrlProperty())
|
||||||
|
|
|
@ -51,6 +51,7 @@ type Statusable interface {
|
||||||
WithTypeName
|
WithTypeName
|
||||||
|
|
||||||
WithSummary
|
WithSummary
|
||||||
|
WithName
|
||||||
WithInReplyTo
|
WithInReplyTo
|
||||||
WithPublished
|
WithPublished
|
||||||
WithURL
|
WithURL
|
||||||
|
|
|
@ -86,9 +86,10 @@ func (c *converter) ASRepresentationToAccount(ctx context.Context, accountable a
|
||||||
|
|
||||||
// display name aka name
|
// display name aka name
|
||||||
// we default to the username, but take the more nuanced name property if it exists
|
// we default to the username, but take the more nuanced name property if it exists
|
||||||
acct.DisplayName = username
|
if displayName := ap.ExtractName(accountable); displayName != "" {
|
||||||
if displayName, err := ap.ExtractName(accountable); err == nil {
|
|
||||||
acct.DisplayName = displayName
|
acct.DisplayName = displayName
|
||||||
|
} else {
|
||||||
|
acct.DisplayName = username
|
||||||
}
|
}
|
||||||
|
|
||||||
// account emojis (used in bio, display name, fields)
|
// account emojis (used in bio, display name, fields)
|
||||||
|
@ -101,10 +102,7 @@ func (c *converter) ASRepresentationToAccount(ctx context.Context, accountable a
|
||||||
// TODO: fields aka attachment array
|
// TODO: fields aka attachment array
|
||||||
|
|
||||||
// note aka summary
|
// note aka summary
|
||||||
note, err := ap.ExtractSummary(accountable)
|
acct.Note = ap.ExtractSummary(accountable)
|
||||||
if err == nil && note != "" {
|
|
||||||
acct.Note = note
|
|
||||||
}
|
|
||||||
|
|
||||||
// check for bot and actor type
|
// check for bot and actor type
|
||||||
switch accountable.GetTypeName() {
|
switch accountable.GetTypeName() {
|
||||||
|
@ -219,6 +217,38 @@ func (c *converter) ASRepresentationToAccount(ctx context.Context, accountable a
|
||||||
return acct, nil
|
return acct, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *converter) extractAttachments(i ap.WithAttachment) []*gtsmodel.MediaAttachment {
|
||||||
|
attachmentProp := i.GetActivityStreamsAttachment()
|
||||||
|
if attachmentProp == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
attachments := make([]*gtsmodel.MediaAttachment, 0, attachmentProp.Len())
|
||||||
|
|
||||||
|
for iter := attachmentProp.Begin(); iter != attachmentProp.End(); iter = iter.Next() {
|
||||||
|
t := iter.GetType()
|
||||||
|
if t == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
attachmentable, ok := t.(ap.Attachmentable)
|
||||||
|
if !ok {
|
||||||
|
log.Error("ap attachment was not attachmentable")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
attachment, err := ap.ExtractAttachment(attachmentable)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error extracting attachment: %s", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
attachments = append(attachments, attachment)
|
||||||
|
}
|
||||||
|
|
||||||
|
return attachments
|
||||||
|
}
|
||||||
|
|
||||||
func (c *converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusable) (*gtsmodel.Status, error) {
|
func (c *converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusable) (*gtsmodel.Status, error) {
|
||||||
status := >smodel.Status{}
|
status := >smodel.Status{}
|
||||||
|
|
||||||
|
@ -243,11 +273,7 @@ func (c *converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
|
||||||
status.Content = ap.ExtractContent(statusable)
|
status.Content = ap.ExtractContent(statusable)
|
||||||
|
|
||||||
// attachments to dereference and fetch later on (we don't do that here)
|
// attachments to dereference and fetch later on (we don't do that here)
|
||||||
if attachments, err := ap.ExtractAttachments(statusable); err != nil {
|
status.Attachments = c.extractAttachments(statusable)
|
||||||
l.Infof("ASStatusToStatus: error extracting status attachments: %s", err)
|
|
||||||
} else {
|
|
||||||
status.Attachments = attachments
|
|
||||||
}
|
|
||||||
|
|
||||||
// hashtags to dereference later on
|
// hashtags to dereference later on
|
||||||
if hashtags, err := ap.ExtractHashtags(statusable); err != nil {
|
if hashtags, err := ap.ExtractHashtags(statusable); err != nil {
|
||||||
|
@ -271,10 +297,11 @@ func (c *converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
|
||||||
}
|
}
|
||||||
|
|
||||||
// cw string for this status
|
// cw string for this status
|
||||||
if cw, err := ap.ExtractSummary(statusable); err != nil {
|
// prefer Summary, fall back to Name
|
||||||
l.Infof("ASStatusToStatus: error extracting status summary: %s", err)
|
if summary := ap.ExtractSummary(statusable); summary != "" {
|
||||||
|
status.ContentWarning = summary
|
||||||
} else {
|
} else {
|
||||||
status.ContentWarning = cw
|
status.ContentWarning = ap.ExtractName(statusable)
|
||||||
}
|
}
|
||||||
|
|
||||||
// when was this status created?
|
// when was this status created?
|
||||||
|
|
|
@ -215,6 +215,60 @@ func (suite *ASToInternalTestSuite) TestParseOwncastService() {
|
||||||
fmt.Printf("\n\n\n%s\n\n\n", string(b))
|
fmt.Printf("\n\n\n%s\n\n\n", string(b))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (suite *ASToInternalTestSuite) TestParseBookwyrmStatus() {
|
||||||
|
authorAccount := suite.testAccounts["remote_account_1"]
|
||||||
|
|
||||||
|
raw := `{
|
||||||
|
"id": "` + authorAccount.URI + `/review/445260",
|
||||||
|
"type": "Article",
|
||||||
|
"published": "2022-11-09T16:34:28.488375+00:00",
|
||||||
|
"attributedTo": "` + authorAccount.URI + `",
|
||||||
|
"content": "<p>The original novel is a great read. Not just for the way it codified modern vampire lore. But for the way it's built entirely out of diary entries, letters, news fragments, telegrams and so on. For the way it shows modern science coming to grips with ancient superstition and figuring out how to deal with it. For showing an early example of a woman participating in her own rescue. And for some of the parts that <em>didn't</em> make it into general pop culture. (Count Dracula spends an awful lot of time in a shipping box.)</p>\n<p>In some senses it's the written-word equivalent of the \"found footage\" horror genre. Except the \"sources\" are wildly varying. John and Mina write their journals and letters to each other in shorthand. Business letters are of course written formally. Dr. Seward keeps an audio diary on a phonograph. Van Helsing's speech is rendered with every quirk of his Dutch accent and speech patterns. And then halfway through the book, when all the major characters finally come together...they collate all the documents and Mina transcribes them on a typewriter, and they pass around the first half of the book so they can all read up on what the rest of them have been doing! (Literally getting them all on the same page.)</p>\n<p>That's not to say it's flawless. It's unclear why some victims rise again as vampires while others don't. While the science/superstition contrast works well for the most part, eastern Europeans don't exactly come off very well. Especially when they'd talk about the \"gypsies\" carrying Dracula around Transylvania. I mean, it could have been a lot worse, but it's still jarring.</p>\n<p>Overall, though, it's an engaging read, whether approached as a book or, as Dracula Daily did, one day's letters at a time from May 3 through November 7. </p>\n<p>Dracula Daily: <a href=\"https://draculadaily.example.org/archive\">draculadaily.example.org/archive</a></p>\n<p>This review on my website: <a href=\"https://example.org/reviews/books/dracula/\">example.org/reviews/books/dracula/</a></p>",
|
||||||
|
"to": [
|
||||||
|
"https://www.w3.org/ns/activitystreams#Public"
|
||||||
|
],
|
||||||
|
"cc": [
|
||||||
|
"` + authorAccount.FollowersURI + `"
|
||||||
|
],
|
||||||
|
"replies": {
|
||||||
|
"id": "` + authorAccount.URI + `/review/445260/replies",
|
||||||
|
"type": "OrderedCollection",
|
||||||
|
"totalItems": 0,
|
||||||
|
"first": "` + authorAccount.URI + `/review/445260/replies?page=1",
|
||||||
|
"last": "` + authorAccount.URI + `/review/445260/replies?page=1",
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams"
|
||||||
|
},
|
||||||
|
"tag": [],
|
||||||
|
"attachment": [
|
||||||
|
{
|
||||||
|
"type": "Document",
|
||||||
|
"url": "` + authorAccount.URI + `/review/445260/images/previews/covers/451118-5f7bd96e-ca03-4865-ab14-baa7addaca8e.jpg",
|
||||||
|
"name": "Dracula (Paperback, 1992, Signet)",
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"sensitive": false,
|
||||||
|
"inReplyToBook": "https://bookwyrm.social/book/451118",
|
||||||
|
"name": "Review of \"Dracula\" (5 stars): A great read, not just for codifying vampire lore, but the way it's built from letters and diaries.",
|
||||||
|
"rating": 5,
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams"
|
||||||
|
}`
|
||||||
|
|
||||||
|
t := suite.jsonToType(raw)
|
||||||
|
asArticle, ok := t.(ap.Statusable)
|
||||||
|
if !ok {
|
||||||
|
suite.FailNow("type not coercible")
|
||||||
|
}
|
||||||
|
|
||||||
|
status, err := suite.typeconverter.ASStatusToStatus(context.Background(), asArticle)
|
||||||
|
if err != nil {
|
||||||
|
suite.FailNow(err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
suite.Equal("Review of \"Dracula\" (5 stars): A great read, not just for codifying vampire lore, but the way it's built from letters and diaries.", status.ContentWarning)
|
||||||
|
suite.Len(status.Attachments, 1)
|
||||||
|
}
|
||||||
|
|
||||||
func (suite *ASToInternalTestSuite) TestParseFlag1() {
|
func (suite *ASToInternalTestSuite) TestParseFlag1() {
|
||||||
reportedAccount := suite.testAccounts["local_account_1"]
|
reportedAccount := suite.testAccounts["local_account_1"]
|
||||||
reportingAccount := suite.testAccounts["remote_account_1"]
|
reportingAccount := suite.testAccounts["remote_account_1"]
|
||||||
|
|
Loading…
Reference in New Issue