mirror of
1
Fork 0

[feature] Federate status language in and out (#2366)

* [feature] Federate status language in + out

* go fmt

* tests, little fix

* improve comments

* unnest a bit

* avoid unnecessary nil check

* use more descriptive variable for contentMap

* prefer instance languages when selecting from contentMap

* update docs to reflect lang selection

* rename rdfLangString -> rdfLangs

* update comments to mention Pollable

* iter through slice instead of map
This commit is contained in:
tobi 2023-11-21 15:13:30 +01:00 committed by GitHub
parent 1f962372af
commit cfefbc08d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 758 additions and 168 deletions

View File

@ -482,3 +482,64 @@ For the convenience of remote servers, GoToSocial will always provide both the `
GoToSocial tries to parse incoming Mentions in the same way it sends them out: as a `Mention` type entry in the `tag` property. However, when parsing incoming Mentions it's a bit more relaxed with regards to which properties must be set. GoToSocial tries to parse incoming Mentions in the same way it sends them out: as a `Mention` type entry in the `tag` property. However, when parsing incoming Mentions it's a bit more relaxed with regards to which properties must be set.
GoToSocial will prefer the `href` property, which can be either the ActivityPub ID/URI or the web URL of the target; if `href` is not present, it will fall back to using the `name` property. If neither property is present, the mention will be considered invalid and discarded. GoToSocial will prefer the `href` property, which can be either the ActivityPub ID/URI or the web URL of the target; if `href` is not present, it will fall back to using the `name` property. If neither property is present, the mention will be considered invalid and discarded.
## Content, ContentMap, and Language
In line with other ActivityPub implementations, GoToSocial uses `content` and `contentMap` fields on `Objects` to infer content and language of incoming posts, and to set content and language on outgoing posts.
### Outgoing
If an outgoing `Object` (usually a `Note`) has content, it will be set as stringified HTML on the `content` field.
If the `content` is in a specific user-selected language, then the `Object` will also have the `contentMap` property set to a single-entry key/value map, where the key is a BCP47 language tag, and the value is the same content from the `content` field.
For example, a post written in English (`en`) will look something like this:
```json
{
"@context": "https://www.w3.org/ns/activitystreams",
"type": "Note",
"attributedTo": "http://example.org/users/i_p_freely",
"to": "https://www.w3.org/ns/activitystreams#Public",
"cc": "http://example.org/users/i_p_freely/followers",
"id": "http://example.org/users/i_p_freely/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
"url": "http://example.org/@i_p_freely/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
"published": "2021-11-20T13:32:16Z",
"content": "<p>This is an example note.</p>",
"contentMap": {
"en": "<p>This is an example note.</p>"
},
"attachment": [],
"replies": {...},
"sensitive": false,
"summary": "",
"tag": {...}
}
```
GoToSocial will always set the `content` field if the post has content, but it may not always set the `contentMap` field, if an old version of GoToSocial is in use, or the language used by a user is not set or not a recognized BCP47 language tag.
### Incoming
GoToSocial uses both the `content` and the `contentMap` properties on incoming `Object`s to determine the content and infer the intended "primary" language for that content. It uses the following algorithm:
#### Only `content` is set
Take that content only and mark language as unknown.
#### Both `content` and `contentMap` are set
Look for a language tag as key in the `contentMap`, with a value that matches the stringified HTML set in `content`.
If a match is found, use this as the post's language.
If a match is not found, keep content from `content` and mark language as unknown.
#### Only `contentMap` is set
If `contentMap` has only one entry, take the language tag and content value as the "primary" language and content.
If `contentMap` has multiple entries, we have no way of determining the intended preferred content and language of the post, since map order is not deterministic. In this case, try to pick a language and content entry that matches one of the languages configured in the GoToSocial instance's [configured languages](../configuration/instance.md). If no language can be matched this way, pick a language and content entry from the `contentMap` at random as the "primary" language and content.
!!! Note
In all of the above cases, if the inferred language cannot be parsed as a valid BCP47 language tag, language will fall back to unknown.

View File

@ -93,6 +93,12 @@ func noteWithMentions1() vocab.ActivityStreamsNote {
content := streams.NewActivityStreamsContentProperty() content := streams.NewActivityStreamsContentProperty()
content.AppendXMLSchemaString("hey @f0x and @dumpsterqueer") content.AppendXMLSchemaString("hey @f0x and @dumpsterqueer")
rdfLangString := make(map[string]string)
rdfLangString["en"] = "hey @f0x and @dumpsterqueer"
rdfLangString["fr"] = "bonjour @f0x et @dumpsterqueer"
content.AppendRDFLangString(rdfLangString)
note.SetActivityStreamsContent(content) note.SetActivityStreamsContent(content)
return note return note

View File

@ -631,27 +631,34 @@ func ExtractPublicKey(i WithPublicKey) (
return nil, nil, nil, gtserror.New("couldn't find public key") return nil, nil, nil, gtserror.New("couldn't find public key")
} }
// ExtractContent returns a string representation of the // ExtractContent returns an intermediary representation of
// given interface's Content property, or an empty string // the given interface's Content and/or ContentMap property.
// if no Content is found. func ExtractContent(i WithContent) gtsmodel.Content {
func ExtractContent(i WithContent) string { content := gtsmodel.Content{}
contentProperty := i.GetActivityStreamsContent()
if contentProperty == nil { contentProp := i.GetActivityStreamsContent()
return "" if contentProp == nil {
// No content at all.
return content
} }
for iter := contentProperty.Begin(); iter != contentProperty.End(); iter = iter.Next() { for iter := contentProp.Begin(); iter != contentProp.End(); iter = iter.Next() {
switch { switch {
// Content may be parsed as IRI, depending on case iter.IsRDFLangString() &&
// how it's formatted, so account for this. len(content.ContentMap) == 0:
case iter.IsXMLSchemaString(): content.ContentMap = iter.GetRDFLangString()
return iter.GetXMLSchemaString()
case iter.IsIRI(): case iter.IsXMLSchemaString() &&
return iter.GetIRI().String() content.Content == "":
content.Content = iter.GetXMLSchemaString()
case iter.IsIRI() &&
content.Content == "":
content.Content = iter.GetIRI().String()
} }
} }
return "" return content
} }
// ExtractAttachments attempts to extract barebones MediaAttachment objects from given AS interface type. // ExtractAttachments attempts to extract barebones MediaAttachment objects from given AS interface type.

View File

@ -30,10 +30,11 @@ type ExtractContentTestSuite struct {
func (suite *ExtractContentTestSuite) TestExtractContent1() { func (suite *ExtractContentTestSuite) TestExtractContent1() {
note := suite.noteWithMentions1 note := suite.noteWithMentions1
content := ap.ExtractContent(note) content := ap.ExtractContent(note)
suite.Equal("hey @f0x and @dumpsterqueer", content) suite.Equal("hey @f0x and @dumpsterqueer", content.Content)
suite.Equal("bonjour @f0x et @dumpsterqueer", content.ContentMap["fr"])
suite.Equal("hey @f0x and @dumpsterqueer", content.ContentMap["en"])
} }
func TestExtractContentTestSuite(t *testing.T) { func TestExtractContentTestSuite(t *testing.T) {

View File

@ -20,11 +20,12 @@ package ap
import ( import (
"github.com/superseriousbusiness/activity/pub" "github.com/superseriousbusiness/activity/pub"
"github.com/superseriousbusiness/activity/streams" "github.com/superseriousbusiness/activity/streams"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/text" "github.com/superseriousbusiness/gotosocial/internal/text"
) )
/* /*
NORMALIZE INCOMING INCOMING NORMALIZATION
The below functions should be called to normalize the content The below functions should be called to normalize the content
of messages *COMING INTO* GoToSocial via the federation API, of messages *COMING INTO* GoToSocial via the federation API,
either as the result of delivery from a remote instance to this either as the result of delivery from a remote instance to this
@ -84,39 +85,84 @@ func NormalizeIncomingActivity(activity pub.Activity, rawJSON map[string]interfa
} }
} }
// NormalizeIncomingContent replaces the Content of the given item // normalizeContent normalizes the given content
// with the sanitized version of the raw 'content' value from the // string by sanitizing its HTML and minimizing it.
// raw json object map.
// //
// noop if there was no content in the json object map or the // Noop for non-string content.
// content was not a plain string. func normalizeContent(rawContent interface{}) string {
func NormalizeIncomingContent(item WithContent, rawJSON map[string]interface{}) { if rawContent == nil {
rawContent, ok := rawJSON["content"] // Nothing to fix.
if !ok { return ""
// No content in rawJSON.
// TODO: In future we might also
// look for "contentMap" property.
return
} }
content, ok := rawContent.(string) content, ok := rawContent.(string)
if !ok { if !ok {
// Not interested in content arrays. // Not interested in
return // content slices etc.
return ""
} }
// Content should be HTML encoded by default: if content == "" {
// Nothing to fix.
return ""
}
// Content entries should be HTML encoded by default:
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-content // https://www.w3.org/TR/activitystreams-vocabulary/#dfn-content
// //
// TODO: sanitize differently based on mediaType. // TODO: sanitize differently based on mediaType.
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-mediatype // https://www.w3.org/TR/activitystreams-vocabulary/#dfn-mediatype
content = text.SanitizeToHTML(content) content = text.SanitizeToHTML(content)
content = text.MinifyHTML(content) content = text.MinifyHTML(content)
return content
}
// Set normalized content property from the raw string; // NormalizeIncomingContent replaces the Content property of the given
// this replaces any existing content property on the item. // item with the normalized versions of the raw 'content' and 'contentMap'
// values from the raw json object map.
//
// noop if there was no 'content' or 'contentMap' in the json object map.
func NormalizeIncomingContent(item WithContent, rawJSON map[string]interface{}) {
var (
rawContent = rawJSON["content"]
rawContentMap = rawJSON["contentMap"]
)
if rawContent == nil &&
rawContentMap == nil {
// Nothing to normalize,
// leave no content on item.
return
}
// Create wrapper for normalized content.
contentProp := streams.NewActivityStreamsContentProperty() contentProp := streams.NewActivityStreamsContentProperty()
// Fix 'content' if applicable.
content := normalizeContent(rawContent)
if content != "" {
contentProp.AppendXMLSchemaString(content) contentProp.AppendXMLSchemaString(content)
}
// Fix 'contentMap' if applicable.
contentMap, ok := rawContentMap.(map[string]interface{})
if ok {
rdfLangs := make(map[string]string, len(contentMap))
for lang, rawContent := range contentMap {
content := normalizeContent(rawContent)
if content != "" {
rdfLangs[lang] = content
}
}
if len(rdfLangs) != 0 {
contentProp.AppendRDFLangString(rdfLangs)
}
}
// Replace any existing content property
// on the item with normalized version.
item.SetActivityStreamsContent(contentProp) item.SetActivityStreamsContent(contentProp)
} }
@ -299,3 +345,204 @@ func NormalizeIncomingPollOptions(item WithOneOf, rawJSON map[string]interface{}
NormalizeIncomingName(choiceable, rawChoice) NormalizeIncomingName(choiceable, rawChoice)
} }
} }
/*
OUTGOING NORMALIZATION
The below functions should be called to normalize the content
of messages *GOING OUT OF* GoToSocial via the federation API,
either as the result of delivery to a remote instance from this
instance, or as a result of a remote instance doing an http call
to us to dereference something.
*/
// NormalizeOutgoingAttachmentProp replaces single-entry Attachment objects with
// single-entry arrays, for better compatibility with other AP implementations.
//
// Ie:
//
// "attachment": {
// ...
// }
//
// becomes:
//
// "attachment": [
// {
// ...
// }
// ]
//
// Noop for items with no attachments, or with attachments that are already a slice.
func NormalizeOutgoingAttachmentProp(item WithAttachment, rawJSON map[string]interface{}) {
attachment, ok := rawJSON["attachment"]
if !ok {
// No 'attachment',
// nothing to change.
return
}
if _, ok := attachment.([]interface{}); ok {
// Already slice,
// nothing to change.
return
}
// Coerce single-object to slice.
rawJSON["attachment"] = []interface{}{attachment}
}
// NormalizeOutgoingContentProp normalizes go-fed's funky formatting of content and
// contentMap properties to a format better understood by other AP implementations.
//
// Ie., incoming "content" property like this:
//
// "content": [
// "hello world!",
// {
// "en": "hello world!"
// }
// ]
//
// Is unpacked to:
//
// "content": "hello world!",
// "contentMap": {
// "en": "hello world!"
// }
//
// Noop if neither content nor contentMap are set.
func NormalizeOutgoingContentProp(item WithContent, rawJSON map[string]interface{}) {
contentProp := item.GetActivityStreamsContent()
if contentProp == nil {
// Nothing to do,
// bail early.
return
}
contentPropLen := contentProp.Len()
if contentPropLen == 0 {
// Nothing to do,
// bail early.
return
}
var (
content string
contentMap map[string]string
)
for iter := contentProp.Begin(); iter != contentProp.End(); iter = iter.Next() {
switch {
case iter.IsRDFLangString() &&
contentMap == nil:
contentMap = iter.GetRDFLangString()
case content == "" &&
iter.IsXMLSchemaString():
content = iter.GetXMLSchemaString()
}
}
if content != "" {
rawJSON["content"] = content
} else {
delete(rawJSON, "content")
}
if contentMap != nil {
rawJSON["contentMap"] = contentMap
} else {
delete(rawJSON, "contentMap")
}
}
// NormalizeOutgoingObjectProp normalizes each Object entry in the rawJSON of the given
// item by calling custom serialization / normalization functions on them in turn.
//
// This function also unnests single-entry arrays, so that:
//
// "object": [
// {
// ...
// }
// ]
//
// Becomes:
//
// "object": {
// ...
// }
//
// Noop for each Object entry that isn't an Accountable or Statusable.
func NormalizeOutgoingObjectProp(item WithObject, rawJSON map[string]interface{}) error {
objectProp := item.GetActivityStreamsObject()
if objectProp == nil {
// Nothing to do,
// bail early.
return nil
}
objectPropLen := objectProp.Len()
if objectPropLen == 0 {
// Nothing to do,
// bail early.
return nil
}
// The thing we already serialized has objects
// on it, so we should see if we need to custom
// serialize any of those objects, and replace
// them on the data map as necessary.
objects := make([]interface{}, 0, objectPropLen)
for iter := objectProp.Begin(); iter != objectProp.End(); iter = iter.Next() {
if iter.IsIRI() {
// Plain IRIs don't need custom serialization.
objects = append(objects, iter.GetIRI().String())
continue
}
var (
objectType = iter.GetType()
objectSer map[string]interface{}
)
if objectType == nil {
// This is awkward.
return gtserror.Newf("could not resolve object iter %T to vocab.Type", iter)
}
var err error
// In the below accountable and statusable serialization,
// `@context` will be included in the wrapping type already,
// so we shouldn't also include it in the object itself.
switch tn := objectType.GetTypeName(); {
case IsAccountable(tn):
objectSer, err = serializeAccountable(objectType, false)
case IsStatusable(tn):
// IsStatusable includes Pollable as well.
objectSer, err = serializeStatusable(objectType, false)
default:
// No custom serializer for this type; serialize as normal.
objectSer, err = objectType.Serialize()
}
if err != nil {
return err
}
objects = append(objects, objectSer)
}
if objectPropLen == 1 {
// Unnest single object.
rawJSON["object"] = objects[0]
} else {
// Array of objects.
rawJSON["object"] = objects
}
return nil
}

View File

@ -46,6 +46,9 @@ func (suite *NormalizeTestSuite) getStatusable() (vocab.ActivityStreamsNote, map
"https://example.org/users/someone/followers" "https://example.org/users/someone/followers"
], ],
"content": "UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class=\"hashtag\" data-tag=\"twittermigration\" href=\"https://example.org/tag/twittermigration\" rel=\"tag ugc\">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night&#39;s spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.", "content": "UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class=\"hashtag\" data-tag=\"twittermigration\" href=\"https://example.org/tag/twittermigration\" rel=\"tag ugc\">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night&#39;s spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.",
"contentMap": {
"en": "UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class=\"hashtag\" data-tag=\"twittermigration\" href=\"https://example.org/tag/twittermigration\" rel=\"tag ugc\">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night&#39;s spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues."
},
"context": "https://example.org/contexts/01GX0MSHPER1E0FT022Q209EJZ", "context": "https://example.org/contexts/01GX0MSHPER1E0FT022Q209EJZ",
"conversation": "https://example.org/contexts/01GX0MSHPER1E0FT022Q209EJZ", "conversation": "https://example.org/contexts/01GX0MSHPER1E0FT022Q209EJZ",
"id": "https://example.org/objects/01GX0MT2PA58JNSMK11MCS65YD", "id": "https://example.org/objects/01GX0MT2PA58JNSMK11MCS65YD",
@ -182,7 +185,15 @@ func (suite *NormalizeTestSuite) getAccountable() (vocab.ActivityStreamsPerson,
func (suite *NormalizeTestSuite) TestNormalizeActivityObject() { func (suite *NormalizeTestSuite) TestNormalizeActivityObject() {
note, rawNote := suite.getStatusable() note, rawNote := suite.getStatusable()
suite.Equal(`update: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" data-tag="twittermigration" href="https://example.org/tag/twittermigration" rel="tag ugc">#TwitterMigration%3C/a%3E.%3Cbr%3E%3Cbr%3EIn%20fact,%20100,000%20new%20accounts%20have%20been%20created%20since%20last%20night.%3Cbr%3E%3Cbr%3ESince%20last%20night&%2339;s%20spike%208,000-12,000%20new%20accounts%20are%20being%20created%20every%20hour.%3Cbr%3E%3Cbr%3EYesterday,%20I%20estimated%20that%20Mastodon%20would%20have%208%20million%20users%20by%20the%20end%20of%20the%20week.%20That%20might%20happen%20a%20lot%20sooner%20if%20this%20trend%20continues.`, ap.ExtractContent(note)) content := ap.ExtractContent(note)
suite.Equal(
`update: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" data-tag="twittermigration" href="https://example.org/tag/twittermigration" rel="tag ugc">#TwitterMigration%3C/a%3E.%3Cbr%3E%3Cbr%3EIn%20fact,%20100,000%20new%20accounts%20have%20been%20created%20since%20last%20night.%3Cbr%3E%3Cbr%3ESince%20last%20night&%2339;s%20spike%208,000-12,000%20new%20accounts%20are%20being%20created%20every%20hour.%3Cbr%3E%3Cbr%3EYesterday,%20I%20estimated%20that%20Mastodon%20would%20have%208%20million%20users%20by%20the%20end%20of%20the%20week.%20That%20might%20happen%20a%20lot%20sooner%20if%20this%20trend%20continues.`,
content.Content,
)
// Malformed contentMap entry
// will not be extractable yet.
suite.Empty(content.ContentMap["en"])
create := testrig.WrapAPNoteInCreate( create := testrig.WrapAPNoteInCreate(
testrig.URLMustParse("https://example.org/create_something"), testrig.URLMustParse("https://example.org/create_something"),
@ -192,7 +203,18 @@ func (suite *NormalizeTestSuite) TestNormalizeActivityObject() {
) )
ap.NormalizeIncomingActivity(create, map[string]interface{}{"object": rawNote}) ap.NormalizeIncomingActivity(create, map[string]interface{}{"object": rawNote})
suite.Equal(`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`, ap.ExtractContent(note)) content = ap.ExtractContent(note)
suite.Equal(
`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`,
content.Content,
)
// Content map entry should now be extractable.
suite.Equal(
`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`,
content.ContentMap["en"],
)
} }
func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment() { func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment() {
@ -202,12 +224,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
// the attachment(s) should be all jacked up. // the attachment(s) should be all jacked up.
suite.Equal(`{ suite.Equal(`{
"@context": "https://www.w3.org/ns/activitystreams", "@context": "https://www.w3.org/ns/activitystreams",
"attachment": { "attachment": [
{
"mediaType": "image/jpeg", "mediaType": "image/jpeg",
"name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27", "name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27",
"type": "Document", "type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
}, }
],
"attributedTo": "https://example.org/users/hourlycatbot", "attributedTo": "https://example.org/users/hourlycatbot",
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ", "id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
"to": "https://www.w3.org/ns/activitystreams#Public", "to": "https://www.w3.org/ns/activitystreams#Public",
@ -222,12 +246,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
// attachment should no longer be all jacked up. // attachment should no longer be all jacked up.
suite.Equal(`{ suite.Equal(`{
"@context": "https://www.w3.org/ns/activitystreams", "@context": "https://www.w3.org/ns/activitystreams",
"attachment": { "attachment": [
{
"mediaType": "image/jpeg", "mediaType": "image/jpeg",
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''", "name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
"type": "Document", "type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
}, }
],
"attributedTo": "https://example.org/users/hourlycatbot", "attributedTo": "https://example.org/users/hourlycatbot",
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ", "id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
"to": "https://www.w3.org/ns/activitystreams#Public", "to": "https://www.w3.org/ns/activitystreams#Public",
@ -243,12 +269,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
// the attachment(s) should be all jacked up. // the attachment(s) should be all jacked up.
suite.Equal(`{ suite.Equal(`{
"@context": "https://www.w3.org/ns/activitystreams", "@context": "https://www.w3.org/ns/activitystreams",
"attachment": { "attachment": [
{
"mediaType": "image/jpeg", "mediaType": "image/jpeg",
"name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27", "name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27",
"type": "Document", "type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
}, }
],
"attributedTo": "https://example.org/users/hourlycatbot", "attributedTo": "https://example.org/users/hourlycatbot",
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ", "id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
"to": "https://www.w3.org/ns/activitystreams#Public", "to": "https://www.w3.org/ns/activitystreams#Public",
@ -263,12 +291,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
// attachment should no longer be all jacked up. // attachment should no longer be all jacked up.
suite.Equal(`{ suite.Equal(`{
"@context": "https://www.w3.org/ns/activitystreams", "@context": "https://www.w3.org/ns/activitystreams",
"attachment": { "attachment": [
{
"mediaType": "image/jpeg", "mediaType": "image/jpeg",
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''", "name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
"type": "Document", "type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
}, }
],
"attributedTo": "https://example.org/users/hourlycatbot", "attributedTo": "https://example.org/users/hourlycatbot",
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ", "id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
"to": "https://www.w3.org/ns/activitystreams#Public", "to": "https://www.w3.org/ns/activitystreams#Public",

View File

@ -18,10 +18,9 @@
package ap package ap
import ( import (
"fmt"
"github.com/superseriousbusiness/activity/streams" "github.com/superseriousbusiness/activity/streams"
"github.com/superseriousbusiness/activity/streams/vocab" "github.com/superseriousbusiness/activity/streams/vocab"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
) )
// Serialize is a custom serializer for ActivityStreams types. // Serialize is a custom serializer for ActivityStreams types.
@ -37,15 +36,18 @@ import (
// //
// - OrderedCollection: 'orderedItems' property will always be made into an array. // - OrderedCollection: 'orderedItems' property will always be made into an array.
// - Any Accountable type: 'attachment' property will always be made into an array. // - Any Accountable type: 'attachment' property will always be made into an array.
// - Update: any Accountable 'object's set on an update will be custom serialized as above. // - Any Statusable type: 'attachment' property will always be made into an array; 'content' and 'contentMap' will be normalized.
// - Any Activityable type: any 'object's set on an activity will be custom serialized as above.
func Serialize(t vocab.Type) (m map[string]interface{}, e error) { func Serialize(t vocab.Type) (m map[string]interface{}, e error) {
switch t.GetTypeName() { switch tn := t.GetTypeName(); {
case ObjectOrderedCollection: case tn == ObjectOrderedCollection:
return serializeOrderedCollection(t) return serializeOrderedCollection(t)
case ActorApplication, ActorGroup, ActorOrganization, ActorPerson, ActorService: case IsAccountable(tn):
return serializeAccountable(t, true) return serializeAccountable(t, true)
case ActivityUpdate: case IsStatusable(tn):
return serializeWithObject(t) return serializeStatusable(t, true)
case IsActivityable(tn):
return serializeActivityable(t, true)
default: default:
// No custom serializer necessary. // No custom serializer necessary.
return streams.Serialize(t) return streams.Serialize(t)
@ -61,8 +63,8 @@ func Serialize(t vocab.Type) (m map[string]interface{}, e error) {
// See: // See:
// - https://github.com/go-fed/activity/issues/139 // - https://github.com/go-fed/activity/issues/139
// - https://github.com/mastodon/mastodon/issues/24225 // - https://github.com/mastodon/mastodon/issues/24225
func serializeOrderedCollection(orderedCollection vocab.Type) (map[string]interface{}, error) { func serializeOrderedCollection(t vocab.Type) (map[string]interface{}, error) {
data, err := streams.Serialize(orderedCollection) data, err := streams.Serialize(t)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -99,7 +101,12 @@ func serializeOrderedCollection(orderedCollection vocab.Type) (map[string]interf
// If the accountable is being serialized as part of another object (eg., as the // If the accountable is being serialized as part of another object (eg., as the
// object of an activity), then includeContext should be set to false, as the // object of an activity), then includeContext should be set to false, as the
// @context entry should be included on the top-level/wrapping activity/object. // @context entry should be included on the top-level/wrapping activity/object.
func serializeAccountable(accountable vocab.Type, includeContext bool) (map[string]interface{}, error) { func serializeAccountable(t vocab.Type, includeContext bool) (map[string]interface{}, error) {
accountable, ok := t.(Accountable)
if !ok {
return nil, gtserror.Newf("vocab.Type %T not accountable", t)
}
var ( var (
data map[string]interface{} data map[string]interface{}
err error err error
@ -115,91 +122,61 @@ func serializeAccountable(accountable vocab.Type, includeContext bool) (map[stri
return nil, err return nil, err
} }
attachment, ok := data["attachment"] NormalizeOutgoingAttachmentProp(accountable, data)
return data, nil
}
func serializeStatusable(t vocab.Type, includeContext bool) (map[string]interface{}, error) {
statusable, ok := t.(Statusable)
if !ok { if !ok {
// No 'attachment', nothing to change. return nil, gtserror.Newf("vocab.Type %T not statusable", t)
return data, nil
}
if _, ok := attachment.([]interface{}); ok {
// Already slice.
return data, nil
}
// Coerce single-object to slice.
data["attachment"] = []interface{}{attachment}
return data, nil
}
func serializeWithObject(t vocab.Type) (map[string]interface{}, error) {
withObject, ok := t.(WithObject)
if !ok {
return nil, fmt.Errorf("serializeWithObject: could not resolve %T to WithObject", t)
}
data, err := streams.Serialize(t)
if err != nil {
return nil, err
}
object := withObject.GetActivityStreamsObject()
if object == nil {
// Nothing to do, bail early.
return data, nil
}
objectLen := object.Len()
if objectLen == 0 {
// Nothing to do, bail early.
return data, nil
}
// The thing we already serialized has objects
// on it, so we should see if we need to custom
// serialize any of those objects, and replace
// them on the data map as necessary.
objects := make([]interface{}, 0, objectLen)
for iter := object.Begin(); iter != object.End(); iter = iter.Next() {
if iter.IsIRI() {
// Plain IRIs don't need custom serialization.
objects = append(objects, iter.GetIRI().String())
continue
} }
var ( var (
objectType = iter.GetType() data map[string]interface{}
objectSer map[string]interface{} err error
) )
if objectType == nil { if includeContext {
// This is awkward. data, err = streams.Serialize(statusable)
return nil, fmt.Errorf("serializeWithObject: could not resolve object iter %T to vocab.Type", iter) } else {
} data, err = statusable.Serialize()
switch objectType.GetTypeName() {
case ActorApplication, ActorGroup, ActorOrganization, ActorPerson, ActorService:
// @context will be included in wrapping type already,
// we don't need to include it in the object itself.
objectSer, err = serializeAccountable(objectType, false)
default:
// No custom serializer for this type; serialize as normal.
objectSer, err = objectType.Serialize()
} }
if err != nil { if err != nil {
return nil, err return nil, err
} }
objects = append(objects, objectSer) NormalizeOutgoingAttachmentProp(statusable, data)
NormalizeOutgoingContentProp(statusable, data)
return data, nil
} }
if objectLen == 1 { func serializeActivityable(t vocab.Type, includeContext bool) (map[string]interface{}, error) {
// Unnest single object. activityable, ok := t.(Activityable)
data["object"] = objects[0] if !ok {
return nil, gtserror.Newf("vocab.Type %T not activityable", t)
}
var (
data map[string]interface{}
err error
)
if includeContext {
data, err = streams.Serialize(activityable)
} else { } else {
// Array of objects. data, err = activityable.Serialize()
data["object"] = objects }
if err != nil {
return nil, err
}
if err := NormalizeOutgoingObjectProp(activityable, data); err != nil {
return nil, err
} }
return data, nil return data, nil

View File

@ -237,3 +237,14 @@ const (
// VisibilityDefault is used when no other setting can be found. // VisibilityDefault is used when no other setting can be found.
VisibilityDefault Visibility = VisibilityUnlocked VisibilityDefault Visibility = VisibilityUnlocked
) )
// Content models the simple string content
// of a status along with its ContentMap,
// which contains content entries keyed by
// BCP47 language tag.
//
// Content and/or ContentMap may be zero/nil.
type Content struct {
Content string
ContentMap map[string]string
}

View File

@ -244,9 +244,15 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
} }
// status.Content // status.Content
// status.Language
// //
// The (html-formatted) content of this status. // Many implementations set both content
status.Content = ap.ExtractContent(statusable) // and contentMap; we can use these to
// infer the language of the status.
status.Content, status.Language = ContentToContentLanguage(
ctx,
ap.ExtractContent(statusable),
)
// status.Attachments // status.Attachments
// //
@ -396,9 +402,6 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
return &s return &s
}() }()
// language
// TODO: we might be able to extract this from the contentMap field
// ActivityStreamsType // ActivityStreamsType
status.ActivityStreamsType = statusable.GetTypeName() status.ActivityStreamsType = statusable.GetTypeName()
@ -707,7 +710,7 @@ func (c *Converter) ASFlagToReport(ctx context.Context, flaggable ap.Flaggable)
// For Mastodon, this will just be a string, or nothing. // For Mastodon, this will just be a string, or nothing.
// In Misskey's case, it may also contain the URLs of // In Misskey's case, it may also contain the URLs of
// one or more reported statuses, so extract these too. // one or more reported statuses, so extract these too.
content := ap.ExtractContent(flaggable) content := ap.ExtractContent(flaggable).Content
statusURIs := []*url.URL{} statusURIs := []*url.URL{}
inlineURLs := misskeyReportInlineURLs(content) inlineURLs := misskeyReportInlineURLs(content)
statusURIs = append(statusURIs, inlineURLs...) statusURIs = append(statusURIs, inlineURLs...)

View File

@ -45,6 +45,10 @@ func (suite *ASToInternalTestSuite) jsonToType(in string) vocab.Type {
suite.FailNow(err.Error()) suite.FailNow(err.Error())
} }
if statusable, ok := t.(ap.Statusable); ok {
ap.NormalizeIncomingContent(statusable, m)
}
return t return t
} }
@ -103,7 +107,8 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatus() {
suite.NoError(err) suite.NoError(err)
suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning) suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning)
suite.Equal(`<p>&gt; So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content) suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
suite.Equal("en", status.Language)
} }
func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() { func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {
@ -117,7 +122,7 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {
suite.NoError(err) suite.NoError(err)
suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning) suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning)
suite.Equal(`<p>&gt; So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content) suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
// on statuses with no URL in them (like ones we get from pleroma sometimes) we should use the AP URI of the status as URL // on statuses with no URL in them (like ones we get from pleroma sometimes) we should use the AP URI of the status as URL
suite.Equal("http://fossbros-anonymous.io/users/foss_satan/statuses/108138763199405167", status.URL) suite.Equal("http://fossbros-anonymous.io/users/foss_satan/statuses/108138763199405167", status.URL)

View File

@ -607,9 +607,17 @@ func (c *Converter) StatusToAS(ctx context.Context, s *gtsmodel.Status) (ap.Stat
// conversation // conversation
// TODO // TODO
// content -- the actual post itself // content -- the actual post
// itself, plus the language
contentProp := streams.NewActivityStreamsContentProperty() contentProp := streams.NewActivityStreamsContentProperty()
contentProp.AppendXMLSchemaString(s.Content) contentProp.AppendXMLSchemaString(s.Content)
if s.Language != "" {
contentProp.AppendRDFLangString(map[string]string{
s.Language: s.Content,
})
}
status.SetActivityStreamsContent(contentProp) status.SetActivityStreamsContent(contentProp)
// attachments // attachments

View File

@ -340,6 +340,9 @@ func (suite *InternalToASTestSuite) TestStatusToAS() {
"attributedTo": "http://localhost:8080/users/the_mighty_zork", "attributedTo": "http://localhost:8080/users/the_mighty_zork",
"cc": "http://localhost:8080/users/the_mighty_zork/followers", "cc": "http://localhost:8080/users/the_mighty_zork/followers",
"content": "hello everyone!", "content": "hello everyone!",
"contentMap": {
"en": "hello everyone!"
},
"id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY", "id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
"published": "2021-10-20T12:40:37+02:00", "published": "2021-10-20T12:40:37+02:00",
"replies": { "replies": {
@ -379,16 +382,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASWithIDs() {
// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams -- // http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --
// will appear, so trim them out of the string for consistency // will appear, so trim them out of the string for consistency
trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1] trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1]
suite.Equal(` { suite.Equal(` [
{
"blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj", "blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
"mediaType": "image/jpeg", "mediaType": "image/jpeg",
"name": "Black and white image of some 50's style text saying: Welcome On Board", "name": "Black and white image of some 50's style text saying: Welcome On Board",
"type": "Document", "type": "Document",
"url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg" "url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
}, }
],
"attributedTo": "http://localhost:8080/users/admin", "attributedTo": "http://localhost:8080/users/admin",
"cc": "http://localhost:8080/users/admin/followers", "cc": "http://localhost:8080/users/admin/followers",
"content": "hello world! #welcome ! first post on the instance :rainbow: !", "content": "hello world! #welcome ! first post on the instance :rainbow: !",
"contentMap": {
"en": "hello world! #welcome ! first post on the instance :rainbow: !"
},
"id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R", "id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",
"published": "2021-10-20T11:36:45Z", "published": "2021-10-20T11:36:45Z",
"replies": { "replies": {
@ -446,16 +454,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASFromDB() {
// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams -- // http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --
// will appear, so trim them out of the string for consistency // will appear, so trim them out of the string for consistency
trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1] trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1]
suite.Equal(` { suite.Equal(` [
{
"blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj", "blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
"mediaType": "image/jpeg", "mediaType": "image/jpeg",
"name": "Black and white image of some 50's style text saying: Welcome On Board", "name": "Black and white image of some 50's style text saying: Welcome On Board",
"type": "Document", "type": "Document",
"url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg" "url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
}, }
],
"attributedTo": "http://localhost:8080/users/admin", "attributedTo": "http://localhost:8080/users/admin",
"cc": "http://localhost:8080/users/admin/followers", "cc": "http://localhost:8080/users/admin/followers",
"content": "hello world! #welcome ! first post on the instance :rainbow: !", "content": "hello world! #welcome ! first post on the instance :rainbow: !",
"contentMap": {
"en": "hello world! #welcome ! first post on the instance :rainbow: !"
},
"id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R", "id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",
"published": "2021-10-20T11:36:45Z", "published": "2021-10-20T11:36:45Z",
"replies": { "replies": {
@ -519,6 +532,9 @@ func (suite *InternalToASTestSuite) TestStatusToASWithMentions() {
"http://localhost:8080/users/the_mighty_zork" "http://localhost:8080/users/the_mighty_zork"
], ],
"content": "hi @the_mighty_zork welcome to the instance!", "content": "hi @the_mighty_zork welcome to the instance!",
"contentMap": {
"en": "hi @the_mighty_zork welcome to the instance!"
},
"id": "http://localhost:8080/users/admin/statuses/01FF25D5Q0DH7CHD57CTRS6WK0", "id": "http://localhost:8080/users/admin/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
"inReplyTo": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY", "inReplyTo": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
"published": "2021-11-20T13:32:16Z", "published": "2021-11-20T13:32:16Z",

View File

@ -31,6 +31,8 @@ import (
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config" "github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/language"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes" "github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/text" "github.com/superseriousbusiness/gotosocial/internal/text"
) )
@ -184,3 +186,102 @@ func placeholdUnknownAttachments(arr []apimodel.Attachment) (string, []apimodel.
return text.SanitizeToHTML(aside.String()), arr return text.SanitizeToHTML(aside.String()), arr
} }
// ContentToContentLanguage tries to
// extract a content string and language
// tag string from the given intermediary
// content.
//
// Either/both of the returned strings may
// be empty, depending on how things go.
func ContentToContentLanguage(
ctx context.Context,
content gtsmodel.Content,
) (
string, // content
string, // language
) {
var (
contentStr string
langTagStr string
)
switch contentMap := content.ContentMap; {
// Simplest case: no `contentMap`.
// Return `content`, even if empty.
case contentMap == nil:
return content.Content, ""
// `content` and `contentMap` set.
// Try to infer "primary" language.
case content.Content != "":
// Assume `content` is intended
// primary content, and look for
// corresponding language tag.
contentStr = content.Content
for t, c := range contentMap {
if contentStr == c {
langTagStr = t
break
}
}
// `content` not set; `contentMap`
// is set with only one value.
// This must be the "primary" lang.
case len(contentMap) == 1:
// Use an empty loop to
// get the values we want.
// nolint:revive
for langTagStr, contentStr = range contentMap {
}
// Only `contentMap` is set, with more
// than one value. Map order is not
// guaranteed so we can't know the
// "primary" language.
//
// Try to select content using our
// instance's configured languages.
//
// In case of no hits, just take the
// first tag and content in the map.
default:
instanceLangs := config.GetInstanceLanguages()
for _, langTagStr = range instanceLangs.TagStrs() {
if contentStr = contentMap[langTagStr]; contentStr != "" {
// Hit!
break
}
}
// If nothing found, just take
// the first entry we can get by
// breaking after the first iter.
if contentStr == "" {
for langTagStr, contentStr = range contentMap {
break
}
}
}
if langTagStr != "" {
// Found a lang tag for this content,
// make sure it's valid / parseable.
lang, err := language.Parse(langTagStr)
if err != nil {
log.Warnf(
ctx,
"could not parse %s as BCP47 language tag in status contentMap: %v",
langTagStr, err,
)
} else {
// Inferred the language!
// Use normalized version.
langTagStr = lang.TagStr
}
}
return contentStr, langTagStr
}

View File

@ -18,7 +18,12 @@
package typeutils package typeutils
import ( import (
"context"
"testing" "testing"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/language"
) )
func TestMisskeyReportContentURLs1(t *testing.T) { func TestMisskeyReportContentURLs1(t *testing.T) {
@ -44,3 +49,112 @@ misskey-formatted`
t.Fatalf("wanted 0 urls, got %d", l) t.Fatalf("wanted 0 urls, got %d", l)
} }
} }
func TestContentToContentLanguage(t *testing.T) {
type testcase struct {
content gtsmodel.Content
instanceLanguages language.Languages
expectedContent string
expectedLang string
}
ctx, cncl := context.WithCancel(context.Background())
defer cncl()
for i, testcase := range []testcase{
{
content: gtsmodel.Content{
Content: "hello world",
ContentMap: nil,
},
expectedContent: "hello world",
expectedLang: "",
},
{
content: gtsmodel.Content{
Content: "",
ContentMap: map[string]string{
"en": "hello world",
},
},
expectedContent: "hello world",
expectedLang: "en",
},
{
content: gtsmodel.Content{
Content: "bonjour le monde",
ContentMap: map[string]string{
"en": "hello world",
"fr": "bonjour le monde",
},
},
expectedContent: "bonjour le monde",
expectedLang: "fr",
},
{
content: gtsmodel.Content{
Content: "bonjour le monde",
ContentMap: map[string]string{
"en": "hello world",
},
},
expectedContent: "bonjour le monde",
expectedLang: "",
},
{
content: gtsmodel.Content{
Content: "",
ContentMap: map[string]string{
"en": "hello world",
"ru": "Привет, мир!",
"nl": "hallo wereld!",
"ca": "Hola món!",
},
},
instanceLanguages: language.Languages{
{TagStr: "en"},
{TagStr: "ca"},
},
expectedContent: "hello world",
expectedLang: "en",
},
{
content: gtsmodel.Content{
Content: "",
ContentMap: map[string]string{
"en": "hello world",
"ru": "Привет, мир!",
"nl": "hallo wereld!",
"ca": "Hola món!",
},
},
instanceLanguages: language.Languages{
{TagStr: "ca"},
{TagStr: "en"},
},
expectedContent: "Hola món!",
expectedLang: "ca",
},
} {
langs, err := language.InitLangs(testcase.instanceLanguages.TagStrs())
if err != nil {
t.Fatal(err)
}
config.SetInstanceLanguages(langs)
content, language := ContentToContentLanguage(ctx, testcase.content)
if content != testcase.expectedContent {
t.Errorf(
"test %d expected content '%s' got '%s'",
i, testcase.expectedContent, content,
)
}
if language != testcase.expectedLang {
t.Errorf(
"test %d expected language '%s' got '%s'",
i, testcase.expectedLang, language,
)
}
}
}

View File

@ -85,6 +85,9 @@ func (suite *WrapTestSuite) TestWrapNoteInCreate() {
"attributedTo": "http://localhost:8080/users/the_mighty_zork", "attributedTo": "http://localhost:8080/users/the_mighty_zork",
"cc": "http://localhost:8080/users/the_mighty_zork/followers", "cc": "http://localhost:8080/users/the_mighty_zork/followers",
"content": "hello everyone!", "content": "hello everyone!",
"contentMap": {
"en": "hello everyone!"
},
"id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY", "id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
"published": "2021-10-20T12:40:37+02:00", "published": "2021-10-20T12:40:37+02:00",
"replies": { "replies": {