[feature] Federate status language in and out (#2366)
* [feature] Federate status language in + out * go fmt * tests, little fix * improve comments * unnest a bit * avoid unnecessary nil check * use more descriptive variable for contentMap * prefer instance languages when selecting from contentMap * update docs to reflect lang selection * rename rdfLangString -> rdfLangs * update comments to mention Pollable * iter through slice instead of map
This commit is contained in:
parent
1f962372af
commit
cfefbc08d8
|
@ -482,3 +482,64 @@ For the convenience of remote servers, GoToSocial will always provide both the `
|
|||
GoToSocial tries to parse incoming Mentions in the same way it sends them out: as a `Mention` type entry in the `tag` property. However, when parsing incoming Mentions it's a bit more relaxed with regards to which properties must be set.
|
||||
|
||||
GoToSocial will prefer the `href` property, which can be either the ActivityPub ID/URI or the web URL of the target; if `href` is not present, it will fall back to using the `name` property. If neither property is present, the mention will be considered invalid and discarded.
|
||||
|
||||
## Content, ContentMap, and Language
|
||||
|
||||
In line with other ActivityPub implementations, GoToSocial uses `content` and `contentMap` fields on `Objects` to infer content and language of incoming posts, and to set content and language on outgoing posts.
|
||||
|
||||
### Outgoing
|
||||
|
||||
If an outgoing `Object` (usually a `Note`) has content, it will be set as stringified HTML on the `content` field.
|
||||
|
||||
If the `content` is in a specific user-selected language, then the `Object` will also have the `contentMap` property set to a single-entry key/value map, where the key is a BCP47 language tag, and the value is the same content from the `content` field.
|
||||
|
||||
For example, a post written in English (`en`) will look something like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"@context": "https://www.w3.org/ns/activitystreams",
|
||||
"type": "Note",
|
||||
"attributedTo": "http://example.org/users/i_p_freely",
|
||||
"to": "https://www.w3.org/ns/activitystreams#Public",
|
||||
"cc": "http://example.org/users/i_p_freely/followers",
|
||||
"id": "http://example.org/users/i_p_freely/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
|
||||
"url": "http://example.org/@i_p_freely/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
|
||||
"published": "2021-11-20T13:32:16Z",
|
||||
"content": "<p>This is an example note.</p>",
|
||||
"contentMap": {
|
||||
"en": "<p>This is an example note.</p>"
|
||||
},
|
||||
"attachment": [],
|
||||
"replies": {...},
|
||||
"sensitive": false,
|
||||
"summary": "",
|
||||
"tag": {...}
|
||||
}
|
||||
```
|
||||
|
||||
GoToSocial will always set the `content` field if the post has content, but it may not always set the `contentMap` field, if an old version of GoToSocial is in use, or the language used by a user is not set or not a recognized BCP47 language tag.
|
||||
|
||||
### Incoming
|
||||
|
||||
GoToSocial uses both the `content` and the `contentMap` properties on incoming `Object`s to determine the content and infer the intended "primary" language for that content. It uses the following algorithm:
|
||||
|
||||
#### Only `content` is set
|
||||
|
||||
Take that content only and mark language as unknown.
|
||||
|
||||
#### Both `content` and `contentMap` are set
|
||||
|
||||
Look for a language tag as key in the `contentMap`, with a value that matches the stringified HTML set in `content`.
|
||||
|
||||
If a match is found, use this as the post's language.
|
||||
|
||||
If a match is not found, keep content from `content` and mark language as unknown.
|
||||
|
||||
#### Only `contentMap` is set
|
||||
|
||||
If `contentMap` has only one entry, take the language tag and content value as the "primary" language and content.
|
||||
|
||||
If `contentMap` has multiple entries, we have no way of determining the intended preferred content and language of the post, since map order is not deterministic. In this case, try to pick a language and content entry that matches one of the languages configured in the GoToSocial instance's [configured languages](../configuration/instance.md). If no language can be matched this way, pick a language and content entry from the `contentMap` at random as the "primary" language and content.
|
||||
|
||||
!!! Note
|
||||
In all of the above cases, if the inferred language cannot be parsed as a valid BCP47 language tag, language will fall back to unknown.
|
||||
|
|
|
@ -93,6 +93,12 @@ func noteWithMentions1() vocab.ActivityStreamsNote {
|
|||
|
||||
content := streams.NewActivityStreamsContentProperty()
|
||||
content.AppendXMLSchemaString("hey @f0x and @dumpsterqueer")
|
||||
|
||||
rdfLangString := make(map[string]string)
|
||||
rdfLangString["en"] = "hey @f0x and @dumpsterqueer"
|
||||
rdfLangString["fr"] = "bonjour @f0x et @dumpsterqueer"
|
||||
content.AppendRDFLangString(rdfLangString)
|
||||
|
||||
note.SetActivityStreamsContent(content)
|
||||
|
||||
return note
|
||||
|
|
|
@ -631,27 +631,34 @@ func ExtractPublicKey(i WithPublicKey) (
|
|||
return nil, nil, nil, gtserror.New("couldn't find public key")
|
||||
}
|
||||
|
||||
// ExtractContent returns a string representation of the
|
||||
// given interface's Content property, or an empty string
|
||||
// if no Content is found.
|
||||
func ExtractContent(i WithContent) string {
|
||||
contentProperty := i.GetActivityStreamsContent()
|
||||
if contentProperty == nil {
|
||||
return ""
|
||||
// ExtractContent returns an intermediary representation of
|
||||
// the given interface's Content and/or ContentMap property.
|
||||
func ExtractContent(i WithContent) gtsmodel.Content {
|
||||
content := gtsmodel.Content{}
|
||||
|
||||
contentProp := i.GetActivityStreamsContent()
|
||||
if contentProp == nil {
|
||||
// No content at all.
|
||||
return content
|
||||
}
|
||||
|
||||
for iter := contentProperty.Begin(); iter != contentProperty.End(); iter = iter.Next() {
|
||||
for iter := contentProp.Begin(); iter != contentProp.End(); iter = iter.Next() {
|
||||
switch {
|
||||
// Content may be parsed as IRI, depending on
|
||||
// how it's formatted, so account for this.
|
||||
case iter.IsXMLSchemaString():
|
||||
return iter.GetXMLSchemaString()
|
||||
case iter.IsIRI():
|
||||
return iter.GetIRI().String()
|
||||
case iter.IsRDFLangString() &&
|
||||
len(content.ContentMap) == 0:
|
||||
content.ContentMap = iter.GetRDFLangString()
|
||||
|
||||
case iter.IsXMLSchemaString() &&
|
||||
content.Content == "":
|
||||
content.Content = iter.GetXMLSchemaString()
|
||||
|
||||
case iter.IsIRI() &&
|
||||
content.Content == "":
|
||||
content.Content = iter.GetIRI().String()
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
return content
|
||||
}
|
||||
|
||||
// ExtractAttachments attempts to extract barebones MediaAttachment objects from given AS interface type.
|
||||
|
|
|
@ -30,10 +30,11 @@ type ExtractContentTestSuite struct {
|
|||
|
||||
func (suite *ExtractContentTestSuite) TestExtractContent1() {
|
||||
note := suite.noteWithMentions1
|
||||
|
||||
content := ap.ExtractContent(note)
|
||||
|
||||
suite.Equal("hey @f0x and @dumpsterqueer", content)
|
||||
suite.Equal("hey @f0x and @dumpsterqueer", content.Content)
|
||||
suite.Equal("bonjour @f0x et @dumpsterqueer", content.ContentMap["fr"])
|
||||
suite.Equal("hey @f0x and @dumpsterqueer", content.ContentMap["en"])
|
||||
}
|
||||
|
||||
func TestExtractContentTestSuite(t *testing.T) {
|
||||
|
|
|
@ -20,11 +20,12 @@ package ap
|
|||
import (
|
||||
"github.com/superseriousbusiness/activity/pub"
|
||||
"github.com/superseriousbusiness/activity/streams"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/text"
|
||||
)
|
||||
|
||||
/*
|
||||
NORMALIZE INCOMING
|
||||
INCOMING NORMALIZATION
|
||||
The below functions should be called to normalize the content
|
||||
of messages *COMING INTO* GoToSocial via the federation API,
|
||||
either as the result of delivery from a remote instance to this
|
||||
|
@ -84,39 +85,84 @@ func NormalizeIncomingActivity(activity pub.Activity, rawJSON map[string]interfa
|
|||
}
|
||||
}
|
||||
|
||||
// NormalizeIncomingContent replaces the Content of the given item
|
||||
// with the sanitized version of the raw 'content' value from the
|
||||
// raw json object map.
|
||||
// normalizeContent normalizes the given content
|
||||
// string by sanitizing its HTML and minimizing it.
|
||||
//
|
||||
// noop if there was no content in the json object map or the
|
||||
// content was not a plain string.
|
||||
func NormalizeIncomingContent(item WithContent, rawJSON map[string]interface{}) {
|
||||
rawContent, ok := rawJSON["content"]
|
||||
if !ok {
|
||||
// No content in rawJSON.
|
||||
// TODO: In future we might also
|
||||
// look for "contentMap" property.
|
||||
return
|
||||
// Noop for non-string content.
|
||||
func normalizeContent(rawContent interface{}) string {
|
||||
if rawContent == nil {
|
||||
// Nothing to fix.
|
||||
return ""
|
||||
}
|
||||
|
||||
content, ok := rawContent.(string)
|
||||
if !ok {
|
||||
// Not interested in content arrays.
|
||||
return
|
||||
// Not interested in
|
||||
// content slices etc.
|
||||
return ""
|
||||
}
|
||||
|
||||
// Content should be HTML encoded by default:
|
||||
if content == "" {
|
||||
// Nothing to fix.
|
||||
return ""
|
||||
}
|
||||
|
||||
// Content entries should be HTML encoded by default:
|
||||
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-content
|
||||
//
|
||||
// TODO: sanitize differently based on mediaType.
|
||||
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-mediatype
|
||||
content = text.SanitizeToHTML(content)
|
||||
content = text.MinifyHTML(content)
|
||||
return content
|
||||
}
|
||||
|
||||
// Set normalized content property from the raw string;
|
||||
// this replaces any existing content property on the item.
|
||||
// NormalizeIncomingContent replaces the Content property of the given
|
||||
// item with the normalized versions of the raw 'content' and 'contentMap'
|
||||
// values from the raw json object map.
|
||||
//
|
||||
// noop if there was no 'content' or 'contentMap' in the json object map.
|
||||
func NormalizeIncomingContent(item WithContent, rawJSON map[string]interface{}) {
|
||||
var (
|
||||
rawContent = rawJSON["content"]
|
||||
rawContentMap = rawJSON["contentMap"]
|
||||
)
|
||||
|
||||
if rawContent == nil &&
|
||||
rawContentMap == nil {
|
||||
// Nothing to normalize,
|
||||
// leave no content on item.
|
||||
return
|
||||
}
|
||||
|
||||
// Create wrapper for normalized content.
|
||||
contentProp := streams.NewActivityStreamsContentProperty()
|
||||
|
||||
// Fix 'content' if applicable.
|
||||
content := normalizeContent(rawContent)
|
||||
if content != "" {
|
||||
contentProp.AppendXMLSchemaString(content)
|
||||
}
|
||||
|
||||
// Fix 'contentMap' if applicable.
|
||||
contentMap, ok := rawContentMap.(map[string]interface{})
|
||||
if ok {
|
||||
rdfLangs := make(map[string]string, len(contentMap))
|
||||
|
||||
for lang, rawContent := range contentMap {
|
||||
content := normalizeContent(rawContent)
|
||||
if content != "" {
|
||||
rdfLangs[lang] = content
|
||||
}
|
||||
}
|
||||
|
||||
if len(rdfLangs) != 0 {
|
||||
contentProp.AppendRDFLangString(rdfLangs)
|
||||
}
|
||||
}
|
||||
|
||||
// Replace any existing content property
|
||||
// on the item with normalized version.
|
||||
item.SetActivityStreamsContent(contentProp)
|
||||
}
|
||||
|
||||
|
@ -299,3 +345,204 @@ func NormalizeIncomingPollOptions(item WithOneOf, rawJSON map[string]interface{}
|
|||
NormalizeIncomingName(choiceable, rawChoice)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
OUTGOING NORMALIZATION
|
||||
The below functions should be called to normalize the content
|
||||
of messages *GOING OUT OF* GoToSocial via the federation API,
|
||||
either as the result of delivery to a remote instance from this
|
||||
instance, or as a result of a remote instance doing an http call
|
||||
to us to dereference something.
|
||||
*/
|
||||
|
||||
// NormalizeOutgoingAttachmentProp replaces single-entry Attachment objects with
|
||||
// single-entry arrays, for better compatibility with other AP implementations.
|
||||
//
|
||||
// Ie:
|
||||
//
|
||||
// "attachment": {
|
||||
// ...
|
||||
// }
|
||||
//
|
||||
// becomes:
|
||||
//
|
||||
// "attachment": [
|
||||
// {
|
||||
// ...
|
||||
// }
|
||||
// ]
|
||||
//
|
||||
// Noop for items with no attachments, or with attachments that are already a slice.
|
||||
func NormalizeOutgoingAttachmentProp(item WithAttachment, rawJSON map[string]interface{}) {
|
||||
attachment, ok := rawJSON["attachment"]
|
||||
if !ok {
|
||||
// No 'attachment',
|
||||
// nothing to change.
|
||||
return
|
||||
}
|
||||
|
||||
if _, ok := attachment.([]interface{}); ok {
|
||||
// Already slice,
|
||||
// nothing to change.
|
||||
return
|
||||
}
|
||||
|
||||
// Coerce single-object to slice.
|
||||
rawJSON["attachment"] = []interface{}{attachment}
|
||||
}
|
||||
|
||||
// NormalizeOutgoingContentProp normalizes go-fed's funky formatting of content and
|
||||
// contentMap properties to a format better understood by other AP implementations.
|
||||
//
|
||||
// Ie., incoming "content" property like this:
|
||||
//
|
||||
// "content": [
|
||||
// "hello world!",
|
||||
// {
|
||||
// "en": "hello world!"
|
||||
// }
|
||||
// ]
|
||||
//
|
||||
// Is unpacked to:
|
||||
//
|
||||
// "content": "hello world!",
|
||||
// "contentMap": {
|
||||
// "en": "hello world!"
|
||||
// }
|
||||
//
|
||||
// Noop if neither content nor contentMap are set.
|
||||
func NormalizeOutgoingContentProp(item WithContent, rawJSON map[string]interface{}) {
|
||||
contentProp := item.GetActivityStreamsContent()
|
||||
if contentProp == nil {
|
||||
// Nothing to do,
|
||||
// bail early.
|
||||
return
|
||||
}
|
||||
|
||||
contentPropLen := contentProp.Len()
|
||||
if contentPropLen == 0 {
|
||||
// Nothing to do,
|
||||
// bail early.
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
content string
|
||||
contentMap map[string]string
|
||||
)
|
||||
|
||||
for iter := contentProp.Begin(); iter != contentProp.End(); iter = iter.Next() {
|
||||
switch {
|
||||
case iter.IsRDFLangString() &&
|
||||
contentMap == nil:
|
||||
contentMap = iter.GetRDFLangString()
|
||||
|
||||
case content == "" &&
|
||||
iter.IsXMLSchemaString():
|
||||
content = iter.GetXMLSchemaString()
|
||||
}
|
||||
}
|
||||
|
||||
if content != "" {
|
||||
rawJSON["content"] = content
|
||||
} else {
|
||||
delete(rawJSON, "content")
|
||||
}
|
||||
|
||||
if contentMap != nil {
|
||||
rawJSON["contentMap"] = contentMap
|
||||
} else {
|
||||
delete(rawJSON, "contentMap")
|
||||
}
|
||||
}
|
||||
|
||||
// NormalizeOutgoingObjectProp normalizes each Object entry in the rawJSON of the given
|
||||
// item by calling custom serialization / normalization functions on them in turn.
|
||||
//
|
||||
// This function also unnests single-entry arrays, so that:
|
||||
//
|
||||
// "object": [
|
||||
// {
|
||||
// ...
|
||||
// }
|
||||
// ]
|
||||
//
|
||||
// Becomes:
|
||||
//
|
||||
// "object": {
|
||||
// ...
|
||||
// }
|
||||
//
|
||||
// Noop for each Object entry that isn't an Accountable or Statusable.
|
||||
func NormalizeOutgoingObjectProp(item WithObject, rawJSON map[string]interface{}) error {
|
||||
objectProp := item.GetActivityStreamsObject()
|
||||
if objectProp == nil {
|
||||
// Nothing to do,
|
||||
// bail early.
|
||||
return nil
|
||||
}
|
||||
|
||||
objectPropLen := objectProp.Len()
|
||||
if objectPropLen == 0 {
|
||||
// Nothing to do,
|
||||
// bail early.
|
||||
return nil
|
||||
}
|
||||
|
||||
// The thing we already serialized has objects
|
||||
// on it, so we should see if we need to custom
|
||||
// serialize any of those objects, and replace
|
||||
// them on the data map as necessary.
|
||||
objects := make([]interface{}, 0, objectPropLen)
|
||||
for iter := objectProp.Begin(); iter != objectProp.End(); iter = iter.Next() {
|
||||
if iter.IsIRI() {
|
||||
// Plain IRIs don't need custom serialization.
|
||||
objects = append(objects, iter.GetIRI().String())
|
||||
continue
|
||||
}
|
||||
|
||||
var (
|
||||
objectType = iter.GetType()
|
||||
objectSer map[string]interface{}
|
||||
)
|
||||
|
||||
if objectType == nil {
|
||||
// This is awkward.
|
||||
return gtserror.Newf("could not resolve object iter %T to vocab.Type", iter)
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
// In the below accountable and statusable serialization,
|
||||
// `@context` will be included in the wrapping type already,
|
||||
// so we shouldn't also include it in the object itself.
|
||||
switch tn := objectType.GetTypeName(); {
|
||||
case IsAccountable(tn):
|
||||
objectSer, err = serializeAccountable(objectType, false)
|
||||
|
||||
case IsStatusable(tn):
|
||||
// IsStatusable includes Pollable as well.
|
||||
objectSer, err = serializeStatusable(objectType, false)
|
||||
|
||||
default:
|
||||
// No custom serializer for this type; serialize as normal.
|
||||
objectSer, err = objectType.Serialize()
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
objects = append(objects, objectSer)
|
||||
}
|
||||
|
||||
if objectPropLen == 1 {
|
||||
// Unnest single object.
|
||||
rawJSON["object"] = objects[0]
|
||||
} else {
|
||||
// Array of objects.
|
||||
rawJSON["object"] = objects
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -46,6 +46,9 @@ func (suite *NormalizeTestSuite) getStatusable() (vocab.ActivityStreamsNote, map
|
|||
"https://example.org/users/someone/followers"
|
||||
],
|
||||
"content": "UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class=\"hashtag\" data-tag=\"twittermigration\" href=\"https://example.org/tag/twittermigration\" rel=\"tag ugc\">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.",
|
||||
"contentMap": {
|
||||
"en": "UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class=\"hashtag\" data-tag=\"twittermigration\" href=\"https://example.org/tag/twittermigration\" rel=\"tag ugc\">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues."
|
||||
},
|
||||
"context": "https://example.org/contexts/01GX0MSHPER1E0FT022Q209EJZ",
|
||||
"conversation": "https://example.org/contexts/01GX0MSHPER1E0FT022Q209EJZ",
|
||||
"id": "https://example.org/objects/01GX0MT2PA58JNSMK11MCS65YD",
|
||||
|
@ -182,7 +185,15 @@ func (suite *NormalizeTestSuite) getAccountable() (vocab.ActivityStreamsPerson,
|
|||
|
||||
func (suite *NormalizeTestSuite) TestNormalizeActivityObject() {
|
||||
note, rawNote := suite.getStatusable()
|
||||
suite.Equal(`update: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" data-tag="twittermigration" href="https://example.org/tag/twittermigration" rel="tag ugc">#TwitterMigration%3C/a%3E.%3Cbr%3E%3Cbr%3EIn%20fact,%20100,000%20new%20accounts%20have%20been%20created%20since%20last%20night.%3Cbr%3E%3Cbr%3ESince%20last%20night&%2339;s%20spike%208,000-12,000%20new%20accounts%20are%20being%20created%20every%20hour.%3Cbr%3E%3Cbr%3EYesterday,%20I%20estimated%20that%20Mastodon%20would%20have%208%20million%20users%20by%20the%20end%20of%20the%20week.%20That%20might%20happen%20a%20lot%20sooner%20if%20this%20trend%20continues.`, ap.ExtractContent(note))
|
||||
content := ap.ExtractContent(note)
|
||||
suite.Equal(
|
||||
`update: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" data-tag="twittermigration" href="https://example.org/tag/twittermigration" rel="tag ugc">#TwitterMigration%3C/a%3E.%3Cbr%3E%3Cbr%3EIn%20fact,%20100,000%20new%20accounts%20have%20been%20created%20since%20last%20night.%3Cbr%3E%3Cbr%3ESince%20last%20night&%2339;s%20spike%208,000-12,000%20new%20accounts%20are%20being%20created%20every%20hour.%3Cbr%3E%3Cbr%3EYesterday,%20I%20estimated%20that%20Mastodon%20would%20have%208%20million%20users%20by%20the%20end%20of%20the%20week.%20That%20might%20happen%20a%20lot%20sooner%20if%20this%20trend%20continues.`,
|
||||
content.Content,
|
||||
)
|
||||
|
||||
// Malformed contentMap entry
|
||||
// will not be extractable yet.
|
||||
suite.Empty(content.ContentMap["en"])
|
||||
|
||||
create := testrig.WrapAPNoteInCreate(
|
||||
testrig.URLMustParse("https://example.org/create_something"),
|
||||
|
@ -192,7 +203,18 @@ func (suite *NormalizeTestSuite) TestNormalizeActivityObject() {
|
|||
)
|
||||
|
||||
ap.NormalizeIncomingActivity(create, map[string]interface{}{"object": rawNote})
|
||||
suite.Equal(`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`, ap.ExtractContent(note))
|
||||
content = ap.ExtractContent(note)
|
||||
|
||||
suite.Equal(
|
||||
`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`,
|
||||
content.Content,
|
||||
)
|
||||
|
||||
// Content map entry should now be extractable.
|
||||
suite.Equal(
|
||||
`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`,
|
||||
content.ContentMap["en"],
|
||||
)
|
||||
}
|
||||
|
||||
func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment() {
|
||||
|
@ -202,12 +224,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
|
|||
// the attachment(s) should be all jacked up.
|
||||
suite.Equal(`{
|
||||
"@context": "https://www.w3.org/ns/activitystreams",
|
||||
"attachment": {
|
||||
"attachment": [
|
||||
{
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27",
|
||||
"type": "Document",
|
||||
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
|
||||
},
|
||||
}
|
||||
],
|
||||
"attributedTo": "https://example.org/users/hourlycatbot",
|
||||
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
|
||||
"to": "https://www.w3.org/ns/activitystreams#Public",
|
||||
|
@ -222,12 +246,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
|
|||
// attachment should no longer be all jacked up.
|
||||
suite.Equal(`{
|
||||
"@context": "https://www.w3.org/ns/activitystreams",
|
||||
"attachment": {
|
||||
"attachment": [
|
||||
{
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
|
||||
"type": "Document",
|
||||
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
|
||||
},
|
||||
}
|
||||
],
|
||||
"attributedTo": "https://example.org/users/hourlycatbot",
|
||||
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
|
||||
"to": "https://www.w3.org/ns/activitystreams#Public",
|
||||
|
@ -243,12 +269,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
|
|||
// the attachment(s) should be all jacked up.
|
||||
suite.Equal(`{
|
||||
"@context": "https://www.w3.org/ns/activitystreams",
|
||||
"attachment": {
|
||||
"attachment": [
|
||||
{
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27",
|
||||
"type": "Document",
|
||||
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
|
||||
},
|
||||
}
|
||||
],
|
||||
"attributedTo": "https://example.org/users/hourlycatbot",
|
||||
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
|
||||
"to": "https://www.w3.org/ns/activitystreams#Public",
|
||||
|
@ -263,12 +291,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
|
|||
// attachment should no longer be all jacked up.
|
||||
suite.Equal(`{
|
||||
"@context": "https://www.w3.org/ns/activitystreams",
|
||||
"attachment": {
|
||||
"attachment": [
|
||||
{
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
|
||||
"type": "Document",
|
||||
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
|
||||
},
|
||||
}
|
||||
],
|
||||
"attributedTo": "https://example.org/users/hourlycatbot",
|
||||
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
|
||||
"to": "https://www.w3.org/ns/activitystreams#Public",
|
||||
|
|
|
@ -18,10 +18,9 @@
|
|||
package ap
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/superseriousbusiness/activity/streams"
|
||||
"github.com/superseriousbusiness/activity/streams/vocab"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||
)
|
||||
|
||||
// Serialize is a custom serializer for ActivityStreams types.
|
||||
|
@ -37,15 +36,18 @@ import (
|
|||
//
|
||||
// - OrderedCollection: 'orderedItems' property will always be made into an array.
|
||||
// - Any Accountable type: 'attachment' property will always be made into an array.
|
||||
// - Update: any Accountable 'object's set on an update will be custom serialized as above.
|
||||
// - Any Statusable type: 'attachment' property will always be made into an array; 'content' and 'contentMap' will be normalized.
|
||||
// - Any Activityable type: any 'object's set on an activity will be custom serialized as above.
|
||||
func Serialize(t vocab.Type) (m map[string]interface{}, e error) {
|
||||
switch t.GetTypeName() {
|
||||
case ObjectOrderedCollection:
|
||||
switch tn := t.GetTypeName(); {
|
||||
case tn == ObjectOrderedCollection:
|
||||
return serializeOrderedCollection(t)
|
||||
case ActorApplication, ActorGroup, ActorOrganization, ActorPerson, ActorService:
|
||||
case IsAccountable(tn):
|
||||
return serializeAccountable(t, true)
|
||||
case ActivityUpdate:
|
||||
return serializeWithObject(t)
|
||||
case IsStatusable(tn):
|
||||
return serializeStatusable(t, true)
|
||||
case IsActivityable(tn):
|
||||
return serializeActivityable(t, true)
|
||||
default:
|
||||
// No custom serializer necessary.
|
||||
return streams.Serialize(t)
|
||||
|
@ -61,8 +63,8 @@ func Serialize(t vocab.Type) (m map[string]interface{}, e error) {
|
|||
// See:
|
||||
// - https://github.com/go-fed/activity/issues/139
|
||||
// - https://github.com/mastodon/mastodon/issues/24225
|
||||
func serializeOrderedCollection(orderedCollection vocab.Type) (map[string]interface{}, error) {
|
||||
data, err := streams.Serialize(orderedCollection)
|
||||
func serializeOrderedCollection(t vocab.Type) (map[string]interface{}, error) {
|
||||
data, err := streams.Serialize(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -99,7 +101,12 @@ func serializeOrderedCollection(orderedCollection vocab.Type) (map[string]interf
|
|||
// If the accountable is being serialized as part of another object (eg., as the
|
||||
// object of an activity), then includeContext should be set to false, as the
|
||||
// @context entry should be included on the top-level/wrapping activity/object.
|
||||
func serializeAccountable(accountable vocab.Type, includeContext bool) (map[string]interface{}, error) {
|
||||
func serializeAccountable(t vocab.Type, includeContext bool) (map[string]interface{}, error) {
|
||||
accountable, ok := t.(Accountable)
|
||||
if !ok {
|
||||
return nil, gtserror.Newf("vocab.Type %T not accountable", t)
|
||||
}
|
||||
|
||||
var (
|
||||
data map[string]interface{}
|
||||
err error
|
||||
|
@ -115,91 +122,61 @@ func serializeAccountable(accountable vocab.Type, includeContext bool) (map[stri
|
|||
return nil, err
|
||||
}
|
||||
|
||||
attachment, ok := data["attachment"]
|
||||
if !ok {
|
||||
// No 'attachment', nothing to change.
|
||||
return data, nil
|
||||
}
|
||||
|
||||
if _, ok := attachment.([]interface{}); ok {
|
||||
// Already slice.
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Coerce single-object to slice.
|
||||
data["attachment"] = []interface{}{attachment}
|
||||
NormalizeOutgoingAttachmentProp(accountable, data)
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func serializeWithObject(t vocab.Type) (map[string]interface{}, error) {
|
||||
withObject, ok := t.(WithObject)
|
||||
func serializeStatusable(t vocab.Type, includeContext bool) (map[string]interface{}, error) {
|
||||
statusable, ok := t.(Statusable)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("serializeWithObject: could not resolve %T to WithObject", t)
|
||||
}
|
||||
|
||||
data, err := streams.Serialize(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
object := withObject.GetActivityStreamsObject()
|
||||
if object == nil {
|
||||
// Nothing to do, bail early.
|
||||
return data, nil
|
||||
}
|
||||
|
||||
objectLen := object.Len()
|
||||
if objectLen == 0 {
|
||||
// Nothing to do, bail early.
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// The thing we already serialized has objects
|
||||
// on it, so we should see if we need to custom
|
||||
// serialize any of those objects, and replace
|
||||
// them on the data map as necessary.
|
||||
objects := make([]interface{}, 0, objectLen)
|
||||
for iter := object.Begin(); iter != object.End(); iter = iter.Next() {
|
||||
if iter.IsIRI() {
|
||||
// Plain IRIs don't need custom serialization.
|
||||
objects = append(objects, iter.GetIRI().String())
|
||||
continue
|
||||
return nil, gtserror.Newf("vocab.Type %T not statusable", t)
|
||||
}
|
||||
|
||||
var (
|
||||
objectType = iter.GetType()
|
||||
objectSer map[string]interface{}
|
||||
data map[string]interface{}
|
||||
err error
|
||||
)
|
||||
|
||||
if objectType == nil {
|
||||
// This is awkward.
|
||||
return nil, fmt.Errorf("serializeWithObject: could not resolve object iter %T to vocab.Type", iter)
|
||||
}
|
||||
|
||||
switch objectType.GetTypeName() {
|
||||
case ActorApplication, ActorGroup, ActorOrganization, ActorPerson, ActorService:
|
||||
// @context will be included in wrapping type already,
|
||||
// we don't need to include it in the object itself.
|
||||
objectSer, err = serializeAccountable(objectType, false)
|
||||
default:
|
||||
// No custom serializer for this type; serialize as normal.
|
||||
objectSer, err = objectType.Serialize()
|
||||
if includeContext {
|
||||
data, err = streams.Serialize(statusable)
|
||||
} else {
|
||||
data, err = statusable.Serialize()
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
objects = append(objects, objectSer)
|
||||
NormalizeOutgoingAttachmentProp(statusable, data)
|
||||
NormalizeOutgoingContentProp(statusable, data)
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func serializeActivityable(t vocab.Type, includeContext bool) (map[string]interface{}, error) {
|
||||
activityable, ok := t.(Activityable)
|
||||
if !ok {
|
||||
return nil, gtserror.Newf("vocab.Type %T not activityable", t)
|
||||
}
|
||||
|
||||
if objectLen == 1 {
|
||||
// Unnest single object.
|
||||
data["object"] = objects[0]
|
||||
var (
|
||||
data map[string]interface{}
|
||||
err error
|
||||
)
|
||||
|
||||
if includeContext {
|
||||
data, err = streams.Serialize(activityable)
|
||||
} else {
|
||||
// Array of objects.
|
||||
data["object"] = objects
|
||||
data, err = activityable.Serialize()
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := NormalizeOutgoingObjectProp(activityable, data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data, nil
|
||||
|
|
|
@ -237,3 +237,14 @@ const (
|
|||
// VisibilityDefault is used when no other setting can be found.
|
||||
VisibilityDefault Visibility = VisibilityUnlocked
|
||||
)
|
||||
|
||||
// Content models the simple string content
|
||||
// of a status along with its ContentMap,
|
||||
// which contains content entries keyed by
|
||||
// BCP47 language tag.
|
||||
//
|
||||
// Content and/or ContentMap may be zero/nil.
|
||||
type Content struct {
|
||||
Content string
|
||||
ContentMap map[string]string
|
||||
}
|
||||
|
|
|
@ -244,9 +244,15 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
|
|||
}
|
||||
|
||||
// status.Content
|
||||
// status.Language
|
||||
//
|
||||
// The (html-formatted) content of this status.
|
||||
status.Content = ap.ExtractContent(statusable)
|
||||
// Many implementations set both content
|
||||
// and contentMap; we can use these to
|
||||
// infer the language of the status.
|
||||
status.Content, status.Language = ContentToContentLanguage(
|
||||
ctx,
|
||||
ap.ExtractContent(statusable),
|
||||
)
|
||||
|
||||
// status.Attachments
|
||||
//
|
||||
|
@ -396,9 +402,6 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
|
|||
return &s
|
||||
}()
|
||||
|
||||
// language
|
||||
// TODO: we might be able to extract this from the contentMap field
|
||||
|
||||
// ActivityStreamsType
|
||||
status.ActivityStreamsType = statusable.GetTypeName()
|
||||
|
||||
|
@ -707,7 +710,7 @@ func (c *Converter) ASFlagToReport(ctx context.Context, flaggable ap.Flaggable)
|
|||
// For Mastodon, this will just be a string, or nothing.
|
||||
// In Misskey's case, it may also contain the URLs of
|
||||
// one or more reported statuses, so extract these too.
|
||||
content := ap.ExtractContent(flaggable)
|
||||
content := ap.ExtractContent(flaggable).Content
|
||||
statusURIs := []*url.URL{}
|
||||
inlineURLs := misskeyReportInlineURLs(content)
|
||||
statusURIs = append(statusURIs, inlineURLs...)
|
||||
|
|
|
@ -45,6 +45,10 @@ func (suite *ASToInternalTestSuite) jsonToType(in string) vocab.Type {
|
|||
suite.FailNow(err.Error())
|
||||
}
|
||||
|
||||
if statusable, ok := t.(ap.Statusable); ok {
|
||||
ap.NormalizeIncomingContent(statusable, m)
|
||||
}
|
||||
|
||||
return t
|
||||
}
|
||||
|
||||
|
@ -103,7 +107,8 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatus() {
|
|||
suite.NoError(err)
|
||||
|
||||
suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning)
|
||||
suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
|
||||
suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
|
||||
suite.Equal("en", status.Language)
|
||||
}
|
||||
|
||||
func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {
|
||||
|
@ -117,7 +122,7 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {
|
|||
suite.NoError(err)
|
||||
|
||||
suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning)
|
||||
suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
|
||||
suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
|
||||
|
||||
// on statuses with no URL in them (like ones we get from pleroma sometimes) we should use the AP URI of the status as URL
|
||||
suite.Equal("http://fossbros-anonymous.io/users/foss_satan/statuses/108138763199405167", status.URL)
|
||||
|
|
|
@ -607,9 +607,17 @@ func (c *Converter) StatusToAS(ctx context.Context, s *gtsmodel.Status) (ap.Stat
|
|||
// conversation
|
||||
// TODO
|
||||
|
||||
// content -- the actual post itself
|
||||
// content -- the actual post
|
||||
// itself, plus the language
|
||||
contentProp := streams.NewActivityStreamsContentProperty()
|
||||
contentProp.AppendXMLSchemaString(s.Content)
|
||||
|
||||
if s.Language != "" {
|
||||
contentProp.AppendRDFLangString(map[string]string{
|
||||
s.Language: s.Content,
|
||||
})
|
||||
}
|
||||
|
||||
status.SetActivityStreamsContent(contentProp)
|
||||
|
||||
// attachments
|
||||
|
|
|
@ -340,6 +340,9 @@ func (suite *InternalToASTestSuite) TestStatusToAS() {
|
|||
"attributedTo": "http://localhost:8080/users/the_mighty_zork",
|
||||
"cc": "http://localhost:8080/users/the_mighty_zork/followers",
|
||||
"content": "hello everyone!",
|
||||
"contentMap": {
|
||||
"en": "hello everyone!"
|
||||
},
|
||||
"id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
|
||||
"published": "2021-10-20T12:40:37+02:00",
|
||||
"replies": {
|
||||
|
@ -379,16 +382,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASWithIDs() {
|
|||
// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --
|
||||
// will appear, so trim them out of the string for consistency
|
||||
trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1]
|
||||
suite.Equal(` {
|
||||
suite.Equal(` [
|
||||
{
|
||||
"blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "Black and white image of some 50's style text saying: Welcome On Board",
|
||||
"type": "Document",
|
||||
"url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
|
||||
},
|
||||
}
|
||||
],
|
||||
"attributedTo": "http://localhost:8080/users/admin",
|
||||
"cc": "http://localhost:8080/users/admin/followers",
|
||||
"content": "hello world! #welcome ! first post on the instance :rainbow: !",
|
||||
"contentMap": {
|
||||
"en": "hello world! #welcome ! first post on the instance :rainbow: !"
|
||||
},
|
||||
"id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",
|
||||
"published": "2021-10-20T11:36:45Z",
|
||||
"replies": {
|
||||
|
@ -446,16 +454,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASFromDB() {
|
|||
// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --
|
||||
// will appear, so trim them out of the string for consistency
|
||||
trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1]
|
||||
suite.Equal(` {
|
||||
suite.Equal(` [
|
||||
{
|
||||
"blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "Black and white image of some 50's style text saying: Welcome On Board",
|
||||
"type": "Document",
|
||||
"url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
|
||||
},
|
||||
}
|
||||
],
|
||||
"attributedTo": "http://localhost:8080/users/admin",
|
||||
"cc": "http://localhost:8080/users/admin/followers",
|
||||
"content": "hello world! #welcome ! first post on the instance :rainbow: !",
|
||||
"contentMap": {
|
||||
"en": "hello world! #welcome ! first post on the instance :rainbow: !"
|
||||
},
|
||||
"id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",
|
||||
"published": "2021-10-20T11:36:45Z",
|
||||
"replies": {
|
||||
|
@ -519,6 +532,9 @@ func (suite *InternalToASTestSuite) TestStatusToASWithMentions() {
|
|||
"http://localhost:8080/users/the_mighty_zork"
|
||||
],
|
||||
"content": "hi @the_mighty_zork welcome to the instance!",
|
||||
"contentMap": {
|
||||
"en": "hi @the_mighty_zork welcome to the instance!"
|
||||
},
|
||||
"id": "http://localhost:8080/users/admin/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
|
||||
"inReplyTo": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
|
||||
"published": "2021-11-20T13:32:16Z",
|
||||
|
|
|
@ -31,6 +31,8 @@ import (
|
|||
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/language"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/text"
|
||||
)
|
||||
|
@ -184,3 +186,102 @@ func placeholdUnknownAttachments(arr []apimodel.Attachment) (string, []apimodel.
|
|||
|
||||
return text.SanitizeToHTML(aside.String()), arr
|
||||
}
|
||||
|
||||
// ContentToContentLanguage tries to
|
||||
// extract a content string and language
|
||||
// tag string from the given intermediary
|
||||
// content.
|
||||
//
|
||||
// Either/both of the returned strings may
|
||||
// be empty, depending on how things go.
|
||||
func ContentToContentLanguage(
|
||||
ctx context.Context,
|
||||
content gtsmodel.Content,
|
||||
) (
|
||||
string, // content
|
||||
string, // language
|
||||
) {
|
||||
var (
|
||||
contentStr string
|
||||
langTagStr string
|
||||
)
|
||||
|
||||
switch contentMap := content.ContentMap; {
|
||||
// Simplest case: no `contentMap`.
|
||||
// Return `content`, even if empty.
|
||||
case contentMap == nil:
|
||||
return content.Content, ""
|
||||
|
||||
// `content` and `contentMap` set.
|
||||
// Try to infer "primary" language.
|
||||
case content.Content != "":
|
||||
// Assume `content` is intended
|
||||
// primary content, and look for
|
||||
// corresponding language tag.
|
||||
contentStr = content.Content
|
||||
|
||||
for t, c := range contentMap {
|
||||
if contentStr == c {
|
||||
langTagStr = t
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// `content` not set; `contentMap`
|
||||
// is set with only one value.
|
||||
// This must be the "primary" lang.
|
||||
case len(contentMap) == 1:
|
||||
// Use an empty loop to
|
||||
// get the values we want.
|
||||
// nolint:revive
|
||||
for langTagStr, contentStr = range contentMap {
|
||||
}
|
||||
|
||||
// Only `contentMap` is set, with more
|
||||
// than one value. Map order is not
|
||||
// guaranteed so we can't know the
|
||||
// "primary" language.
|
||||
//
|
||||
// Try to select content using our
|
||||
// instance's configured languages.
|
||||
//
|
||||
// In case of no hits, just take the
|
||||
// first tag and content in the map.
|
||||
default:
|
||||
instanceLangs := config.GetInstanceLanguages()
|
||||
for _, langTagStr = range instanceLangs.TagStrs() {
|
||||
if contentStr = contentMap[langTagStr]; contentStr != "" {
|
||||
// Hit!
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If nothing found, just take
|
||||
// the first entry we can get by
|
||||
// breaking after the first iter.
|
||||
if contentStr == "" {
|
||||
for langTagStr, contentStr = range contentMap {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if langTagStr != "" {
|
||||
// Found a lang tag for this content,
|
||||
// make sure it's valid / parseable.
|
||||
lang, err := language.Parse(langTagStr)
|
||||
if err != nil {
|
||||
log.Warnf(
|
||||
ctx,
|
||||
"could not parse %s as BCP47 language tag in status contentMap: %v",
|
||||
langTagStr, err,
|
||||
)
|
||||
} else {
|
||||
// Inferred the language!
|
||||
// Use normalized version.
|
||||
langTagStr = lang.TagStr
|
||||
}
|
||||
}
|
||||
|
||||
return contentStr, langTagStr
|
||||
}
|
||||
|
|
|
@ -18,7 +18,12 @@
|
|||
package typeutils
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/language"
|
||||
)
|
||||
|
||||
func TestMisskeyReportContentURLs1(t *testing.T) {
|
||||
|
@ -44,3 +49,112 @@ misskey-formatted`
|
|||
t.Fatalf("wanted 0 urls, got %d", l)
|
||||
}
|
||||
}
|
||||
|
||||
func TestContentToContentLanguage(t *testing.T) {
|
||||
type testcase struct {
|
||||
content gtsmodel.Content
|
||||
instanceLanguages language.Languages
|
||||
expectedContent string
|
||||
expectedLang string
|
||||
}
|
||||
|
||||
ctx, cncl := context.WithCancel(context.Background())
|
||||
defer cncl()
|
||||
|
||||
for i, testcase := range []testcase{
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "hello world",
|
||||
ContentMap: nil,
|
||||
},
|
||||
expectedContent: "hello world",
|
||||
expectedLang: "",
|
||||
},
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "",
|
||||
ContentMap: map[string]string{
|
||||
"en": "hello world",
|
||||
},
|
||||
},
|
||||
expectedContent: "hello world",
|
||||
expectedLang: "en",
|
||||
},
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "bonjour le monde",
|
||||
ContentMap: map[string]string{
|
||||
"en": "hello world",
|
||||
"fr": "bonjour le monde",
|
||||
},
|
||||
},
|
||||
expectedContent: "bonjour le monde",
|
||||
expectedLang: "fr",
|
||||
},
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "bonjour le monde",
|
||||
ContentMap: map[string]string{
|
||||
"en": "hello world",
|
||||
},
|
||||
},
|
||||
expectedContent: "bonjour le monde",
|
||||
expectedLang: "",
|
||||
},
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "",
|
||||
ContentMap: map[string]string{
|
||||
"en": "hello world",
|
||||
"ru": "Привет, мир!",
|
||||
"nl": "hallo wereld!",
|
||||
"ca": "Hola món!",
|
||||
},
|
||||
},
|
||||
instanceLanguages: language.Languages{
|
||||
{TagStr: "en"},
|
||||
{TagStr: "ca"},
|
||||
},
|
||||
expectedContent: "hello world",
|
||||
expectedLang: "en",
|
||||
},
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "",
|
||||
ContentMap: map[string]string{
|
||||
"en": "hello world",
|
||||
"ru": "Привет, мир!",
|
||||
"nl": "hallo wereld!",
|
||||
"ca": "Hola món!",
|
||||
},
|
||||
},
|
||||
instanceLanguages: language.Languages{
|
||||
{TagStr: "ca"},
|
||||
{TagStr: "en"},
|
||||
},
|
||||
expectedContent: "Hola món!",
|
||||
expectedLang: "ca",
|
||||
},
|
||||
} {
|
||||
langs, err := language.InitLangs(testcase.instanceLanguages.TagStrs())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
config.SetInstanceLanguages(langs)
|
||||
|
||||
content, language := ContentToContentLanguage(ctx, testcase.content)
|
||||
if content != testcase.expectedContent {
|
||||
t.Errorf(
|
||||
"test %d expected content '%s' got '%s'",
|
||||
i, testcase.expectedContent, content,
|
||||
)
|
||||
}
|
||||
|
||||
if language != testcase.expectedLang {
|
||||
t.Errorf(
|
||||
"test %d expected language '%s' got '%s'",
|
||||
i, testcase.expectedLang, language,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,6 +85,9 @@ func (suite *WrapTestSuite) TestWrapNoteInCreate() {
|
|||
"attributedTo": "http://localhost:8080/users/the_mighty_zork",
|
||||
"cc": "http://localhost:8080/users/the_mighty_zork/followers",
|
||||
"content": "hello everyone!",
|
||||
"contentMap": {
|
||||
"en": "hello everyone!"
|
||||
},
|
||||
"id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
|
||||
"published": "2021-10-20T12:40:37+02:00",
|
||||
"replies": {
|
||||
|
|
Loading…
Reference in New Issue