From 1710158b39910489fc50f951940e48bc386c97af Mon Sep 17 00:00:00 2001 From: tsmethurst Date: Sun, 4 Apr 2021 19:20:31 +0200 Subject: [PATCH] tags, emoji --- internal/apimodule/status/statuscreate.go | 23 ++++++++++-- internal/db/db.go | 23 +++++++++++- internal/db/pg.go | 36 +++++++++++++++++- internal/util/status.go | 44 +++++++++++++++++++++- internal/util/status_test.go | 45 +++++++++++++++++++++++ 5 files changed, 163 insertions(+), 8 deletions(-) diff --git a/internal/apimodule/status/statuscreate.go b/internal/apimodule/status/statuscreate.go index f77174b77..68188c422 100644 --- a/internal/apimodule/status/statuscreate.go +++ b/internal/apimodule/status/statuscreate.go @@ -115,24 +115,41 @@ func (m *statusModule) statusCreatePOSTHandler(c *gin.Context) { ActivityStreamsType: model.ActivityStreamsNote, } - menchies, err := m.db.AccountStringsToMentions(util.DeriveMentions(form.Status), authed.Account.ID, thisStatusID) + menchies, err := m.db.MentionStringsToMentions(util.DeriveMentions(form.Status), authed.Account.ID, thisStatusID) if err != nil { l.Debugf("error generating mentions from status: %s", err) c.JSON(http.StatusInternalServerError, gin.H{"error": "error generating mentions from status"}) return } + tags, err := m.db.TagStringsToTags(util.DeriveHashtags(form.Status), authed.Account.ID, thisStatusID) + if err != nil { + l.Debugf("error generating hashtags from status: %s", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "error generating hashtags from status"}) + return + } + + emojis, err := m.db.EmojiStringsToEmojis(util.DeriveEmojis(form.Status), authed.Account.ID, thisStatusID) + if err != nil { + l.Debugf("error generating emojis from status: %s", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "error generating emojis from status"}) + return + } + newStatus.Mentions = menchies + newStatus.Tags = tags + newStatus.Emojis = emojis // take care of side effects -- federation, mentions, updating metadata, etc, etc - - m.distributor.FromClientAPI() <- distributor.FromClientAPI{ APObjectType: model.ActivityStreamsNote, APActivityType: model.ActivityStreamsCreate, Activity: newStatus, } + // return populated status to submitter + + } func validateCreateStatus(form *advancedStatusCreateForm, config *config.StatusesConfig, accountID string, db db.DB) error { diff --git a/internal/db/db.go b/internal/db/db.go index 9ab41399d..31641f767 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -188,10 +188,29 @@ type DB interface { // In other words, this is the public record that the server has of an account. AccountToMastoPublic(account *model.Account) (*mastotypes.Account, error) - // AccountStringsToMentions takes a slice of deduplicated account names in the form "@test@whatever.example.org", which have been + // MentionStringsToMentions takes a slice of deduplicated, lowercase account names in the form "@test@whatever.example.org", which have been // mentioned in a status. It takes the id of the account that wrote the status, and the id of the status itself, and then // checks in the database for the mentioned accounts, and returns a slice of mentions generated based on the given parameters. - AccountStringsToMentions(targetAccounts []string, originAccountID string, statusID string) ([]*model.Mention, error) + // + // Note: this func doesn't/shouldn't do any manipulation of the accounts in the DB, it's just for checking if they exist + // and conveniently returning them. + MentionStringsToMentions(targetAccounts []string, originAccountID string, statusID string) ([]*model.Mention, error) + + // TagStringsToTags takes a slice of deduplicated, lowercase tags in the form "somehashtag", which have been + // used in a status. It takes the id of the account that wrote the status, and the id of the status itself, and then + // returns a slice of *model.Tag corresponding to the given tags. + // + // Note: this func doesn't/shouldn't do any manipulation of the tags in the DB, it's just for checking if they exist + // and conveniently returning them. + TagStringsToTags(tags []string, originAccountID string, statusID string) ([]*model.Tag, error) + + // EmojiStringsToEmojis takes a slice of deduplicated, lowercase emojis in the form ":emojiname:", which have been + // used in a status. It takes the id of the account that wrote the status, and the id of the status itself, and then + // returns a slice of *model.Emoji corresponding to the given emojis. + // + // Note: this func doesn't/shouldn't do any manipulation of the emoji in the DB, it's just for checking if they exist + // and conveniently returning them. + EmojiStringsToEmojis(emojis []string, originAccountID string, statusID string) ([]*model.Emoji, error) } // New returns a new database service that satisfies the DB interface and, by extension, diff --git a/internal/db/pg.go b/internal/db/pg.go index b8ba8a7d9..0ca2dc752 100644 --- a/internal/db/pg.go +++ b/internal/db/pg.go @@ -665,7 +665,7 @@ func (ps *postgresService) AccountToMastoPublic(a *model.Account) (*mastotypes.A }, nil } -func (ps *postgresService) AccountStringsToMentions(targetAccounts []string, originAccountID string, statusID string) ([]*model.Mention, error) { +func (ps *postgresService) MentionStringsToMentions(targetAccounts []string, originAccountID string, statusID string) ([]*model.Mention, error) { menchies := []*model.Mention{} for _, a := range targetAccounts { // A mentioned account looks like "@test@example.org" -- we can guarantee this from the regex that targetAccounts should have been derived from. @@ -710,7 +710,7 @@ func (ps *postgresService) AccountStringsToMentions(targetAccounts []string, ori return nil, fmt.Errorf("error getting account with username %s and domain %s: %s", username, domain, err) } - // id, createdat and updatedat will be populated by the db, so we have everything we need! + // id, createdAt and updatedAt will be populated by the db, so we have everything we need! menchies = append(menchies, &model.Mention{ StatusID: statusID, OriginAccountID: originAccountID, @@ -719,3 +719,35 @@ func (ps *postgresService) AccountStringsToMentions(targetAccounts []string, ori } return menchies, nil } + +// for now this function doesn't really use the database, but it's here because: +// A) it might later and +// B) it's v. similar to MentionStringsToMentions +func (ps *postgresService) TagStringsToTags(tags []string, originAccountID string, statusID string) ([]*model.Tag, error) { + newTags := []*model.Tag{} + for _, t := range tags { + newTags = append(newTags, &model.Tag{ + Name: t, + }) + } + return newTags, nil +} + +func (ps *postgresService) EmojiStringsToEmojis(emojis []string, originAccountID string, statusID string) ([]*model.Emoji, error) { + newEmojis := []*model.Emoji{} + for _, e := range emojis { + emoji := &model.Emoji{} + err := ps.conn.Model(emoji).Where("shortcode = ?", e).Where("visible_in_picker = true").Where("disabled = false").Select() + if err != nil { + if err == pg.ErrNoRows { + // no result found for this username/domain so just don't include it as a mencho and carry on about our business + ps.log.Debugf("no emoji found with shortcode %s, skipping it", e) + continue + } + // a serious error has happened so bail + return nil, fmt.Errorf("error getting emoji with shortcode %s: %s",e, err) + } + newEmojis = append(newEmojis, emoji) + } + return newEmojis, nil +} diff --git a/internal/util/status.go b/internal/util/status.go index 36679378c..996538638 100644 --- a/internal/util/status.go +++ b/internal/util/status.go @@ -21,13 +21,21 @@ package util import ( "fmt" "regexp" + "strings" ) // To play around with these regexes, see: https://regex101.com/r/2km2EK/1 var ( + // mention regex can be played around with here: https://regex101.com/r/2km2EK/1 hostnameRegexString = `(?:(?:[a-zA-Z]{1})|(?:[a-zA-Z]{1}[a-zA-Z]{1})|(?:[a-zA-Z]{1}[0-9]{1})|(?:[0-9]{1}[a-zA-Z]{1})|(?:[a-zA-Z0-9][a-zA-Z0-9-_]{1,61}[a-zA-Z0-9]))\.(?:[a-zA-Z]{2,6}|[a-zA-Z0-9-]{2,30}\.[a-zA-Z]{2,5}))` mentionRegexString = fmt.Sprintf(`(?: |^|\W)(@[a-zA-Z0-9_]+@%s(?: |\n)`, hostnameRegexString) mentionRegex = regexp.MustCompile(mentionRegexString) + // hashtag regex can be played with here: https://regex101.com/r/Vhy8pg/1 + hashtagRegexString = `(?: |^|\W)?#([a-zA-Z0-9]{1,30})(?:\b|\r)` + hashtagRegex = regexp.MustCompile(hashtagRegexString) + // emoji regex can be played with here: https://regex101.com/r/478XGM/1 + emojiRegexString = `(?: |^|\W)?:([a-zA-Z0-9_]{2,30}):(?:\b|\r)?` + emojiRegex = regexp.MustCompile(emojiRegexString) ) // DeriveMentions takes a plaintext (ie., not html-formatted) status, @@ -36,12 +44,37 @@ var ( // // It will look for fully-qualified account names in the form "@user@example.org". // Mentions that are just in the form "@username" will not be detected. +// The case of the returned mentions will be lowered, for consistency. func DeriveMentions(status string) []string { mentionedAccounts := []string{} for _, m := range mentionRegex.FindAllStringSubmatch(status, -1) { mentionedAccounts = append(mentionedAccounts, m[1]) } - return Unique(mentionedAccounts) + return Lower(Unique(mentionedAccounts)) +} + +// DeriveHashtags takes a plaintext (ie., not html-formatted) status, +// and applies a regex to it to return a deduplicated list of hashtags +// used in that status, without the leading #. The case of the returned +// tags will be lowered, for consistency. +func DeriveHashtags(status string) []string { + tags := []string{} + for _, m := range hashtagRegex.FindAllStringSubmatch(status, -1) { + tags = append(tags, m[1]) + } + return Lower(Unique(tags)) +} + +// DeriveEmojis takes a plaintext (ie., not html-formatted) status, +// and applies a regex to it to return a deduplicated list of emojis +// used in that status, without the surround ::. The case of the returned +// emojis will be lowered, for consistency. +func DeriveEmojis(status string) []string { + emojis := []string{} + for _, m := range emojiRegex.FindAllStringSubmatch(status, -1) { + emojis = append(emojis, m[1]) + } + return Lower(Unique(emojis)) } // Unique returns a deduplicated version of a given string slice. @@ -57,6 +90,15 @@ func Unique(s []string) []string { return list } +// Lower lowercases all strings in a given string slice +func Lower(s []string) []string { + new := []string{} + for _, i := range s { + new = append(new, strings.ToLower(i)) + } + return new +} + // HTMLFormat takes a plaintext formatted status string, and converts it into // a nice HTML-formatted string. // diff --git a/internal/util/status_test.go b/internal/util/status_test.go index 3e670fa72..e2079659b 100644 --- a/internal/util/status_test.go +++ b/internal/util/status_test.go @@ -54,6 +54,51 @@ func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { assert.Len(suite.T(), menchies, 0) } +func (suite *StatusTestSuite) TestDeriveHashtagsOK() { + statusText := `#testing123 #also testing + +# testing this one shouldn't work + + #thisshouldwork + +#ThisShouldAlsoWork #not_this_though + +#111111 thisalsoshouldn'twork#### ##` + + tags := DeriveHashtags(statusText) + assert.Len(suite.T(), tags, 5) + assert.Equal(suite.T(), "testing123", tags[0]) + assert.Equal(suite.T(), "also", tags[1]) + assert.Equal(suite.T(), "thisshouldwork", tags[2]) + assert.Equal(suite.T(), "thisshouldalsowork", tags[3]) + assert.Equal(suite.T(), "111111", tags[4]) +} + +func (suite *StatusTestSuite) TestDeriveEmojiOK() { + statusText := `:test: :another: + +Here's some normal text with an :emoji: at the end + +:spaces shouldnt work: + +:emoji1::emoji2: + +:anotheremoji:emoji2: +:anotheremoji::anotheremoji::anotheremoji::anotheremoji: +:underscores_ok_too: +` + + tags := DeriveEmojis(statusText) + assert.Len(suite.T(), tags, 7) + assert.Equal(suite.T(), "test", tags[0]) + assert.Equal(suite.T(), "another", tags[1]) + assert.Equal(suite.T(), "emoji", tags[2]) + assert.Equal(suite.T(), "emoji1", tags[3]) + assert.Equal(suite.T(), "emoji2", tags[4]) + assert.Equal(suite.T(), "anotheremoji", tags[5]) + assert.Equal(suite.T(), "underscores_ok_too", tags[6]) +} + func TestStatusTestSuite(t *testing.T) { suite.Run(t, new(StatusTestSuite)) }