From 664713ddd4f7236fde0759cf7a0e04a434417876 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Sun, 3 Jul 2022 11:03:03 +0200 Subject: [PATCH] [bugfix] Make hashtag regex work with non-ascii characters (#682) --- internal/regexes/regexes.go | 4 ++-- internal/util/statustools_test.go | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go index 58635b6b4..dd3d9ce40 100644 --- a/internal/regexes/regexes.go +++ b/internal/regexes/regexes.go @@ -71,8 +71,8 @@ var ( // MentionFinder extracts mentions from a piece of text. MentionFinder = regexp.MustCompile(mentionFinder) - // hashtag regex can be played with here: https://regex101.com/r/bPxeca/1 - hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[a-zA-Z0-9]{1,%d})(?:#|\b)`, maximumHashtagLength) + // hashtag regex can be played with here: https://regex101.com/r/bpyGlj/1 + hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[\p{L}\p{N}]{1,%d})(?:#|\b)`, maximumHashtagLength) // HashtagFinder finds possible hashtags in a string. // It returns just the string part of the hashtag, not the # symbol. HashtagFinder = regexp.MustCompile(hashtagFinder) diff --git a/internal/util/statustools_test.go b/internal/util/statustools_test.go index bea89158a..d9f344e4b 100644 --- a/internal/util/statustools_test.go +++ b/internal/util/statustools_test.go @@ -83,15 +83,20 @@ func (suite *StatusTestSuite) TestDeriveHashtagsOK() { #ThisShouldAlsoWork #not_this_though -#111111 thisalsoshouldn'twork#### ##` +#111111 thisalsoshouldn'twork#### ## + +#alimentación, #saúde +` tags := util.DeriveHashtagsFromText(statusText) - assert.Len(suite.T(), tags, 5) + assert.Len(suite.T(), tags, 7) assert.Equal(suite.T(), "testing123", tags[0]) assert.Equal(suite.T(), "also", tags[1]) assert.Equal(suite.T(), "thisshouldwork", tags[2]) assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[3]) assert.Equal(suite.T(), "111111", tags[4]) + assert.Equal(suite.T(), "alimentación", tags[5]) + assert.Equal(suite.T(), "saúde", tags[6]) } func (suite *StatusTestSuite) TestDeriveEmojiOK() {