2023-03-12 16:00:57 +01:00
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2021-09-01 18:29:25 +02:00
package regexes
import (
2022-05-07 17:55:27 +02:00
"bytes"
2021-09-01 18:29:25 +02:00
"fmt"
"regexp"
2022-05-07 17:55:27 +02:00
"sync"
"mvdan.cc/xurls/v2"
2021-09-01 18:29:25 +02:00
)
const (
2021-09-02 12:24:18 +02:00
users = "users"
actors = "actors"
statuses = "statuses"
inbox = "inbox"
outbox = "outbox"
followers = "followers"
following = "following"
liked = "liked"
publicKey = "main-key"
follow = "follow"
2023-01-10 15:19:05 +01:00
blocks = "blocks"
reports = "reports"
2021-09-01 18:29:25 +02:00
)
const (
maximumUsernameLength = 64
maximumEmojiShortcodeLength = 30
)
var (
2022-05-07 17:55:27 +02:00
schemes = ` (http|https):// `
// LinkScheme captures http/https schemes in URLs.
LinkScheme = func ( ) * regexp . Regexp {
rgx , err := xurls . StrictMatchingScheme ( schemes )
if err != nil {
panic ( err )
}
return rgx
} ( )
2022-06-11 11:01:34 +02:00
mentionName = ` ^@([\w\-\.]+)(?:@([\w\-\.:]+))?$ `
2021-09-01 18:29:25 +02:00
// MentionName captures the username and domain part from a mention string
// such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols)
MentionName = regexp . MustCompile ( mentionName )
2022-11-15 16:05:34 +01:00
// mention regex can be played around with here: https://regex101.com/r/P0vpYG/1
2021-09-30 18:11:57 +02:00
mentionFinder = ` (?:^|\s)(@\w+(?:@[a-zA-Z0-9_\-\.]+)?) `
2021-09-01 18:29:25 +02:00
// MentionFinder extracts mentions from a piece of text.
MentionFinder = regexp . MustCompile ( mentionFinder )
emojiShortcode = fmt . Sprintf ( ` \w { 2,%d} ` , maximumEmojiShortcodeLength )
// EmojiShortcode validates an emoji name.
EmojiShortcode = regexp . MustCompile ( fmt . Sprintf ( "^%s$" , emojiShortcode ) )
// emoji regex can be played with here: https://regex101.com/r/478XGM/1
2022-05-07 17:55:27 +02:00
emojiFinderString = fmt . Sprintf ( ` (?:\b)?:(%s):(?:\b)? ` , emojiShortcode )
2021-09-01 18:29:25 +02:00
// EmojiFinder extracts emoji strings from a piece of text.
EmojiFinder = regexp . MustCompile ( emojiFinderString )
2023-01-27 14:48:11 +01:00
// usernameString defines an acceptable username for a new account on this instance
2021-09-01 18:29:25 +02:00
usernameString = fmt . Sprintf ( ` [a-z0-9_] { 2,%d} ` , maximumUsernameLength )
// Username can be used to validate usernames of new signups
Username = regexp . MustCompile ( fmt . Sprintf ( ` ^%s$ ` , usernameString ) )
2023-01-27 14:48:11 +01:00
// usernameStringRelaxed is like usernameString, but also allows the '.' character,
// so it can also be used to match the instance account, which will have a username
// like 'example.org', and it has no upper length limit, so will work for long domains.
usernameStringRelaxed = ` [a-z0-9_\.] { 2,} `
userPathString = fmt . Sprintf ( ` ^/?%s/(%s)$ ` , users , usernameStringRelaxed )
2021-09-01 18:29:25 +02:00
// UserPath parses a path that validates and captures the username part from eg /users/example_username
UserPath = regexp . MustCompile ( userPathString )
2023-01-27 14:48:11 +01:00
publicKeyPath = fmt . Sprintf ( ` ^/?%s/(%s)/%s ` , users , usernameStringRelaxed , publicKey )
2021-09-01 18:29:25 +02:00
// PublicKeyPath parses a path that validates and captures the username part from eg /users/example_username/main-key
PublicKeyPath = regexp . MustCompile ( publicKeyPath )
2023-01-27 14:48:11 +01:00
inboxPath = fmt . Sprintf ( ` ^/?%s/(%s)/%s$ ` , users , usernameStringRelaxed , inbox )
2021-09-01 18:29:25 +02:00
// InboxPath parses a path that validates and captures the username part from eg /users/example_username/inbox
InboxPath = regexp . MustCompile ( inboxPath )
2023-01-27 14:48:11 +01:00
outboxPath = fmt . Sprintf ( ` ^/?%s/(%s)/%s$ ` , users , usernameStringRelaxed , outbox )
2021-09-01 18:29:25 +02:00
// OutboxPath parses a path that validates and captures the username part from eg /users/example_username/outbox
OutboxPath = regexp . MustCompile ( outboxPath )
2023-01-27 14:48:11 +01:00
actorPath = fmt . Sprintf ( ` ^/?%s/(%s)$ ` , actors , usernameStringRelaxed )
2021-09-01 18:29:25 +02:00
// ActorPath parses a path that validates and captures the username part from eg /actors/example_username
ActorPath = regexp . MustCompile ( actorPath )
2023-01-27 14:48:11 +01:00
followersPath = fmt . Sprintf ( ` ^/?%s/(%s)/%s$ ` , users , usernameStringRelaxed , followers )
2021-09-01 18:29:25 +02:00
// FollowersPath parses a path that validates and captures the username part from eg /users/example_username/followers
FollowersPath = regexp . MustCompile ( followersPath )
2023-01-27 14:48:11 +01:00
followingPath = fmt . Sprintf ( ` ^/?%s/(%s)/%s$ ` , users , usernameStringRelaxed , following )
2021-09-01 18:29:25 +02:00
// FollowingPath parses a path that validates and captures the username part from eg /users/example_username/following
FollowingPath = regexp . MustCompile ( followingPath )
2023-01-27 14:48:11 +01:00
followPath = fmt . Sprintf ( ` ^/?%s/(%s)/%s/(%s)$ ` , users , usernameStringRelaxed , follow , ulid )
2021-09-01 18:29:25 +02:00
// FollowPath parses a path that validates and captures the username part and the ulid part
// from eg /users/example_username/follow/01F7XT5JZW1WMVSW1KADS8PVDH
FollowPath = regexp . MustCompile ( followPath )
ulid = ` [0123456789ABCDEFGHJKMNPQRSTVWXYZ] { 26} `
// ULID parses and validate a ULID.
ULID = regexp . MustCompile ( fmt . Sprintf ( ` ^%s$ ` , ulid ) )
2023-01-27 14:48:11 +01:00
likedPath = fmt . Sprintf ( ` ^/?%s/(%s)/%s$ ` , users , usernameStringRelaxed , liked )
2021-09-01 18:29:25 +02:00
// LikedPath parses a path that validates and captures the username part from eg /users/example_username/liked
LikedPath = regexp . MustCompile ( likedPath )
2023-01-27 14:48:11 +01:00
likePath = fmt . Sprintf ( ` ^/?%s/(%s)/%s/(%s)$ ` , users , usernameStringRelaxed , liked , ulid )
2021-09-01 18:29:25 +02:00
// LikePath parses a path that validates and captures the username part and the ulid part
// from eg /users/example_username/like/01F7XT5JZW1WMVSW1KADS8PVDH
LikePath = regexp . MustCompile ( likePath )
2023-01-27 14:48:11 +01:00
statusesPath = fmt . Sprintf ( ` ^/?%s/(%s)/%s/(%s)$ ` , users , usernameStringRelaxed , statuses , ulid )
2021-09-01 18:29:25 +02:00
// StatusesPath parses a path that validates and captures the username part and the ulid part
// from eg /users/example_username/statuses/01F7XT5JZW1WMVSW1KADS8PVDH
// The regex can be played with here: https://regex101.com/r/G9zuxQ/1
StatusesPath = regexp . MustCompile ( statusesPath )
2023-01-27 14:48:11 +01:00
blockPath = fmt . Sprintf ( ` ^/?%s/(%s)/%s/(%s)$ ` , users , usernameStringRelaxed , blocks , ulid )
2021-09-01 18:29:25 +02:00
// BlockPath parses a path that validates and captures the username part and the ulid part
// from eg /users/example_username/blocks/01F7XT5JZW1WMVSW1KADS8PVDH
BlockPath = regexp . MustCompile ( blockPath )
2022-11-25 18:23:42 +01:00
2023-01-10 15:19:05 +01:00
reportPath = fmt . Sprintf ( ` ^/?%s/(%s)$ ` , reports , ulid )
// ReportPath parses a path that validates and captures the ulid part
// from eg /reports/01GP3AWY4CRDVRNZKW0TEAMB5R
ReportPath = regexp . MustCompile ( reportPath )
2022-11-25 18:23:42 +01:00
filePath = fmt . Sprintf ( ` ^(%s)/([a-z]+)/([a-z]+)/(%s)\.([a-z]+)$ ` , ulid , ulid )
// FilePath parses a file storage path of the form [ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[FILE_NAME]
// eg 01F8MH1H7YV1Z7D2C8K2730QBF/attachment/small/01F8MH8RMYQ6MSNY3JM2XT1CQ5.jpeg
// It captures the account id, media type, media size, file name, and file extension, eg
// `01F8MH1H7YV1Z7D2C8K2730QBF`, `attachment`, `small`, `01F8MH8RMYQ6MSNY3JM2XT1CQ5`, `jpeg`.
FilePath = regexp . MustCompile ( filePath )
2023-01-25 11:12:27 +01:00
// MisskeyReportNotes captures a list of Note URIs from report content created by Misskey.
// https://regex101.com/r/EnTOBV/1
MisskeyReportNotes = regexp . MustCompile ( ` (?m)(?:^Note: ((?:http|https):\/\/.*)$) ` )
2021-09-01 18:29:25 +02:00
)
2022-05-07 17:55:27 +02:00
// bufpool is a memory pool of byte buffers for use in our regex utility functions.
var bufpool = sync . Pool {
New : func ( ) any {
buf := bytes . NewBuffer ( make ( [ ] byte , 0 , 512 ) )
return buf
} ,
}
// ReplaceAllStringFunc will call through to .ReplaceAllStringFunc in the provided regex, but provide you a clean byte buffer for optimized string writes.
func ReplaceAllStringFunc ( rgx * regexp . Regexp , src string , repl func ( match string , buf * bytes . Buffer ) string ) string {
buf := bufpool . Get ( ) . ( * bytes . Buffer ) //nolint
defer bufpool . Put ( buf )
return rgx . ReplaceAllStringFunc ( src , func ( match string ) string {
buf . Reset ( ) // reset use
return repl ( match , buf )
} )
}