This commit is contained in:
Dym Sohin 2023-06-23 20:07:30 +02:00
parent d413c9ad42
commit b106069d2e
5 changed files with 368 additions and 3 deletions

View File

@ -1,3 +0,0 @@
# y
algorithmically-enhanced youtube-dl wrapper for audio-downloads

20
install.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
# y install script
## use
# bash ./install.sh
PREFIX=~/.local
mkdir -p $PREFIX/src
mkdir -p $PREFIX/bin
git clone --depth 1 \
'https://source.garden/scripts/y.git' \
$PREFIX/src/y/
chmod +x $PREFIX/src/y/y.sh
ln -s $PREFIX/src/y/y.sh \
$PREFIX/bin/y

7
meta.kdl Normal file
View File

@ -0,0 +1,7 @@
title "y"
description "algorithmically-enhanced youtube-dl wrapper for audio-downloads"
type "code"
tags "bash" "script" "file-download" "automation"
license "AGPL"
homepage "https://dym.sh/y/"
source "https://source.garden/scripts/y/"

51
readme.md Normal file
View File

@ -0,0 +1,51 @@
# y
> algorithmically-enhanced [youtube-dl](https://yt-dl.org) wrapper for audio-downloads
## install
1. look at [`install.sh`](./install.sh) file for instructions
1. adjust `PREFIX` at wish
1. make sure the final directory is on `$PATH`
## use
`y https://orax.bandcamp.com/track/ectoplasmic`
(adjust [config.sh](./config.sh) file to set your prefered download paths)
## requirments
- [`youtube_dl`](https://github.com/ytdl-org/youtube-dl) the main thing
- [`ffmpeg`](https://ffmpeg.org/download.html) to convert files of diffrent formats
- [`sd`](https://github.com/chmln/sd) a better `sed`
- [`lynx`](https://github.com/lynx/lynx) to easily scrap web-pages
## site-specific improvements
### bandcamp
1. prefers mp3-320 (checks every track)
2. downloads entire discography if given root-domain or `/music` url
### twitter
file-name is now twitter-username and tweet-id
(instead of full text of a tweet, which can result in strings over 255 characters long, aborting the entire process)
### reddit
file-name is now subreddit, post-id, and url-slug
(instead of full text of a title, which can result in strings over 255 characters long, aborting the entire process)
### youtube
allows to set several predetermined filesystem-paths to keep file-categories separately:
- `y m <youtube_URL>` music (automatically chosen for `music.youtube.com`)
- `y a <youtube_URL>` audiobooks
### soundcloud
1. occasional wav-files are converted to mp3 320kb/s
1. artist is set based on url (instead of having none by default)
### instagram
- file-name is now username, video id, and nubmer of the video if there are multiple
(instead of "Video by username", which refuses to download multiple videos from different id's: "Video by username.mp4 has already been downloaded")
- TODO: explose playlist's `%(uploader_id)s`, `nr_current`, `nr_total` to the naming api

290
y.sh Executable file
View File

@ -0,0 +1,290 @@
#!/bin/bash
## y
# > algorithmically-enhanced youtube-dl/yt-dlp wrapper for audio-downloads
## requires
# - `python3 -m pip install --upgrade yt-dlp`
# the main thing, you should already have it
# - `sudo apt install -y ffmpeg` to convert files of diffrent formats
# - `cargo install sd` a better `sed`
# - `sudo apt install -y lynx` to easily scrap web-pages
DN_TYPE=''
YDL()
{
which yt-dlp
if [ $? -eq 0 ]; then
yt-dlp $DN_TYPE --cookies-from-browser firefox $@
# --verbose
# --restrict-filenames
return
fi
echo 'no youtube-dl/yt-dlp found, you can install it with:
sudo apt install -y \
python3 python3-pip
python3 -m pip install --upgrade \
yt-dlp
'
exit 1
}
# load config
cd `realpath "$0" | xargs dirname`
. config.sh
# use new-line as separator of array elements
IFS=$'\n'
# parse cli options
OPT="$1"
URL="$2"
if [ -z "$URL" ]; then
OPT=''
URL="$1"
fi
SITE=` echo "$URL" \
| sd 'https?://(www\.)?' '' \
| sd '/.*' '' \
`
if [ OPT == 'band' ]; then
SITE='_CNAME.bandcamp.com'
fi
echo "URL : '$URL'"
echo "SITE : '$SITE'"
if [ "$OPT" == 'F' ]; then
YDL "$URL" -F
exit 1
fi
if [ "$OPT" == '22' ]; then
DN_TYPE='-f 22'
fi
to_mp3()
{
echo "to mp3: [ ${@} ]"
for UNCOMPRESSED in "${@}" ; do
MP3=` echo "$UNCOMPRESSED" \
| sd '\.\w+$' ' [conv].mp3' \
`
echo "'$UNCOMPRESSED'"
echo ">> '$MP3'"
ffmpeg -i "$UNCOMPRESSED" \
-codec:a libmp3lame \
-qscale:a 2 \
-loglevel quiet \
-y "$MP3"
rm "$UNCOMPRESSED"
done
}
get_bandcamp_track()
{
echo "> > track: '$1'"
YDL "$1" -f mp3-320 \
-o "$MUSIC_PATH/$ARTIST/%(album)s/%(track_number)02d %(title).100B [%(id)s].%(ext)s"
[ $? -eq 1 ] \
&& YDL "$1" -f mp3 \
-o "$MUSIC_PATH/$ARTIST/%(album)s/%(track_number)02d %(title).100B [%(id)s].%(ext)s"
}
get_bandcamp_album()
{
echo "> > album: '$1'"
SUBLINKS=(` lynx -dump -listonly -nonumbers "$1" \
| grep -Eiw "^(https://$SITE/track)" \
| sd '(\?|#).+$' '' \
| sort -u \
`)
for LINK in "${SUBLINKS[@]}" ; do
get_bandcamp_track "$LINK"
done
}
case "$SITE" in
'music.youtube.com')
echo '> youtube-music'
YDL "$URL" -f 251 \
-o "$MUSIC_PATH/%(artist)s - %(title).100B [%(id)s].%(ext)s"
;;
'youtube.com'| \
'm.youtube.com'| \
'youtu.be')
echo '> youtube'
case "$OPT" in
'm')
echo '> > music'
YDL "$URL" -f 251 \
-o "$MUSIC_PATH/%(title).100B [%(id)s].%(ext)s"
;;
'a')
echo '> > audio'
YDL "$URL" -f 251 \
-o "$AUDIO_PATH/%(title).100B [%(id)s].%(ext)s"
;;
*)
echo '> > _video_'
YDL "$URL" \
-o "$DEFAULT_PATH/yt-%(title).100B [%(id)s].%(ext)s"
;;
esac
;;
'soundcloud.com'| \
*.soundcloud.com )
echo '> soundcloud'
ARTIST=` echo "$URL" \
| sd 'https?://([\w-]+\.)?soundcloud.com/' '' \
| sd '/.*' '' \
`
echo "ARTIST : '$ARTIST'"
YDL "$URL" --add-metadata \
--postprocessor-args "-metadata artist='$ARTIST'" \
-o "$MUSIC_PATH/$ARTIST/%(title).200B.%(ext)s"
WAV_FILES=(` ls -RAd $MUSIC_PATH/$ARTIST/*.wav `)
[ $? -eq 0 ] \
&& to_mp3 "$WAV_FILES"
FLAC_FILES=(` ls -RAd $MUSIC_PATH/$ARTIST/*.flac `)
[ $? -eq 0 ] \
&& to_mp3 "$FLAC_FILES"
;;
'bandcamp.com'| \
*.bandcamp.com )
echo '> bandcamp'
ARTIST=''
if [ OPT == 'band' ]; then
ARTIST=` echo "$URL" \
| sd '^https?://(www\.)?' '' \
| sd '/.*$' '' \
| sd '\..+$' '' \
`
SITE=` echo "$URL" \
| sd '^https?://(www\.)?' '' \
| sd '/.*$' '' \
`
else
ARTIST=` echo "$SITE" \
| sd '\.bandcamp\.com.*' '' \
`
if [ -z "$ARTIST" ]; then
ARTIST="$SITE"
fi
fi
echo "ARTIST : '$ARTIST'"
if [[ "$URL" =~ '/track/' ]]; then
get_bandcamp_track "$URL"
elif [[ "$URL" =~ '/album/' ]]; then
get_bandcamp_album "$URL"
else
echo '> > discography'
SUBLINKS=(` lynx -dump -listonly -nonumbers "$URL" \
| grep -Eiw "^(https://$SITE/(album|track))" \
| sd '\?action=download' '' \
| uniq \
`)
for LINK in "${SUBLINKS[@]}" ; do
if [[ "$LINK" =~ '/track/' ]]; then
get_bandcamp_track "$LINK"
elif [[ "$LINK" =~ '/album/' ]]; then
get_bandcamp_album "$LINK"
fi
done
fi
RENAME_LIST=(` ls -RAd $MUSIC_PATH/$ARTIST/NA/* `)
if [ $? -eq 0 ]; then
for RENAME_FROM in "${RENAME_LIST[@]}"; do
RENAME_TO=` echo "$RENAME_FROM" \
| sd '/NA/NA ' '/' \
`
if [ "$RENAME_FROM" != "$RENAME_TO" ]; then
echo "'$RENAME_FROM'"
echo ">> '$RENAME_TO'"
mv "$RENAME_FROM" "$RENAME_TO"
fi
done
rmdir "$MUSIC_PATH/$ARTIST/NA/"
fi
;;
'twitter.com'| \
*.twitter.com )
echo '> twitter'
CLEAN_URL=` echo "$URL" \
| sd '^https?://([\w-]+\.)?twitter.com/' '' \
| sd '/?\?.*$' '' \
| sd -- '/status/' '--' \
| sd '/' '-' \
`
echo "CLEAN_URL : '$CLEAN_URL'"
YDL "$URL" -o "$DEFAULT_PATH/tw--$CLEAN_URL.%(ext)s"
;;
'reddit.com'| \
*.reddit.com )
echo '> reddit'
CLEAN_URL=` echo "$URL" \
| sd '^https?://([\w-]+\.)?reddit.com/(r/)?' '' \
| sd '/?\?.*$' '' \
| sd -- '/?comments/' '--' \
| sd '/' '-' \
`
echo "CLEAN_URL : '$CLEAN_URL'"
YDL "$URL" -o "$DEFAULT_PATH/r-$CLEAN_URL.%(ext)s"
;;
'instagram.com'| \
*.instagram.com )
echo '> instagram'
CLEAN_URL=` echo "$URL" \
| sd '^https?://([\w-]+\.)?instagram.com/p/' '' \
| sd '/?\?.*$' '' \
| sd '/+$' '' \
| sd '/' '-' \
`
echo "CLEAN_URL : '$CLEAN_URL'"
DATE=` date -u "+%Y%m%d%H%M%S" `
YDL "$URL" -o "$DEFAULT_PATH/ig-%(uploader_id).50B-$CLEAN_URL-(%(title).50B)_$DATE.%(ext)s"
;;
*)
echo '> _default_'
CLEAN_URL=` echo "$URL" \
| sd '^https?://(www\.)?' '' \
| sd '/+$' '' \
| sd '[\\\/\s\?\^\*\|:><]+' '-' \
`
CLEAN_URL=${CLEAN_URL%%/*}
echo "CLEAN_URL : '$CLEAN_URL'"
DATE=` date -u "+%Y%m%d%H%M%S" `
YDL "$URL" -o "$DEFAULT_PATH/$CLEAN_URL-[%(id)s]_$DATE.%(ext)s"
;;
esac