diff --git a/README.md b/README.md deleted file mode 100644 index 7119546..0000000 --- a/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# y - -algorithmically-enhanced youtube-dl wrapper for audio-downloads \ No newline at end of file diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..fe112dc --- /dev/null +++ b/install.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# y install script + +## use +# bash ./install.sh + +PREFIX=~/.local +mkdir -p $PREFIX/src +mkdir -p $PREFIX/bin + + +git clone --depth 1 \ + 'https://source.garden/scripts/y.git' \ + $PREFIX/src/y/ + +chmod +x $PREFIX/src/y/y.sh + +ln -s $PREFIX/src/y/y.sh \ + $PREFIX/bin/y diff --git a/meta.kdl b/meta.kdl new file mode 100644 index 0000000..af3ebd0 --- /dev/null +++ b/meta.kdl @@ -0,0 +1,7 @@ +title "y" +description "algorithmically-enhanced youtube-dl wrapper for audio-downloads" +type "code" +tags "bash" "script" "file-download" "automation" +license "AGPL" +homepage "https://dym.sh/y/" +source "https://source.garden/scripts/y/" diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..73f43a0 --- /dev/null +++ b/readme.md @@ -0,0 +1,51 @@ +# y + +> algorithmically-enhanced [youtube-dl](https://yt-dl.org) wrapper for audio-downloads + + +## install +1. look at [`install.sh`](./install.sh) file for instructions +1. adjust `PREFIX` at wish +1. make sure the final directory is on `$PATH` + +## use +`y https://orax.bandcamp.com/track/ectoplasmic` + +(adjust [config.sh](./config.sh) file to set your prefered download paths) + + +## requirments +- [`youtube_dl`](https://github.com/ytdl-org/youtube-dl) – the main thing +- [`ffmpeg`](https://ffmpeg.org/download.html) – to convert files of diffrent formats +- [`sd`](https://github.com/chmln/sd) – a better `sed` +- [`lynx`](https://github.com/lynx/lynx) – to easily scrap web-pages + + +## site-specific improvements + +### bandcamp +1. prefers mp3-320 (checks every track) +2. downloads entire discography if given root-domain or `/music` url + +### twitter +file-name is now twitter-username and tweet-id +(instead of full text of a tweet, which can result in strings over 255 characters long, aborting the entire process) + +### reddit +file-name is now subreddit, post-id, and url-slug +(instead of full text of a title, which can result in strings over 255 characters long, aborting the entire process) + +### youtube +allows to set several predetermined filesystem-paths to keep file-categories separately: + +- `y m ` – music (automatically chosen for `music.youtube.com`) +- `y a ` – audiobooks + +### soundcloud +1. occasional wav-files are converted to mp3 320kb/s +1. artist is set based on url (instead of having none by default) + +### instagram +- file-name is now username, video id, and nubmer of the video if there are multiple +(instead of "Video by username", which refuses to download multiple videos from different id's: "Video by username.mp4 has already been downloaded") +- TODO: explose playlist's `%(uploader_id)s`, `nr_current`, `nr_total` to the naming api diff --git a/y.sh b/y.sh new file mode 100755 index 0000000..39c15c7 --- /dev/null +++ b/y.sh @@ -0,0 +1,290 @@ +#!/bin/bash + +## y +# > algorithmically-enhanced youtube-dl/yt-dlp wrapper for audio-downloads + +## requires +# - `python3 -m pip install --upgrade yt-dlp` +# – the main thing, you should already have it +# - `sudo apt install -y ffmpeg` – to convert files of diffrent formats +# - `cargo install sd` – a better `sed` +# - `sudo apt install -y lynx` – to easily scrap web-pages + +DN_TYPE='' + +YDL() +{ + + which yt-dlp + if [ $? -eq 0 ]; then + yt-dlp $DN_TYPE --cookies-from-browser firefox $@ + # --verbose + # --restrict-filenames + return + fi + + echo 'no youtube-dl/yt-dlp found, you can install it with: + + sudo apt install -y \ + python3 python3-pip + + python3 -m pip install --upgrade \ + yt-dlp + ' + exit 1 +} + +# load config +cd `realpath "$0" | xargs dirname` +. config.sh + +# use new-line as separator of array elements +IFS=$'\n' + +# parse cli options +OPT="$1" +URL="$2" +if [ -z "$URL" ]; then + OPT='' + URL="$1" +fi + + +SITE=` echo "$URL" \ + | sd 'https?://(www\.)?' '' \ + | sd '/.*' '' \ + ` +if [ OPT == 'band' ]; then + SITE='_CNAME.bandcamp.com' +fi + +echo "URL : '$URL'" +echo "SITE : '$SITE'" + +if [ "$OPT" == 'F' ]; then + YDL "$URL" -F + exit 1 +fi + +if [ "$OPT" == '22' ]; then + DN_TYPE='-f 22' +fi + +to_mp3() +{ + echo "to mp3: [ ${@} ]" + + for UNCOMPRESSED in "${@}" ; do + MP3=` echo "$UNCOMPRESSED" \ + | sd '\.\w+$' ' [conv].mp3' \ + ` + echo "'$UNCOMPRESSED'" + echo ">> '$MP3'" + + ffmpeg -i "$UNCOMPRESSED" \ + -codec:a libmp3lame \ + -qscale:a 2 \ + -loglevel quiet \ + -y "$MP3" + + rm "$UNCOMPRESSED" + done +} + +get_bandcamp_track() +{ + echo "> > track: '$1'" + YDL "$1" -f mp3-320 \ + -o "$MUSIC_PATH/$ARTIST/%(album)s/%(track_number)02d %(title).100B [%(id)s].%(ext)s" + [ $? -eq 1 ] \ + && YDL "$1" -f mp3 \ + -o "$MUSIC_PATH/$ARTIST/%(album)s/%(track_number)02d %(title).100B [%(id)s].%(ext)s" +} + +get_bandcamp_album() +{ + echo "> > album: '$1'" + SUBLINKS=(` lynx -dump -listonly -nonumbers "$1" \ + | grep -Eiw "^(https://$SITE/track)" \ + | sd '(\?|#).+$' '' \ + | sort -u \ + `) + for LINK in "${SUBLINKS[@]}" ; do + get_bandcamp_track "$LINK" + done +} + + + +case "$SITE" in + +'music.youtube.com') + echo '> youtube-music' + YDL "$URL" -f 251 \ + -o "$MUSIC_PATH/%(artist)s - %(title).100B [%(id)s].%(ext)s" + ;; + +'youtube.com'| \ +'m.youtube.com'| \ +'youtu.be') + echo '> youtube' + case "$OPT" in + 'm') + echo '> > music' + YDL "$URL" -f 251 \ + -o "$MUSIC_PATH/%(title).100B [%(id)s].%(ext)s" + ;; + 'a') + echo '> > audio' + YDL "$URL" -f 251 \ + -o "$AUDIO_PATH/%(title).100B [%(id)s].%(ext)s" + ;; + *) + echo '> > _video_' + YDL "$URL" \ + -o "$DEFAULT_PATH/yt-%(title).100B [%(id)s].%(ext)s" + ;; + esac + ;; + + +'soundcloud.com'| \ +*.soundcloud.com ) + echo '> soundcloud' + ARTIST=` echo "$URL" \ + | sd 'https?://([\w-]+\.)?soundcloud.com/' '' \ + | sd '/.*' '' \ + ` + echo "ARTIST : '$ARTIST'" + YDL "$URL" --add-metadata \ + --postprocessor-args "-metadata artist='$ARTIST'" \ + -o "$MUSIC_PATH/$ARTIST/%(title).200B.%(ext)s" + + WAV_FILES=(` ls -RAd $MUSIC_PATH/$ARTIST/*.wav `) + [ $? -eq 0 ] \ + && to_mp3 "$WAV_FILES" + + FLAC_FILES=(` ls -RAd $MUSIC_PATH/$ARTIST/*.flac `) + [ $? -eq 0 ] \ + && to_mp3 "$FLAC_FILES" + ;; + +'bandcamp.com'| \ +*.bandcamp.com ) + echo '> bandcamp' + ARTIST='' + if [ OPT == 'band' ]; then + ARTIST=` echo "$URL" \ + | sd '^https?://(www\.)?' '' \ + | sd '/.*$' '' \ + | sd '\..+$' '' \ + ` + SITE=` echo "$URL" \ + | sd '^https?://(www\.)?' '' \ + | sd '/.*$' '' \ + ` + else + ARTIST=` echo "$SITE" \ + | sd '\.bandcamp\.com.*' '' \ + ` + if [ -z "$ARTIST" ]; then + ARTIST="$SITE" + fi + fi + + echo "ARTIST : '$ARTIST'" + + if [[ "$URL" =~ '/track/' ]]; then + get_bandcamp_track "$URL" + elif [[ "$URL" =~ '/album/' ]]; then + get_bandcamp_album "$URL" + else + echo '> > discography' + SUBLINKS=(` lynx -dump -listonly -nonumbers "$URL" \ + | grep -Eiw "^(https://$SITE/(album|track))" \ + | sd '\?action=download' '' \ + | uniq \ + `) + for LINK in "${SUBLINKS[@]}" ; do + if [[ "$LINK" =~ '/track/' ]]; then + get_bandcamp_track "$LINK" + elif [[ "$LINK" =~ '/album/' ]]; then + get_bandcamp_album "$LINK" + fi + done + fi + + RENAME_LIST=(` ls -RAd $MUSIC_PATH/$ARTIST/NA/* `) + if [ $? -eq 0 ]; then + for RENAME_FROM in "${RENAME_LIST[@]}"; do + RENAME_TO=` echo "$RENAME_FROM" \ + | sd '/NA/NA ' '/' \ + ` + if [ "$RENAME_FROM" != "$RENAME_TO" ]; then + echo "'$RENAME_FROM'" + echo ">> '$RENAME_TO'" + mv "$RENAME_FROM" "$RENAME_TO" + fi + done + rmdir "$MUSIC_PATH/$ARTIST/NA/" + fi + + ;; + +'twitter.com'| \ +*.twitter.com ) + echo '> twitter' + CLEAN_URL=` echo "$URL" \ + | sd '^https?://([\w-]+\.)?twitter.com/' '' \ + | sd '/?\?.*$' '' \ + | sd -- '/status/' '--' \ + | sd '/' '-' \ + ` + echo "CLEAN_URL : '$CLEAN_URL'" + YDL "$URL" -o "$DEFAULT_PATH/tw--$CLEAN_URL.%(ext)s" + ;; + + +'reddit.com'| \ +*.reddit.com ) + echo '> reddit' + CLEAN_URL=` echo "$URL" \ + | sd '^https?://([\w-]+\.)?reddit.com/(r/)?' '' \ + | sd '/?\?.*$' '' \ + | sd -- '/?comments/' '--' \ + | sd '/' '-' \ + ` + echo "CLEAN_URL : '$CLEAN_URL'" + YDL "$URL" -o "$DEFAULT_PATH/r-$CLEAN_URL.%(ext)s" + ;; + + +'instagram.com'| \ +*.instagram.com ) + echo '> instagram' + CLEAN_URL=` echo "$URL" \ + | sd '^https?://([\w-]+\.)?instagram.com/p/' '' \ + | sd '/?\?.*$' '' \ + | sd '/+$' '' \ + | sd '/' '-' \ + ` + echo "CLEAN_URL : '$CLEAN_URL'" + DATE=` date -u "+%Y%m%d%H%M%S" ` + YDL "$URL" -o "$DEFAULT_PATH/ig-%(uploader_id).50B-$CLEAN_URL-(%(title).50B)_$DATE.%(ext)s" + ;; + + +*) + echo '> _default_' + CLEAN_URL=` echo "$URL" \ + | sd '^https?://(www\.)?' '' \ + | sd '/+$' '' \ + | sd '[\\\/\s\?\^\*\|:><]+' '-' \ + ` + CLEAN_URL=${CLEAN_URL%%/*} + echo "CLEAN_URL : '$CLEAN_URL'" + DATE=` date -u "+%Y%m%d%H%M%S" ` + YDL "$URL" -o "$DEFAULT_PATH/$CLEAN_URL-[%(id)s]_$DATE.%(ext)s" + ;; + +esac