re-init

2023-06-23 20:07:30 +02:00 · 2023-06-23 20:07:30 +02:00 · b106069d2e
parent d413c9ad42
commit b106069d2e
5 changed files with 368 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +0,0 @@
-# y
-
-algorithmically-enhanced youtube-dl wrapper for audio-downloads
--- a/install.sh
+++ b/install.sh
@ -0,0 +1,20 @@
+#!/bin/bash
+
+# y install script
+
+## use
+# bash ./install.sh
+
+PREFIX=~/.local
+mkdir -p $PREFIX/src
+mkdir -p $PREFIX/bin
+
+
+git clone --depth 1 \
+  'https://source.garden/scripts/y.git' \
+  $PREFIX/src/y/
+
+chmod +x $PREFIX/src/y/y.sh
+
+ln -s $PREFIX/src/y/y.sh \
+      $PREFIX/bin/y
--- a/meta.kdl
+++ b/meta.kdl
@ -0,0 +1,7 @@
+title        "y"
+description  "algorithmically-enhanced youtube-dl wrapper for audio-downloads"
+type         "code"
+tags         "bash"  "script"  "file-download"  "automation"
+license      "AGPL"
+homepage     "https://dym.sh/y/"
+source       "https://source.garden/scripts/y/"
--- a/readme.md
+++ b/readme.md
@ -0,0 +1,51 @@
+# y
+
+> algorithmically-enhanced [youtube-dl](https://yt-dl.org) wrapper for audio-downloads
+
+
+## install
+1. look at [`install.sh`](./install.sh) file for instructions
+1. adjust `PREFIX` at wish
+1. make sure the final directory is on `$PATH`
+
+## use
+`y https://orax.bandcamp.com/track/ectoplasmic`
+
+(adjust [config.sh](./config.sh) file to set your prefered download paths)
+
+
+## requirments
+- [`youtube_dl`](https://github.com/ytdl-org/youtube-dl) – the main thing
+- [`ffmpeg`](https://ffmpeg.org/download.html) – to convert files of diffrent formats
+- [`sd`](https://github.com/chmln/sd) – a better `sed`
+- [`lynx`](https://github.com/lynx/lynx) – to easily scrap web-pages
+
+
+## site-specific improvements
+
+### bandcamp
+1. prefers mp3-320 (checks every track)
+2. downloads entire discography if given root-domain or `/music` url
+
+### twitter
+file-name is now twitter-username and tweet-id
+(instead of full text of a tweet, which can result in strings over 255 characters long, aborting the entire process)
+
+### reddit
+file-name is now subreddit, post-id, and url-slug
+(instead of full text of a title, which can result in strings over 255 characters long, aborting the entire process)
+
+### youtube
+allows to set several predetermined filesystem-paths to keep file-categories separately:
+
+- `y m <youtube_URL>` – music (automatically chosen for `music.youtube.com`)
+- `y a <youtube_URL>` – audiobooks
+
+### soundcloud
+1. occasional wav-files are converted to mp3 320kb/s
+1. artist is set based on url (instead of having none by default)
+
+### instagram
+- file-name is now username, video id, and nubmer of the video if there are multiple
+(instead of "Video by username", which refuses to download multiple videos from different id's: "Video by username.mp4 has already been downloaded")
+- TODO: explose playlist's `%(uploader_id)s`, `nr_current`, `nr_total` to the naming api
--- a/y.sh
+++ b/y.sh
@ -0,0 +1,290 @@
+#!/bin/bash
+
+## y
+# > algorithmically-enhanced youtube-dl/yt-dlp wrapper for audio-downloads
+
+## requires
+# - `python3 -m pip install --upgrade yt-dlp`
+#   – the main thing, you should already have it
+# - `sudo apt install -y ffmpeg` – to convert files of diffrent formats
+# - `cargo install sd` – a better `sed`
+# - `sudo apt install -y lynx` – to easily scrap web-pages
+
+DN_TYPE=''
+
+YDL()
+{
+
+  which yt-dlp
+  if [ $? -eq 0 ]; then
+    yt-dlp  $DN_TYPE  --cookies-from-browser firefox  $@
+    # --verbose
+    # --restrict-filenames
+    return
+  fi
+
+  echo 'no youtube-dl/yt-dlp found, you can install it with:
+
+        sudo apt install -y \
+          python3 python3-pip
+
+        python3 -m pip install --upgrade \
+          yt-dlp
+       '
+  exit 1
+}
+
+# load config
+cd `realpath "$0" | xargs dirname`
+. config.sh
+
+# use new-line as separator of array elements
+IFS=$'\n'
+
+# parse cli options
+OPT="$1"
+URL="$2"
+if [ -z "$URL" ]; then
+  OPT=''
+  URL="$1"
+fi
+
+
+SITE=` echo "$URL" \
+     | sd 'https?://(www\.)?' '' \
+     | sd '/.*' '' \
+     `
+if [ OPT == 'band' ]; then
+  SITE='_CNAME.bandcamp.com'
+fi
+
+echo "URL : '$URL'"
+echo "SITE : '$SITE'"
+
+if [ "$OPT" == 'F' ]; then
+  YDL "$URL" -F
+  exit 1
+fi
+
+if [ "$OPT" == '22' ]; then
+  DN_TYPE='-f 22'
+fi
+
+to_mp3()
+{
+  echo "to mp3: [ ${@} ]"
+
+  for UNCOMPRESSED in "${@}" ; do
+    MP3=` echo "$UNCOMPRESSED" \
+        | sd '\.\w+$' ' [conv].mp3' \
+        `
+    echo "'$UNCOMPRESSED'"
+    echo ">> '$MP3'"
+
+    ffmpeg -i "$UNCOMPRESSED" \
+           -codec:a libmp3lame \
+           -qscale:a 2 \
+           -loglevel quiet \
+           -y "$MP3"
+
+    rm "$UNCOMPRESSED"
+  done
+}
+
+get_bandcamp_track()
+{
+  echo "> > track: '$1'"
+  YDL "$1" -f mp3-320 \
+    -o "$MUSIC_PATH/$ARTIST/%(album)s/%(track_number)02d %(title).100B [%(id)s].%(ext)s"
+  [ $? -eq 1 ] \
+    && YDL "$1" -f mp3 \
+       -o "$MUSIC_PATH/$ARTIST/%(album)s/%(track_number)02d %(title).100B [%(id)s].%(ext)s"
+}
+
+get_bandcamp_album()
+{
+  echo "> > album: '$1'"
+  SUBLINKS=(` lynx -dump -listonly -nonumbers "$1" \
+            | grep -Eiw "^(https://$SITE/track)" \
+            | sd '(\?|#).+$' '' \
+            | sort -u \
+            `)
+  for LINK in "${SUBLINKS[@]}" ; do
+    get_bandcamp_track "$LINK"
+  done
+}
+
+
+
+case "$SITE" in
+
+'music.youtube.com')
+  echo '> youtube-music'
+  YDL "$URL" -f 251 \
+    -o "$MUSIC_PATH/%(artist)s - %(title).100B [%(id)s].%(ext)s"
+  ;;
+
+'youtube.com'| \
+'m.youtube.com'| \
+'youtu.be')
+  echo '> youtube'
+  case "$OPT" in
+  'm')
+    echo '> > music'
+    YDL "$URL" -f 251 \
+      -o "$MUSIC_PATH/%(title).100B [%(id)s].%(ext)s"
+    ;;
+  'a')
+    echo '> > audio'
+    YDL "$URL" -f 251 \
+      -o "$AUDIO_PATH/%(title).100B [%(id)s].%(ext)s"
+    ;;
+  *)
+    echo '> > _video_'
+    YDL "$URL" \
+      -o "$DEFAULT_PATH/yt-%(title).100B [%(id)s].%(ext)s"
+    ;;
+  esac
+  ;;
+
+
+'soundcloud.com'| \
+*.soundcloud.com )
+  echo '> soundcloud'
+  ARTIST=` echo "$URL" \
+         | sd 'https?://([\w-]+\.)?soundcloud.com/' '' \
+         | sd '/.*' '' \
+         `
+  echo "ARTIST : '$ARTIST'"
+  YDL "$URL" --add-metadata \
+    --postprocessor-args "-metadata artist='$ARTIST'" \
+    -o "$MUSIC_PATH/$ARTIST/%(title).200B.%(ext)s"
+
+  WAV_FILES=(` ls -RAd $MUSIC_PATH/$ARTIST/*.wav `)
+  [ $? -eq 0 ] \
+    && to_mp3 "$WAV_FILES"
+
+  FLAC_FILES=(` ls -RAd $MUSIC_PATH/$ARTIST/*.flac `)
+  [ $? -eq 0 ] \
+    && to_mp3 "$FLAC_FILES"
+  ;;
+
+'bandcamp.com'| \
+*.bandcamp.com )
+  echo '> bandcamp'
+  ARTIST=''
+  if [ OPT == 'band' ]; then
+    ARTIST=` echo "$URL" \
+           | sd '^https?://(www\.)?' '' \
+           | sd '/.*$' '' \
+           | sd '\..+$' '' \
+           `
+     SITE=` echo "$URL" \
+          | sd '^https?://(www\.)?' '' \
+          | sd '/.*$' '' \
+          `
+  else
+    ARTIST=` echo "$SITE" \
+           | sd '\.bandcamp\.com.*' '' \
+           `
+    if [ -z "$ARTIST" ]; then
+      ARTIST="$SITE"
+    fi
+  fi
+
+  echo "ARTIST : '$ARTIST'"
+
+  if [[ "$URL" =~ '/track/' ]]; then
+    get_bandcamp_track "$URL"
+  elif [[ "$URL" =~ '/album/' ]]; then
+    get_bandcamp_album "$URL"
+  else
+    echo '> > discography'
+    SUBLINKS=(` lynx -dump -listonly -nonumbers "$URL" \
+              | grep -Eiw "^(https://$SITE/(album|track))" \
+              | sd '\?action=download' '' \
+              | uniq \
+              `)
+    for LINK in "${SUBLINKS[@]}" ; do
+      if [[ "$LINK" =~ '/track/' ]]; then
+        get_bandcamp_track "$LINK"
+      elif [[ "$LINK" =~ '/album/' ]]; then
+        get_bandcamp_album "$LINK"
+      fi
+    done
+  fi
+
+  RENAME_LIST=(` ls -RAd $MUSIC_PATH/$ARTIST/NA/* `)
+  if [ $? -eq 0 ]; then
+    for RENAME_FROM in "${RENAME_LIST[@]}"; do
+      RENAME_TO=` echo "$RENAME_FROM" \
+                | sd '/NA/NA ' '/' \
+                `
+      if [ "$RENAME_FROM" != "$RENAME_TO" ]; then
+        echo "'$RENAME_FROM'"
+        echo ">> '$RENAME_TO'"
+        mv "$RENAME_FROM" "$RENAME_TO"
+      fi
+    done
+    rmdir "$MUSIC_PATH/$ARTIST/NA/"
+  fi
+
+  ;;
+
+'twitter.com'| \
+*.twitter.com )
+  echo '> twitter'
+  CLEAN_URL=` echo "$URL" \
+            | sd '^https?://([\w-]+\.)?twitter.com/' '' \
+            | sd '/?\?.*$' '' \
+            | sd -- '/status/' '--' \
+            | sd  '/' '-' \
+            `
+  echo "CLEAN_URL : '$CLEAN_URL'"
+  YDL "$URL" -o "$DEFAULT_PATH/tw--$CLEAN_URL.%(ext)s"
+  ;;
+
+
+'reddit.com'| \
+*.reddit.com )
+  echo '> reddit'
+  CLEAN_URL=` echo "$URL" \
+            | sd '^https?://([\w-]+\.)?reddit.com/(r/)?' '' \
+            | sd '/?\?.*$' '' \
+            | sd -- '/?comments/' '--' \
+            | sd '/' '-' \
+            `
+  echo "CLEAN_URL : '$CLEAN_URL'"
+  YDL "$URL" -o "$DEFAULT_PATH/r-$CLEAN_URL.%(ext)s"
+  ;;
+
+
+'instagram.com'| \
+*.instagram.com )
+  echo '> instagram'
+  CLEAN_URL=` echo "$URL" \
+            | sd '^https?://([\w-]+\.)?instagram.com/p/' '' \
+            | sd '/?\?.*$' '' \
+            | sd '/+$' '' \
+            | sd  '/' '-' \
+            `
+  echo "CLEAN_URL : '$CLEAN_URL'"
+  DATE=` date -u "+%Y%m%d%H%M%S" `
+  YDL "$URL" -o "$DEFAULT_PATH/ig-%(uploader_id).50B-$CLEAN_URL-(%(title).50B)_$DATE.%(ext)s"
+  ;;
+
+
+*)
+  echo '> _default_'
+  CLEAN_URL=` echo "$URL" \
+            | sd '^https?://(www\.)?' '' \
+            | sd '/+$' '' \
+            | sd '[\\\/\s\?\^\*\|:><]+' '-' \
+            `
+  CLEAN_URL=${CLEAN_URL%%/*}
+  echo "CLEAN_URL : '$CLEAN_URL'"
+  DATE=` date -u "+%Y%m%d%H%M%S" `
+  YDL "$URL" -o "$DEFAULT_PATH/$CLEAN_URL-[%(id)s]_$DATE.%(ext)s"
+  ;;
+
+esac