From dfc0f6193981af526ec59f6c5deceb3c4ac0cc22 Mon Sep 17 00:00:00 2001 From: Jason Igari-Szabo Date: Sat, 25 Apr 2026 09:39:46 +0200 Subject: [PATCH] add linux script to dl tube videos --- school/tube-video-grabber.sh | 243 +++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100755 school/tube-video-grabber.sh diff --git a/school/tube-video-grabber.sh b/school/tube-video-grabber.sh new file mode 100755 index 0000000..ff93389 --- /dev/null +++ b/school/tube-video-grabber.sh @@ -0,0 +1,243 @@ +#!/bin/sh + +OUTPUT_DIR="." +CASE="lower" +REPL_CHAR="_" +PREFIX="" +SUFFIX=".mp4" +MANUAL_TITLE="" +VERBOSE=0 + +usage() { + cat << EOF +Usage: $(basename "$0") [OPTIONS] +Options: + -l URL Single video URL + -f FILE File containing URLs (one per line, # for comments) + If .csv: format is "url,title" where title must be quoted + If no ext/.txt: one URL per line + -o DIR Output directory (default: current directory) + -c CASE Case handling: none, lower, upper, title (default: lower) +-r CHAR Replacement character for invalid chars (default: _) + -T TITLE Directly provide title (overrides fetching from URL) + -v Verbose output (warn on unquoted URLs) + -h Show this help +EOF + exit 1 +} + +parse_title() { + url="$1" + yt-dlp --skip-download --print title "$url" 2>/dev/null +} + +get_fallback_title() { + url="$1" + video_id=$(echo "$url" | grep -oE '/watch/[^/?]+' | cut -d'/' -f3) + if [ -z "$video_id" ]; then + video_id=$(echo "$url" | sed 's/.*\///') + fi + echo "tube_${video_id}" +} + +format_filename() { + title="$1" + result="$title" + + case "$CASE" in + lower) result=$(echo "$result" | tr '[:upper:]' '[:lower:]') ;; + upper) result=$(echo "$result" | tr '[:lower:]' '[:upper:]') ;; + title) result=$(echo "$result" | awk '{for(i=1;i<=NF;i++) $i=toupper(substr($i,1,1)) tolower(substr($i,2));}1') ;; + esac + + result=$(echo "$result" | sed "s/[^a-zA-Z0-9._-]/$REPL_CHAR/g") + result=$(echo "$result" | sed "s/${REPL_CHAR}\{2,\}/${REPL_CHAR}/g") + echo "$result" +} + +is_csv() { + file="$1" + case "$file" in + *.csv|*.CSV) return 0 ;; + *) return 1 ;; + esac +} + +validate_csv() { + file="$1" + lineno=0 + errors=0 + skipped_header=0 + + while IFS= read -r line || [ -n "$line" ]; do + lineno=$((lineno + 1)) + line=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + [ -z "$line" ] && continue + + case "$line" in + \#*) continue ;; + esac + + first_field=$(echo "$line" | cut -d',' -f1) + + if [ $skipped_header -eq 0 ] && [ $lineno -eq 1 ]; then + if ! echo "$first_field" | grep -qi '^http'; then + skipped_header=1 + continue + fi + fi + skipped_header=1 + + commas=$(echo "$line" | tr -cd ',' | wc -c) + if [ "$commas" -eq 0 ]; then + echo "Line $lineno: Missing comma separator (expected url,title)" >&2 + errors=$((errors + 1)) + continue + fi + + first_char=$(echo "$line" | cut -c1) + if [ "$first_char" = "\"" ]; then + url=$(echo "$line" | sed 's/^"\([^"]*\)".*/\1/') + rest=$(echo "$line" | sed 's/^"[^"]*",//') + else + url=$(echo "$line" | cut -d',' -f1) + rest=$(echo "$line" | cut -d',' -f2-) + + if echo "$url" | grep -q ','; then + echo "Line $lineno: URL contains comma - quote the URL if it contains commas." >&2 + errors=$((errors + 1)) + continue + fi + + if [ -z "$url" ]; then + echo "Line $lineno: Empty URL field" >&2 + errors=$((errors + 1)) + continue + fi + + if [ "$VERBOSE" -eq 1 ]; then + echo "Line $lineno: Warning - URL not quoted" >&2 + fi + fi + + if [ -z "$rest" ]; then + echo "Line $lineno: Missing title (must be quoted)" >&2 + errors=$((errors + 1)) + continue + fi + + title_first=$(echo "$rest" | cut -c1) + if [ "$title_first" != "\"" ]; then + echo "Line $lineno: Title must be surrounded by quotes" >&2 + errors=$((errors + 1)) + continue + fi + done < "$file" + + if [ "$errors" -gt 0 ]; then + echo "Error: CSV validation failed with $errors error(s). Please fix and retry." >&2 + return 1 + fi + return 0 +} + +load_links() { + file="$1" + + if is_csv "$file"; then + if ! validate_csv "$file"; then + exit 1 + fi + skipped_header=0 + while IFS= read -r line || [ -n "$line" ]; do + line=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + [ -z "$line" ] && continue + case "$line" in \#*) continue ;; esac + + first_field=$(echo "$line" | cut -d',' -f1) + + if [ $skipped_header -eq 0 ]; then + if ! echo "$first_field" | grep -qi '^http'; then + skipped_header=1 + continue + fi + fi + skipped_header=1 + + url=$(echo "$line" | cut -d',' -f1) + title=$(echo "$line" | cut -d',' -f2-) + + title=$(echo "$title" | sed 's/^"\(.*\)"$/\1/;s/[[:space:]]*$//') + echo "${url}|${title}" + done < "$file" + else + while IFS= read -r line || [ -n "$line" ]; do + line=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + [ -z "$line" ] && continue + case "$line" in \#*) continue ;; esac + echo "${line}|" + done < "$file" + fi +} + +while getopts "l:f:o:c:r:p:t:T:vh" opt; do + case "$opt" in + l) LINK="$OPTARG" ;; + f) LINKFILE="$OPTARG" ;; + o) OUTPUT_DIR="$OPTARG" ;; + c) CASE="$OPTARG" ;; + r) REPL_CHAR="$OPTARG" ;; + p) PREFIX="$OPTARG" ;; + t) SUFFIX="$OPTARG" ;; + T) MANUAL_TITLE="$OPTARG" ;; + v) VERBOSE=1 ;; + h) usage ;; + *) usage ;; + esac +done +shift $((OPTIND - 1)) + +if [ -n "$LINKFILE" ]; then + if [ ! -f "$LINKFILE" ]; then + echo "Error: Link file not found: $LINKFILE" >&2 + exit 1 + fi + ENTRIES=$(load_links "$LINKFILE") +elif [ -n "$LINK" ]; then + ENTRIES="${LINK}|" +else + usage +fi + +echo "$ENTRIES" | grep -v '^$' | while read -r entry; do + url=$(echo "$entry" | cut -d'|' -f1) + manual_title=$(echo "$entry" | cut -d'|' -f2) + + [ -z "$url" ] && continue + + echo "Processing: $url" + + if [ -n "$manual_title" ]; then + title="$manual_title" + elif [ -n "$MANUAL_TITLE" ]; then + title="$MANUAL_TITLE" + else + title=$(parse_title "$url") + if [ -z "$title" ]; then + echo "Warning: Could not parse title, using fallback" >&2 + title=$(get_fallback_title "$url") + fi + fi + + formatted=$(format_filename "$title") + filename="${PREFIX}${formatted}${SUFFIX}" + + OUTPUT_PATH="$OUTPUT_DIR/$filename" + OUTPUT_PATH=$(realpath "$OUTPUT_PATH") + + echo "Output: $OUTPUT_PATH" + + yt-dlp -f "bestvideo+bestaudio/best" -o "$OUTPUT_PATH" "$url" + + echo "Download complete: $filename" + done \ No newline at end of file