Files
scripts-and-configs/school/tube-video-grabber.sh

243 lines
6.6 KiB
Bash
Executable File

#!/bin/sh
OUTPUT_DIR="."
CASE="lower"
REPL_CHAR="_"
PREFIX=""
SUFFIX=".mp4"
MANUAL_TITLE=""
VERBOSE=0
usage() {
cat << EOF
Usage: $(basename "$0") [OPTIONS]
Options:
-l URL Single video URL
-f FILE File containing URLs (one per line, # for comments)
If .csv: format is "url,title" where title must be quoted
If no ext/.txt: one URL per line
-o DIR Output directory (default: current directory)
-c CASE Case handling: none, lower, upper, title (default: lower)
-r CHAR Replacement character for invalid chars (default: _)
-T TITLE Directly provide title (overrides fetching from URL)
-v Verbose output (warn on unquoted URLs)
-h Show this help
EOF
exit 1
}
parse_title() {
url="$1"
yt-dlp --skip-download --print title "$url" 2>/dev/null
}
get_fallback_title() {
url="$1"
video_id=$(echo "$url" | grep -oE '/watch/[^/?]+' | cut -d'/' -f3)
if [ -z "$video_id" ]; then
video_id=$(echo "$url" | sed 's/.*\///')
fi
echo "tube_${video_id}"
}
format_filename() {
title="$1"
result="$title"
case "$CASE" in
lower) result=$(echo "$result" | tr '[:upper:]' '[:lower:]') ;;
upper) result=$(echo "$result" | tr '[:lower:]' '[:upper:]') ;;
title) result=$(echo "$result" | awk '{for(i=1;i<=NF;i++) $i=toupper(substr($i,1,1)) tolower(substr($i,2));}1') ;;
esac
result=$(echo "$result" | sed "s/[^a-zA-Z0-9._-]/$REPL_CHAR/g")
result=$(echo "$result" | sed "s/${REPL_CHAR}\{2,\}/${REPL_CHAR}/g")
echo "$result"
}
is_csv() {
file="$1"
case "$file" in
*.csv|*.CSV) return 0 ;;
*) return 1 ;;
esac
}
validate_csv() {
file="$1"
lineno=0
errors=0
skipped_header=0
while IFS= read -r line || [ -n "$line" ]; do
lineno=$((lineno + 1))
line=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
[ -z "$line" ] && continue
case "$line" in
\#*) continue ;;
esac
first_field=$(echo "$line" | cut -d',' -f1)
if [ $skipped_header -eq 0 ] && [ $lineno -eq 1 ]; then
if ! echo "$first_field" | grep -qi '^http'; then
skipped_header=1
continue
fi
fi
skipped_header=1
commas=$(echo "$line" | tr -cd ',' | wc -c)
if [ "$commas" -eq 0 ]; then
echo "Line $lineno: Missing comma separator (expected url,title)" >&2
errors=$((errors + 1))
continue
fi
first_char=$(echo "$line" | cut -c1)
if [ "$first_char" = "\"" ]; then
url=$(echo "$line" | sed 's/^"\([^"]*\)".*/\1/')
rest=$(echo "$line" | sed 's/^"[^"]*",//')
else
url=$(echo "$line" | cut -d',' -f1)
rest=$(echo "$line" | cut -d',' -f2-)
if echo "$url" | grep -q ','; then
echo "Line $lineno: URL contains comma - quote the URL if it contains commas." >&2
errors=$((errors + 1))
continue
fi
if [ -z "$url" ]; then
echo "Line $lineno: Empty URL field" >&2
errors=$((errors + 1))
continue
fi
if [ "$VERBOSE" -eq 1 ]; then
echo "Line $lineno: Warning - URL not quoted" >&2
fi
fi
if [ -z "$rest" ]; then
echo "Line $lineno: Missing title (must be quoted)" >&2
errors=$((errors + 1))
continue
fi
title_first=$(echo "$rest" | cut -c1)
if [ "$title_first" != "\"" ]; then
echo "Line $lineno: Title must be surrounded by quotes" >&2
errors=$((errors + 1))
continue
fi
done < "$file"
if [ "$errors" -gt 0 ]; then
echo "Error: CSV validation failed with $errors error(s). Please fix and retry." >&2
return 1
fi
return 0
}
load_links() {
file="$1"
if is_csv "$file"; then
if ! validate_csv "$file"; then
exit 1
fi
skipped_header=0
while IFS= read -r line || [ -n "$line" ]; do
line=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
[ -z "$line" ] && continue
case "$line" in \#*) continue ;; esac
first_field=$(echo "$line" | cut -d',' -f1)
if [ $skipped_header -eq 0 ]; then
if ! echo "$first_field" | grep -qi '^http'; then
skipped_header=1
continue
fi
fi
skipped_header=1
url=$(echo "$line" | cut -d',' -f1)
title=$(echo "$line" | cut -d',' -f2-)
title=$(echo "$title" | sed 's/^"\(.*\)"$/\1/;s/[[:space:]]*$//')
echo "${url}|${title}"
done < "$file"
else
while IFS= read -r line || [ -n "$line" ]; do
line=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
[ -z "$line" ] && continue
case "$line" in \#*) continue ;; esac
echo "${line}|"
done < "$file"
fi
}
while getopts "l:f:o:c:r:p:t:T:vh" opt; do
case "$opt" in
l) LINK="$OPTARG" ;;
f) LINKFILE="$OPTARG" ;;
o) OUTPUT_DIR="$OPTARG" ;;
c) CASE="$OPTARG" ;;
r) REPL_CHAR="$OPTARG" ;;
p) PREFIX="$OPTARG" ;;
t) SUFFIX="$OPTARG" ;;
T) MANUAL_TITLE="$OPTARG" ;;
v) VERBOSE=1 ;;
h) usage ;;
*) usage ;;
esac
done
shift $((OPTIND - 1))
if [ -n "$LINKFILE" ]; then
if [ ! -f "$LINKFILE" ]; then
echo "Error: Link file not found: $LINKFILE" >&2
exit 1
fi
ENTRIES=$(load_links "$LINKFILE")
elif [ -n "$LINK" ]; then
ENTRIES="${LINK}|"
else
usage
fi
echo "$ENTRIES" | grep -v '^$' | while read -r entry; do
url=$(echo "$entry" | cut -d'|' -f1)
manual_title=$(echo "$entry" | cut -d'|' -f2)
[ -z "$url" ] && continue
echo "Processing: $url"
if [ -n "$manual_title" ]; then
title="$manual_title"
elif [ -n "$MANUAL_TITLE" ]; then
title="$MANUAL_TITLE"
else
title=$(parse_title "$url")
if [ -z "$title" ]; then
echo "Warning: Could not parse title, using fallback" >&2
title=$(get_fallback_title "$url")
fi
fi
formatted=$(format_filename "$title")
filename="${PREFIX}${formatted}${SUFFIX}"
OUTPUT_PATH="$OUTPUT_DIR/$filename"
OUTPUT_PATH=$(realpath "$OUTPUT_PATH")
echo "Output: $OUTPUT_PATH"
yt-dlp -f "bestvideo+bestaudio/best" -o "$OUTPUT_PATH" "$url"
echo "Download complete: $filename"
done