rls-discogs/parse-rls.fish
2022-03-27 16:00:26 +02:00

192 lines
8.5 KiB
Fish
Executable File

#!/usr/bin/env -S fish
#set fish_trace on
function usage
begin
echo "Split scene release name into artist, title, barcode, catno, format, edition, year"
echo
echo "Usage: $0 [-j][-t][-v]"
echo
echo " -t / --tsv Output tab separated (default)"
echo " -j / --json Output as JSON"
echo " -v / --verbose Output unmatched to stderr"
echo " -s / --stats Output stats to stderr"
exit 1
end >&2
end
argparse -n (status current-filename) -x 'j,t' 'h/help' 'v/verbose' 'j/json' 't/tsv' 's/stats' -- $argv
if test $status -ne 0
usage
end
if set -q _flag_help
usage
end
# supported patterns
# artist-title-(catno)-edition-format-year
# artist-title-(catno)-lang-format-year
# artist-title-(catno)-format-year
# artist-title(catno)-format-year
# artist-title(catno)-year
# artist-title-(barcode)-lang-format-year
# artist-title-(barcode)-edition-format-year
# artist-title-(barcode)-format-year
# artist-title-format-lang-year
# artist-title-edition-format-lang-year
# artist-title-edition-format-year
# artist-title-edition-lang-year
# artist-title-edition-year
# artist-title-format-edition-year
# artist-title-format-year
# artist-title-lang-format-year
# artist-title-lang-year
# artist-title-year
set CNT_ATCEFY 0; set CNT_ATCLFY 0; set CNT_ATCFY 0; set CNT_ATCY 0
set CNT_ATBLFY 0; set CNT_ATBEFY 0; set CNT_ATBFY 0
set CNT_ATFLY 0; set CNT_ATEFLY 0; set CNT_ATEFY 0; set CNT_ATELY 0; set CNT_ATFEY 0
set CNT_ATEY 0; set CNT_ATLFY 0; set CNT_ATFY 0; set CNT_ATLY 0; set CNT_ATY 0
set TOTAL 0; set IGNORED 0; set UNMATCHED 0; set ONEMATCH 0; set MMATCH 0
set EDITION '\(?(?<E>REMASTERED|BOOTLEG|DEMO|PROMO|REISSUE|EXPANDED_EDITION|DELUXE_EDITION|REMASTERED_DELUXE_EDITION|LIMITED_EDITION|BONUS_EDITION|SPECIAL_EDITION|REMASTERED_EXPANDED_BOXSET|REMASTERED_BOXSET|Remastered_Edition|Limited_Edition_Vinyl|PROMO_VINYL|Platinum_Edition)\)?'
set FORMAT '\(?(?<F>[0-9]{1,2}CD|[0-9]{1,2}LP|LP|[0-9]{1,2}VINYL|CD|CDA|CDS|CDM|CDEP|CDR|CDREP|EP|VINYL|VLS|TAPE|WEB|SINGLE-WEB|EP-WEB|BOXSET|MAG)\)?'
set ISO3166 'AW|AF|AO|AI|AX|AL|AD|AE|AR|AM|AS|AQ|TF|AG|AU|AT|AZ|BI|BE|BJ|BQ|BF|BD|BG|BH|BS|BA|BL|BY|BZ|BM|BO|BR|BB|BN|BT|BV|BW|CF|CA|CC|CH|CL|CN|CI|CM|CD|CG|CK|CO|KM|CV|CR|CU|CW|CX|KY|CY|CZ|DE|DJ|DM|DK|DO|DZ|EC|EG|ER|EH|ES|EE|ET|FI|FJ|FK|FR|FO|FM|GA|GB|GE|GG|GH|GI|GN|GP|GM|GW|GQ|GR|GD|GL|GT|GF|GU|GY|HK|HM|HN|HR|HT|HU|ID|IM|IN|IO|IE|IR|IQ|IS|IL|IT|JM|JE|JO|JP|KZ|KE|KG|KH|KI|KN|KR|KW|LA|LB|LR|LY|LC|LI|LK|LS|LT|LU|LV|MO|MF|MA|MC|MD|MG|MV|MX|MH|MK|ML|MT|MM|ME|MN|MP|MZ|MR|MS|MQ|MU|MW|MY|YT|NA|NC|NE|NF|NG|NI|NU|NL|NO|NP|NR|NZ|OM|PK|PA|PN|PE|PH|PW|PG|PL|PR|KP|PT|PY|PS|PF|QA|RE|RO|RU|RW|SA|SD|SN|SG|GS|SH|SJ|SB|SL|SV|SM|SO|PM|RS|SS|ST|SR|SK|SI|SE|SZ|SX|SC|SY|TC|TD|TG|TH|TJ|TK|TM|TL|TO|TT|TN|TR|TV|TW|TZ|UG|UA|UM|UY|US|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|YE|ZA|ZM|ZW'
set ISO639 'AA|AB|AE|AF|AK|AM|AN|AR|AS|AV|AY|AZ|BA|BE|BG|BH|BI|BM|BN|BO|BR|BS|CA|CE|CH|CO|CR|CS|CU|CV|CY|DA|DE|DV|DZ|EE|EL|EN|EO|ES|ET|EU|FA|FF|FI|FJ|FO|FR|FY|GA|GD|GL|GN|GU|GV|HA|HE|HI|HO|HR|HT|HU|HY|HZ|IA|ID|IE|IG|II|IK|IO|IS|IT|IU|JA|JV|KA|KG|KI|KJ|KK|KL|KM|KN|KO|KR|KS|KU|KV|KW|KY|LA|LB|LG|LI|LN|LO|LT|LU|LV|MG|MH|MI|MK|ML|MN|MR|MS|MT|MY|NA|NB|ND|NE|NG|NL|NN|NO|NR|NV|NY|OC|OJ|OM|OR|OS|PA|PI|PL|PS|PT|QU|RM|RN|RO|RU|RW|SA|SC|SD|SE|SG|SI|SK|SL|SM|SN|SO|SQ|SR|SS|ST|SU|SV|SW|TA|TE|TG|TH|TI|TK|TL|TN|TO|TR|TS|TT|TW|TY|UG|UK|UR|UZ|VE|VI|VO|WA|WO|XH|YI|YO|ZA|ZH|ZU'
set LANG "(?<L>$ISO3166|$ISO639|SP)"
set TEAM '[a-zA-Z0-9_]+'
set YEAR '\(?(?<Y>(19|20)[0-9]{2})\)?'
set SEP '(-|_-_|--)'
set CATNO '\((?<C>[a-zA-Z]+[0-9]+|[a-zA-Z]+[_-][0-9]+|[a-zA-Z]+[0-9]+[a-zA-Z]+|[a-zA-Z]+[_-][0-9]+[a-zA-Z]+|[a-zA-Z]+[_-][0-9]+[_-][0-9]+|[a-zA-Z]+[0-9]+[_-][a-zA-Z]+)\)'
set BARCODE '\(?(?<B>[0-9_-]{5,15})\)?'
set ARTIST '(?<A>[a-zA-Z0-9._]+)'
set TITLE '(?<T>[a-zA-Z0-9_.()]+)'
while read -l REL
set TOTAL (math $TOTAL+1)
if string match -iq -r -- "(LINE|DVBS|DVBC|FM|DAB|SBD|CABLE|SAT)-[0-9]{2}-[0-9]{2}-[0-9]{4}-$TEAM\$" "$REL"
set IGNORED (math $IGNORED+1)
continue
end
# remove stuff unusuable in search
set ORG $REL
for N in 'FLAC' 'WEBFLAC' 'DIRFIX' 'PROOFFIX' 'NFOFIX' 'REPACK' 'PROPER' 'RETAIL' 'READ_NFO' 'OST' 'RERIP'
set REL (string replace -ia -- "-$N-" '-' $REL)
set REL (string replace -ia -- "_$N-" '-' $REL)
end
# reset named regex groups
set -e A; set -e T; set -e B; set -e C; set -e L; set -e F; set -e E; set -e Y
if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
set CNT_ATCEFY (math $CNT_ATCEFY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
set CNT_ATCLFY (math $CNT_ATCLFY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$FORMAT-$YEAR-$TEAM\$" "$REL"
set CNT_ATCFY (math $CNT_ATCFY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$YEAR-$TEAM\$" "$REL"
set CNT_ATCY (math $CNT_ATCY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE$CATNO-$FORMAT-$YEAR-$TEAM\$" "$REL"
set CNT_ATCFY (math $CNT_ATCFY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE$CATNO-$YEAR-$TEAM\$" "$REL"
set CNT_ATCY (math $CNT_ATCY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
set CNT_ATBLFY (math $CNT_ATBLFY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
set CNT_ATBEFY (math $CNT_ATBEFY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$FORMAT-$YEAR-$TEAM\$" "$REL"
set CNT_ATBFY (math $CNT_ATBFY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$LANG-$YEAR-$TEAM\$" "$REL"
set CNT_ATFLY (math $CNT_ATFLY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$FORMAT-$LANG-$YEAR-$TEAM\$" "$REL"
set CNT_ATEFLY (math $CNT_ATEFLY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
set CNT_ATEFY (math $CNT_ATEFY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$LANG-$YEAR-$TEAM\$" "$REL"
set CNT_ATELY (math $CNT_ATELY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
set CNT_ATLFY (math $CNT_ATLFY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$EDITION-$YEAR-$TEAM\$" "$REL"
set CNT_ATFEY (math $CNT_ATFEY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$YEAR-$TEAM\$" "$REL"
set CNT_ATFY (math $CNT_ATFY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$YEAR-$TEAM\$" "$REL"
set CNT_ATEY (math $CNT_ATEY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$LANG-$YEAR-$TEAM\$" "$REL"
set CNT_ATLY (math $CNT_ATLY+1)
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$YEAR-$TEAM\$" "$REL"
set CNT_ATY (math $CNT_ATY+1)
else
set UNMATCHED (math $UNMATCHED+1)
if set -q _flag_verbose
echo "$ORG" >&2
end
continue
end
if set -q A; and test -n "$A"
set A (string replace -a '_' ' ' "$A")
set A (string trim "$A")
end
if set -q T; and test -n "$T"
set T (string replace -a '_' ' ' "$T")
set T (string trim "$T")
end
if set -q B; and test -n "$B"
set B (string replace -a '_' ' ' "$B")
set B (string trim "$B")
end
if set -q E; and test -n "$E"
set E (string replace -a '_' ' ' "$E")
set E (string trim "$E")
end
if set -q _flag_json
echo '{}' | jq -cSM --arg rls "$ORG" --arg a "$A" --arg t "$T" \
--arg c "$C" --arg b "$B" --arg f "$F" --arg e "$E" --arg y "$Y" '{
"rls": $rls,
"artist": $a,
"title": $t,
"catno": (if ($c | length == 0) then null else $c end),
"barcode": (if ($b | length == 0) then null else $b end),
"format": (if ($f | length == 0) then null else $f end),
"edition": (if ($e | length == 0) then null else $e end),
"year": (if ($y | length == 0) then null else $y end)
}'
else
printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" "$ORG" "$A" "$T" "$C" "$B" "$F" "$E" "$Y"
end
end
# to stderr
if set -q _flag_stats
begin
echo "ATCEFY $CNT_ATCEFY"
echo "ATCLFY $CNT_ATCLFY"
echo "ATCFY $CNT_ATCFY"
echo "ATBLFY $CNT_ATBLFY"
echo "ATBEFY $CNT_ATBEFY"
echo "ATBFY $CNT_ATBFY"
echo "ATFLY $CNT_ATFLY"
echo "ATEFLY $CNT_ATEFLY"
echo "ATEFY $CNT_ATEFY"
echo "ATELY $CNT_ATELY"
echo "ATLFY $CNT_ATLFY"
echo "ATFY $CNT_ATFY"
echo "ATEY $CNT_ATEY"
echo "ATLY $CNT_ATLY"
echo "ATY $CNT_ATY"
echo "Unmatched $UNMATCHED"
echo "Ignored $IGNORED"
echo "Total $TOTAL"
end >&2
end