2022-03-26 20:50:13 -04:00
|
|
|
#!/usr/bin/env -S fish
|
|
|
|
#set fish_trace on
|
|
|
|
|
|
|
|
function usage
|
|
|
|
begin
|
|
|
|
echo "Split scene release name into artist, title, barcode, catno, format, edition, year"
|
|
|
|
echo
|
|
|
|
echo "Usage: $0 [-j][-t][-v]"
|
|
|
|
echo
|
|
|
|
echo " -t / --tsv Output tab separated (default)"
|
|
|
|
echo " -j / --json Output as JSON"
|
|
|
|
echo " -v / --verbose Output unmatched to stderr"
|
|
|
|
echo " -s / --stats Output stats to stderr"
|
|
|
|
exit 1
|
|
|
|
end >&2
|
|
|
|
end
|
|
|
|
|
|
|
|
argparse -n (status current-filename) -x 'j,t' 'h/help' 'v/verbose' 'j/json' 't/tsv' 's/stats' -- $argv
|
|
|
|
if test $status -ne 0
|
|
|
|
usage
|
|
|
|
end
|
|
|
|
|
|
|
|
if set -q _flag_help
|
|
|
|
usage
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
# supported patterns
|
|
|
|
# artist-title-(catno)-edition-format-year
|
|
|
|
# artist-title-(catno)-lang-format-year
|
|
|
|
# artist-title-(catno)-format-year
|
|
|
|
# artist-title(catno)-format-year
|
|
|
|
# artist-title(catno)-year
|
|
|
|
# artist-title-(barcode)-lang-format-year
|
|
|
|
# artist-title-(barcode)-edition-format-year
|
|
|
|
# artist-title-(barcode)-format-year
|
|
|
|
# artist-title-format-lang-year
|
|
|
|
# artist-title-edition-format-lang-year
|
|
|
|
# artist-title-edition-format-year
|
|
|
|
# artist-title-edition-lang-year
|
|
|
|
# artist-title-edition-year
|
|
|
|
# artist-title-format-edition-year
|
|
|
|
# artist-title-format-year
|
|
|
|
# artist-title-lang-format-year
|
|
|
|
# artist-title-lang-year
|
|
|
|
# artist-title-year
|
|
|
|
|
|
|
|
set CNT_ATCEFY 0; set CNT_ATCLFY 0; set CNT_ATCFY 0; set CNT_ATCY 0
|
|
|
|
set CNT_ATBLFY 0; set CNT_ATBEFY 0; set CNT_ATBFY 0
|
|
|
|
set CNT_ATFLY 0; set CNT_ATEFLY 0; set CNT_ATEFY 0; set CNT_ATELY 0; set CNT_ATFEY 0
|
|
|
|
set CNT_ATEY 0; set CNT_ATLFY 0; set CNT_ATFY 0; set CNT_ATLY 0; set CNT_ATY 0
|
|
|
|
|
|
|
|
set TOTAL 0; set IGNORED 0; set UNMATCHED 0; set ONEMATCH 0; set MMATCH 0
|
|
|
|
|
|
|
|
set EDITION '\(?(?<E>REMASTERED|BOOTLEG|DEMO|PROMO|REISSUE|EXPANDED_EDITION|DELUXE_EDITION|REMASTERED_DELUXE_EDITION|LIMITED_EDITION|BONUS_EDITION|SPECIAL_EDITION|REMASTERED_EXPANDED_BOXSET|REMASTERED_BOXSET|Remastered_Edition|Limited_Edition_Vinyl|PROMO_VINYL|Platinum_Edition)\)?'
|
|
|
|
set FORMAT '\(?(?<F>[0-9]{1,2}CD|[0-9]{1,2}LP|LP|[0-9]{1,2}VINYL|CD|CDA|CDS|CDM|CDEP|CDR|CDREP|EP|VINYL|VLS|TAPE|WEB|SINGLE-WEB|EP-WEB|BOXSET|MAG)\)?'
|
|
|
|
|
|
|
|
set ISO3166 'AW|AF|AO|AI|AX|AL|AD|AE|AR|AM|AS|AQ|TF|AG|AU|AT|AZ|BI|BE|BJ|BQ|BF|BD|BG|BH|BS|BA|BL|BY|BZ|BM|BO|BR|BB|BN|BT|BV|BW|CF|CA|CC|CH|CL|CN|CI|CM|CD|CG|CK|CO|KM|CV|CR|CU|CW|CX|KY|CY|CZ|DE|DJ|DM|DK|DO|DZ|EC|EG|ER|EH|ES|EE|ET|FI|FJ|FK|FR|FO|FM|GA|GB|GE|GG|GH|GI|GN|GP|GM|GW|GQ|GR|GD|GL|GT|GF|GU|GY|HK|HM|HN|HR|HT|HU|ID|IM|IN|IO|IE|IR|IQ|IS|IL|IT|JM|JE|JO|JP|KZ|KE|KG|KH|KI|KN|KR|KW|LA|LB|LR|LY|LC|LI|LK|LS|LT|LU|LV|MO|MF|MA|MC|MD|MG|MV|MX|MH|MK|ML|MT|MM|ME|MN|MP|MZ|MR|MS|MQ|MU|MW|MY|YT|NA|NC|NE|NF|NG|NI|NU|NL|NO|NP|NR|NZ|OM|PK|PA|PN|PE|PH|PW|PG|PL|PR|KP|PT|PY|PS|PF|QA|RE|RO|RU|RW|SA|SD|SN|SG|GS|SH|SJ|SB|SL|SV|SM|SO|PM|RS|SS|ST|SR|SK|SI|SE|SZ|SX|SC|SY|TC|TD|TG|TH|TJ|TK|TM|TL|TO|TT|TN|TR|TV|TW|TZ|UG|UA|UM|UY|US|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|YE|ZA|ZM|ZW'
|
|
|
|
|
|
|
|
set ISO639 'AA|AB|AE|AF|AK|AM|AN|AR|AS|AV|AY|AZ|BA|BE|BG|BH|BI|BM|BN|BO|BR|BS|CA|CE|CH|CO|CR|CS|CU|CV|CY|DA|DE|DV|DZ|EE|EL|EN|EO|ES|ET|EU|FA|FF|FI|FJ|FO|FR|FY|GA|GD|GL|GN|GU|GV|HA|HE|HI|HO|HR|HT|HU|HY|HZ|IA|ID|IE|IG|II|IK|IO|IS|IT|IU|JA|JV|KA|KG|KI|KJ|KK|KL|KM|KN|KO|KR|KS|KU|KV|KW|KY|LA|LB|LG|LI|LN|LO|LT|LU|LV|MG|MH|MI|MK|ML|MN|MR|MS|MT|MY|NA|NB|ND|NE|NG|NL|NN|NO|NR|NV|NY|OC|OJ|OM|OR|OS|PA|PI|PL|PS|PT|QU|RM|RN|RO|RU|RW|SA|SC|SD|SE|SG|SI|SK|SL|SM|SN|SO|SQ|SR|SS|ST|SU|SV|SW|TA|TE|TG|TH|TI|TK|TL|TN|TO|TR|TS|TT|TW|TY|UG|UK|UR|UZ|VE|VI|VO|WA|WO|XH|YI|YO|ZA|ZH|ZU'
|
|
|
|
|
2022-03-27 10:00:26 -04:00
|
|
|
set LANG "(?<L>$ISO3166|$ISO639|SP)"
|
2022-03-26 20:50:13 -04:00
|
|
|
|
|
|
|
set TEAM '[a-zA-Z0-9_]+'
|
|
|
|
set YEAR '\(?(?<Y>(19|20)[0-9]{2})\)?'
|
|
|
|
set SEP '(-|_-_|--)'
|
|
|
|
set CATNO '\((?<C>[a-zA-Z]+[0-9]+|[a-zA-Z]+[_-][0-9]+|[a-zA-Z]+[0-9]+[a-zA-Z]+|[a-zA-Z]+[_-][0-9]+[a-zA-Z]+|[a-zA-Z]+[_-][0-9]+[_-][0-9]+|[a-zA-Z]+[0-9]+[_-][a-zA-Z]+)\)'
|
|
|
|
set BARCODE '\(?(?<B>[0-9_-]{5,15})\)?'
|
|
|
|
set ARTIST '(?<A>[a-zA-Z0-9._]+)'
|
|
|
|
set TITLE '(?<T>[a-zA-Z0-9_.()]+)'
|
|
|
|
|
|
|
|
while read -l REL
|
|
|
|
set TOTAL (math $TOTAL+1)
|
|
|
|
|
|
|
|
if string match -iq -r -- "(LINE|DVBS|DVBC|FM|DAB|SBD|CABLE|SAT)-[0-9]{2}-[0-9]{2}-[0-9]{4}-$TEAM\$" "$REL"
|
|
|
|
set IGNORED (math $IGNORED+1)
|
|
|
|
continue
|
|
|
|
end
|
|
|
|
# remove stuff unusuable in search
|
|
|
|
set ORG $REL
|
|
|
|
for N in 'FLAC' 'WEBFLAC' 'DIRFIX' 'PROOFFIX' 'NFOFIX' 'REPACK' 'PROPER' 'RETAIL' 'READ_NFO' 'OST' 'RERIP'
|
|
|
|
set REL (string replace -ia -- "-$N-" '-' $REL)
|
|
|
|
set REL (string replace -ia -- "_$N-" '-' $REL)
|
|
|
|
end
|
|
|
|
|
|
|
|
# reset named regex groups
|
|
|
|
set -e A; set -e T; set -e B; set -e C; set -e L; set -e F; set -e E; set -e Y
|
|
|
|
|
|
|
|
if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATCEFY (math $CNT_ATCEFY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATCLFY (math $CNT_ATCLFY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$FORMAT-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATCFY (math $CNT_ATCFY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATCY (math $CNT_ATCY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE$CATNO-$FORMAT-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATCFY (math $CNT_ATCFY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE$CATNO-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATCY (math $CNT_ATCY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATBLFY (math $CNT_ATBLFY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATBEFY (math $CNT_ATBEFY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$FORMAT-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATBFY (math $CNT_ATBFY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$LANG-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATFLY (math $CNT_ATFLY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$FORMAT-$LANG-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATEFLY (math $CNT_ATEFLY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATEFY (math $CNT_ATEFY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$LANG-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATELY (math $CNT_ATELY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATLFY (math $CNT_ATLFY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$EDITION-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATFEY (math $CNT_ATFEY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATFY (math $CNT_ATFY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATEY (math $CNT_ATEY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$LANG-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATLY (math $CNT_ATLY+1)
|
|
|
|
else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$YEAR-$TEAM\$" "$REL"
|
|
|
|
set CNT_ATY (math $CNT_ATY+1)
|
|
|
|
else
|
|
|
|
set UNMATCHED (math $UNMATCHED+1)
|
|
|
|
if set -q _flag_verbose
|
|
|
|
echo "$ORG" >&2
|
|
|
|
end
|
|
|
|
continue
|
|
|
|
end
|
|
|
|
|
|
|
|
if set -q A; and test -n "$A"
|
|
|
|
set A (string replace -a '_' ' ' "$A")
|
|
|
|
set A (string trim "$A")
|
|
|
|
end
|
|
|
|
if set -q T; and test -n "$T"
|
|
|
|
set T (string replace -a '_' ' ' "$T")
|
|
|
|
set T (string trim "$T")
|
|
|
|
end
|
|
|
|
if set -q B; and test -n "$B"
|
|
|
|
set B (string replace -a '_' ' ' "$B")
|
|
|
|
set B (string trim "$B")
|
|
|
|
end
|
|
|
|
if set -q E; and test -n "$E"
|
|
|
|
set E (string replace -a '_' ' ' "$E")
|
|
|
|
set E (string trim "$E")
|
|
|
|
end
|
|
|
|
|
|
|
|
if set -q _flag_json
|
|
|
|
echo '{}' | jq -cSM --arg rls "$ORG" --arg a "$A" --arg t "$T" \
|
|
|
|
--arg c "$C" --arg b "$B" --arg f "$F" --arg e "$E" --arg y "$Y" '{
|
|
|
|
"rls": $rls,
|
|
|
|
"artist": $a,
|
|
|
|
"title": $t,
|
|
|
|
"catno": (if ($c | length == 0) then null else $c end),
|
|
|
|
"barcode": (if ($b | length == 0) then null else $b end),
|
|
|
|
"format": (if ($f | length == 0) then null else $f end),
|
|
|
|
"edition": (if ($e | length == 0) then null else $e end),
|
|
|
|
"year": (if ($y | length == 0) then null else $y end)
|
|
|
|
}'
|
|
|
|
else
|
|
|
|
printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" "$ORG" "$A" "$T" "$C" "$B" "$F" "$E" "$Y"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# to stderr
|
|
|
|
if set -q _flag_stats
|
|
|
|
begin
|
|
|
|
echo "ATCEFY $CNT_ATCEFY"
|
|
|
|
echo "ATCLFY $CNT_ATCLFY"
|
|
|
|
echo "ATCFY $CNT_ATCFY"
|
|
|
|
echo "ATBLFY $CNT_ATBLFY"
|
|
|
|
echo "ATBEFY $CNT_ATBEFY"
|
|
|
|
echo "ATBFY $CNT_ATBFY"
|
|
|
|
echo "ATFLY $CNT_ATFLY"
|
|
|
|
echo "ATEFLY $CNT_ATEFLY"
|
|
|
|
echo "ATEFY $CNT_ATEFY"
|
|
|
|
echo "ATELY $CNT_ATELY"
|
|
|
|
echo "ATLFY $CNT_ATLFY"
|
|
|
|
echo "ATFY $CNT_ATFY"
|
|
|
|
echo "ATEY $CNT_ATEY"
|
|
|
|
echo "ATLY $CNT_ATLY"
|
|
|
|
echo "ATY $CNT_ATY"
|
|
|
|
echo "Unmatched $UNMATCHED"
|
|
|
|
echo "Ignored $IGNORED"
|
|
|
|
echo "Total $TOTAL"
|
|
|
|
end >&2
|
|
|
|
end
|