scene release to discogs release
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

192 lines
8.5KB

  1. #!/usr/bin/env -S fish
  2. #set fish_trace on
  3. function usage
  4. begin
  5. echo "Split scene release name into artist, title, barcode, catno, format, edition, year"
  6. echo
  7. echo "Usage: $0 [-j][-t][-v]"
  8. echo
  9. echo " -t / --tsv Output tab separated (default)"
  10. echo " -j / --json Output as JSON"
  11. echo " -v / --verbose Output unmatched to stderr"
  12. echo " -s / --stats Output stats to stderr"
  13. exit 1
  14. end >&2
  15. end
  16. argparse -n (status current-filename) -x 'j,t' 'h/help' 'v/verbose' 'j/json' 't/tsv' 's/stats' -- $argv
  17. if test $status -ne 0
  18. usage
  19. end
  20. if set -q _flag_help
  21. usage
  22. end
  23. # supported patterns
  24. # artist-title-(catno)-edition-format-year
  25. # artist-title-(catno)-lang-format-year
  26. # artist-title-(catno)-format-year
  27. # artist-title(catno)-format-year
  28. # artist-title(catno)-year
  29. # artist-title-(barcode)-lang-format-year
  30. # artist-title-(barcode)-edition-format-year
  31. # artist-title-(barcode)-format-year
  32. # artist-title-format-lang-year
  33. # artist-title-edition-format-lang-year
  34. # artist-title-edition-format-year
  35. # artist-title-edition-lang-year
  36. # artist-title-edition-year
  37. # artist-title-format-edition-year
  38. # artist-title-format-year
  39. # artist-title-lang-format-year
  40. # artist-title-lang-year
  41. # artist-title-year
  42. set CNT_ATCEFY 0; set CNT_ATCLFY 0; set CNT_ATCFY 0; set CNT_ATCY 0
  43. set CNT_ATBLFY 0; set CNT_ATBEFY 0; set CNT_ATBFY 0
  44. set CNT_ATFLY 0; set CNT_ATEFLY 0; set CNT_ATEFY 0; set CNT_ATELY 0; set CNT_ATFEY 0
  45. set CNT_ATEY 0; set CNT_ATLFY 0; set CNT_ATFY 0; set CNT_ATLY 0; set CNT_ATY 0
  46. set TOTAL 0; set IGNORED 0; set UNMATCHED 0; set ONEMATCH 0; set MMATCH 0
  47. set EDITION '\(?(?<E>REMASTERED|BOOTLEG|DEMO|PROMO|REISSUE|EXPANDED_EDITION|DELUXE_EDITION|REMASTERED_DELUXE_EDITION|LIMITED_EDITION|BONUS_EDITION|SPECIAL_EDITION|REMASTERED_EXPANDED_BOXSET|REMASTERED_BOXSET|Remastered_Edition|Limited_Edition_Vinyl|PROMO_VINYL|Platinum_Edition)\)?'
  48. set FORMAT '\(?(?<F>[0-9]{1,2}CD|[0-9]{1,2}LP|LP|[0-9]{1,2}VINYL|CD|CDA|CDS|CDM|CDEP|CDR|CDREP|EP|VINYL|VLS|TAPE|WEB|SINGLE-WEB|EP-WEB|BOXSET|MAG)\)?'
  49. set ISO3166 'AW|AF|AO|AI|AX|AL|AD|AE|AR|AM|AS|AQ|TF|AG|AU|AT|AZ|BI|BE|BJ|BQ|BF|BD|BG|BH|BS|BA|BL|BY|BZ|BM|BO|BR|BB|BN|BT|BV|BW|CF|CA|CC|CH|CL|CN|CI|CM|CD|CG|CK|CO|KM|CV|CR|CU|CW|CX|KY|CY|CZ|DE|DJ|DM|DK|DO|DZ|EC|EG|ER|EH|ES|EE|ET|FI|FJ|FK|FR|FO|FM|GA|GB|GE|GG|GH|GI|GN|GP|GM|GW|GQ|GR|GD|GL|GT|GF|GU|GY|HK|HM|HN|HR|HT|HU|ID|IM|IN|IO|IE|IR|IQ|IS|IL|IT|JM|JE|JO|JP|KZ|KE|KG|KH|KI|KN|KR|KW|LA|LB|LR|LY|LC|LI|LK|LS|LT|LU|LV|MO|MF|MA|MC|MD|MG|MV|MX|MH|MK|ML|MT|MM|ME|MN|MP|MZ|MR|MS|MQ|MU|MW|MY|YT|NA|NC|NE|NF|NG|NI|NU|NL|NO|NP|NR|NZ|OM|PK|PA|PN|PE|PH|PW|PG|PL|PR|KP|PT|PY|PS|PF|QA|RE|RO|RU|RW|SA|SD|SN|SG|GS|SH|SJ|SB|SL|SV|SM|SO|PM|RS|SS|ST|SR|SK|SI|SE|SZ|SX|SC|SY|TC|TD|TG|TH|TJ|TK|TM|TL|TO|TT|TN|TR|TV|TW|TZ|UG|UA|UM|UY|US|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|YE|ZA|ZM|ZW'
  50. set ISO639 'AA|AB|AE|AF|AK|AM|AN|AR|AS|AV|AY|AZ|BA|BE|BG|BH|BI|BM|BN|BO|BR|BS|CA|CE|CH|CO|CR|CS|CU|CV|CY|DA|DE|DV|DZ|EE|EL|EN|EO|ES|ET|EU|FA|FF|FI|FJ|FO|FR|FY|GA|GD|GL|GN|GU|GV|HA|HE|HI|HO|HR|HT|HU|HY|HZ|IA|ID|IE|IG|II|IK|IO|IS|IT|IU|JA|JV|KA|KG|KI|KJ|KK|KL|KM|KN|KO|KR|KS|KU|KV|KW|KY|LA|LB|LG|LI|LN|LO|LT|LU|LV|MG|MH|MI|MK|ML|MN|MR|MS|MT|MY|NA|NB|ND|NE|NG|NL|NN|NO|NR|NV|NY|OC|OJ|OM|OR|OS|PA|PI|PL|PS|PT|QU|RM|RN|RO|RU|RW|SA|SC|SD|SE|SG|SI|SK|SL|SM|SN|SO|SQ|SR|SS|ST|SU|SV|SW|TA|TE|TG|TH|TI|TK|TL|TN|TO|TR|TS|TT|TW|TY|UG|UK|UR|UZ|VE|VI|VO|WA|WO|XH|YI|YO|ZA|ZH|ZU'
  51. set LANG "(?<L>$ISO3166|$ISO639|SP)"
  52. set TEAM '[a-zA-Z0-9_]+'
  53. set YEAR '\(?(?<Y>(19|20)[0-9]{2})\)?'
  54. set SEP '(-|_-_|--)'
  55. set CATNO '\((?<C>[a-zA-Z]+[0-9]+|[a-zA-Z]+[_-][0-9]+|[a-zA-Z]+[0-9]+[a-zA-Z]+|[a-zA-Z]+[_-][0-9]+[a-zA-Z]+|[a-zA-Z]+[_-][0-9]+[_-][0-9]+|[a-zA-Z]+[0-9]+[_-][a-zA-Z]+)\)'
  56. set BARCODE '\(?(?<B>[0-9_-]{5,15})\)?'
  57. set ARTIST '(?<A>[a-zA-Z0-9._]+)'
  58. set TITLE '(?<T>[a-zA-Z0-9_.()]+)'
  59. while read -l REL
  60. set TOTAL (math $TOTAL+1)
  61. if string match -iq -r -- "(LINE|DVBS|DVBC|FM|DAB|SBD|CABLE|SAT)-[0-9]{2}-[0-9]{2}-[0-9]{4}-$TEAM\$" "$REL"
  62. set IGNORED (math $IGNORED+1)
  63. continue
  64. end
  65. # remove stuff unusuable in search
  66. set ORG $REL
  67. for N in 'FLAC' 'WEBFLAC' 'DIRFIX' 'PROOFFIX' 'NFOFIX' 'REPACK' 'PROPER' 'RETAIL' 'READ_NFO' 'OST' 'RERIP'
  68. set REL (string replace -ia -- "-$N-" '-' $REL)
  69. set REL (string replace -ia -- "_$N-" '-' $REL)
  70. end
  71. # reset named regex groups
  72. set -e A; set -e T; set -e B; set -e C; set -e L; set -e F; set -e E; set -e Y
  73. if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
  74. set CNT_ATCEFY (math $CNT_ATCEFY+1)
  75. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
  76. set CNT_ATCLFY (math $CNT_ATCLFY+1)
  77. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$FORMAT-$YEAR-$TEAM\$" "$REL"
  78. set CNT_ATCFY (math $CNT_ATCFY+1)
  79. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$YEAR-$TEAM\$" "$REL"
  80. set CNT_ATCY (math $CNT_ATCY+1)
  81. else if string match -iq -r -- "^$ARTIST$SEP$TITLE$CATNO-$FORMAT-$YEAR-$TEAM\$" "$REL"
  82. set CNT_ATCFY (math $CNT_ATCFY+1)
  83. else if string match -iq -r -- "^$ARTIST$SEP$TITLE$CATNO-$YEAR-$TEAM\$" "$REL"
  84. set CNT_ATCY (math $CNT_ATCY+1)
  85. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
  86. set CNT_ATBLFY (math $CNT_ATBLFY+1)
  87. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
  88. set CNT_ATBEFY (math $CNT_ATBEFY+1)
  89. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$FORMAT-$YEAR-$TEAM\$" "$REL"
  90. set CNT_ATBFY (math $CNT_ATBFY+1)
  91. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$LANG-$YEAR-$TEAM\$" "$REL"
  92. set CNT_ATFLY (math $CNT_ATFLY+1)
  93. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$FORMAT-$LANG-$YEAR-$TEAM\$" "$REL"
  94. set CNT_ATEFLY (math $CNT_ATEFLY+1)
  95. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
  96. set CNT_ATEFY (math $CNT_ATEFY+1)
  97. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$LANG-$YEAR-$TEAM\$" "$REL"
  98. set CNT_ATELY (math $CNT_ATELY+1)
  99. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
  100. set CNT_ATLFY (math $CNT_ATLFY+1)
  101. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$EDITION-$YEAR-$TEAM\$" "$REL"
  102. set CNT_ATFEY (math $CNT_ATFEY+1)
  103. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$YEAR-$TEAM\$" "$REL"
  104. set CNT_ATFY (math $CNT_ATFY+1)
  105. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$YEAR-$TEAM\$" "$REL"
  106. set CNT_ATEY (math $CNT_ATEY+1)
  107. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$LANG-$YEAR-$TEAM\$" "$REL"
  108. set CNT_ATLY (math $CNT_ATLY+1)
  109. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$YEAR-$TEAM\$" "$REL"
  110. set CNT_ATY (math $CNT_ATY+1)
  111. else
  112. set UNMATCHED (math $UNMATCHED+1)
  113. if set -q _flag_verbose
  114. echo "$ORG" >&2
  115. end
  116. continue
  117. end
  118. if set -q A; and test -n "$A"
  119. set A (string replace -a '_' ' ' "$A")
  120. set A (string trim "$A")
  121. end
  122. if set -q T; and test -n "$T"
  123. set T (string replace -a '_' ' ' "$T")
  124. set T (string trim "$T")
  125. end
  126. if set -q B; and test -n "$B"
  127. set B (string replace -a '_' ' ' "$B")
  128. set B (string trim "$B")
  129. end
  130. if set -q E; and test -n "$E"
  131. set E (string replace -a '_' ' ' "$E")
  132. set E (string trim "$E")
  133. end
  134. if set -q _flag_json
  135. echo '{}' | jq -cSM --arg rls "$ORG" --arg a "$A" --arg t "$T" \
  136. --arg c "$C" --arg b "$B" --arg f "$F" --arg e "$E" --arg y "$Y" '{
  137. "rls": $rls,
  138. "artist": $a,
  139. "title": $t,
  140. "catno": (if ($c | length == 0) then null else $c end),
  141. "barcode": (if ($b | length == 0) then null else $b end),
  142. "format": (if ($f | length == 0) then null else $f end),
  143. "edition": (if ($e | length == 0) then null else $e end),
  144. "year": (if ($y | length == 0) then null else $y end)
  145. }'
  146. else
  147. printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" "$ORG" "$A" "$T" "$C" "$B" "$F" "$E" "$Y"
  148. end
  149. end
  150. # to stderr
  151. if set -q _flag_stats
  152. begin
  153. echo "ATCEFY $CNT_ATCEFY"
  154. echo "ATCLFY $CNT_ATCLFY"
  155. echo "ATCFY $CNT_ATCFY"
  156. echo "ATBLFY $CNT_ATBLFY"
  157. echo "ATBEFY $CNT_ATBEFY"
  158. echo "ATBFY $CNT_ATBFY"
  159. echo "ATFLY $CNT_ATFLY"
  160. echo "ATEFLY $CNT_ATEFLY"
  161. echo "ATEFY $CNT_ATEFY"
  162. echo "ATELY $CNT_ATELY"
  163. echo "ATLFY $CNT_ATLFY"
  164. echo "ATFY $CNT_ATFY"
  165. echo "ATEY $CNT_ATEY"
  166. echo "ATLY $CNT_ATLY"
  167. echo "ATY $CNT_ATY"
  168. echo "Unmatched $UNMATCHED"
  169. echo "Ignored $IGNORED"
  170. echo "Total $TOTAL"
  171. end >&2
  172. end