scene release to discogs release
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

192 lignes
8.5KB

  1. #!/usr/bin/env -S fish
  2. #set fish_trace on
  3. function usage
  4. begin
  5. echo "Split scene release name into artist, title, barcode, catno, format, edition, year"
  6. echo
  7. echo "Usage: $0 [-j][-t][-v]"
  8. echo
  9. echo " -t / --tsv Output tab separated (default)"
  10. echo " -j / --json Output as JSON"
  11. echo " -v / --verbose Output unmatched to stderr"
  12. echo " -s / --stats Output stats to stderr"
  13. exit 1
  14. end >&2
  15. end
  16. argparse -n (status current-filename) -x 'j,t' 'h/help' 'v/verbose' 'j/json' 't/tsv' 's/stats' -- $argv
  17. if test $status -ne 0
  18. usage
  19. end
  20. if set -q _flag_help
  21. usage
  22. end
  23. # supported patterns
  24. # artist-title-(catno)-edition-format-year
  25. # artist-title-(catno)-lang-format-year
  26. # artist-title-(catno)-format-year
  27. # artist-title(catno)-format-year
  28. # artist-title(catno)-year
  29. # artist-title-(barcode)-lang-format-year
  30. # artist-title-(barcode)-edition-format-year
  31. # artist-title-(barcode)-format-year
  32. # artist-title-format-lang-year
  33. # artist-title-edition-format-lang-year
  34. # artist-title-edition-format-year
  35. # artist-title-edition-lang-year
  36. # artist-title-edition-year
  37. # artist-title-format-edition-year
  38. # artist-title-format-year
  39. # artist-title-lang-format-year
  40. # artist-title-lang-year
  41. # artist-title-year
  42. set CNT_ATCEFY 0; set CNT_ATCLFY 0; set CNT_ATCFY 0; set CNT_ATCY 0
  43. set CNT_ATBLFY 0; set CNT_ATBEFY 0; set CNT_ATBFY 0
  44. set CNT_ATFLY 0; set CNT_ATEFLY 0; set CNT_ATEFY 0; set CNT_ATELY 0; set CNT_ATFEY 0
  45. set CNT_ATEY 0; set CNT_ATLFY 0; set CNT_ATFY 0; set CNT_ATLY 0; set CNT_ATY 0
  46. set TOTAL 0; set IGNORED 0; set UNMATCHED 0; set ONEMATCH 0; set MMATCH 0
  47. set EDITION '\(?(?<E>REMASTERED|BOOTLEG|DEMO|PROMO|REISSUE|EXPANDED_EDITION|DELUXE_EDITION|REMASTERED_DELUXE_EDITION|LIMITED_EDITION|BONUS_EDITION|SPECIAL_EDITION|REMASTERED_EXPANDED_BOXSET|REMASTERED_BOXSET|Remastered_Edition|Limited_Edition_Vinyl|PROMO_VINYL|Platinum_Edition)\)?'
  48. set FORMAT '\(?(?<F>[0-9]{1,2}CD|[0-9]{1,2}LP|LP|[0-9]{1,2}VINYL|CD|CDA|CDS|CDM|CDEP|CDR|CDREP|EP|VINYL|VLS|TAPE|WEB|SINGLE-WEB|EP-WEB|BOXSET|MAG)\)?'
  49. set ISO3166 'AW|AF|AO|AI|AX|AL|AD|AE|AR|AM|AS|AQ|TF|AG|AU|AT|AZ|BI|BE|BJ|BQ|BF|BD|BG|BH|BS|BA|BL|BY|BZ|BM|BO|BR|BB|BN|BT|BV|BW|CF|CA|CC|CH|CL|CN|CI|CM|CD|CG|CK|CO|KM|CV|CR|CU|CW|CX|KY|CY|CZ|DE|DJ|DM|DK|DO|DZ|EC|EG|ER|EH|ES|EE|ET|FI|FJ|FK|FR|FO|FM|GA|GB|GE|GG|GH|GI|GN|GP|GM|GW|GQ|GR|GD|GL|GT|GF|GU|GY|HK|HM|HN|HR|HT|HU|ID|IM|IN|IO|IE|IR|IQ|IS|IL|IT|JM|JE|JO|JP|KZ|KE|KG|KH|KI|KN|KR|KW|LA|LB|LR|LY|LC|LI|LK|LS|LT|LU|LV|MO|MF|MA|MC|MD|MG|MV|MX|MH|MK|ML|MT|MM|ME|MN|MP|MZ|MR|MS|MQ|MU|MW|MY|YT|NA|NC|NE|NF|NG|NI|NU|NL|NO|NP|NR|NZ|OM|PK|PA|PN|PE|PH|PW|PG|PL|PR|KP|PT|PY|PS|PF|QA|RE|RO|RU|RW|SA|SD|SN|SG|GS|SH|SJ|SB|SL|SV|SM|SO|PM|RS|SS|ST|SR|SK|SI|SE|SZ|SX|SC|SY|TC|TD|TG|TH|TJ|TK|TM|TL|TO|TT|TN|TR|TV|TW|TZ|UG|UA|UM|UY|US|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|YE|ZA|ZM|ZW'
  50. set ISO639 'AA|AB|AE|AF|AK|AM|AN|AR|AS|AV|AY|AZ|BA|BE|BG|BH|BI|BM|BN|BO|BR|BS|CA|CE|CH|CO|CR|CS|CU|CV|CY|DA|DE|DV|DZ|EE|EL|EN|EO|ES|ET|EU|FA|FF|FI|FJ|FO|FR|FY|GA|GD|GL|GN|GU|GV|HA|HE|HI|HO|HR|HT|HU|HY|HZ|IA|ID|IE|IG|II|IK|IO|IS|IT|IU|JA|JV|KA|KG|KI|KJ|KK|KL|KM|KN|KO|KR|KS|KU|KV|KW|KY|LA|LB|LG|LI|LN|LO|LT|LU|LV|MG|MH|MI|MK|ML|MN|MR|MS|MT|MY|NA|NB|ND|NE|NG|NL|NN|NO|NR|NV|NY|OC|OJ|OM|OR|OS|PA|PI|PL|PS|PT|QU|RM|RN|RO|RU|RW|SA|SC|SD|SE|SG|SI|SK|SL|SM|SN|SO|SQ|SR|SS|ST|SU|SV|SW|TA|TE|TG|TH|TI|TK|TL|TN|TO|TR|TS|TT|TW|TY|UG|UK|UR|UZ|VE|VI|VO|WA|WO|XH|YI|YO|ZA|ZH|ZU'
  51. set LANG "(?<L>$ISO3166|$ISO639|SP)"
  52. set TEAM '[a-zA-Z0-9_]+'
  53. set YEAR '\(?(?<Y>(19|20)[0-9]{2})\)?'
  54. set SEP '(-|_-_|--)'
  55. set CATNO '\((?<C>[a-zA-Z]+[0-9]+|[a-zA-Z]+[_-][0-9]+|[a-zA-Z]+[0-9]+[a-zA-Z]+|[a-zA-Z]+[_-][0-9]+[a-zA-Z]+|[a-zA-Z]+[_-][0-9]+[_-][0-9]+|[a-zA-Z]+[0-9]+[_-][a-zA-Z]+)\)'
  56. set BARCODE '\(?(?<B>[0-9_-]{5,15})\)?'
  57. set ARTIST '(?<A>[a-zA-Z0-9._]+)'
  58. set TITLE '(?<T>[a-zA-Z0-9_.()]+)'
  59. while read -l REL
  60. set TOTAL (math $TOTAL+1)
  61. if string match -iq -r -- "(LINE|DVBS|DVBC|FM|DAB|SBD|CABLE|SAT)-[0-9]{2}-[0-9]{2}-[0-9]{4}-$TEAM\$" "$REL"
  62. set IGNORED (math $IGNORED+1)
  63. continue
  64. end
  65. # remove stuff unusuable in search
  66. set ORG $REL
  67. for N in 'FLAC' 'WEBFLAC' 'DIRFIX' 'PROOFFIX' 'NFOFIX' 'REPACK' 'PROPER' 'RETAIL' 'READ_NFO' 'OST' 'RERIP'
  68. set REL (string replace -ia -- "-$N-" '-' $REL)
  69. set REL (string replace -ia -- "_$N-" '-' $REL)
  70. end
  71. # reset named regex groups
  72. set -e A; set -e T; set -e B; set -e C; set -e L; set -e F; set -e E; set -e Y
  73. if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
  74. set CNT_ATCEFY (math $CNT_ATCEFY+1)
  75. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
  76. set CNT_ATCLFY (math $CNT_ATCLFY+1)
  77. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$FORMAT-$YEAR-$TEAM\$" "$REL"
  78. set CNT_ATCFY (math $CNT_ATCFY+1)
  79. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$CATNO-$YEAR-$TEAM\$" "$REL"
  80. set CNT_ATCY (math $CNT_ATCY+1)
  81. else if string match -iq -r -- "^$ARTIST$SEP$TITLE$CATNO-$FORMAT-$YEAR-$TEAM\$" "$REL"
  82. set CNT_ATCFY (math $CNT_ATCFY+1)
  83. else if string match -iq -r -- "^$ARTIST$SEP$TITLE$CATNO-$YEAR-$TEAM\$" "$REL"
  84. set CNT_ATCY (math $CNT_ATCY+1)
  85. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
  86. set CNT_ATBLFY (math $CNT_ATBLFY+1)
  87. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
  88. set CNT_ATBEFY (math $CNT_ATBEFY+1)
  89. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$BARCODE-$FORMAT-$YEAR-$TEAM\$" "$REL"
  90. set CNT_ATBFY (math $CNT_ATBFY+1)
  91. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$LANG-$YEAR-$TEAM\$" "$REL"
  92. set CNT_ATFLY (math $CNT_ATFLY+1)
  93. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$FORMAT-$LANG-$YEAR-$TEAM\$" "$REL"
  94. set CNT_ATEFLY (math $CNT_ATEFLY+1)
  95. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$FORMAT-$YEAR-$TEAM\$" "$REL"
  96. set CNT_ATEFY (math $CNT_ATEFY+1)
  97. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$LANG-$YEAR-$TEAM\$" "$REL"
  98. set CNT_ATELY (math $CNT_ATELY+1)
  99. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$LANG-$FORMAT-$YEAR-$TEAM\$" "$REL"
  100. set CNT_ATLFY (math $CNT_ATLFY+1)
  101. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$EDITION-$YEAR-$TEAM\$" "$REL"
  102. set CNT_ATFEY (math $CNT_ATFEY+1)
  103. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$FORMAT-$YEAR-$TEAM\$" "$REL"
  104. set CNT_ATFY (math $CNT_ATFY+1)
  105. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$EDITION-$YEAR-$TEAM\$" "$REL"
  106. set CNT_ATEY (math $CNT_ATEY+1)
  107. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$LANG-$YEAR-$TEAM\$" "$REL"
  108. set CNT_ATLY (math $CNT_ATLY+1)
  109. else if string match -iq -r -- "^$ARTIST$SEP$TITLE-$YEAR-$TEAM\$" "$REL"
  110. set CNT_ATY (math $CNT_ATY+1)
  111. else
  112. set UNMATCHED (math $UNMATCHED+1)
  113. if set -q _flag_verbose
  114. echo "$ORG" >&2
  115. end
  116. continue
  117. end
  118. if set -q A; and test -n "$A"
  119. set A (string replace -a '_' ' ' "$A")
  120. set A (string trim "$A")
  121. end
  122. if set -q T; and test -n "$T"
  123. set T (string replace -a '_' ' ' "$T")
  124. set T (string trim "$T")
  125. end
  126. if set -q B; and test -n "$B"
  127. set B (string replace -a '_' ' ' "$B")
  128. set B (string trim "$B")
  129. end
  130. if set -q E; and test -n "$E"
  131. set E (string replace -a '_' ' ' "$E")
  132. set E (string trim "$E")
  133. end
  134. if set -q _flag_json
  135. echo '{}' | jq -cSM --arg rls "$ORG" --arg a "$A" --arg t "$T" \
  136. --arg c "$C" --arg b "$B" --arg f "$F" --arg e "$E" --arg y "$Y" '{
  137. "rls": $rls,
  138. "artist": $a,
  139. "title": $t,
  140. "catno": (if ($c | length == 0) then null else $c end),
  141. "barcode": (if ($b | length == 0) then null else $b end),
  142. "format": (if ($f | length == 0) then null else $f end),
  143. "edition": (if ($e | length == 0) then null else $e end),
  144. "year": (if ($y | length == 0) then null else $y end)
  145. }'
  146. else
  147. printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" "$ORG" "$A" "$T" "$C" "$B" "$F" "$E" "$Y"
  148. end
  149. end
  150. # to stderr
  151. if set -q _flag_stats
  152. begin
  153. echo "ATCEFY $CNT_ATCEFY"
  154. echo "ATCLFY $CNT_ATCLFY"
  155. echo "ATCFY $CNT_ATCFY"
  156. echo "ATBLFY $CNT_ATBLFY"
  157. echo "ATBEFY $CNT_ATBEFY"
  158. echo "ATBFY $CNT_ATBFY"
  159. echo "ATFLY $CNT_ATFLY"
  160. echo "ATEFLY $CNT_ATEFLY"
  161. echo "ATEFY $CNT_ATEFY"
  162. echo "ATELY $CNT_ATELY"
  163. echo "ATLFY $CNT_ATLFY"
  164. echo "ATFY $CNT_ATFY"
  165. echo "ATEY $CNT_ATEY"
  166. echo "ATLY $CNT_ATLY"
  167. echo "ATY $CNT_ATY"
  168. echo "Unmatched $UNMATCHED"
  169. echo "Ignored $IGNORED"
  170. echo "Total $TOTAL"
  171. end >&2
  172. end