A bot written in Python3 that mirrors YouTube channels to PeerTube channels as videos are released in a YouTube channel.
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

327 行
14KB

  1. #!/usr/bin/python3
  2. import pafy
  3. import feedparser as fp
  4. from urllib.request import urlretrieve
  5. import requests
  6. import json
  7. from time import sleep
  8. from os import mkdir, path
  9. from shutil import rmtree
  10. import mimetypes
  11. from requests_toolbelt.multipart.encoder import MultipartEncoder
  12. import utils
  13. def get_video_data(channel_id):
  14. yt_rss_url = "https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id
  15. feed = fp.parse(yt_rss_url)
  16. channel_lang = feed["feed"]["title_detail"]["language"]
  17. print(feed["feed"])
  18. entries = feed["entries"]
  19. channels_timestamps = "channels_timestamps.csv"
  20. # clear any existing queue before start
  21. queue = []
  22. # read contents of channels_timestamps.csv, create list object of contents
  23. ct = open(channels_timestamps, "r")
  24. ctr = ct.read().split("\n")
  25. ct.close()
  26. ctr_line = []
  27. channel_found = False
  28. # check if channel ID is found in channels_timestamps.csv
  29. for line in ctr:
  30. line_list = line.split(',')
  31. if channel_id == line_list[0]:
  32. channel_found = True
  33. ctr_line = line
  34. break
  35. if not channel_found:
  36. print("new channel added to config: " + channel_id)
  37. print(channel_id)
  38. # iterate through video entries for channel, parse data into objects for use
  39. for pos, i in enumerate(reversed(entries)):
  40. published = i["published"]
  41. updated = i["updated"]
  42. if not channel_found:
  43. # add the video to the queue
  44. queue.append(i)
  45. ctr_line = str(channel_id + "," + published + "," + updated + '\n')
  46. # add the new line to ctr for adding to channels_timestamps later
  47. ctr.append(ctr_line)
  48. channel_found = True
  49. # if the channel exists in channels_timestamps, update "published" time in the channel line
  50. else:
  51. published_int = utils.convert_timestamp(published)
  52. ctr_line_list = ctr_line.split(",")
  53. line_published_int = utils.convert_timestamp(ctr_line_list[1])
  54. if published_int > line_published_int:
  55. # update the timestamp in the line for the channel in channels_timestamps,
  56. ctr.remove(ctr_line)
  57. ctr_line = str(channel_id + "," + published + "," + updated + '\n')
  58. ctr.append(ctr_line)
  59. # and add current videos to queue.
  60. queue.append(i)
  61. print(published)
  62. # write the new channels and timestamps line to channels_timestamps.csv
  63. ct = open(channels_timestamps, "w")
  64. for line in ctr:
  65. if line != '':
  66. ct.write(line + "\n")
  67. ct.close()
  68. return queue, channel_lang
  69. def download_yt_video(queue_item, dl_dir, channel_conf):
  70. url = queue_item["link"]
  71. dl_dir = dl_dir + channel_conf["name"]
  72. try:
  73. video = pafy.new(url)
  74. streams = video.streams
  75. #for s in streams:
  76. #print(s.resolution, s.extension, s.get_filesize, s.url)
  77. best = video.getbest(preftype=channel_conf["preferred_extension"])
  78. filepath = dl_dir + "/"+ queue_item["yt_videoid"] + "." + channel_conf["preferred_extension"]
  79. #TODO: implement resolution logic from config, currently downloading best resolution
  80. best.download(filepath=filepath, quiet=False)
  81. except:
  82. pass
  83. # TODO: check YT alternate URL for video availability
  84. # TODO: print and log exceptions
  85. def save_metadata(queue_item, dl_dir, channel_conf):
  86. dl_dir = dl_dir + channel_conf["name"]
  87. link = queue_item["link"]
  88. title = queue_item["title"]
  89. description = queue_item["summary"]
  90. author = queue_item["author"]
  91. published = queue_item["published"]
  92. metadata_file = dl_dir + "/" + queue_item["yt_videoid"] + ".txt"
  93. metadata = open(metadata_file, "w+")
  94. # save relevant metadata as semicolon separated easy to read values to text file
  95. metadata.write('title: "' + title + '";\n\nlink: "' + link + '";\n\nauthor: "' + author + '";\n\npublished: "' +
  96. published + '";\n\ndescription: "' + description + '"\n\n;')
  97. # save raw metadata JSON string
  98. metadata.write(str(queue_item))
  99. metadata.close()
  100. def save_thumbnail(queue_item, dl_dir, channel_conf):
  101. dl_dir = dl_dir + channel_conf["name"]
  102. thumb = str(queue_item["media_thumbnail"][0]["url"])
  103. extension = thumb.split(".")[-1]
  104. thumb_file = dl_dir + "/" + queue_item["yt_videoid"] + "." + extension
  105. # download the thumbnail
  106. urlretrieve(thumb, thumb_file)
  107. return extension
  108. def get_pt_auth(channel_conf):
  109. # get variables from channel_conf
  110. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  111. pt_uname = channel_conf["peertube_username"]
  112. pt_passwd = channel_conf["peertube_password"]
  113. # get client ID and secret from peertube instance
  114. id_secret = json.loads(str(requests.get(pt_api + "/oauth-clients/local").content).split("'")[1])
  115. client_id = id_secret["client_id"]
  116. client_secret = id_secret["client_secret"]
  117. # construct JSON for post request to get access token
  118. auth_json = {'client_id': client_id,
  119. 'client_secret': client_secret,
  120. 'grant_type': 'password',
  121. 'response_type': 'code',
  122. 'username': pt_uname,
  123. 'password': pt_passwd
  124. }
  125. # get access token
  126. auth_result = json.loads(str(requests.post(pt_api + "/users/token", data=auth_json).content).split("'")[1])
  127. access_token = auth_result["access_token"]
  128. return access_token
  129. def get_pt_channel_id(channel_conf):
  130. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  131. post_url = pt_api + "/video-channels/" + channel_conf["peertube_channel"] + "/"
  132. returned_json = json.loads(requests.get(post_url).content)
  133. channel_id = returned_json["id"]
  134. return channel_id
  135. def get_file(file_path):
  136. mimetypes.init()
  137. return (path.basename(file_path), open(path.abspath(file_path), 'rb'),
  138. mimetypes.types_map[path.splitext(file_path)[1]])
  139. def handle_peertube_result(request_result):
  140. if request_result.status_code < 300:
  141. return True
  142. else:
  143. print(request_result)
  144. return False
  145. def upload_to_pt(dl_dir, channel_conf, queue_item, access_token, thumb_extension):
  146. # Adapted from Prismedia https://git.lecygnenoir.info/LecygneNoir/prismedia
  147. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  148. video_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + \
  149. channel_conf["preferred_extension"]
  150. thumb_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + thumb_extension
  151. description = channel_conf["description_prefix"] + "\n\n" + queue_item["summary"] + "\n\n" + channel_conf["description_suffix"]
  152. channel_id = str(get_pt_channel_id(channel_conf))
  153. # We need to transform fields into tuple to deal with tags as
  154. # MultipartEncoder does not support list refer
  155. # https://github.com/requests/toolbelt/issues/190 and
  156. # https://github.com/requests/toolbelt/issues/205
  157. try:
  158. fields = [
  159. ("name", queue_item["title"]),
  160. ("licence", "1"),
  161. ("description", description),
  162. ("nsfw", channel_conf["nsfw"]),
  163. ("channelId", channel_id),
  164. ("originallyPublishedAt", queue_item["published"]),
  165. ("category", channel_conf["pt_channel_category"]),
  166. ("language", channel_conf["default_lang"]),
  167. ("privacy", str(channel_conf["pt_privacy"])),
  168. ("commentsEnabled", channel_conf["comments_enabled"]),
  169. ("videofile", get_file(video_file)),
  170. ("thumbnailfile", get_file(thumb_file)),
  171. ("previewfile", get_file(thumb_file)),
  172. ("waitTranscoding", 'false')
  173. ]
  174. except:
  175. return
  176. if channel_conf["pt_tags"] != "":
  177. fields.append(("tags", "[" + channel_conf["pt_tags"] + "]"))
  178. else:
  179. print("you have no tags in your configuration file for this channel")
  180. multipart_data = MultipartEncoder(fields)
  181. headers = {
  182. 'Content-Type': multipart_data.content_type,
  183. 'Authorization': "Bearer " + access_token
  184. }
  185. return handle_peertube_result(requests.post(pt_api + "/videos/upload", data=multipart_data, headers=headers))
  186. def pt_http_import(dl_dir, channel_conf, queue_item, access_token, thumb_extension, yt_lang):
  187. # Adapted from Prismedia https://git.lecygnenoir.info/LecygneNoir/prismedia
  188. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  189. yt_video_url = queue_item["link"]
  190. # TODO: use the alternate link if video not found error occurs
  191. alternate_link = queue_item["links"][0]["href"]
  192. thumb_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + thumb_extension
  193. description = channel_conf["description_prefix"] + "\n\n" + queue_item["summary"] + "\n\n" + channel_conf["description_suffix"]
  194. channel_id = str(get_pt_channel_id(channel_conf))
  195. language = utils.set_pt_lang(yt_lang, channel_conf["default_lang"])
  196. category = utils.set_pt_category(channel_conf["pt_channel_category"])
  197. # We need to transform fields into tuple to deal with tags as
  198. # MultipartEncoder does not support list refer
  199. # https://github.com/requests/toolbelt/issues/190 and
  200. # https://github.com/requests/toolbelt/issues/205
  201. fields = [
  202. ("name", queue_item["title"]),
  203. ("licence", "1"),
  204. ("description", description),
  205. ("nsfw", channel_conf["nsfw"]),
  206. ("channelId", channel_id),
  207. ("originallyPublishedAt", queue_item["published"]),
  208. ("category", category),
  209. ("language", language),
  210. ("privacy", str(channel_conf["pt_privacy"])),
  211. ("commentsEnabled", channel_conf["comments_enabled"]),
  212. ("targetUrl", yt_video_url),
  213. ("thumbnailfile", get_file(thumb_file)),
  214. ("previewfile", get_file(thumb_file)),
  215. ("waitTranscoding", 'false')
  216. ]
  217. if channel_conf["pt_tags"] != "":
  218. fields.append(("tags[]", channel_conf["pt_tags"]))
  219. else:
  220. print("you have no tags in your configuration file for this channel")
  221. multipart_data = MultipartEncoder(fields)
  222. headers = {
  223. 'Content-Type': multipart_data.content_type,
  224. 'Authorization': "Bearer " + access_token
  225. }
  226. return handle_peertube_result(requests.post(pt_api + "/videos/imports", data=multipart_data, headers=headers))
  227. def log_upload_error(yt_url,channel_conf):
  228. error_file = open("video_errors.csv", "a")
  229. error_file.write(channel_conf['name']+","+yt_url+"\n")
  230. error_file.close()
  231. print("error !")
  232. def run_steps(conf):
  233. # TODO: logging
  234. channel = conf["channel"]
  235. # run loop for every channel in the configuration file
  236. global_conf = conf["global"]
  237. if conf["global"]["delete_videos"] == "true":
  238. delete_videos = True
  239. else:
  240. delete_videos = False
  241. # The following enables the deletion of thumbnails, videos are not downloaded at all
  242. if conf["global"]["use_pt_http_import"] == "true":
  243. delete_videos = True
  244. use_pt_http_import = True
  245. else:
  246. use_pt_http_import = False
  247. dl_dir = global_conf["video_download_dir"]
  248. if not path.exists(dl_dir):
  249. mkdir(dl_dir)
  250. channel_counter = 0
  251. for c in channel:
  252. print("\n")
  253. channel_id = channel[c]["channel_id"]
  254. channel_conf = channel[str(channel_counter)]
  255. video_data = get_video_data(channel_id)
  256. queue = video_data[0]
  257. yt_lang = video_data[1]
  258. if len(queue) > 0:
  259. if not path.exists(dl_dir + "/" + channel_conf["name"]):
  260. mkdir(dl_dir + "/" + channel_conf["name"])
  261. # download videos, metadata and thumbnails from youtube
  262. for queue_item in queue:
  263. if not use_pt_http_import:
  264. print("downloading " + queue_item["yt_videoid"] + " from YouTube...")
  265. download_yt_video(queue_item, dl_dir, channel_conf)
  266. print("done.")
  267. # TODO: download closest to config specified resolution instead of best resolution
  268. thumb_extension = save_thumbnail(queue_item, dl_dir, channel_conf)
  269. # only save metadata to text file if archiving videos
  270. if not delete_videos:
  271. print("saving video metadata...")
  272. save_metadata(queue_item, dl_dir, channel_conf)
  273. print("done.")
  274. access_token = get_pt_auth(channel_conf)
  275. # upload videos, metadata and thumbnails to peertube
  276. for queue_item in queue:
  277. if not use_pt_http_import:
  278. print("uploading " + queue_item["yt_videoid"] + " to Peertube...")
  279. pt_result = upload_to_pt(dl_dir, channel_conf, queue_item, access_token, thumb_extension)
  280. else:
  281. print("mirroring " + queue_item["link"] + " to Peertube using HTTP import...")
  282. pt_result = pt_http_import(dl_dir, channel_conf, queue_item, access_token, thumb_extension, yt_lang)
  283. if pt_result:
  284. print("done !")
  285. else:
  286. log_upload_error(queue_item["link"],channel_conf)
  287. if delete_videos:
  288. print("deleting videos and/or thumbnails...")
  289. rmtree(dl_dir + "/" + channel_conf["name"], ignore_errors=True)
  290. print("done")
  291. channel_counter += 1
  292. def run(run_once=True):
  293. #TODO: turn this into a daemon
  294. conf = utils.read_conf("config.toml")
  295. if run_once:
  296. run_steps(conf)
  297. else:
  298. while True:
  299. poll_frequency = int(conf["global"]["poll_frequency"]) * 60
  300. run_steps(conf)
  301. sleep(poll_frequency)
  302. if __name__ == "__main__":
  303. run(run_once=False)