A bot written in Python3 that mirrors YouTube channels to PeerTube channels as videos are released in a YouTube channel.
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

303 Zeilen
13KB

  1. #!/usr/bin/python3
  2. import pafy
  3. import feedparser as fp
  4. from urllib.request import urlretrieve
  5. import requests
  6. import json
  7. from time import sleep
  8. from os import mkdir, path
  9. from shutil import rmtree
  10. import mimetypes
  11. from requests_toolbelt.multipart.encoder import MultipartEncoder
  12. import utils
  13. def get_video_data(channel_id):
  14. yt_rss_url = "https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id
  15. feed = fp.parse(yt_rss_url)
  16. channel_lang = feed["feed"]["title_detail"]["language"]
  17. print(feed["feed"])
  18. entries = feed["entries"]
  19. channels_timestamps = "channels_timestamps.csv"
  20. # clear any existing queue before start
  21. queue = []
  22. # read contents of channels_timestamps.csv, create list object of contents
  23. ct = open(channels_timestamps, "r")
  24. ctr = ct.read().split("\n")
  25. ct.close()
  26. ctr_line = []
  27. channel_found = False
  28. # check if channel ID is found in channels_timestamps.csv
  29. for line in ctr:
  30. line_list = line.split(',')
  31. if channel_id == line_list[0]:
  32. channel_found = True
  33. ctr_line = line
  34. break
  35. if not channel_found:
  36. print("new channel added to config: " + channel_id)
  37. print(channel_id)
  38. # iterate through video entries for channel, parse data into objects for use
  39. for pos, i in enumerate(reversed(entries)):
  40. published = i["published"]
  41. updated = i["updated"]
  42. if not channel_found:
  43. # add the video to the queue
  44. queue.append(i)
  45. ctr_line = str(channel_id + "," + published + "," + updated + '\n')
  46. # add the new line to ctr for adding to channels_timestamps later
  47. ctr.append(ctr_line)
  48. channel_found = True
  49. # if the channel exists in channels_timestamps, update "published" time in the channel line
  50. else:
  51. published_int = utils.convert_timestamp(published)
  52. ctr_line_list = ctr_line.split(",")
  53. line_published_int = utils.convert_timestamp(ctr_line_list[1])
  54. if published_int > line_published_int:
  55. # update the timestamp in the line for the channel in channels_timestamps,
  56. ctr.remove(ctr_line)
  57. ctr_line = str(channel_id + "," + published + "," + updated + '\n')
  58. ctr.append(ctr_line)
  59. # and add current videos to queue.
  60. queue.append(i)
  61. print(published)
  62. # write the new channels and timestamps line to channels_timestamps.csv
  63. ct = open(channels_timestamps, "w")
  64. for line in ctr:
  65. if line != '':
  66. ct.write(line + "\n")
  67. ct.close()
  68. return queue, channel_lang
  69. def download_yt_video(queue_item, dl_dir, channel_conf):
  70. url = queue_item["link"]
  71. dl_dir = dl_dir + channel_conf["name"]
  72. try:
  73. video = pafy.new(url)
  74. streams = video.streams
  75. #for s in streams:
  76. #print(s.resolution, s.extension, s.get_filesize, s.url)
  77. best = video.getbest(preftype=channel_conf["preferred_extension"])
  78. filepath = dl_dir + "/"+ queue_item["yt_videoid"] + "." + channel_conf["preferred_extension"]
  79. #TODO: implement resolution logic from config, currently downloading best resolution
  80. best.download(filepath=filepath, quiet=False)
  81. except:
  82. pass
  83. # TODO: check YT alternate URL for video availability
  84. # TODO: print and log exceptions
  85. def save_metadata(queue_item, dl_dir, channel_conf):
  86. dl_dir = dl_dir + channel_conf["name"]
  87. link = queue_item["link"]
  88. title = queue_item["title"]
  89. description = queue_item["summary"]
  90. author = queue_item["author"]
  91. published = queue_item["published"]
  92. metadata_file = dl_dir + "/" + queue_item["yt_videoid"] + ".txt"
  93. metadata = open(metadata_file, "w+")
  94. # save relevant metadata as semicolon separated easy to read values to text file
  95. metadata.write('title: "' + title + '";\n\nlink: "' + link + '";\n\nauthor: "' + author + '";\n\npublished: "' +
  96. published + '";\n\ndescription: "' + description + '"\n\n;')
  97. # save raw metadata JSON string
  98. metadata.write(str(queue_item))
  99. metadata.close()
  100. def save_thumbnail(queue_item, dl_dir, channel_conf):
  101. dl_dir = dl_dir + channel_conf["name"]
  102. thumb = str(queue_item["media_thumbnail"][0]["url"])
  103. extension = thumb.split(".")[-1]
  104. thumb_file = dl_dir + "/" + queue_item["yt_videoid"] + "." + extension
  105. # download the thumbnail
  106. urlretrieve(thumb, thumb_file)
  107. return extension
  108. def get_pt_auth(channel_conf):
  109. # get variables from channel_conf
  110. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  111. pt_uname = channel_conf["peertube_username"]
  112. pt_passwd = channel_conf["peertube_password"]
  113. # get client ID and secret from peertube instance
  114. id_secret = json.loads(str(requests.get(pt_api + "/oauth-clients/local").content).split("'")[1])
  115. client_id = id_secret["client_id"]
  116. client_secret = id_secret["client_secret"]
  117. # construct JSON for post request to get access token
  118. auth_json = {'client_id': client_id,
  119. 'client_secret': client_secret,
  120. 'grant_type': 'password',
  121. 'response_type': 'code',
  122. 'username': pt_uname,
  123. 'password': pt_passwd
  124. }
  125. # get access token
  126. auth_result = json.loads(str(requests.post(pt_api + "/users/token", data=auth_json).content).split("'")[1])
  127. access_token = auth_result["access_token"]
  128. return access_token
  129. def get_pt_channel_id(channel_conf):
  130. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  131. post_url = pt_api + "/video-channels/" + channel_conf["peertube_channel"] + "/"
  132. returned_json = json.loads(requests.get(post_url).content)
  133. channel_id = returned_json["id"]
  134. return channel_id
  135. def get_file(file_path):
  136. mimetypes.init()
  137. return (path.basename(file_path), open(path.abspath(file_path), 'rb'),
  138. mimetypes.types_map[path.splitext(file_path)[1]])
  139. def upload_to_pt(dl_dir, channel_conf, queue_item, access_token, thumb_extension):
  140. # Adapted from Prismedia https://git.lecygnenoir.info/LecygneNoir/prismedia
  141. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  142. video_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + \
  143. channel_conf["preferred_extension"]
  144. thumb_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + thumb_extension
  145. description = channel_conf["description_prefix"] + "\n\n" + queue_item["summary"] + "\n\n" + channel_conf["description_suffix"]
  146. channel_id = str(get_pt_channel_id(channel_conf))
  147. # We need to transform fields into tuple to deal with tags as
  148. # MultipartEncoder does not support list refer
  149. # https://github.com/requests/toolbelt/issues/190 and
  150. # https://github.com/requests/toolbelt/issues/205
  151. fields = [
  152. ("name", queue_item["title"]),
  153. ("licence", "1"),
  154. ("description", description),
  155. ("nsfw", channel_conf["nsfw"]),
  156. ("channelId", channel_id),
  157. ("originallyPublishedAt", queue_item["published"]),
  158. ("category", channel_conf["pt_channel_category"]),
  159. ("language", channel_conf["default_lang"]),
  160. ("privacy", str(channel_conf["pt_privacy"])),
  161. ("commentsEnabled", channel_conf["comments_enabled"]),
  162. ("videofile", get_file(video_file)),
  163. ("thumbnailfile", get_file(thumb_file)),
  164. ("previewfile", get_file(thumb_file)),
  165. ("waitTranscoding", 'false')
  166. ]
  167. if channel_conf["pt_tags"] != "":
  168. fields.append(("tags", "[" + channel_conf["pt_tags"] + "]"))
  169. else:
  170. print("you have no tags in your configuration file for this channel")
  171. multipart_data = MultipartEncoder(fields)
  172. headers = {
  173. 'Content-Type': multipart_data.content_type,
  174. 'Authorization': "Bearer " + access_token
  175. }
  176. print(requests.post(pt_api + "/videos/upload", data=multipart_data, headers=headers).content)
  177. def pt_http_import(dl_dir, channel_conf, queue_item, access_token, thumb_extension, yt_lang):
  178. # Adapted from Prismedia https://git.lecygnenoir.info/LecygneNoir/prismedia
  179. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  180. yt_video_url = queue_item["link"]
  181. # TODO: use the alternate link if video not found error occurs
  182. alternate_link = queue_item["links"][0]["href"]
  183. thumb_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + thumb_extension
  184. description = channel_conf["description_prefix"] + "\n\n" + queue_item["summary"] + "\n\n" + channel_conf["description_suffix"]
  185. channel_id = str(get_pt_channel_id(channel_conf))
  186. language = utils.set_pt_lang(yt_lang, channel_conf["default_lang"])
  187. category = utils.set_pt_category(channel_conf["pt_channel_category"])
  188. # We need to transform fields into tuple to deal with tags as
  189. # MultipartEncoder does not support list refer
  190. # https://github.com/requests/toolbelt/issues/190 and
  191. # https://github.com/requests/toolbelt/issues/205
  192. fields = [
  193. ("name", queue_item["title"]),
  194. ("licence", "1"),
  195. ("description", description),
  196. ("nsfw", channel_conf["nsfw"]),
  197. ("channelId", channel_id),
  198. ("originallyPublishedAt", queue_item["published"]),
  199. ("category", category),
  200. ("language", language),
  201. ("privacy", str(channel_conf["pt_privacy"])),
  202. ("commentsEnabled", channel_conf["comments_enabled"]),
  203. ("targetUrl", yt_video_url),
  204. ("thumbnailfile", get_file(thumb_file)),
  205. ("previewfile", get_file(thumb_file)),
  206. ("waitTranscoding", 'false')
  207. ]
  208. if channel_conf["pt_tags"] != "":
  209. fields.append(("tags[]", channel_conf["pt_tags"]))
  210. else:
  211. print("you have no tags in your configuration file for this channel")
  212. multipart_data = MultipartEncoder(fields)
  213. headers = {
  214. 'Content-Type': multipart_data.content_type,
  215. 'Authorization': "Bearer " + access_token
  216. }
  217. print(requests.post(pt_api + "/videos/imports", data=multipart_data, headers=headers).content)
  218. def run_steps(conf):
  219. # TODO: logging
  220. channel = conf["channel"]
  221. # run loop for every channel in the configuration file
  222. global_conf = conf["global"]
  223. if conf["global"]["delete_videos"] == "true":
  224. delete_videos = True
  225. else:
  226. delete_videos = False
  227. # The following enables the deletion of thumbnails, videos are not downloaded at all
  228. if conf["global"]["use_pt_http_import"] == "true":
  229. delete_videos = True
  230. use_pt_http_import = True
  231. else:
  232. use_pt_http_import = False
  233. dl_dir = global_conf["video_download_dir"]
  234. if not path.exists(dl_dir):
  235. mkdir(dl_dir)
  236. channel_counter = 0
  237. for c in channel:
  238. print("\n")
  239. channel_id = channel[c]["channel_id"]
  240. channel_conf = channel[str(channel_counter)]
  241. video_data = get_video_data(channel_id)
  242. queue = video_data[0]
  243. yt_lang = video_data[1]
  244. if len(queue) > 0:
  245. if not path.exists(dl_dir + "/" + channel_conf["name"]):
  246. mkdir(dl_dir + "/" + channel_conf["name"])
  247. # download videos, metadata and thumbnails from youtube
  248. for queue_item in queue:
  249. if not use_pt_http_import:
  250. print("downloading " + queue_item["yt_videoid"] + " from YouTube...")
  251. download_yt_video(queue_item, dl_dir, channel_conf)
  252. print("done.")
  253. # TODO: download closest to config specified resolution instead of best resolution
  254. thumb_extension = save_thumbnail(queue_item, dl_dir, channel_conf)
  255. # only save metadata to text file if archiving videos
  256. if not delete_videos:
  257. print("saving video metadata...")
  258. save_metadata(queue_item, dl_dir, channel_conf)
  259. print("done.")
  260. access_token = get_pt_auth(channel_conf)
  261. # upload videos, metadata and thumbnails to peertube
  262. for queue_item in queue:
  263. if not use_pt_http_import:
  264. print("uploading " + queue_item["yt_videoid"] + " to Peertube...")
  265. upload_to_pt(dl_dir, channel_conf, queue_item, access_token, thumb_extension)
  266. print("done.")
  267. else:
  268. print("mirroring " + queue_item["link"] + " to Peertube using HTTP import...")
  269. pt_http_import(dl_dir, channel_conf, queue_item, access_token, thumb_extension, yt_lang)
  270. print("done.")
  271. if delete_videos:
  272. print("deleting videos and/or thumbnails...")
  273. rmtree(dl_dir + "/" + channel_conf["name"], ignore_errors=True)
  274. print("done")
  275. channel_counter += 1
  276. def run(run_once=True):
  277. #TODO: turn this into a daemon
  278. conf = utils.read_conf("config.toml")
  279. if run_once:
  280. run_steps(conf)
  281. else:
  282. while True:
  283. poll_frequency = int(conf["global"]["poll_frequency"]) * 60
  284. run_steps(conf)
  285. sleep(poll_frequency)
  286. if __name__ == "__main__":
  287. run(run_once=False)