A bot written in Python3 that mirrors YouTube channels to PeerTube channels as videos are released in a YouTube channel.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

349 lines
14KB

  1. #!/usr/bin/python3
  2. import sys
  3. import getopt
  4. import pafy
  5. import feedparser as fp
  6. from urllib.request import urlretrieve
  7. import requests
  8. import json
  9. from time import sleep
  10. from os import mkdir, path
  11. from shutil import rmtree
  12. import mimetypes
  13. from requests_toolbelt.multipart.encoder import MultipartEncoder
  14. import utils
  15. def get_video_data(channel_id):
  16. yt_rss_url = "https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id
  17. feed = fp.parse(yt_rss_url)
  18. channel_lang = feed["feed"]["title_detail"]["language"]
  19. print(feed["feed"])
  20. entries = feed["entries"]
  21. channels_timestamps = "channels_timestamps.csv"
  22. # clear any existing queue before start
  23. queue = []
  24. # read contents of channels_timestamps.csv, create list object of contents
  25. ct = open(channels_timestamps, "r")
  26. ctr = ct.read().split("\n")
  27. ct.close()
  28. ctr_line = []
  29. channel_found = False
  30. # check if channel ID is found in channels_timestamps.csv
  31. for line in ctr:
  32. line_list = line.split(',')
  33. if channel_id == line_list[0]:
  34. channel_found = True
  35. ctr_line = line
  36. break
  37. if not channel_found:
  38. print("new channel added to config: " + channel_id)
  39. print(channel_id)
  40. # iterate through video entries for channel, parse data into objects for use
  41. for pos, i in enumerate(reversed(entries)):
  42. published = i["published"]
  43. updated = i["updated"]
  44. if not channel_found:
  45. # add the video to the queue
  46. queue.append(i)
  47. ctr_line = str(channel_id + "," + published + "," + updated + '\n')
  48. # add the new line to ctr for adding to channels_timestamps later
  49. ctr.append(ctr_line)
  50. channel_found = True
  51. # if the channel exists in channels_timestamps, update "published" time in the channel line
  52. else:
  53. published_int = utils.convert_timestamp(published)
  54. ctr_line_list = ctr_line.split(",")
  55. line_published_int = utils.convert_timestamp(ctr_line_list[1])
  56. if published_int > line_published_int:
  57. # update the timestamp in the line for the channel in channels_timestamps,
  58. ctr.remove(ctr_line)
  59. ctr_line = str(channel_id + "," + published + "," + updated + '\n')
  60. ctr.append(ctr_line)
  61. # and add current videos to queue.
  62. queue.append(i)
  63. print(published)
  64. # write the new channels and timestamps line to channels_timestamps.csv
  65. ct = open(channels_timestamps, "w")
  66. for line in ctr:
  67. if line != '':
  68. ct.write(line + "\n")
  69. ct.close()
  70. return queue, channel_lang
  71. def download_yt_video(queue_item, dl_dir, channel_conf):
  72. url = queue_item["link"]
  73. dl_dir = dl_dir + channel_conf["name"]
  74. try:
  75. video = pafy.new(url)
  76. streams = video.streams
  77. #for s in streams:
  78. #print(s.resolution, s.extension, s.get_filesize, s.url)
  79. best = video.getbest(preftype=channel_conf["preferred_extension"])
  80. filepath = dl_dir + "/"+ queue_item["yt_videoid"] + "." + channel_conf["preferred_extension"]
  81. #TODO: implement resolution logic from config, currently downloading best resolution
  82. best.download(filepath=filepath, quiet=False)
  83. except:
  84. pass
  85. # TODO: check YT alternate URL for video availability
  86. # TODO: print and log exceptions
  87. def save_metadata(queue_item, dl_dir, channel_conf):
  88. dl_dir = dl_dir + channel_conf["name"]
  89. link = queue_item["link"]
  90. title = queue_item["title"]
  91. description = queue_item["summary"]
  92. author = queue_item["author"]
  93. published = queue_item["published"]
  94. metadata_file = dl_dir + "/" + queue_item["yt_videoid"] + ".txt"
  95. metadata = open(metadata_file, "w+")
  96. # save relevant metadata as semicolon separated easy to read values to text file
  97. metadata.write('title: "' + title + '";\n\nlink: "' + link + '";\n\nauthor: "' + author + '";\n\npublished: "' +
  98. published + '";\n\ndescription: "' + description + '"\n\n;')
  99. # save raw metadata JSON string
  100. metadata.write(str(queue_item))
  101. metadata.close()
  102. def save_thumbnail(queue_item, dl_dir, channel_conf):
  103. dl_dir = dl_dir + channel_conf["name"]
  104. thumb = str(queue_item["media_thumbnail"][0]["url"])
  105. extension = thumb.split(".")[-1]
  106. thumb_file = dl_dir + "/" + queue_item["yt_videoid"] + "." + extension
  107. # download the thumbnail
  108. urlretrieve(thumb, thumb_file)
  109. return extension
  110. def get_pt_auth(channel_conf):
  111. # get variables from channel_conf
  112. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  113. pt_uname = channel_conf["peertube_username"]
  114. pt_passwd = channel_conf["peertube_password"]
  115. # get client ID and secret from peertube instance
  116. id_secret = json.loads(str(requests.get(pt_api + "/oauth-clients/local").content).split("'")[1])
  117. client_id = id_secret["client_id"]
  118. client_secret = id_secret["client_secret"]
  119. # construct JSON for post request to get access token
  120. auth_json = {'client_id': client_id,
  121. 'client_secret': client_secret,
  122. 'grant_type': 'password',
  123. 'response_type': 'code',
  124. 'username': pt_uname,
  125. 'password': pt_passwd
  126. }
  127. # get access token
  128. auth_result = json.loads(str(requests.post(pt_api + "/users/token", data=auth_json).content).split("'")[1])
  129. access_token = auth_result["access_token"]
  130. return access_token
  131. def get_pt_channel_id(channel_conf):
  132. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  133. post_url = pt_api + "/video-channels/" + channel_conf["peertube_channel"] + "/"
  134. returned_json = json.loads(requests.get(post_url).content)
  135. channel_id = returned_json["id"]
  136. return channel_id
  137. def get_file(file_path):
  138. mimetypes.init()
  139. return (path.basename(file_path), open(path.abspath(file_path), 'rb'),
  140. mimetypes.types_map[path.splitext(file_path)[1]])
  141. def handle_peertube_result(request_result):
  142. if request_result.status_code < 300:
  143. return True
  144. else:
  145. print(request_result)
  146. return False
  147. def upload_to_pt(dl_dir, channel_conf, queue_item, access_token, thumb_extension):
  148. # Adapted from Prismedia https://git.lecygnenoir.info/LecygneNoir/prismedia
  149. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  150. video_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + \
  151. channel_conf["preferred_extension"]
  152. thumb_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + thumb_extension
  153. description = channel_conf["description_prefix"] + "\n\n" + queue_item["summary"] + "\n\n" + channel_conf["description_suffix"]
  154. channel_id = str(get_pt_channel_id(channel_conf))
  155. category = utils.set_pt_category(channel_conf["pt_channel_category"])
  156. # We need to transform fields into tuple to deal with tags as
  157. # MultipartEncoder does not support list refer
  158. # https://github.com/requests/toolbelt/issues/190 and
  159. # https://github.com/requests/toolbelt/issues/205
  160. try:
  161. fields = [
  162. ("name", queue_item["title"]),
  163. ("licence", "1"),
  164. ("description", description),
  165. ("nsfw", channel_conf["nsfw"]),
  166. ("channelId", channel_id),
  167. ("originallyPublishedAt", queue_item["published"]),
  168. ("category", category),
  169. ("language", channel_conf["default_lang"]),
  170. ("privacy", str(channel_conf["pt_privacy"])),
  171. ("commentsEnabled", channel_conf["comments_enabled"]),
  172. ("videofile", get_file(video_file)),
  173. ("thumbnailfile", get_file(thumb_file)),
  174. ("previewfile", get_file(thumb_file)),
  175. ("waitTranscoding", 'false')
  176. ]
  177. except:
  178. return
  179. if channel_conf["pt_tags"] != "":
  180. fields.append(("tags", "[" + channel_conf["pt_tags"] + "]"))
  181. else:
  182. print("you have no tags in your configuration file for this channel")
  183. multipart_data = MultipartEncoder(fields)
  184. headers = {
  185. 'Content-Type': multipart_data.content_type,
  186. 'Authorization': "Bearer " + access_token
  187. }
  188. return handle_peertube_result(requests.post(pt_api + "/videos/upload", data=multipart_data, headers=headers))
  189. def pt_http_import(dl_dir, channel_conf, queue_item, access_token, thumb_extension, yt_lang):
  190. # Adapted from Prismedia https://git.lecygnenoir.info/LecygneNoir/prismedia
  191. pt_api = channel_conf["peertube_instance"] + "/api/v1"
  192. yt_video_url = queue_item["link"]
  193. # TODO: use the alternate link if video not found error occurs
  194. alternate_link = queue_item["links"][0]["href"]
  195. thumb_file = dl_dir + channel_conf["name"] + "/" + queue_item["yt_videoid"] + "." + thumb_extension
  196. description = channel_conf["description_prefix"] + "\n\n" + queue_item["summary"] + "\n\n" + channel_conf["description_suffix"]
  197. channel_id = str(get_pt_channel_id(channel_conf))
  198. language = utils.set_pt_lang(yt_lang, channel_conf["default_lang"])
  199. category = utils.set_pt_category(channel_conf["pt_channel_category"])
  200. # We need to transform fields into tuple to deal with tags as
  201. # MultipartEncoder does not support list refer
  202. # https://github.com/requests/toolbelt/issues/190 and
  203. # https://github.com/requests/toolbelt/issues/205
  204. fields = [
  205. ("name", queue_item["title"]),
  206. ("licence", "1"),
  207. ("description", description),
  208. ("nsfw", channel_conf["nsfw"]),
  209. ("channelId", channel_id),
  210. ("originallyPublishedAt", queue_item["published"]),
  211. ("category", category),
  212. ("language", language),
  213. ("privacy", str(channel_conf["pt_privacy"])),
  214. ("commentsEnabled", channel_conf["comments_enabled"]),
  215. ("targetUrl", yt_video_url),
  216. ("thumbnailfile", get_file(thumb_file)),
  217. ("previewfile", get_file(thumb_file)),
  218. ("waitTranscoding", 'false')
  219. ]
  220. if channel_conf["pt_tags"] != "":
  221. fields.append(("tags[]", channel_conf["pt_tags"]))
  222. else:
  223. print("you have no tags in your configuration file for this channel")
  224. multipart_data = MultipartEncoder(fields)
  225. headers = {
  226. 'Content-Type': multipart_data.content_type,
  227. 'Authorization': "Bearer " + access_token
  228. }
  229. return handle_peertube_result(requests.post(pt_api + "/videos/imports", data=multipart_data, headers=headers))
  230. def log_upload_error(yt_url,channel_conf):
  231. error_file = open("video_errors.csv", "a")
  232. error_file.write(channel_conf['name']+","+yt_url+"\n")
  233. error_file.close()
  234. print("error !")
  235. def run_steps(conf):
  236. # TODO: logging
  237. channel = conf["channel"]
  238. # run loop for every channel in the configuration file
  239. global_conf = conf["global"]
  240. if conf["global"]["delete_videos"] == "true":
  241. delete_videos = True
  242. else:
  243. delete_videos = False
  244. # The following enables the deletion of thumbnails, videos are not downloaded at all
  245. if conf["global"]["use_pt_http_import"] == "true":
  246. delete_videos = True
  247. use_pt_http_import = True
  248. else:
  249. use_pt_http_import = False
  250. dl_dir = global_conf["video_download_dir"]
  251. if not path.exists(dl_dir):
  252. mkdir(dl_dir)
  253. channel_counter = 0
  254. for c in channel:
  255. print("\n")
  256. channel_id = channel[c]["channel_id"]
  257. channel_conf = channel[str(channel_counter)]
  258. video_data = get_video_data(channel_id)
  259. queue = video_data[0]
  260. yt_lang = video_data[1]
  261. if len(queue) > 0:
  262. if not path.exists(dl_dir + "/" + channel_conf["name"]):
  263. mkdir(dl_dir + "/" + channel_conf["name"])
  264. # download videos, metadata and thumbnails from youtube
  265. for queue_item in queue:
  266. if not use_pt_http_import:
  267. print("downloading " + queue_item["yt_videoid"] + " from YouTube...")
  268. download_yt_video(queue_item, dl_dir, channel_conf)
  269. print("done.")
  270. # TODO: download closest to config specified resolution instead of best resolution
  271. thumb_extension = save_thumbnail(queue_item, dl_dir, channel_conf)
  272. # only save metadata to text file if archiving videos
  273. if not delete_videos:
  274. print("saving video metadata...")
  275. save_metadata(queue_item, dl_dir, channel_conf)
  276. print("done.")
  277. access_token = get_pt_auth(channel_conf)
  278. # upload videos, metadata and thumbnails to peertube
  279. for queue_item in queue:
  280. if not use_pt_http_import:
  281. print("uploading " + queue_item["yt_videoid"] + " to Peertube...")
  282. pt_result = upload_to_pt(dl_dir, channel_conf, queue_item, access_token, thumb_extension)
  283. else:
  284. print("mirroring " + queue_item["link"] + " to Peertube using HTTP import...")
  285. pt_result = pt_http_import(dl_dir, channel_conf, queue_item, access_token, thumb_extension, yt_lang)
  286. if pt_result:
  287. print("done !")
  288. else:
  289. log_upload_error(queue_item["link"],channel_conf)
  290. if delete_videos:
  291. print("deleting videos and/or thumbnails...")
  292. rmtree(dl_dir + "/" + channel_conf["name"], ignore_errors=True)
  293. print("done")
  294. channel_counter += 1
  295. def run(run_once=True):
  296. #TODO: turn this into a daemon
  297. conf = utils.read_conf("config.toml")
  298. if run_once:
  299. run_steps(conf)
  300. else:
  301. while True:
  302. poll_frequency = int(conf["global"]["poll_frequency"]) * 60
  303. run_steps(conf)
  304. sleep(poll_frequency)
  305. def main(argv):
  306. run_once=False
  307. try:
  308. opts, args = getopt.getopt(argv,"ho",["help","once"])
  309. except:
  310. print("youtube2peertube.py [-o|--once]")
  311. sys(exit(2))
  312. for opt, arg in opts:
  313. if opt == '-h':
  314. print("youtube2peertube.py [-o|--once]")
  315. sys.exit()
  316. elif opt in ("-o", "--once"):
  317. run_once = True
  318. run(run_once)
  319. if __name__ == "__main__":
  320. main(sys.argv[1:])