Forum » Programiranje » Python Youtube MP3 downloader + some bug(s)
Python Youtube MP3 downloader + some bug(s)
HotBurek ::
Pozdravljeni.
Pred dnevi sem začel s pisanjem Python skripte, s katero bi Youtube video shranil v mp3. Skripto mi je uspelo napisati do te mere, da za določene video posnetke download deluje.
Zadnja dva dni pa sem prišel do blokade, ker za določene video posneteke ne uspem najti prave kombinacije parametrov, da bi download delal.
Spodaj prilagam kodo, če se bo kdo lotil debugiranja, in našel postopek, kako pravilno narediti convert iz "videoplayback" linka (SETP 1) v delujoč download link (STEP 2). V kodi so trije Youtube IDji; prvi dela bp, drugi nima audio mime tipa med linki (zato ne dela, kar je ok), tretji pa ne sploh dela. In ta me je zmatral. :)
Za download je potrebno nastavit obstoječ folder za downloadpath.
Pred dnevi sem začel s pisanjem Python skripte, s katero bi Youtube video shranil v mp3. Skripto mi je uspelo napisati do te mere, da za določene video posnetke download deluje.
Zadnja dva dni pa sem prišel do blokade, ker za določene video posneteke ne uspem najti prave kombinacije parametrov, da bi download delal.
Spodaj prilagam kodo, če se bo kdo lotil debugiranja, in našel postopek, kako pravilno narediti convert iz "videoplayback" linka (SETP 1) v delujoč download link (STEP 2). V kodi so trije Youtube IDji; prvi dela bp, drugi nima audio mime tipa med linki (zato ne dela, kar je ok), tretji pa ne sploh dela. In ta me je zmatral. :)
Za download je potrebno nastavit obstoječ folder za downloadpath.
#!/usr/bin/python3.4 # import import os; import sys; import time; import html; import shutil; import datetime; import urllib.parse; import requests.packages.urllib3; # variables timeout = 4; useragent = "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0"; baseurl = "https://www.youtube.com/watch?v="; # debug on/off debug = True; title = ""; mp3url = ""; clen = 0; # download path downloadpath = "/home/user/Documents/Youtube"; # define color's strings clr_darkgray = "\033[90m"; clr_red = "\033[91m"; clr_green = "\033[92m"; clr_yellow = "\033[93m"; clr_blue = "\033[94m"; clr_magenta = "\033[95m"; clr_azure = "\033[96m"; clr_lightgray = "\033[97m"; clr_eoc = "\033[0m"; # YOUTUBE IDs --------------------------------------------------------------------------------------------- # this works ytid = "aecYDGLLzTk"; # this has no audio # ytid = "KB4UgLloyXo"; # this doesn't work :( # ytid = "FzJN6TDkQCg"; # DOWNLOADER BITS ----------------------------------------------------------------------------------------- def downloadwb(musicurl, ytid): rstatus = False; request = urllib.request.Request(str(musicurl)); request.add_header("User-Agent", str(useragent)); request.add_header("Accept-Language", "en-US"); request.add_header("Accept-Encoding", "gzip, deflate, br"); request.add_header("Referer", "https://www.youtube.com/"); request.add_header("Origin", "https://www.youtube.com"); request.add_header("DNT", "1"); try: requestopen = urllib.request.urlopen(request, timeout=timeout); charset = requestopen.headers.get_content_charset(); if charset != None: response = requestopen.read().decode(charset); else: response = requestopen.read(); # check if folder exists if not os.path.exists(downloadpath): rstatus = False; print(clr_red + "Folder " + clr_eoc + clr_yellow + str(downloadpath) + clr_eoc + clr_red + " doesn't exist" + clr_eoc); else: # writing file to disk file = open(downloadpath + "/" + str(ytid) + ".mp3", "ab+"); file.write(response); file.close(); rstatus = True; except Exception as exc: print(clr_red + "Error: " + clr_eoc + clr_lightgray + str(exc) + clr_eoc); rstatus = False; return rstatus; # MAIN STEPS ---------------------------------------------------------------------------------------------- # STEP 1--------------------------------------------------------------------------------------------------- # make request and read videoplayback urls try: response = ""; headers = {"User-Agent": useragent}; # avoid InsecureRequestWarning requests.packages.urllib3.disable_warnings(); url = str(baseurl) + str(ytid); request = requests.get(url, headers=headers, allow_redirects=True, verify=False, timeout=timeout); if request.status_code == 200: if request.text: response = request.text; # find title ---------------------------------------------------------------- mp3title = response; if mp3title.find("document.title") > 0: mp3title = mp3title[mp3title.find("document.title") + len("document.title"):]; if mp3title.find("\"") > -1: mp3title = mp3title[mp3title.find("\"") + len("\""):]; if mp3title.find("\"") > -1: mp3title = mp3title[:mp3title.find("\"")]; if mp3title.rfind("-") > 0: mp3title = mp3title[:mp3title.rfind("-")]; mp3title = mp3title.strip(); mp3title = html.unescape(mp3title); title = mp3title.replace("\\u0026", "&"); print(clr_green + str(title) + clr_eoc); # find download url------------------------------------------------------------ # strip to videoplayback url list area downloadurl = response; if downloadurl.find("<div id=\"player\"") > 0: downloadurl = downloadurl[downloadurl.find("<div id=\"player\""):]; if downloadurl.find("ytplayer") > 0: downloadurl = downloadurl[downloadurl.find("ytplayer"):]; if downloadurl.find("</div>") > 0: downloadurl = downloadurl[:downloadurl.find("</div>")]; downloadurl = urllib.parse.unquote(downloadurl); # check all urls wiht videoplayback and find the longest if downloadurl.find("https:") > -1: while downloadurl.find("https:") > -1: urlvp = downloadurl[downloadurl.find("https:"):]; downloadurl = downloadurl[downloadurl.find("https:") + len("https:"):]; if urlvp.find(",") > 0: urlvp = urlvp[:urlvp.find(",")]; urlvp = html.unescape(urlvp); urlvp = urlvp.replace("\\u0026", "&"); urlvp = urlvp.replace("%252F", "/"); urlvp = urlvp.replace("%2F", "/"); urlvp = urlvp.replace("%2C", ","); urlvp = urlvp.replace("%5B", "["); urlvp = urlvp.replace("%5D", "["); if urlvp.find("videoplayback") > 0: if urlvp.find("mime=audio/") > 0: urlparams = ""; if urlvp.find("?") > 0: urlparams = urlvp[urlvp.find("?") + len("?"):]; urlparamsarray = urlparams.split("&"); for urlparam in urlparamsarray: # find clen value---------------------------------------- if urlparam.find("clen=") > -1: clenparam = urlparam.replace("clen=", ""); clenparam = clenparam.strip(); while clenparam.endswith("\""): if clenparam.endswith("\""): clenparam = clenparam[:len(clenparam) - 1]; while clenparam.startswith("?"): if clenparam.startswith("?"): clenparam = clenparam[1:]; # find longest clen---------------------------------- if int(clenparam) > int(clen): clen = int(clenparam); if urlvp.endswith("\""): urlvp = urlvp[:len(urlvp) - 1]; mp3url = urlvp; # check if mp3url for audio is found if len(mp3url) < 1: print(clr_red + "Error: " + clr_eoc + clr_yellow + "Couldn't found mime audio type for given url" + clr_eoc); else: # STEP 2--------------------------------------------------------------------------------------------------- # build downlaod url--------------------------------------------------------------------------------------- # DEBUG ------------------- if debug: videoplayback = ""; videoplaybackpar = ""; if mp3url.find("?") > 0: videoplayback = mp3url[:mp3url.find("?")]; videoplaybackpar = mp3url[mp3url.find("?") + len("?"):]; videoplaybackparar = videoplaybackpar.split("&"); videoplaybackparar.sort(); videoplaybackpar = ""; for par in videoplaybackparar: videoplaybackpar = str(videoplaybackpar) + str(par) + "\n"; videoplaybackpar = videoplaybackpar.strip(); print(clr_yellow + "---DEBUG---" + clr_eoc); print(clr_blue + "videoplayback=" + clr_eoc + clr_lightgray + videoplayback + clr_eoc); print(clr_blue + "videoplaybackpar=" + clr_eoc + clr_lightgray + videoplaybackpar + clr_eoc); print(clr_yellow + "---DEBUG---" + clr_eoc); # DEBUG ------------------- # split base url and parameters mp3urlbase = ""; mp3urlparameters = ""; mp3urlarray = []; mp3urlpar = ""; if mp3url.find("?") > 0: mp3urlbase = mp3url[:mp3url.find("?") + len("?")]; mp3urlparameters = mp3url[mp3url.find("?") + len("?"):]; # if parameters are missing add them if mp3urlparameters.find("alr=") == -1: mp3urlparameters = str(mp3urlparameters) + "&alr=yes"; if mp3urlparameters.find("c=") == -1: mp3urlparameters = str(mp3urlparameters) + "&c=WEB"; if mp3urlparameters.find("gir=") == -1: mp3urlparameters = str(mp3urlparameters) + "&gir=yes"; if mp3urlparameters.find("ratebypass=") == -1: mp3urlparameters = str(mp3urlparameters) + "&ratebypass=yes"; if mp3urlparameters.find("rbuf=") == -1: mp3urlparameters = str(mp3urlparameters) + "&rbuf=0"; if mp3urlparameters.find("source=") == -1: mp3urlparameters = str(mp3urlparameters) + "&source=youtube"; if mp3urlparameters.find("ipbypass=") == -1: mp3urlparameters = str(mp3urlparameters) + "&ipbypass=yes"; mp3urlarray = mp3urlparameters.split("&"); mp3urlarray.sort(); # bits to add each parameter only once alradd = False; aitagsadd = False; cadd = False; clenadd = False; cpnadd = False; cverad = False; duradd = False; eiadd = False; expireadd = False; gcradd = False; giradd = False; idadd = False; initcwndbpsadd = False; ipadd = False; ipbitsad = False; ipbypassadd = False; itagadd = False; keepaliveadd = False; keyadd = False; lmtadd = False; mimeadd = False; mipadd = False; mmadd = False; mnadd = False; msadd = False; mtadd = False; mvadd = False; pcm2add = False; pcm2cmsadd = False; pladd = False; ratebypassadd = False; rbufadd = False; req_idadd = False; requiressladd = False; rmadd = False; sadd = False; signatureadd = False; sourceadd = False; sparams = False; for parameter in mp3urlarray: parameter = parameter.strip(); while parameter.endswith("\""): if parameter.endswith("\""): parameter = parameter[:len(parameter) - 1]; if parameter.startswith("aitags="): if not aitagsadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); aitagsadd = True; elif parameter.startswith("alr="): if not alradd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); alradd = True; elif parameter.startswith("c="): if not cadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); cadd = True; elif parameter.startswith("clen="): if not clenadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); clenadd = True; elif parameter.startswith("cpn="): if not cpnadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); cpnadd = True; elif parameter.startswith("cver="): if not cverad: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); cverad = True; elif parameter.startswith("dur="): if not duradd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); duradd = True; elif parameter.startswith("ei="): if not eiadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); eiadd = True; elif parameter.startswith("expire="): if not expireadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); expireadd = True; elif parameter.startswith("gcr="): if not gcradd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); gcradd = True; elif parameter.startswith("gir="): if not giradd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); giradd = True; elif parameter.startswith("id="): if not idadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); idadd = True; elif parameter.startswith("initcwndbps="): if not initcwndbpsadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); initcwndbpsadd = True; elif parameter.startswith("ip="): if not ipadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); ipadd = True; elif parameter.startswith("ipbits="): if not ipbitsad: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); ipbitsad = True; elif parameter.startswith("ipbypass="): if not ipbypassadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); ipbypassadd = True; elif parameter.startswith("itag="): if not itagadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); itagadd = True; elif parameter.startswith("keepalive="): if not keepaliveadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); keepaliveadd = True; elif parameter.startswith("key="): if not keyadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); keyadd = True; elif parameter.startswith("lmt="): if not lmtadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); lmtadd = True; elif parameter.startswith("mime="): if not mimeadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); mimeadd = True; elif parameter.startswith("mip="): if not mipadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); mipadd = True; elif parameter.startswith("mm="): if not mmadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); mmadd = True; elif parameter.startswith("mn="): if not mnadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); mnadd = True; elif parameter.startswith("ms="): if not msadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); msadd = True; elif parameter.startswith("mt="): if not mtadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); mtadd = True; elif parameter.startswith("mv="): if not mvadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); mvadd = True; elif parameter.startswith("pcm2="): if not pcm2add: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); pcm2add = True; elif parameter.startswith("pcm2cms="): if not pcm2cmsadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); pcm2cmsadd = True; elif parameter.startswith("pl="): if not pladd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); pladd = True; elif parameter.startswith("ratebypass="): if not ratebypassadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); ratebypassadd = True; elif parameter.startswith("rbuf="): if not rbufadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); rbufadd = True; elif parameter.startswith("req_id="): if not req_idadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); req_idadd = True; elif parameter.startswith("requiressl="): if not requiressladd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); requiressladd = True; elif parameter.startswith("rm="): if not rmadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); rmadd = True; elif parameter.startswith("s="): if not sadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); sadd = True; elif parameter.startswith("signature="): if not signatureadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); signatureadd = True; elif parameter.startswith("source="): if not sourceadd: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); sourceadd = True; elif parameter.startswith("sparams="): if not sparams: mp3urlpar = str(mp3urlpar) + "&" + str(parameter); sparams = True; if mp3urlpar.startswith("&"): mp3urlpar = mp3urlpar[1:]; # DEBUG ------------------- if debug: print(clr_yellow + "---DEBUG---" + clr_eoc); print(clr_blue + "mp3urlbase=" + clr_eoc + clr_lightgray + mp3urlbase + clr_eoc); print(clr_blue + "mp3urlpar=" + clr_eoc + clr_lightgray + mp3urlpar.replace("&", "\n") + clr_eoc); print(clr_yellow + "---DEBUG---" + clr_eoc); # DEBUG ------------------- # STEP 3--------------------------------------------------------------------------------------------------- # download url rn = 1; rangefrom = 0; rangestep = 66654; # downlaod chunk size rangeto = 66654; rstatusvalue = True; print(clr_green + "Downloading " + clr_eoc + clr_azure + str(ytid) + ".mp3" + clr_eoc); print(clr_green + "Saving to " + clr_eoc + clr_azure + str(downloadpath) + clr_eoc); # delete mp3 file if exists if os.path.exists(downloadpath + "/" + str(ytid) + ".mp3"): os.remove(downloadpath + "/" + str(ytid) + ".mp3"); while int(rangeto) < int(clen): percentage = round(int(rangeto) / int(clen) * 100); size = round(rangeto / 1024 / 1024); # add range to url mp3urlrange = "&range=" + str(rangefrom) + "-" + str(rangeto) + "&rn=" + str(rn); # some sleep time between requests time.sleep(0.2); # DOWNLOAD rstatusvalue = downloadwb(mp3urlbase + mp3urlpar + mp3urlrange, ytid); if not rstatusvalue: break; # print percentage and MB print(clr_yellow + str(percentage) + " %" + clr_eoc + " " + clr_magenta + str(size) + " MB" + clr_eoc); if int(rangeto) < int(clen): rangefrom = int(rangeto) + 1; if int(rangeto) + int(rangestep) < int(clen): rangeto = int(rangeto) + int(rangestep); else: rangeto = int(clen); rn = int(rn) + 1; mp3urlrange = "&range=" + str(rangefrom) + "-" + str(rangeto) + "&rn=" + str(rn); # DOWNLOAD downloadwb(mp3urlbase + mp3urlpar + mp3urlrange, ytid); # print percentage and MB print(clr_yellow + str(percentage) + " %" + clr_eoc + " " + clr_magenta + str(size) + " MB" + clr_eoc); break; rn = int(rn) + 1; if rstatusvalue: print(clr_green + "Downloading done" + clr_eoc); print(clr_green + "File saved to " + clr_eoc + clr_azure + str(downloadpath) + clr_eoc); else: print(clr_red + "Downloading failed" + clr_eoc); else: print(clr_red + "Response code: " + clr_eoc + clr_yellow + str(request.status_code) + clr_eoc); except Exception as exc: print(clr_red + "Error: " + clr_eoc + clr_yellow + str(exc) + clr_eoc);
root@debian:/# iptraf-ng
fatal: This program requires a screen size of at least 80 columns by 24 lines
Please resize your window
fatal: This program requires a screen size of at least 80 columns by 24 lines
Please resize your window
Vredno ogleda ...
Tema | Ogledi | Zadnje sporočilo | |
---|---|---|---|
Tema | Ogledi | Zadnje sporočilo | |
» | Nasveti glede API-jaOddelek: Programiranje | 1176 (716) | Arey |
» | program za pomoč pri tiskanju 200+pdf dokumentovOddelek: Pomoč in nasveti | 2283 (1335) | HotBurek |
» | Postavitev mySQLOddelek: Programiranje | 2227 (1805) | M01O |
» | Fake traffic generatorOddelek: Omrežja in internet | 2248 (1444) | HotBurek |
» | python -slovarOddelek: Programiranje | 3123 (2102) | Valex86 |