» »

Python Youtube MP3 downloader + some bug(s)

Python Youtube MP3 downloader + some bug(s)

HotBurek ::

Pozdravljeni.

Pred dnevi sem začel s pisanjem Python skripte, s katero bi Youtube video shranil v mp3. Skripto mi je uspelo napisati do te mere, da za določene video posnetke download deluje.

Zadnja dva dni pa sem prišel do blokade, ker za določene video posneteke ne uspem najti prave kombinacije parametrov, da bi download delal.

Spodaj prilagam kodo, če se bo kdo lotil debugiranja, in našel postopek, kako pravilno narediti convert iz "videoplayback" linka (SETP 1) v delujoč download link (STEP 2). V kodi so trije Youtube IDji; prvi dela bp, drugi nima audio mime tipa med linki (zato ne dela, kar je ok), tretji pa ne sploh dela. In ta me je zmatral. :)

Za download je potrebno nastavit obstoječ folder za downloadpath.

#!/usr/bin/python3.4

# import
import os;
import sys;
import time;
import html;
import shutil;
import datetime;
import urllib.parse;
import requests.packages.urllib3;

# variables
timeout = 4;
useragent = "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0";
baseurl = "https://www.youtube.com/watch?v=";

# debug on/off
debug = True;

title = "";
mp3url = "";
clen = 0;

# download path
downloadpath = "/home/user/Documents/Youtube";

# define color's strings
clr_darkgray = "\033[90m";
clr_red = "\033[91m";
clr_green = "\033[92m";
clr_yellow = "\033[93m";
clr_blue = "\033[94m";
clr_magenta = "\033[95m";
clr_azure = "\033[96m";
clr_lightgray = "\033[97m";
clr_eoc = "\033[0m";

# YOUTUBE IDs ---------------------------------------------------------------------------------------------

# this works
ytid = "aecYDGLLzTk";

# this has no audio
# ytid = "KB4UgLloyXo";

# this doesn't work :(
# ytid = "FzJN6TDkQCg";


# DOWNLOADER BITS -----------------------------------------------------------------------------------------

def downloadwb(musicurl, ytid):

    rstatus = False;

    request = urllib.request.Request(str(musicurl));
    request.add_header("User-Agent", str(useragent));
    request.add_header("Accept-Language", "en-US");
    request.add_header("Accept-Encoding", "gzip, deflate, br");
    request.add_header("Referer", "https://www.youtube.com/");
    request.add_header("Origin", "https://www.youtube.com");
    request.add_header("DNT", "1");

    try:
        requestopen = urllib.request.urlopen(request, timeout=timeout);
        charset = requestopen.headers.get_content_charset();

        if charset != None:
            response = requestopen.read().decode(charset);
        else:
            response = requestopen.read();

        # check if folder exists
        if not os.path.exists(downloadpath):
            rstatus = False;
            print(clr_red + "Folder " + clr_eoc + clr_yellow + str(downloadpath) + clr_eoc + clr_red + " doesn't exist" + clr_eoc);
        else:
            # writing file to disk
            file = open(downloadpath + "/" + str(ytid) + ".mp3", "ab+");
            file.write(response);
            file.close();
            rstatus = True;

    except Exception as exc:
        print(clr_red + "Error: " + clr_eoc + clr_lightgray + str(exc) + clr_eoc);
        rstatus = False;

    return rstatus;


# MAIN STEPS ----------------------------------------------------------------------------------------------

# STEP 1---------------------------------------------------------------------------------------------------
# make request and read videoplayback urls
try:
    response = "";

    headers = {"User-Agent": useragent};

    # avoid InsecureRequestWarning
    requests.packages.urllib3.disable_warnings();

    url = str(baseurl) + str(ytid);

    request = requests.get(url, headers=headers, allow_redirects=True, verify=False, timeout=timeout);

    if request.status_code == 200:

        if request.text:
            response = request.text;

            # find title ----------------------------------------------------------------
            mp3title = response;
            if mp3title.find("document.title") > 0:
                mp3title = mp3title[mp3title.find("document.title") + len("document.title"):];
                if mp3title.find("\"") > -1:
                    mp3title = mp3title[mp3title.find("\"") + len("\""):];
                    if mp3title.find("\"") > -1:
                        mp3title = mp3title[:mp3title.find("\"")];

                        if mp3title.rfind("-") > 0:
                            mp3title = mp3title[:mp3title.rfind("-")];
                            mp3title = mp3title.strip();

                        mp3title = html.unescape(mp3title);
                        title = mp3title.replace("\\u0026", "&");
                        print(clr_green + str(title) + clr_eoc);


            # find download url------------------------------------------------------------

            # strip to videoplayback url list area
            downloadurl = response;
            if downloadurl.find("<div id=\"player\"") > 0:
                downloadurl = downloadurl[downloadurl.find("<div id=\"player\""):];
            if downloadurl.find("ytplayer") > 0:
                downloadurl = downloadurl[downloadurl.find("ytplayer"):];
            if downloadurl.find("</div>") > 0:
                downloadurl = downloadurl[:downloadurl.find("</div>")];

            downloadurl = urllib.parse.unquote(downloadurl);

            # check all urls wiht videoplayback and find the longest
            if downloadurl.find("https:") > -1:
                while downloadurl.find("https:") > -1:

                    urlvp = downloadurl[downloadurl.find("https:"):];
                    downloadurl = downloadurl[downloadurl.find("https:") + len("https:"):];

                    if urlvp.find(",") > 0:
                        urlvp = urlvp[:urlvp.find(",")];

                    urlvp = html.unescape(urlvp);

                    urlvp = urlvp.replace("\\u0026", "&");
                    urlvp = urlvp.replace("%252F", "/");
                    urlvp = urlvp.replace("%2F", "/");
                    urlvp = urlvp.replace("%2C", ",");
                    urlvp = urlvp.replace("%5B", "[");
                    urlvp = urlvp.replace("%5D", "[");

                    if urlvp.find("videoplayback") > 0:

                        if urlvp.find("mime=audio/") > 0:
                            urlparams = "";

                            if urlvp.find("?") > 0:
                                urlparams = urlvp[urlvp.find("?") + len("?"):];
                                urlparamsarray = urlparams.split("&");
                                for urlparam in urlparamsarray:

                                    # find clen value----------------------------------------
                                    if urlparam.find("clen=") > -1:
                                        clenparam = urlparam.replace("clen=", "");
                                        clenparam = clenparam.strip();

                                        while clenparam.endswith("\""):
                                            if clenparam.endswith("\""):
                                                clenparam = clenparam[:len(clenparam) - 1];

                                        while clenparam.startswith("?"):
                                            if clenparam.startswith("?"):
                                                clenparam = clenparam[1:];

                                        # find longest clen----------------------------------
                                        if int(clenparam) > int(clen):
                                            clen = int(clenparam);
                                            if urlvp.endswith("\""):
                                                urlvp = urlvp[:len(urlvp) - 1];
                                            mp3url = urlvp;


            # check if mp3url for audio is found
            if len(mp3url) < 1:
                print(clr_red + "Error: " + clr_eoc + clr_yellow + "Couldn't found mime audio type for given url" + clr_eoc);
            else:
                # STEP 2---------------------------------------------------------------------------------------------------
                # build downlaod url---------------------------------------------------------------------------------------

                # DEBUG -------------------
                if debug:
                    videoplayback = "";
                    videoplaybackpar = "";
                    if mp3url.find("?") > 0:
                        videoplayback = mp3url[:mp3url.find("?")];
                        videoplaybackpar = mp3url[mp3url.find("?") + len("?"):];
                        videoplaybackparar = videoplaybackpar.split("&");
                        videoplaybackparar.sort();
                        videoplaybackpar = "";
                        for par in videoplaybackparar:
                            videoplaybackpar = str(videoplaybackpar) + str(par) + "\n";
                        videoplaybackpar = videoplaybackpar.strip();
                    print(clr_yellow + "---DEBUG---" + clr_eoc);
                    print(clr_blue + "videoplayback=" + clr_eoc + clr_lightgray + videoplayback + clr_eoc);
                    print(clr_blue + "videoplaybackpar=" + clr_eoc + clr_lightgray + videoplaybackpar + clr_eoc);
                    print(clr_yellow + "---DEBUG---" + clr_eoc);
                # DEBUG -------------------


                # split base url and parameters
                mp3urlbase = "";
                mp3urlparameters = "";
                mp3urlarray = [];

                mp3urlpar = "";

                if mp3url.find("?") > 0:
                    mp3urlbase = mp3url[:mp3url.find("?") + len("?")];
                    mp3urlparameters = mp3url[mp3url.find("?") + len("?"):];

                    # if parameters are missing add them
                    if mp3urlparameters.find("alr=") == -1:
                        mp3urlparameters = str(mp3urlparameters) + "&alr=yes";
                    if mp3urlparameters.find("c=") == -1:
                        mp3urlparameters = str(mp3urlparameters) + "&c=WEB";
                    if mp3urlparameters.find("gir=") == -1:
                        mp3urlparameters = str(mp3urlparameters) + "&gir=yes";
                    if mp3urlparameters.find("ratebypass=") == -1:
                        mp3urlparameters = str(mp3urlparameters) + "&ratebypass=yes";
                    if mp3urlparameters.find("rbuf=") == -1:
                        mp3urlparameters = str(mp3urlparameters) + "&rbuf=0";
                    if mp3urlparameters.find("source=") == -1:
                        mp3urlparameters = str(mp3urlparameters) + "&source=youtube";
                    if mp3urlparameters.find("ipbypass=") == -1:
                        mp3urlparameters = str(mp3urlparameters) + "&ipbypass=yes";

                    mp3urlarray = mp3urlparameters.split("&");
                    mp3urlarray.sort();

                # bits to add each parameter only once
                alradd = False;
                aitagsadd = False;
                cadd = False;
                clenadd = False;
                cpnadd = False;
                cverad = False;
                duradd = False;
                eiadd = False;
                expireadd = False;
                gcradd = False;
                giradd = False;
                idadd = False;
                initcwndbpsadd = False;
                ipadd = False;
                ipbitsad = False;
                ipbypassadd = False;
                itagadd = False;
                keepaliveadd = False;
                keyadd = False;
                lmtadd = False;
                mimeadd = False;
                mipadd = False;
                mmadd = False;
                mnadd = False;
                msadd = False;
                mtadd = False;
                mvadd = False;
                pcm2add = False;
                pcm2cmsadd = False;
                pladd = False;
                ratebypassadd = False;
                rbufadd = False;
                req_idadd = False;
                requiressladd = False;
                rmadd = False;
                sadd = False;
                signatureadd = False;
                sourceadd = False;
                sparams = False;

                for parameter in mp3urlarray:
                    parameter = parameter.strip();

                    while parameter.endswith("\""):
                        if parameter.endswith("\""):
                            parameter = parameter[:len(parameter) - 1];

                    if parameter.startswith("aitags="):
                        if not aitagsadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            aitagsadd = True;

                    elif parameter.startswith("alr="):
                        if not alradd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            alradd = True;

                    elif parameter.startswith("c="):
                        if not cadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            cadd = True;

                    elif parameter.startswith("clen="):
                        if not clenadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            clenadd = True;

                    elif parameter.startswith("cpn="):
                        if not cpnadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            cpnadd = True;

                    elif parameter.startswith("cver="):
                        if not cverad:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            cverad = True;

                    elif parameter.startswith("dur="):
                        if not duradd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            duradd = True;

                    elif parameter.startswith("ei="):
                        if not eiadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            eiadd = True;

                    elif parameter.startswith("expire="):
                        if not expireadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            expireadd = True;

                    elif parameter.startswith("gcr="):
                        if not gcradd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            gcradd = True;

                    elif parameter.startswith("gir="):
                        if not giradd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            giradd = True;

                    elif parameter.startswith("id="):
                        if not idadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            idadd = True;

                    elif parameter.startswith("initcwndbps="):
                        if not initcwndbpsadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            initcwndbpsadd = True;

                    elif parameter.startswith("ip="):
                        if not ipadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            ipadd = True;

                    elif parameter.startswith("ipbits="):
                        if not ipbitsad:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            ipbitsad = True;

                    elif parameter.startswith("ipbypass="):
                        if not ipbypassadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            ipbypassadd = True;

                    elif parameter.startswith("itag="):
                        if not itagadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            itagadd = True;

                    elif parameter.startswith("keepalive="):
                        if not keepaliveadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            keepaliveadd = True;

                    elif parameter.startswith("key="):
                        if not keyadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            keyadd = True;

                    elif parameter.startswith("lmt="):
                        if not lmtadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            lmtadd = True;

                    elif parameter.startswith("mime="):
                        if not mimeadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            mimeadd = True;

                    elif parameter.startswith("mip="):
                        if not mipadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            mipadd = True;

                    elif parameter.startswith("mm="):
                        if not mmadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            mmadd = True;

                    elif parameter.startswith("mn="):
                        if not mnadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            mnadd = True;

                    elif parameter.startswith("ms="):
                        if not msadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            msadd = True;

                    elif parameter.startswith("mt="):
                        if not mtadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            mtadd = True;

                    elif parameter.startswith("mv="):
                        if not mvadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            mvadd = True;

                    elif parameter.startswith("pcm2="):
                        if not pcm2add:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            pcm2add = True;

                    elif parameter.startswith("pcm2cms="):
                        if not pcm2cmsadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            pcm2cmsadd = True;

                    elif parameter.startswith("pl="):
                        if not pladd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            pladd = True;

                    elif parameter.startswith("ratebypass="):
                        if not ratebypassadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            ratebypassadd = True;

                    elif parameter.startswith("rbuf="):
                        if not rbufadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            rbufadd = True;

                    elif parameter.startswith("req_id="):
                        if not req_idadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            req_idadd = True;

                    elif parameter.startswith("requiressl="):
                        if not requiressladd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            requiressladd = True;

                    elif parameter.startswith("rm="):
                        if not rmadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            rmadd = True;

                    elif parameter.startswith("s="):
                        if not sadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            sadd = True;

                    elif parameter.startswith("signature="):
                        if not signatureadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            signatureadd = True;

                    elif parameter.startswith("source="):
                        if not sourceadd:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            sourceadd = True;

                    elif parameter.startswith("sparams="):
                        if not sparams:
                            mp3urlpar = str(mp3urlpar) + "&" + str(parameter);
                            sparams = True;

                if mp3urlpar.startswith("&"):
                    mp3urlpar = mp3urlpar[1:];

                # DEBUG -------------------
                if debug:
                    print(clr_yellow + "---DEBUG---" + clr_eoc);
                    print(clr_blue + "mp3urlbase=" + clr_eoc + clr_lightgray + mp3urlbase + clr_eoc);
                    print(clr_blue + "mp3urlpar=" + clr_eoc + clr_lightgray + mp3urlpar.replace("&", "\n") + clr_eoc);
                    print(clr_yellow + "---DEBUG---" + clr_eoc);
                # DEBUG -------------------


                # STEP 3---------------------------------------------------------------------------------------------------
                # download url

                rn = 1;
                rangefrom = 0;
                rangestep = 66654;  # downlaod chunk size
                rangeto = 66654;
                rstatusvalue = True;

                print(clr_green + "Downloading " + clr_eoc + clr_azure + str(ytid) + ".mp3" + clr_eoc);
                print(clr_green + "Saving to " + clr_eoc + clr_azure + str(downloadpath) + clr_eoc);

                # delete mp3 file if exists
                if os.path.exists(downloadpath + "/" + str(ytid) + ".mp3"):
                    os.remove(downloadpath + "/" + str(ytid) + ".mp3");

                while int(rangeto) < int(clen):

                    percentage = round(int(rangeto) / int(clen) * 100);
                    size = round(rangeto / 1024 / 1024);

                    # add range to url
                    mp3urlrange = "&range=" + str(rangefrom) + "-" + str(rangeto) + "&rn=" + str(rn);

                    # some sleep time between requests
                    time.sleep(0.2);

                    # DOWNLOAD
                    rstatusvalue = downloadwb(mp3urlbase + mp3urlpar + mp3urlrange, ytid);

                    if not rstatusvalue:
                        break;

                    # print percentage and MB
                    print(clr_yellow + str(percentage) + " %" + clr_eoc + " " + clr_magenta + str(size) + " MB" + clr_eoc);

                    if int(rangeto) < int(clen):

                        rangefrom = int(rangeto) + 1;

                        if int(rangeto) + int(rangestep) < int(clen):
                            rangeto = int(rangeto) + int(rangestep);
                        else:
                            rangeto = int(clen);
                            rn = int(rn) + 1;
                            mp3urlrange = "&range=" + str(rangefrom) + "-" + str(rangeto) + "&rn=" + str(rn);

                            # DOWNLOAD
                            downloadwb(mp3urlbase + mp3urlpar + mp3urlrange, ytid);

                            # print percentage and MB
                            print(clr_yellow + str(percentage) + " %" + clr_eoc + " " + clr_magenta + str(size) + " MB" + clr_eoc);

                            break;

                    rn = int(rn) + 1;

                if rstatusvalue:
                    print(clr_green + "Downloading done" + clr_eoc);
                    print(clr_green + "File saved to " + clr_eoc + clr_azure + str(downloadpath) + clr_eoc);
                else:
                    print(clr_red + "Downloading failed" + clr_eoc);

    else:
        print(clr_red + "Response code: " + clr_eoc + clr_yellow + str(request.status_code) + clr_eoc);

except Exception as exc:
    print(clr_red + "Error: " + clr_eoc + clr_yellow + str(exc) + clr_eoc);
root@debian:/# iptraf-ng
fatal: This program requires a screen size of at least 80 columns by 24 lines
Please resize your window


Vredno ogleda ...

TemaSporočilaOglediZadnje sporočilo
TemaSporočilaOglediZadnje sporočilo
»

Nasveti glede API-ja

Oddelek: Programiranje
111176 (716) Arey
»

program za pomoč pri tiskanju 200+pdf dokumentov

Oddelek: Pomoč in nasveti
252283 (1335) HotBurek
»

Postavitev mySQL

Oddelek: Programiranje
92227 (1805) M01O
»

Fake traffic generator

Oddelek: Omrežja in internet
192248 (1444) HotBurek
»

python -slovar

Oddelek: Programiranje
223123 (2102) Valex86

Več podobnih tem