eZ Community » Blogs » Thiago Campos Viana » Tip: Converting youtube playlists to mp3

By

Thiago Campos Viana

Tip: Converting youtube playlists to mp3

Monday 20 February 2012 2:45:31 pm

  • Currently 5 out of 5 Stars.
  • 1
  • 2
  • 3
  • 4
  • 5

I was so upset of using flv2mp3 website to convert a youtube playlist to mp3, I'm trying to learn Norwegian and I would like to hear some lessons off line using my phone, so I created my own script to do that, I will show it here.

Warning 1: I will expand this post and explain how to create an ez publish extension later. This is just a sketch.

Warning 2: This script is really processor / bandwidth consuming, be careful.

First you need to install ffmpeg and libavcodec-extra-53:

sudo apt-get install ffmpeg libavcodec-extra-53

I'm using a modified version of the php script found in 1chris.com, here's the complete code:

<?php
 
set_time_limit(0);
 
// modified version from http://1chris.com/wp-content/uploads/2011/08/youtubegrabber.class.php_.txt
class youtubegrabber {
 
    var $youtube_video_url;
    var $test;
    var $final_flv_filename;
    var $filename;
    var $cookies_path;
    var $curl_headers;
    var $flv_url;
 
    function __construct($youtube_video_url, $final_flv_filename, $test = 0) {
        $this->youtube_video_url = $youtube_video_url;
        $this->test = $test;
 
 
        $this->youtube_video_id = $this->get_youtube_video_id();
 
        $this->final_flv_filename = $this->youtube_video_id . '.flv';
 
        $this->cookies_path = "cookies.txt";
        $clear_cookies = $this->clear_cookies();
 
        $this->curl_headers = array(
            "Accept-Language: en-us",
            "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15",
            "Connection: Keep-Alive",
            "Cache-Control: no-cache"
        );
 
        $this->flv_url = $this->get_flv_url();
 
        $save_binary = $this->get_curl_binary();
        $clear_cookies = $this->clear_cookies();
    }
 
    function get_youtube_video_id() {
        $thearray = explode("watch?v=", $this->youtube_video_url);
        $fname_a = explode("&", $thearray[1]);
        $this->filename = $fname_a[0];
        return $thearray[1];
    }
 
    function clear_cookies() {
 
        if (file_exists($this->cookies_path)) {
            unlink($this->cookies_path);
        }
 
        $ourFileName = $this->cookies_path;
        $ourFileHandle = fopen($ourFileName, 'w') or die("can't open file");
        fclose($ourFileHandle);
    }
 
    function get_flv_url() {
 
        $html = $this->curl_get_url($this->youtube_video_url);
 
 
        preg_match_all("/var.*?swf.*?=.*?\"(.*?)watch-player.*?innerHTML.*?=.*?swf/is", $html, $matches);
 
 
        $decoded = urldecode($matches[1][0]);
        preg_match_all("/url=(.*?)\,/is", $decoded, $matches);
        $matches = $matches[1];
 
 
        for ($i = 0; $i < count($matches); $i++) {
            $test = explode("&", $matches[$i]);
            $matches[$i] = $test[0];
            $matches[$i] = urldecode($matches[$i]);
        }
 
 
        $final_flv_url = "";
 
        foreach ($matches AS $this_url) {
            $headers = $this->curl_get_headers($this_url);
 
            $headers = split("\n", trim($headers));
            foreach ($headers as $line) {
                if (strtok($line, ':') == 'Content-Type') {
                    $parts = explode(":", $line);
                    $content_type = strtolower(trim($parts[1]));
                    if ($this->contains("video/x-flv", $content_type)) {
                        $final_flv_url = $this_url;
                        return $final_flv_url;
                    }
                }
            }
        }
 
        return false;
    }
 
    function curl_get_url($url) {
        $cookie_path = $this->cookies_path;
 
        $headers = $this->curl_headers;
 
 
        $ch = curl_init();
        //$referer = 'http://www.google.com/search';
        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); // this pretends this scraper to be browser client IE6 on Windows XP, of course you can pretend to be other browsers just you have to know the correct headers
        //curl_setopt($get, CURLOPT_REFERER, $referer); // lie to the server that we are some visitor who arrived here through google search
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        // set user agent
        //curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13");
        curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
        curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_path);
        curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_path);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
 
        $output = curl_exec($ch);
        $info = curl_getinfo($ch);
        curl_close($ch);
 
        return $output;
    }
 
    function curl_get_headers($url) {
        $cookie_path = $this->cookies_path;
 
        $headers = $this->curl_headers;
 
        $ch = curl_init();
        //$referer = 'http://www.google.com/search';
        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_HEADER, 1);
        curl_setopt($ch, CURLOPT_NOBODY, 1);
        curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_path);
        curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_path);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
 
        $results = curl_exec($ch);
 
        return $results;
    }
 
    function get_curl_binary() {
        $url = $this->flv_url;
        $cookie_path = $this->cookies_path;
        $headers = $this->curl_headers;
        $final_flv_filename = $this->final_flv_filename;
 
        $ch = curl_init($url);
        $fp = fopen($final_flv_filename, "w");
        curl_setopt($ch, CURLOPT_FILE, $fp);
        curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_path);
        curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_path);
        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); // this pretends this scraper to be browser client IE6 on Windows XP, of course you can pretend to be other browsers just you have to know the correct headers
        curl_exec($ch);
        curl_close($ch);
        fclose($fp);
    }
 
    function contains($substring, $string) {
        $pos = strpos($string, $substring);
 
        if ($pos === false) {
            // string needle NOT found in haystack
            return false;
        } else {
            // string needle found in haystack
            return true;
        }
    }
 
}
 
 
$link = "http://www.youtube.com/playlist?list=PLE6D4A81E59068EB7&feature=plcp";
$link = explode('list=PL', $link);
$link = explode('&', $link[1]);
 
$link = $link[0];
 
 
$url = "https://gdata.youtube.com/feeds/api/playlists/" . $link . "?start-index=1&amp;max-results=25&amp;v=2";
 
$doc = new DOMDocument;
$doc->load($url);
 
$total = $doc->getElementsByTagNameNS('http://a9.com/-/spec/opensearchrss/1.0/', 'totalResults')->item(0)->nodeValue;
$entries = $doc->getElementsByTagName("entry");
 
 
// $pages = ceil($total / 25.0);
 
$offset = 0;
$limit = 25;
 
$index = 1;
 
 
for ($y = 0; $y < $total;) {
 
    if ($y == $index * $limit) {
        $index++;
        $next = $y + 1;
        $url = "https://gdata.youtube.com/feeds/api/playlists/E6D4A81E59068EB7?start-index=$next&amp;max-results=25&amp;v=2";
 
 
        $doc = new DOMDocument;
        $doc->load($url);
 
        $entries = $doc->getElementsByTagName("entry");
    }
 
    for ($x = 0; $x < $entries->length; $x++) {
        $y++;
 
        if ($y < $offset)
            continue;
 
        $title = $entries->item($x)->getElementsByTagName("title")->item(0)->nodeValue;
        $link = $entries->item($x)->getElementsByTagName("link")->item(0)->nodeValue;
 
        $id = $entries->item($x)->getElementsByTagNameNS('http://gdata.youtube.com/schemas/2007', 'videoid')->item(0)->nodeValue;
 
        $id_aux = $entries->item($x)->getElementsByTagNameNS('http://search.yahoo.com/mrss/', 'player')->item(0)->getAttribute('url');
        $id_aux = explode('watch?v=', $id_aux);
        $id_aux = explode('&', $id_aux[1]);
 
        $id = $id_aux[0];
 
        //echo $id .'  ' .$title;
 
        $url = "http://www.youtube.com/watch?v=" . $id;
 
        $youtubegrabber = new youtubegrabber($url, 0);
 
        $out = str_pad($y, 2, "0", STR_PAD_LEFT) . '.mp3';
 
        $sCommand = "ffmpeg -i $youtubegrabber->filename.flv $out";
        exec($sCommand);
 
 
    }
}
?>