Monday 20 February 2012 2:45:31 pm
I was so upset of using flv2mp3 website to convert a youtube playlist to mp3, I'm trying to learn Norwegian and I would like to hear some lessons off line using my phone, so I created my own script to do that, I will show it here.
Warning 1: I will expand this post and explain how to create an ez publish extension later. This is just a sketch.
Warning 2: This script is really processor / bandwidth consuming, be careful.
First you need to install ffmpeg and libavcodec-extra-53:
sudo apt-get install ffmpeg libavcodec-extra-53
I'm using a modified version of the php script found in 1chris.com, here's the complete code:
<?php set_time_limit(0); // modified version from http://1chris.com/wp-content/uploads/2011/08/youtubegrabber.class.php_.txt class youtubegrabber { var $youtube_video_url; var $test; var $final_flv_filename; var $filename; var $cookies_path; var $curl_headers; var $flv_url; function __construct($youtube_video_url, $final_flv_filename, $test = 0) { $this->youtube_video_url = $youtube_video_url; $this->test = $test; $this->youtube_video_id = $this->get_youtube_video_id(); $this->final_flv_filename = $this->youtube_video_id . '.flv'; $this->cookies_path = "cookies.txt"; $clear_cookies = $this->clear_cookies(); $this->curl_headers = array( "Accept-Language: en-us", "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15", "Connection: Keep-Alive", "Cache-Control: no-cache" ); $this->flv_url = $this->get_flv_url(); $save_binary = $this->get_curl_binary(); $clear_cookies = $this->clear_cookies(); } function get_youtube_video_id() { $thearray = explode("watch?v=", $this->youtube_video_url); $fname_a = explode("&", $thearray[1]); $this->filename = $fname_a[0]; return $thearray[1]; } function clear_cookies() { if (file_exists($this->cookies_path)) { unlink($this->cookies_path); } $ourFileName = $this->cookies_path; $ourFileHandle = fopen($ourFileName, 'w') or die("can't open file"); fclose($ourFileHandle); } function get_flv_url() { $html = $this->curl_get_url($this->youtube_video_url); preg_match_all("/var.*?swf.*?=.*?\"(.*?)watch-player.*?innerHTML.*?=.*?swf/is", $html, $matches); $decoded = urldecode($matches[1][0]); preg_match_all("/url=(.*?)\,/is", $decoded, $matches); $matches = $matches[1]; for ($i = 0; $i < count($matches); $i++) { $test = explode("&", $matches[$i]); $matches[$i] = $test[0]; $matches[$i] = urldecode($matches[$i]); } $final_flv_url = ""; foreach ($matches AS $this_url) { $headers = $this->curl_get_headers($this_url); $headers = split("\n", trim($headers)); foreach ($headers as $line) { if (strtok($line, ':') == 'Content-Type') { $parts = explode(":", $line); $content_type = strtolower(trim($parts[1])); if ($this->contains("video/x-flv", $content_type)) { $final_flv_url = $this_url; return $final_flv_url; } } } } return false; } function curl_get_url($url) { $cookie_path = $this->cookies_path; $headers = $this->curl_headers; $ch = curl_init(); //$referer = 'http://www.google.com/search'; curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); // this pretends this scraper to be browser client IE6 on Windows XP, of course you can pretend to be other browsers just you have to know the correct headers //curl_setopt($get, CURLOPT_REFERER, $referer); // lie to the server that we are some visitor who arrived here through google search curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // set user agent //curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13"); curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_path); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_path); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); $output = curl_exec($ch); $info = curl_getinfo($ch); curl_close($ch); return $output; } function curl_get_headers($url) { $cookie_path = $this->cookies_path; $headers = $this->curl_headers; $ch = curl_init(); //$referer = 'http://www.google.com/search'; curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, 1); curl_setopt($ch, CURLOPT_NOBODY, 1); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_path); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_path); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); $results = curl_exec($ch); return $results; } function get_curl_binary() { $url = $this->flv_url; $cookie_path = $this->cookies_path; $headers = $this->curl_headers; $final_flv_filename = $this->final_flv_filename; $ch = curl_init($url); $fp = fopen($final_flv_filename, "w"); curl_setopt($ch, CURLOPT_FILE, $fp); curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_path); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_path); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); // this pretends this scraper to be browser client IE6 on Windows XP, of course you can pretend to be other browsers just you have to know the correct headers curl_exec($ch); curl_close($ch); fclose($fp); } function contains($substring, $string) { $pos = strpos($string, $substring); if ($pos === false) { // string needle NOT found in haystack return false; } else { // string needle found in haystack return true; } } } $link = "http://www.youtube.com/playlist?list=PLE6D4A81E59068EB7&feature=plcp"; $link = explode('list=PL', $link); $link = explode('&', $link[1]); $link = $link[0]; $url = "https://gdata.youtube.com/feeds/api/playlists/" . $link . "?start-index=1&max-results=25&v=2"; $doc = new DOMDocument; $doc->load($url); $total = $doc->getElementsByTagNameNS('http://a9.com/-/spec/opensearchrss/1.0/', 'totalResults')->item(0)->nodeValue; $entries = $doc->getElementsByTagName("entry"); // $pages = ceil($total / 25.0); $offset = 0; $limit = 25; $index = 1; for ($y = 0; $y < $total;) { if ($y == $index * $limit) { $index++; $next = $y + 1; $url = "https://gdata.youtube.com/feeds/api/playlists/E6D4A81E59068EB7?start-index=$next&max-results=25&v=2"; $doc = new DOMDocument; $doc->load($url); $entries = $doc->getElementsByTagName("entry"); } for ($x = 0; $x < $entries->length; $x++) { $y++; if ($y < $offset) continue; $title = $entries->item($x)->getElementsByTagName("title")->item(0)->nodeValue; $link = $entries->item($x)->getElementsByTagName("link")->item(0)->nodeValue; $id = $entries->item($x)->getElementsByTagNameNS('http://gdata.youtube.com/schemas/2007', 'videoid')->item(0)->nodeValue; $id_aux = $entries->item($x)->getElementsByTagNameNS('http://search.yahoo.com/mrss/', 'player')->item(0)->getAttribute('url'); $id_aux = explode('watch?v=', $id_aux); $id_aux = explode('&', $id_aux[1]); $id = $id_aux[0]; //echo $id .' ' .$title; $url = "http://www.youtube.com/watch?v=" . $id; $youtubegrabber = new youtubegrabber($url, 0); $out = str_pad($y, 2, "0", STR_PAD_LEFT) . '.mp3'; $sCommand = "ffmpeg -i $youtubegrabber->filename.flv $out"; exec($sCommand); } } ?>