My favorites | Sign in
Project Logo
                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
<?PHP
$m = new MediaInfo();
$info = $m->getMovieInfo('American Beauty');
print_r($info);

class MediaInfo
{
public $info;

function __construct($str = null)
{
if(!is_null($str))
$this->autodetect($str);
}

function autodetect($str)
{
// Attempt to cleanup $str in case it's a filename ;-)
$str = pathinfo($str, PATHINFO_FILENAME);
$str = $this->normalize($str);

// Is it a movie or tv show?
if(preg_match('/s[0-9][0-9]?.?e[0-9][0-9]?/i', $str) == 1)
$this->info = $this->getEpisodeInfo($str);
else
$this->info = $this->getMovieInfo($str);

return $this->info;
}

function getEpisodeInfo($str)
{
$arr = array();
$arr['kind'] = 'tv';
return $arr;
}

function getMovieInfo($str)
{
$str = str_ireplace('the ', '', $str);
$url = "http://www.google.com/search?hl=en&q=imdb+" . urlencode($str) . "&btnI=I%27m+Feeling+Lucky";
$html = $this->geturl($url);
if(stripos($html, "302 Moved") !== false)
$html = $this->geturl($this->match('/HREF="(.*?)"/ms', $html, 1));

$arr = array();
$arr['kind'] = 'movie';
$arr['id'] = $this->match('/poster.*?(tt[0-9]+)/ms', $html, 1);
$arr['title'] = $this->match('/<title>(.*?)<\/title>/ms', $html, 1);
$arr['title'] = preg_replace('/\([0-9]+\)/', '', $arr['title']);
$arr['title'] = trim($arr['title']);
$arr['rating'] = $this->match('/([0-9]\.[0-9])\/10/ms', $html, 1);
$arr['director'] = trim(strip_tags($this->match('/Director:(.*?)<\/a>/ms', $html, 1)));
$arr['release_date'] = $this->match('/([0-9][0-9]? (January|February|March|April|May|June|July|August|September|October|November|December) (19|20)[0-9][0-9])/ms', $html, 1);
$arr['plot'] = trim(strip_tags($this->match('/Plot:(.*?)<a/ms', $html, 1)));
$arr['genres'] = $this->match_all('/Sections\/Genres\/(.*?)[\/">]/ms', $html, 1);
$arr['genres'] = array_unique($arr['genres']);
$arr['poster'] = $this->match('/<a.*?name=.poster.*?src=.(.*?)(\'|")/ms', $html, 1);

$arr['cast'] = array();
foreach($this->match_all('/class="nm">(.*?\.\.\..*?)<\/tr>/ms', $html, 1) as $m)
{
list($actor, $character) = explode('...', strip_tags($m));
$arr['cast'][trim($actor)] = trim($character);
}

return $arr;
}

// ****************************************************************

function normalize($str)
{
$str = str_replace('_', ' ', $str);
$str = str_replace('.', ' ', $str);
$str = preg_replace('/ +/', ' ', $str);
return $str;
}

function geturl($url, $username = null, $password = null)
{
$ch = curl_init();
if(!is_null($username) && !is_null($password))
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Authorization: Basic ' . base64_encode("$username:$password")));
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
$html = curl_exec($ch);
curl_close($ch);
return $html;
}

function match_all($regex, $str, $i = 0)
{
if(preg_match_all($regex, $str, $matches) === false)
return false;
else
return $matches[$i];

}

function match($regex, $str, $i = 0)
{
if(preg_match($regex, $str, $match) == 1)
return $match[$i];
else
return false;
}
}
Show details Hide details

Change log

r27 by tylerhall on May 24, 2009   Diff
Fixed broken IMDB poster regex
Go to: 
Project members, sign in to write a code review

Older revisions

r14 by tylerhall on Jan 01, 2009   Diff
Adding IMDB poster. Thanks to Enrico
Berti for the regex.
r6 by tylerhall on May 20, 2008   Diff
Added Media Info class. Right now it's
just an IMDB scraper. Plan is to also
integrate in the Schmooze.TV episode
lookup service.
All revisions of this file

File info

Size: 3991 bytes, 109 lines
Hosted by Google Code