My favorites | Sign in
Project Home Downloads Wiki Issues Source
READ-ONLY: This project has been archived. For more information see this post.
Project Information
Members

Introduction

Snaker is an open source web crawler written in Java. It is a power plug-able download/crawler platform, easy to setup and use. User can write customized crawler scripts with JavaScript. It also has a friendly web console which help users to manage and monitor the download process.

Features

  1. High performance downloading
  2. Support session/cookie
  3. Support customizable crawler script
  4. Support HTTPS
  5. Support HTTP proxy
  6. Support OCR
  7. Support multi charset

Screen Shot

Crawler script example

Please refer to HowToWriteCrawlerEngineScript for detail.

A simplest example which just download single URL:

// ==UserScript==
// @name		SingleFile
// @title		Single File
// @description	a simple downloader for single url
// @parameter   *url textarea URL 
// ==/UserScript==

var url =  $.url;
$.save(url);

A more complicated example which downloads mp3 from englishpod.com

// ==UserScript==
// @name		EnglishPod
// @title		English Pod
// @description	download lessons from englishpod.com
// @parameter   *email text Email
// @parameter   *password password Password
// ==/UserScript==

var lessonRegex = /http:\/\/englishpod.com\/lessons\/[^\"\/]*/mg;
var mp3Regex = /http:\/\/s3.amazonaws.com\/englishpod\.com\/.*englishpod.*pr\.mp3/;
var allLessons ={};

function getLessons(page){
	var url = "http://englishpod.com/lessons?page=" + page;
	var response = $.get(url);
	if (response.statusCode / 100 == 2) {
		$.print("download Lesson successfully,url:" + url);
		var body = response.body;
		var lessons = body.match(lessonRegex);
		for(i=0;i<lessons.length;i++){
			$.print(lessons[i]);
			allLessons[lessons[i]]=true;
		}
	} else {
		$.("download failed,err:" + response.statusCode);
	}
}

function downloadLesson(lesson){
		var response = $.get(lesson);
		if (response.statusCode / 100 == 2) {
			$.print("download Lesson successfully");
			var mp3 = response.body.match(mp3Regex);
			if(mp3!=null && mp3.length>0){
				mp3 = mp3[0];
			}
			if (mp3!=null) {
				$.save(mp3);
			}
		} else {
			$.print("download lesson failed,err:" + response.statusCode);
		}
	}

function login(){
	$.print("email="+$.email+",password="+$.password);
	$.followRedirects(false);

	//send the login query
	var param = {};
	param.email = $.email;
	param.password = $.password;
	var url = "https://englishpod.com/accounts/signin";
	var response = $.post(url,param);
	var statusCode = response.statusCode;
	var location;
	var result = false;
	$.print(response.body);
	if(statusCode == 302){
		location = response.headers["Location"];
		if(location.length>0 && location!=url){
			$.get(location);
			$.print("Login successfully!");
			result = true;
		}
	}
	if(!result){
		$.print("Login failed!");
	}
	return result;
} 

if(login()){
	for(page=1;page<=47;++page){
		getLessons(page);
	}
	for(les in allLessons){
		downloadLesson(les);
	}
}
Powered by Google Project Hosting