var http =  require('http'),
    fs = require('fs');
var toMarkdown = require('to-markdown').toMarkdown;
var htmlToText = require('html-to-text');
var text = htmlToText.fromString('<h1>Hello World</h1>', {
    wordwrap: 130
});*/
var websiteName = "zerojudge";
var websitePage = 141;
var websiteDomain = 'http://mypaper.pchome.com.tw/';
var url = websiteDomain + websiteName;
var loadprocess = 0;
clawingWebSite(url, websitePage);
function clawingWebSite(url, websitePage) {
    for(var i = 0; i <= websitePage; i++) {
        if(i == 0)
            clawingWebPage(url);
        else
            clawingWebPage(url + "/P" + i);
    }
}
function clawingWebPage(url) {
    readWebPage(url);
}
function branchWebPage(source) {
    var sourceIndex = source.indexOf('class="blog"');
    while(sourceIndex >= 0) {
        var pageLink = source.indexOf(websiteName + '/post', sourceIndex);
        if(pageLink > 0) {
            var endLink = source.indexOf('"', pageLink);
            var articleLink = source.substr(pageLink, endLink - pageLink);
            readWebArticleLink(websiteDomain + articleLink, articleLink);
            sourceIndex = pageLink + 1;
            sourceIndex = source.indexOf('title brk_h', sourceIndex);
        } else {
            break;
        }
    }
}
function parsingArticlePage(source, fileName) {
    var contentBody = "";
    var articleTitle = "";
    var mdFormat = "";
    var sourceIndex = source.indexOf('name="keywords"');
    sourceIndex = source.indexOf('content="', sourceIndex);
    var nextIndex = source.indexOf('"', sourceIndex + 10);
    articleTitle = source.substr(sourceIndex + 9, nextIndex - (sourceIndex + 9));
    sourceIndex = nextIndex;
    console.log('Title = ' + articleTitle);
    sourceIndex = source.indexOf('<div class="innertext brk_h"');
    var endIndex = source.indexOf('<div id="ArticleMapTitle"');
    contentBody = source.substr(sourceIndex, endIndex - sourceIndex - 1);
    contentBody += "</div>";
    var sourceBody = contentBody;
    if(endIndex - sourceIndex - 1 <= 0)
        return;
    articleTitle = articleTitle.replace(/\[/g, "【");
    articleTitle = articleTitle.replace(/\]/g, "】");
    var text = htmlToText.fromString(sourceBody, {
        wordwrap: 130
    });
    contentBody = toMarkdown(contentBody);
    mdFormat += "title: " + articleTitle + "\n";
    mdFormat += "date: 2014-04-10 20:10:28" + "\n";
    mdFormat += "tags: " + "\n";
    var tagIndex = source.indexOf('<div id="article_tag">');
    if(tagIndex > 0) {
        tagIndex += '<div id="article_tag">'.length + 1;
        var tagTail = source.indexOf('<div id="article_author" align="right">');
        var tagEndIndex;
        while(true) {
            var tagStartIndex = source.indexOf('search_fields=tag">', tagIndex);
            tagStartIndex += 'search_fields=tag">'.length;
            if(tagStartIndex < 0 || tagStartIndex > tagTail)
                break;
            tagEndIndex = source.indexOf('</a>', tagStartIndex);
            var articleTag = source.substr(tagStartIndex, tagEndIndex - tagStartIndex);
            console.log(articleTag);
            mdFormat += "- " + articleTag + "\n";
            tagIndex = tagEndIndex;
        }
    }
    mdFormat += "---" + "\n\n";
    text = "    " + text;
    text = text.replace(/\n/g, "\n    ");
    mdFormat += text;
    loadprocess++;
    fs.open('sourcePage' + loadprocess + '.html', 'w', 0666, function(e, fd) {
        if(e) {
            console.log('错误信息:' + e);
        } else {
            fs.write(fd, source, 0, 'utf8', function(e) {
                if(e) {
                    console.log('出错信息:' + e);
                } else {
                    fs.closeSync(fd);
                }
            });
        }
    });
    fs.open('textPC' + loadprocess + '.md', 'w', 0666, function(e, fd) {
        if(e) {
            console.log('错误信息:' + e);
        } else {
            fs.write(fd, mdFormat, 0, 'utf8', function(e) {
                if(e) {
                    console.log('出错信息:' + e);
                } else {
                    fs.closeSync(fd);
                }
            });
        }
    });
    fs.open('oldPC' + loadprocess + '.md', 'w', 0666, function(e, fd) {
        if(e) {
            console.log('错误信息:' + e);
        } else {
            fs.write(fd, contentBody, 0, 'utf8', function(e) {
                if(e) {
                    console.log('出错信息:' + e);
                } else {
                    fs.closeSync(fd);
                }
            });
        }
    });
}
function readWebArticleLink(url) {
    http.get(url, function(res) {
        var source = "";
        
        res.on('data', function(data) {
            source += data;
        });
        
        res.on('end', function() {
            parsingArticlePage(source);
        });
    }).on('error', function() {
        console.log("获取数据出现错误");
    });
}
function readWebPage(url) {
    http.get(url, function(res) {
        var source = "";
        
        res.on('data', function(data) {
            source += data;
        });
        
        res.on('end', function() {
            branchWebPage(source);
            
                if(e) {
                    console.log('错误信息:' + e);
                } else {
                    fs.write(fd, source, 0, 'utf8', function(e) {
                        if(e) {
                            console.log('出错信息:' + e);
                        } else {
                            fs.closeSync(fd);
                        }
                    });
                }
            });*/
        });
    }).on('error', function() {
        console.log("获取数据出现错误");
    });
}