add compiled javascript

This commit is contained in:
Sebastian Hugentobler 2017-01-05 15:57:32 +01:00
parent 0f0fea32ac
commit c6327dda05
2 changed files with 452 additions and 1 deletions

View File

@ -6,7 +6,7 @@ module.exports = function(grunt) {
saver: {
cmd: 'casperjs',
args: [
'src/proboard_saver.coffee',
'src/proboard_saver.js',
'--board-nr=YOUR-BOARD-NR',
'--board-name=YOUR-BOARD-NAME',
'--user=YOUR-USERNAME',

451
src/proboard_saver.js Normal file
View File

@ -0,0 +1,451 @@
// Generated by CoffeeScript 1.12.2
(function() {
var casper, findBoards, findPages, findPosts, findThreads, findUserLinks, fs, getUser, loadImages, missingArgumentError, password, proboard, proboardName, proboardNr, proboardUrl, proboardUserUrl, readBoard, replaceHtml, user, utils;
casper = require('casper').create({
verbose: false,
logLevel: 'info',
pageSettings: {
webSecurityEnabled: false
}
});
utils = require('utils');
fs = require('fs');
casper.on('error', function(msg, trace) {
return this.echo("Error: " + msg, "ERROR");
});
casper.on('page.error', function(msg, trace) {
return this.echo("Error: " + msg, "ERROR");
});
casper.on('remote.message', function(msg, trace) {
if (!/Unsafe JavaScript attempt/.test(msg)) {
return this.echo("remote log: " + msg, "INFO");
}
});
loadImages = function(searchString) {
var detailImage, image, imageName, imageParts, images, j, len, re;
images = searchString.match(/\[img\](.*?)\[\/img\]/g);
if (images) {
for (j = 0, len = images.length; j < len; j++) {
image = images[j];
detailImage = image.match(/\[img\](.*?)\[\/img\]/)[1];
imageParts = detailImage.split('/');
imageName = imageParts[imageParts.length - 1];
console.log("\t\tdownloading image '" + imageName + "'...");
casper.download(detailImage, "data/images/" + imageName);
re = new RegExp("\\[img\\]" + detailImage + "\\[/img\\]", "i");
searchString = searchString.replace(re, "[img]{{baseurl}}/images/" + imageName + "[/img]");
}
}
return searchString;
};
replaceHtml = function(element) {
var attachmentIndex, bold, boldElements, colour, colourElements, cursive, cursiveElements, dummyContent, dummyElements, dummySpan, finalText, image, images, j, k, l, len, len1, len2, len3, len4, len5, len6, len7, link, linkElements, m, message, n, o, p, q, quote, quoteAvatar, quoteClear, quoteHeader, quoteHeaderNode, registeredUserNode, underlined, underlinedElements, user, video, videos;
images = Array.prototype.map.call(element.querySelectorAll('img'), function(img) {
return {
src: img.src,
alt: img.hasAttribute('alt') ? img.alt : ''
};
});
for (j = 0, len = images.length; j < len; j++) {
image = images[j];
element.innerHTML = element.innerHTML.replace(/<img[^>]*>/, "[img" + (image.alt ? '=' + image.alt : '') + "]" + image.src + "[/img]");
}
videos = Array.prototype.map.call(element.querySelectorAll("iframe[title='YouTube video player']"), function(video) {
return video.src.split('/')[4].split('?')[0];
});
for (k = 0, len1 = videos.length; k < len1; k++) {
video = videos[k];
element.innerHTML = element.innerHTML.replace(/<iframe title="YouTube video player"[^>]*>.*?<\/iframe>/, "[video]https://www.youtube.com/watch?v=" + video + "[/video]");
}
cursiveElements = Array.prototype.map.call(element.querySelectorAll('i'), function(cursive) {
return cursive.innerHTML;
});
for (l = 0, len2 = cursiveElements.length; l < len2; l++) {
cursive = cursiveElements[l];
element.innerHTML = element.innerHTML.replace(/<i>.*?<\/i>/, "[i]" + cursive + "[/i]");
}
boldElements = Array.prototype.map.call(element.querySelectorAll('b'), function(bold) {
return bold.innerHTML;
});
for (m = 0, len3 = boldElements.length; m < len3; m++) {
bold = boldElements[m];
element.innerHTML = element.innerHTML.replace(/<b>.*?<\/b>/, "[b]" + bold + "[/b]");
}
underlinedElements = Array.prototype.map.call(element.querySelectorAll('u'), function(underlined) {
return underlined.innerHTML;
});
for (n = 0, len4 = underlinedElements.length; n < len4; n++) {
underlined = underlinedElements[n];
element.innerHTML = element.innerHTML.replace(/<u>.*?<\/u>/, "[u]" + underlined + "[/u]");
}
colourElements = Array.prototype.map.call(element.querySelectorAll('font[color]'), function(colour) {
return {
name: colour.attributes['color'].value.toLowerCase(),
innerHTML: colour.innerHTML
};
});
for (o = 0, len5 = colourElements.length; o < len5; o++) {
colour = colourElements[o];
element.innerHTML = element.innerHTML.replace(/<font color=".*">[^<\/font>]*<\/font>/, "[colour=" + colour.name + "]" + colour.innerHTML + "[/colour]");
}
quote = element.querySelector('div.quote_body');
while (quote) {
quoteHeaderNode = quote.querySelector('div.quote_header');
registeredUserNode = quoteHeaderNode ? quote.querySelector('div.quote_header').querySelector('span[itemprop="name"]') : null;
user = null;
if (registeredUserNode) {
user = registeredUserNode.textContent;
} else if (quote.parentNode.attributes['author']) {
user = quote.parentNode.attributes['author'].value;
if (user.substr(0, 1) === '@') {
user = user.substr(1);
}
}
quoteHeader = quote.querySelector('div.quote_header');
if (quoteHeader) {
quoteHeader.parentNode.removeChild(quoteHeader);
}
quoteAvatar = quote.querySelector('div.quote_avatar_container');
if (quoteAvatar) {
quoteAvatar.parentNode.removeChild(quoteAvatar);
}
quoteClear = quote.querySelector('div.quote_clear');
if (quoteClear) {
quoteClear.parentNode.removeChild(quoteClear);
}
message = quote.innerHTML;
dummySpan = document.createElement('span');
dummySpan.setAttribute('class', 'dummytag');
dummySpan.innerHTML = "[quote" + (user ? '=' + user : '') + "]" + message + "[/quote]";
quote.parentNode.parentNode.replaceChild(dummySpan, quote.parentNode);
quote = element.querySelector('div.quote_body');
}
dummyElements = Array.prototype.map.call(element.querySelectorAll('span.dummytag'), function(dummy) {
return dummy.innerHTML;
});
for (p = 0, len6 = dummyElements.length; p < len6; p++) {
dummyContent = dummyElements[p];
element.innerHTML = element.innerHTML.replace(/<span class="dummytag">.*<\/span>/, dummyContent);
}
linkElements = Array.prototype.map.call(element.querySelectorAll('a[href]'), function(link) {
return {
target: link.attributes['href'].value,
name: link.innerText
};
});
for (q = 0, len7 = linkElements.length; q < len7; q++) {
link = linkElements[q];
element.innerHTML = element.innerHTML.replace(/<a[^>]*>.*?<\/a>/, "[url=" + link.target + "]" + link.name + "[/url]");
}
element.innerHTML = element.innerHTML.replace(/<font [^>]*>/g, '');
element.innerHTML = element.innerHTML.replace(/<\/font>/g, '');
element.innerHTML = element.innerHTML.replace(/<div class="quote_clear"><\/div>/g, '');
finalText = element.innerText;
finalText = finalText.replace(/<br>/g, '\n');
finalText = finalText.replace(/\[img=([^\]]*)\]http:\/\/images\.proboards\.com\/v5\/images\/smiley\/.*?\[\/img\]/g, '$1');
finalText = finalText.replace(/\[img=([^\]]*)\]http:\/\/images\.proboards\.com\/v5\/smiley\/.*?\[\/img\]/g, '$1');
attachmentIndex = finalText.indexOf('\n\n[b]Attachments:[/b]\n\n');
if (attachmentIndex > -1) {
finalText = finalText.substring(0, attachmentIndex);
}
return finalText;
};
findBoards = function() {
var boardDescriptionList, boardDescriptions, boardInfo, boardLinks, boardTitles, boards, i;
boards = document.querySelectorAll('tr.board.item td:nth-child(2) > span > a');
boardTitles = Array.prototype.map.call(boards, function(e) {
return e.textContent;
});
boardLinks = Array.prototype.map.call(boards, function(e) {
return e.href;
});
boardDescriptions = document.querySelectorAll('tr.board.item td:nth-child(2) > p.description');
boardDescriptionList = Array.prototype.map.call(boardDescriptions, function(e) {
return e.textContent;
});
boardInfo = [];
i = 0;
while (i < boardTitles.length) {
boardInfo.push({
title: boardTitles[i],
description: boardDescriptionList[i],
link: boardLinks[i]
});
i++;
}
return boardInfo;
};
findPages = function() {
var lastPage, maxPage, pageBase, pageInfo, pageNr, pages, shownPages;
shownPages = document.querySelectorAll('ul.ui-pagination > li.ui-pagination-page.ui-pagination-slot > a[href]');
lastPage = shownPages[shownPages.length - 1];
pageInfo = /(.*\?page=)(\d*)/.exec(lastPage);
pageBase = pageInfo[1];
maxPage = pageInfo[2];
return pages = (function() {
var j, ref, results;
results = [];
for (pageNr = j = 1, ref = maxPage; 1 <= ref ? j <= ref : j >= ref; pageNr = 1 <= ref ? ++j : --j) {
results.push("" + pageBase + pageNr);
}
return results;
})();
};
findThreads = function() {
var i, threadIds, threadInfo, threadLinks, threadTitles, threads;
threads = document.querySelectorAll('tr.item.thread > td:nth-child(3) a.thread-link');
threadTitles = Array.prototype.map.call(threads, function(e) {
return e.textContent;
});
threadLinks = Array.prototype.map.call(threads, function(e) {
return e.href;
});
threadIds = Array.prototype.map.call(threads, function(e) {
return /.*\/thread\/(\d*)\/.*/.exec(e.href)[1];
});
threadInfo = [];
i = 0;
while (i < threadTitles.length) {
threadInfo.push({
id: threadIds[i],
title: threadTitles[i],
link: threadLinks[i]
});
i++;
}
return threadInfo;
};
findPosts = function(replaceHtml) {
var postInfo;
postInfo = Array.prototype.map.call(document.querySelectorAll('tr.item.post'), function(e) {
var attachmentName, attachmentNode, attachmentNodes, attachments, dateNode, id, j, len, linkSplit, message, messageNode, timestamp, user, userNode;
messageNode = e.querySelector('td.content div.message');
attachmentNodes = messageNode.querySelectorAll('div.post_attachments blockquote a');
dateNode = e.querySelector('td.content span.date > abbr.time');
userNode = e.querySelector('td.left-panel a.user-link,td.left-panel > div.mini-profile.guest-mini-profile');
id = /post-(\d*)/.exec(e.id)[1];
message = replaceHtml(messageNode);
attachments = [];
for (j = 0, len = attachmentNodes.length; j < len; j++) {
attachmentNode = attachmentNodes[j];
attachmentName = attachmentNode.text;
if (attachmentNode.childElementCount > 0) {
attachmentName = attachmentNode.children[0].alt;
}
attachments.push({
name: attachmentName,
url: attachmentNode.href
});
}
timestamp = parseInt(dateNode.attributes['data-timestamp'].value, 10) / 1000;
user = {};
if (userNode.href) {
linkSplit = userNode.href.split('/');
user = {
link: linkSplit[linkSplit.length - 1],
name: userNode.textContent
};
} else {
user = {
link: '',
name: userNode.firstChild.data.replace('\n\t', '')
};
}
return {
id: id,
message: message,
attachments: attachments,
timestamp: timestamp,
user: user
};
});
return postInfo;
};
findUserLinks = function() {
return Array.prototype.map.call(document.querySelectorAll('div.container.members a.user-link'), function(e) {
return e.href;
});
};
getUser = function(replaceHtml) {
var signatureNode, statusNode, user;
user = {};
user.name = document.querySelectorAll('span.big_username')[0].textContent;
signatureNode = document.querySelector('td#center-column > div.content-box:last-child');
user.signature = '';
if (signatureNode) {
user.signature = replaceHtml(signatureNode);
if (!/Signature\n/.test(user.signature)) {
user.signature = '';
}
user.signature = user.signature.replace('Signature\n', '');
}
statusNode = document.querySelectorAll('form.form_user_status div.content-box tr span.personal-text');
user.status = statusNode.length > 0 ? statusNode[0].textContent : '';
user.registered = parseInt(document.querySelectorAll('td#center-column > div.content-box abbr.time')[0].attributes['data-timestamp'].value, 10) / 1000;
return user;
};
missingArgumentError = function(argument) {
console.log("missing the " + argument + " argument");
return casper.exit();
};
if (casper.cli.options['board-nr']) {
proboardNr = casper.cli.options['board-nr'];
} else {
missingArgumentError('board-nr');
}
if (casper.cli.options['board-name']) {
proboardName = casper.cli.options['board-name'];
} else {
missingArgumentError('board-name');
}
if (casper.cli.options['user']) {
user = casper.cli.options['user'];
} else {
missingArgumentError('user');
}
if (casper.cli.options['password']) {
password = casper.cli.options['password'];
} else {
missingArgumentError('password');
}
proboardUrl = "http://" + proboardName + ".proboards.com/";
proboardUserUrl = proboardUrl + "members";
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:30.0) Gecko/20100101 Firefox/30.0');
casper.start(proboardUrl, function() {});
casper.thenOpen('https://login.proboards.com/forum_submit/login', {
method: 'post',
data: {
forum: proboardNr,
email: user,
password: password,
"continue": 'Continue'
}
}, function() {});
readBoard = function(board) {
return casper.thenOpen(board.link, function() {
board.boards = this.evaluate(findBoards);
this.each(board.boards, function(casper, subboard) {
return readBoard(subboard);
});
return this.thenOpen(board.link, function() {
var boardPages;
this.echo("getting threads for board '" + board.title + "'...");
board.threads = [];
boardPages = this.evaluate(findPages);
this.each(boardPages, function(casper, boardPage) {
return this.thenOpen(boardPage, function() {
return board.threads = board.threads.concat(this.evaluate(findThreads));
});
});
return this.then(function() {
return this.each(board.threads, function(casper, thread) {
thread.posts = [];
return this.thenOpen(thread.link, function() {
var linkParts, pollName, threadPages;
this.echo("\tgetting posts for thread '" + thread.title + "'...");
pollName = null;
if (this.exists('div.poll.show.ui-poll')) {
console.log('\t\tsaving poll...');
linkParts = thread.link.split('/');
pollName = linkParts[linkParts.length - 1] + ".png";
this.captureSelector("data/images/polls/" + pollName, 'div.poll.show.ui-poll');
}
thread.poll = pollName;
threadPages = this.evaluate(findPages);
this.each(threadPages, function(casper, threadPage) {
return this.thenOpen(threadPage, function() {
var posts;
posts = this.evaluate(findPosts, replaceHtml);
this.each(posts, function(casper, post) {
var attachment, j, len, ref, results;
post.message = loadImages(post.message);
ref = post.attachments;
results = [];
for (j = 0, len = ref.length; j < len; j++) {
attachment = ref[j];
casper.download(attachment.url, "data/attachments/" + attachment.name);
results.push(attachment.url = "{{baseurl}}/attachments/" + attachment.name);
}
return results;
});
return thread.posts = thread.posts.concat(posts);
});
});
return this.then(function() {
if (thread.poll && thread.posts[0]) {
thread.posts[0].message = "[img]{{baseurl}}/images/polls/" + thread.poll + "[/img]\n\n" + thread.posts[0].message;
}
if (thread.poll && !thread.posts[0]) {
return console.log("how the fuck did you manage that?");
}
});
});
});
});
});
});
};
proboard = {};
casper.thenOpen(proboardUrl, function() {
proboard.boards = this.evaluate(findBoards);
return this.each(proboard.boards, function(casper, board) {
return readBoard(board);
});
});
casper.thenOpen(proboardUserUrl, function() {
var userPages;
proboard.users = [];
userPages = this.evaluate(findPages);
return this.each(userPages, function(casper, userPage) {
return this.thenOpen(userPage, function() {
var userlinks;
userlinks = this.evaluate(findUserLinks);
return this.each(userlinks, function(casper, userlink) {
return this.thenOpen(userlink, function() {
this.echo("getting userinfo for '" + userlink + "'...");
user = this.evaluate(getUser, replaceHtml);
user.signature = loadImages(user.signature);
return proboard.users = proboard.users.concat(user);
});
});
});
});
});
casper.then(function() {
var json;
json = JSON.stringify(proboard, null, '\t');
return fs.write("data/" + proboardName + ".json", json, 'w');
});
casper.run();
}).call(this);