From fc83ea25a966596ca6c81977e7625a6f972a5b0a Mon Sep 17 00:00:00 2001
From: Sebastian Hugentobler <sebastian.hugentobler@idparc.ch>
Date: Thu, 12 Mar 2015 15:56:56 +0100
Subject: [PATCH] initial commit

---
 .gitignore                |   3 +
 Gruntfile.js              |  23 ++
 README.md                 |  13 ++
 package.json              |  15 ++
 src/proboard_saver.coffee | 377 +++++++++++++++++++++++++++++++++
 src/proboard_saver.js     | 430 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 861 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Gruntfile.js
 create mode 100644 README.md
 create mode 100644 package.json
 create mode 100644 src/proboard_saver.coffee
 create mode 100644 src/proboard_saver.js

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fde91ac
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+build/
+node_modules/
+.DS_Store
diff --git a/Gruntfile.js b/Gruntfile.js
new file mode 100644
index 0000000..56a6838
--- /dev/null
+++ b/Gruntfile.js
@@ -0,0 +1,23 @@
+module.exports = function(grunt) {
+  grunt.initConfig({
+    pkg: grunt.file.readJSON( 'package.json' ),
+
+    run: {
+      saver: {
+        cmd: 'casperjs',
+        args: [
+          'src/proboard_saver.coffee',
+          '--board-nr=YOUR-BOARD-NR',
+          '--board-name=YOUR-BOARD-NAME',
+          '--user=YOUR-USERNAME',
+          '--password=YOUR-PASSWORD'
+        ]
+      }
+    }
+  });
+
+  grunt.loadNpmTasks('grunt-run');
+
+  grunt.registerTask('default', ['run:saver']);
+  grunt.registerTask('save', ['run:saver']);
+};
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..801319e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,13 @@
+This tool tries to scrap all the accessible data from a [Proboards Forum](https://proboards.com/).
+
+I wrote this because I wanted to export at least the text data for an old
+board hosted there which some friends and I were using years ago. As it turned
+out you simply can't do that. So here we are.
+
+The program probably does some horrible things and I can't say if it will work
+for every theme. But hey, it only has to work one time to get at the data.
+
+It is able to detect attachments and images and tries to download them too.
+
+A working [casperjs](http://casperjs.org/) installation is needed for the stuff to work.
+
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..911b708
--- /dev/null
+++ b/package.json
@@ -0,0 +1,15 @@
+{
+  "name": "proboard_saver",
+  "version": "0.0.1",
+  "description": "",
+  "main": "src/proboard_saver.coffee",
+  "dependencies": {
+    "grunt": "~0.4.2",
+    "grunt-run": "~0.2.1"
+  },
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "author": "Sebastian Hugentobler",
+  "license": "CC0"
+}
diff --git a/src/proboard_saver.coffee b/src/proboard_saver.coffee
new file mode 100644
index 0000000..937936a
--- /dev/null
+++ b/src/proboard_saver.coffee
@@ -0,0 +1,377 @@
+# 2015 by Sebastian Hugentobler <shugentobler@vanwa.ch>
+# To the extent possible under law, the author(s) have dedicated all copyright
+# and related and neighboring rights to this software to the public domain
+# worldwide. This software is distributed without any warranty.
+# See http://creativecommons.org/publicdomain/zero/1.0/ for a description of CC0.
+
+casper = require('casper').create(
+  verbose: false
+  logLevel: 'info'
+  pageSettings: {
+    webSecurityEnabled: false
+  }
+)
+
+utils = require('utils')
+fs = require('fs')
+
+casper.on 'error', (msg, trace) ->
+  @echo "Error: #{msg}", "ERROR"
+
+casper.on 'page.error', (msg, trace) ->
+  @echo "Error: #{msg}", "ERROR"
+
+casper.on 'remote.message', (msg, trace) ->
+  if not /Unsafe JavaScript attempt/.test msg
+    @echo "remote log: #{msg}", "INFO"
+
+loadImages = (searchString) ->
+  images = searchString.match /\[img\](.*?)\[\/img\]/g
+  if images
+    for image in images
+      detailImage = image.match(/\[img\](.*?)\[\/img\]/)[1]
+
+      imageParts = detailImage.split '/'
+      imageName = imageParts[imageParts.length - 1]
+      console.log "\t\tdownloading image '#{imageName}'..."
+
+      casper.download detailImage, "data/images/#{imageName}"
+
+      re = new RegExp("\\[img\\]#{detailImage}\\[/img\\]", "i")
+      searchString = searchString.replace re, "[img]{{baseurl}}/images/#{imageName}[/img]"
+
+  return searchString
+
+replaceHtml = (element) ->
+  images = Array::map.call element.querySelectorAll('img'), (img) ->
+    src: img.src, alt: if img.hasAttribute('alt') then img.alt else ''
+
+  for image in images
+    element.innerHTML = element.innerHTML.replace /<img[^>]*>/, "[img#{if image.alt then '=' + image.alt else ''}]#{image.src}[/img]"
+
+  videos = Array::map.call element.querySelectorAll("iframe[title='YouTube video player']"), (video) -> video.src.split('/')[4].split('?')[0]
+  for video in videos
+    element.innerHTML = element.innerHTML.replace /<iframe title="YouTube video player"[^>]*>.*?<\/iframe>/, "[video]https://www.youtube.com/watch?v=#{video}[/video]"
+
+  cursiveElements = Array::map.call element.querySelectorAll('i'), (cursive) -> cursive.innerHTML
+  for cursive in cursiveElements
+    element.innerHTML = element.innerHTML.replace /<i>.*?<\/i>/, "[i]#{cursive}[/i]"
+
+  boldElements = Array::map.call element.querySelectorAll('b'), (bold) -> bold.innerHTML
+  for bold in boldElements
+    element.innerHTML = element.innerHTML.replace /<b>.*?<\/b>/, "[b]#{bold}[/b]"
+
+  underlinedElements = Array::map.call element.querySelectorAll('u'), (underlined) -> underlined.innerHTML
+  for underlined in underlinedElements
+    element.innerHTML = element.innerHTML.replace /<u>.*?<\/u>/, "[u]#{underlined}[/u]"
+
+  colourElements = Array::map.call element.querySelectorAll('font[color]'), (colour) -> name: colour.attributes['color'].value.toLowerCase(), innerHTML: colour.innerHTML
+  for colour in colourElements
+    element.innerHTML = element.innerHTML.replace /<font color=".*">[^<\/font>]*<\/font>/, "[colour=#{colour.name}]#{colour.innerHTML}[/colour]"
+
+  quote = element.querySelector 'div.quote_body'
+  while quote
+    quoteHeaderNode = quote.querySelector 'div.quote_header'
+
+    registeredUserNode = if quoteHeaderNode then quote.querySelector('div.quote_header').querySelector('span[itemprop="name"]') else null
+
+    user = null
+
+    if registeredUserNode
+      user = registeredUserNode.textContent
+    else if quote.parentNode.attributes['author']
+      user = quote.parentNode.attributes['author'].value
+      if user.substr(0, 1) == '@'
+        user = user.substr 1
+
+    quoteHeader = quote.querySelector 'div.quote_header'
+    if quoteHeader then quoteHeader.parentNode.removeChild quoteHeader
+
+    quoteAvatar = quote.querySelector 'div.quote_avatar_container'
+    if quoteAvatar then quoteAvatar.parentNode.removeChild quoteAvatar
+
+    quoteClear = quote.querySelector 'div.quote_clear'
+    if quoteClear then quoteClear.parentNode.removeChild quoteClear
+
+    message = quote.innerHTML
+
+    dummySpan = document.createElement 'span'
+    dummySpan.setAttribute 'class', 'dummytag'
+    dummySpan.innerHTML = "[quote#{if user then '=' + user else ''}]#{message}[/quote]"
+
+    quote.parentNode.parentNode.replaceChild dummySpan, quote.parentNode
+
+    quote = element.querySelector 'div.quote_body'
+
+  dummyElements = Array::map.call element.querySelectorAll('span.dummytag'), (dummy) -> dummy.innerHTML
+  for dummyContent in dummyElements
+    element.innerHTML = element.innerHTML.replace /<span class="dummytag">.*<\/span>/, dummyContent
+
+  linkElements = Array::map.call element.querySelectorAll('a[href]'), (link) -> target: link.attributes['href'].value, name: link.innerText
+  for link in linkElements
+    element.innerHTML = element.innerHTML.replace /<a[^>]*>.*?<\/a>/, "[url=#{link.target}]#{link.name}[/url]"
+
+  element.innerHTML = element.innerHTML.replace /<font [^>]*>/g, ''
+  element.innerHTML = element.innerHTML.replace /<\/font>/g, ''
+  element.innerHTML = element.innerHTML.replace /<div class="quote_clear"><\/div>/g, ''
+
+  finalText = element.innerText
+  finalText = finalText.replace /<br>/g, '\n'
+  finalText = finalText.replace /\[img=([^\]]*)\]http:\/\/images\.proboards\.com\/v5\/images\/smiley\/.*?\[\/img\]/g, '$1'
+  finalText = finalText.replace /\[img=([^\]]*)\]http:\/\/images\.proboards\.com\/v5\/smiley\/.*?\[\/img\]/g, '$1'
+
+  attachmentIndex = finalText.indexOf('\n\n[b]Attachments:[/b]\n\n')
+  if attachmentIndex > -1
+    finalText = finalText.substring 0, attachmentIndex
+
+  return finalText
+
+findBoards = ->
+  boards = document.querySelectorAll('tr.board.item td:nth-child(2) > span > a')
+
+  boardTitles = Array::map.call boards, (e) -> e.textContent
+  boardLinks = Array::map.call boards, (e) -> e.href
+
+  boardDescriptions = document.querySelectorAll('tr.board.item td:nth-child(2) > p.description')
+  boardDescriptionList = Array::map.call boardDescriptions, (e) -> e.textContent
+
+  boardInfo = []
+
+  i = 0
+  while i < boardTitles.length
+    boardInfo.push
+                   title: boardTitles[i]
+                   description: boardDescriptionList[i]
+                   link: boardLinks[i]
+
+    i++
+
+  return boardInfo
+
+findPages = ->
+  shownPages = document.querySelectorAll('ul.ui-pagination > li.ui-pagination-page.ui-pagination-slot > a[href]')
+  lastPage = shownPages[shownPages.length - 1]
+
+  pageInfo = /(.*\?page=)(\d*)/.exec lastPage
+  pageBase = pageInfo[1]
+  maxPage = pageInfo[2]
+
+  pages = ("#{pageBase}#{pageNr}" for pageNr in [1..maxPage])
+
+findThreads = ->
+  threads = document.querySelectorAll('tr.item.thread > td:nth-child(3) a.thread-link')
+
+  threadTitles = Array::map.call threads, (e) -> e.textContent
+  threadLinks = Array::map.call threads, (e) -> e.href
+  threadIds = Array::map.call threads, (e) ->
+    /.*\/thread\/(\d*)\/.*/.exec(e.href)[1]
+
+  threadInfo = []
+
+  i = 0
+  while i < threadTitles.length
+    threadInfo.push
+                   id: threadIds[i]
+                   title: threadTitles[i]
+                   link: threadLinks[i]
+
+    i++
+
+  return threadInfo
+
+findPosts = (replaceHtml) ->
+  postInfo = Array::map.call document.querySelectorAll('tr.item.post'), (e) ->
+    messageNode = e.querySelector('td.content div.message')
+    attachmentNodes = messageNode.querySelectorAll('div.post_attachments blockquote a')
+    dateNode = e.querySelector('td.content span.date > abbr.time')
+    userNode = e.querySelector('td.left-panel a.user-link,td.left-panel > div.mini-profile.guest-mini-profile')
+
+    id = /post-(\d*)/.exec(e.id)[1]
+    message = replaceHtml(messageNode)
+
+    attachments = []
+    for attachmentNode in attachmentNodes
+      attachmentName = attachmentNode.text
+      if attachmentNode.childElementCount > 0
+        attachmentName = attachmentNode.children[0].alt
+
+      attachments.push
+        name: attachmentName
+        url: attachmentNode.href
+
+    timestamp = parseInt(dateNode.attributes['data-timestamp'].value, 10) / 1000
+
+    user = { }
+    if userNode.href
+      linkSplit = userNode.href.split '/'
+      user = { link: linkSplit[linkSplit.length - 1], name: userNode.textContent }
+    else
+      user = { link: '', name: userNode.firstChild.data.replace '\n\t', '' }
+
+    return {
+      id: id,
+      message: message,
+      attachments: attachments,
+      timestamp: timestamp,
+      user: user
+    }
+
+  return postInfo
+
+findUserLinks = ->
+  Array::map.call document.querySelectorAll('div.container.members a.user-link'), (e) -> e.href
+
+getUser = (replaceHtml) ->
+  user = {}
+
+  user.name = document.querySelectorAll('span.big_username')[0].textContent
+
+  signatureNode = document.querySelector('td#center-column > div.content-box:last-child')
+
+  user.signature = ''
+
+  if signatureNode
+    user.signature = replaceHtml signatureNode
+
+    if not /Signature\n/.test user.signature
+      user.signature = ''
+
+    user.signature = user.signature.replace 'Signature\n', ''
+
+  statusNode = document.querySelectorAll('form.form_user_status div.content-box tr span.personal-text')
+  user.status = if statusNode.length > 0 then statusNode[0].textContent else ''
+
+  user.registered = parseInt(document.querySelectorAll('td#center-column > div.content-box abbr.time')[0].attributes['data-timestamp'].value, 10) / 1000
+
+  return user
+
+missingArgumentError = (argument) ->
+  console.log "missing the #{ argument } argument"
+  casper.exit()
+
+if casper.cli.options['board-nr']
+  proboardNr = casper.cli.options['board-nr']
+else
+  missingArgumentError 'board-nr'
+
+if casper.cli.options['board-name']
+  proboardName = casper.cli.options['board-name']
+else
+  missingArgumentError 'board-name'
+
+if casper.cli.options['user']
+  user = casper.cli.options['user']
+else
+  missingArgumentError 'user'
+
+if casper.cli.options['password']
+  password = casper.cli.options['password']
+else
+  missingArgumentError 'password'
+
+proboardUrl = "http://#{ proboardName }.proboards.com/"
+proboardUserUrl = "#{ proboardUrl }members"
+
+casper.userAgent 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:30.0) Gecko/20100101 Firefox/30.0'
+
+
+casper.start proboardUrl, ->
+
+casper.thenOpen 'https://login.proboards.com/forum_submit/login',
+  method: 'post'
+  data:
+    forum: proboardNr
+    email: user
+    password: password
+    continue: 'Continue'
+, ->
+
+readBoard = (board) ->
+  casper.thenOpen board.link, ->
+    board.boards = @evaluate findBoards
+
+    @each board.boards, (casper, subboard) ->
+      readBoard subboard
+
+    @thenOpen board.link, ->
+      @echo "getting threads for board '#{ board.title }'..."
+
+      board.threads = []
+
+      boardPages = @evaluate findPages
+
+      @each boardPages, (casper, boardPage) ->
+        @thenOpen boardPage, ->
+          board.threads = board.threads.concat @evaluate findThreads
+
+      @then ->
+        @each board.threads, (casper, thread) ->
+          thread.posts = []
+
+          @thenOpen thread.link, ->
+            @echo "\tgetting posts for thread '#{ thread.title }'..."
+
+            pollName = null
+            if @exists 'div.poll.show.ui-poll'
+              console.log '\t\tsaving poll...'
+
+              linkParts = thread.link.split '/'
+              pollName = "#{linkParts[linkParts.length - 1]}.png"
+
+              @captureSelector "data/images/polls/#{pollName}", 'div.poll.show.ui-poll'
+
+            thread.poll = pollName
+
+            threadPages = @evaluate findPages
+
+            @each threadPages, (casper, threadPage) ->
+              @thenOpen threadPage, ->
+                posts = @evaluate findPosts, replaceHtml
+
+                @each posts, (casper, post) ->
+                  post.message = loadImages post.message
+
+                  for attachment in post.attachments
+                    casper.download attachment.url, "data/attachments/#{attachment.name}"
+                    attachment.url = "{{baseurl}}/attachments/#{attachment.name}"
+
+                thread.posts = thread.posts.concat posts
+
+            @then ->
+              if thread.poll and thread.posts[0]
+                thread.posts[0].message = "[img]{{baseurl}}/images/polls/#{thread.poll}[/img]\n\n#{thread.posts[0].message}"
+
+              if thread.poll and not thread.posts[0]
+                console.log "how the fuck did you manage that?"
+
+proboard = {}
+
+casper.thenOpen proboardUrl, ->
+  proboard.boards = @evaluate findBoards
+
+  @each proboard.boards, (casper, board) ->
+    readBoard board
+
+casper.thenOpen proboardUserUrl, ->
+  proboard.users = []
+
+  userPages = @evaluate findPages
+
+  @each userPages, (casper, userPage) ->
+    @thenOpen userPage, ->
+      userlinks = @evaluate findUserLinks
+
+      @each userlinks, (casper, userlink) ->
+        @thenOpen userlink, ->
+          @echo "getting userinfo for '#{ userlink }'..."
+
+          user = @evaluate getUser, replaceHtml
+          user.signature = loadImages user.signature
+
+          proboard.users = proboard.users.concat user
+
+casper.then ->
+  json = JSON.stringify(proboard, null, '\t')
+  fs.write "data/#{ proboardName }.json", json, 'w'
+
+casper.run()
diff --git a/src/proboard_saver.js b/src/proboard_saver.js
new file mode 100644
index 0000000..652c748
--- /dev/null
+++ b/src/proboard_saver.js
@@ -0,0 +1,430 @@
+// Generated by CoffeeScript 1.8.0
+(function() {
+  var casper, findBoards, findPages, findPosts, findThreads, findUserLinks, fs, getUser, loadImages, missingArgumentError, password, proboard, proboardName, proboardNr, proboardUrl, proboardUserUrl, readBoard, replaceHtml, user, utils;
+
+  casper = require('casper').create({
+    verbose: false,
+    logLevel: 'info',
+    pageSettings: {
+      webSecurityEnabled: false
+    }
+  });
+
+  utils = require('utils');
+
+  fs = require('fs');
+
+  casper.on('error', function(msg, trace) {
+    return this.echo("Error: " + msg, "ERROR");
+  });
+
+  casper.on('page.error', function(msg, trace) {
+    return this.echo("Error: " + msg, "ERROR");
+  });
+
+  casper.on('remote.message', function(msg, trace) {
+    if (!/Unsafe JavaScript attempt/.test(msg)) {
+      return this.echo("remote log: " + msg, "INFO");
+    }
+  });
+
+  loadImages = function(searchString) {
+    var detailImage, image, imageName, imageParts, images, re, _i, _len;
+    images = searchString.match(/\[img\](.*?)\[\/img\]/g);
+    if (images) {
+      for (_i = 0, _len = images.length; _i < _len; _i++) {
+        image = images[_i];
+        detailImage = image.match(/\[img\](.*?)\[\/img\]/)[1];
+        imageParts = detailImage.split('/');
+        imageName = imageParts[imageParts.length - 1];
+        console.log("\t\tdownloading image '" + imageName + "'...");
+        casper.download(detailImage, "data/images/" + imageName);
+        re = new RegExp("\\[img\\]" + detailImage + "\\[/img\\]", "i");
+        searchString = searchString.replace(re, "[img]{{baseurl}}/images/" + imageName + "[/img]");
+      }
+    }
+    return searchString;
+  };
+
+  replaceHtml = function(element) {
+    var bold, boldElements, colour, colourElements, cursive, cursiveElements, dummyContent, dummyElements, dummySpan, finalText, image, images, link, linkElements, message, quote, quoteAvatar, quoteClear, quoteHeader, quoteHeaderNode, registeredUserNode, underlined, underlinedElements, user, video, videos, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _len5, _len6, _len7, _m, _n, _o, _p;
+    images = Array.prototype.map.call(element.querySelectorAll('img'), function(img) {
+      return {
+        src: img.src,
+        alt: img.hasAttribute('alt') ? img.alt : ''
+      };
+    });
+    for (_i = 0, _len = images.length; _i < _len; _i++) {
+      image = images[_i];
+      element.innerHTML = element.innerHTML.replace(/<img[^>]*>/, "[img" + (image.alt ? '=' + image.alt : '') + "]" + image.src + "[/img]");
+    }
+    videos = Array.prototype.map.call(element.querySelectorAll("iframe[title='YouTube video player']"), function(video) {
+      return video.src.split('/')[4].split('?')[0];
+    });
+    for (_j = 0, _len1 = videos.length; _j < _len1; _j++) {
+      video = videos[_j];
+      element.innerHTML = element.innerHTML.replace(/<iframe title="YouTube video player"[^>]*>.*?<\/iframe>/, "[video]https://www.youtube.com/watch?v=" + video + "[/video]");
+    }
+    cursiveElements = Array.prototype.map.call(element.querySelectorAll('i'), function(cursive) {
+      return cursive.innerHTML;
+    });
+    for (_k = 0, _len2 = cursiveElements.length; _k < _len2; _k++) {
+      cursive = cursiveElements[_k];
+      element.innerHTML = element.innerHTML.replace(/<i>.*?<\/i>/, "[i]" + cursive + "[/i]");
+    }
+    boldElements = Array.prototype.map.call(element.querySelectorAll('b'), function(bold) {
+      return bold.innerHTML;
+    });
+    for (_l = 0, _len3 = boldElements.length; _l < _len3; _l++) {
+      bold = boldElements[_l];
+      element.innerHTML = element.innerHTML.replace(/<b>.*?<\/b>/, "[b]" + bold + "[/b]");
+    }
+    underlinedElements = Array.prototype.map.call(element.querySelectorAll('u'), function(underlined) {
+      return underlined.innerHTML;
+    });
+    for (_m = 0, _len4 = underlinedElements.length; _m < _len4; _m++) {
+      underlined = underlinedElements[_m];
+      element.innerHTML = element.innerHTML.replace(/<u>.*?<\/u>/, "[u]" + underlined + "[/u]");
+    }
+    colourElements = Array.prototype.map.call(element.querySelectorAll('font[color]'), function(colour) {
+      return {
+        name: colour.attributes['color'].value.toLowerCase(),
+        innerHTML: colour.innerHTML
+      };
+    });
+    for (_n = 0, _len5 = colourElements.length; _n < _len5; _n++) {
+      colour = colourElements[_n];
+      element.innerHTML = element.innerHTML.replace(/<font color=".*">[^<\/font>]*<\/font>/, "[colour=" + colour.name + "]" + colour.innerHTML + "[/colour]");
+    }
+    quote = element.querySelector('div.quote_body');
+    while (quote) {
+      quoteHeaderNode = quote.querySelector('div.quote_header');
+      registeredUserNode = quoteHeaderNode ? quote.querySelector('div.quote_header').querySelector('span[itemprop="name"]') : null;
+      user = null;
+      if (registeredUserNode) {
+        user = registeredUserNode.textContent;
+      } else if (quote.parentNode.attributes['author']) {
+        user = quote.parentNode.attributes['author'].value;
+        if (user.substr(0, 1) === '@') {
+          user = user.substr(1);
+        }
+      }
+      quoteHeader = quote.querySelector('div.quote_header');
+      if (quoteHeader) {
+        quoteHeader.parentNode.removeChild(quoteHeader);
+      }
+      quoteAvatar = quote.querySelector('div.quote_avatar_container');
+      if (quoteAvatar) {
+        quoteAvatar.parentNode.removeChild(quoteAvatar);
+      }
+      quoteClear = quote.querySelector('div.quote_clear');
+      if (quoteClear) {
+        quoteClear.parentNode.removeChild(quoteClear);
+      }
+      message = quote.innerHTML;
+      dummySpan = document.createElement('span');
+      dummySpan.setAttribute('class', 'dummytag');
+      dummySpan.innerHTML = "[quote" + (user ? '=' + user : '') + "]" + message + "[/quote]";
+      quote.parentNode.parentNode.replaceChild(dummySpan, quote.parentNode);
+      quote = element.querySelector('div.quote_body');
+    }
+    dummyElements = Array.prototype.map.call(element.querySelectorAll('span.dummytag'), function(dummy) {
+      return dummy.innerHTML;
+    });
+    for (_o = 0, _len6 = dummyElements.length; _o < _len6; _o++) {
+      dummyContent = dummyElements[_o];
+      element.innerHTML = element.innerHTML.replace(/<span class="dummytag">.*<\/span>/, dummyContent);
+    }
+    linkElements = Array.prototype.map.call(element.querySelectorAll('a[href]'), function(link) {
+      return {
+        target: link.attributes['href'].value,
+        name: link.innerText
+      };
+    });
+    for (_p = 0, _len7 = linkElements.length; _p < _len7; _p++) {
+      link = linkElements[_p];
+      element.innerHTML = element.innerHTML.replace(/<a[^>]*>.*?<\/a>/, "[url=" + link.target + "]" + link.name + "[/url]");
+    }
+    element.innerHTML = element.innerHTML.replace(/<font [^>]*>/g, '');
+    element.innerHTML = element.innerHTML.replace(/<\/font>/g, '');
+    element.innerHTML = element.innerHTML.replace(/<div class="quote_clear"><\/div>/g, '');
+    finalText = element.innerText;
+    finalText = finalText.replace(/<br>/g, '\n');
+    finalText = finalText.replace(/\[img=([^\]]*)\]http:\/\/images\.proboards\.com\/v5\/images\/smiley\/.*?\[\/img\]/g, '$1');
+    finalText = finalText.replace(/\[img=([^\]]*)\]http:\/\/images\.proboards\.com\/v5\/smiley\/.*?\[\/img\]/g, '$1');
+    return finalText;
+  };
+
+  findBoards = function() {
+    var boardDescriptionList, boardDescriptions, boardInfo, boardLinks, boardTitles, boards, i;
+    boards = document.querySelectorAll('tr.board.item td:nth-child(2) > span > a');
+    boardTitles = Array.prototype.map.call(boards, function(e) {
+      return e.textContent;
+    });
+    boardLinks = Array.prototype.map.call(boards, function(e) {
+      return e.href;
+    });
+    boardDescriptions = document.querySelectorAll('tr.board.item td:nth-child(2) > p.description');
+    boardDescriptionList = Array.prototype.map.call(boardDescriptions, function(e) {
+      return e.textContent;
+    });
+    boardInfo = [];
+    i = 0;
+    while (i < boardTitles.length) {
+      boardInfo.push({
+        title: boardTitles[i],
+        description: boardDescriptionList[i],
+        link: boardLinks[i]
+      });
+      i++;
+    }
+    return boardInfo;
+  };
+
+  findPages = function() {
+    var lastPage, maxPage, pageBase, pageInfo, pageNr, pages, shownPages;
+    shownPages = document.querySelectorAll('ul.ui-pagination > li.ui-pagination-page.ui-pagination-slot > a[href]');
+    lastPage = shownPages[shownPages.length - 1];
+    pageInfo = /(.*\?page=)(\d*)/.exec(lastPage);
+    pageBase = pageInfo[1];
+    maxPage = pageInfo[2];
+    return pages = (function() {
+      var _i, _results;
+      _results = [];
+      for (pageNr = _i = 1; 1 <= maxPage ? _i <= maxPage : _i >= maxPage; pageNr = 1 <= maxPage ? ++_i : --_i) {
+        _results.push("" + pageBase + pageNr);
+      }
+      return _results;
+    })();
+  };
+
+  findThreads = function() {
+    var i, threadIds, threadInfo, threadLinks, threadTitles, threads;
+    threads = document.querySelectorAll('tr.item.thread > td:nth-child(3) a.thread-link');
+    threadTitles = Array.prototype.map.call(threads, function(e) {
+      return e.textContent;
+    });
+    threadLinks = Array.prototype.map.call(threads, function(e) {
+      return e.href;
+    });
+    threadIds = Array.prototype.map.call(threads, function(e) {
+      return /.*\/thread\/(\d*)\/.*/.exec(e.href)[1];
+    });
+    threadInfo = [];
+    i = 0;
+    while (i < threadTitles.length) {
+      threadInfo.push({
+        id: threadIds[i],
+        title: threadTitles[i],
+        link: threadLinks[i]
+      });
+      i++;
+    }
+    return threadInfo;
+  };
+
+  findPosts = function(replaceHtml) {
+    var postInfo;
+    postInfo = Array.prototype.map.call(document.querySelectorAll('tr.item.post'), function(e) {
+      var attachmentName, attachmentNode, attachmentNodes, attachments, dateNode, id, linkSplit, message, messageNode, timestamp, user, userNode, _i, _len;
+      messageNode = e.querySelector('td.content div.message');
+      attachmentNodes = messageNode.querySelectorAll('div.post_attachments blockquote a');
+      dateNode = e.querySelector('td.content span.date > abbr.time');
+      userNode = e.querySelector('td.left-panel a.user-link,td.left-panel > div.mini-profile.guest-mini-profile');
+      id = /post-(\d*)/.exec(e.id)[1];
+      message = replaceHtml(messageNode);
+      attachments = [];
+      for (_i = 0, _len = attachmentNodes.length; _i < _len; _i++) {
+        attachmentNode = attachmentNodes[_i];
+        attachmentName = attachmentNode.text;
+        if (attachmentNode.childElementCount > 0) {
+          attachmentName = attachmentNode.children[0].alt;
+        }
+        attachments.push({
+          name: attachmentName,
+          url: attachmentNode.href
+        });
+      }
+      timestamp = parseInt(dateNode.attributes['data-timestamp'].value, 10) / 1000;
+      user = {};
+      if (userNode.href) {
+        linkSplit = userNode.href.split('/');
+        user = {
+          link: linkSplit[linkSplit.length - 1],
+          name: userNode.textContent
+        };
+      } else {
+        user = {
+          link: '',
+          name: userNode.firstChild.data.replace('\n\t', '')
+        };
+      }
+      return {
+        id: id,
+        message: message,
+        attachments: attachments,
+        timestamp: timestamp,
+        user: user
+      };
+    });
+    return postInfo;
+  };
+
+  findUserLinks = function() {
+    return Array.prototype.map.call(document.querySelectorAll('div.container.members a.user-link'), function(e) {
+      return e.href;
+    });
+  };
+
+  getUser = function(replaceHtml) {
+    var signatureNode, statusNode, user;
+    user = {};
+    user.name = document.querySelectorAll('span.big_username')[0].textContent;
+    signatureNode = document.querySelector('td#center-column > div.content-box:last-child');
+    user.signature = '';
+    if (signatureNode) {
+      user.signature = replaceHtml(signatureNode);
+      if (!/Signature\n/.test(user.signature)) {
+        user.signature = '';
+      }
+      user.signature = user.signature.replace('Signature\n', '');
+    }
+    statusNode = document.querySelectorAll('form.form_user_status div.content-box tr span.personal-text');
+    user.status = statusNode.length > 0 ? statusNode[0].textContent : '';
+    user.registered = parseInt(document.querySelectorAll('td#center-column > div.content-box abbr.time')[0].attributes['data-timestamp'].value, 10) / 1000;
+    return user;
+  };
+
+  missingArgumentError = function(argument) {
+    console.log("missing the " + argument + " argument");
+    return casper.exit();
+  };
+
+  if (casper.cli.options['board-nr']) {
+    proboardNr = casper.cli.options['board-nr'];
+  } else {
+    missingArgumentError('board-nr');
+  }
+
+  if (casper.cli.options['board-name']) {
+    proboardName = casper.cli.options['board-name'];
+  } else {
+    missingArgumentError('board-name');
+  }
+
+  if (casper.cli.options['user']) {
+    user = casper.cli.options['user'];
+  } else {
+    missingArgumentError('user');
+  }
+
+  if (casper.cli.options['password']) {
+    password = casper.cli.options['password'];
+  } else {
+    missingArgumentError('password');
+  }
+
+  proboardUrl = "http://" + proboardName + ".proboards.com/";
+
+  proboardUserUrl = "" + proboardUrl + "members";
+
+  casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:30.0) Gecko/20100101 Firefox/30.0');
+
+  casper.start(proboardUrl, function() {});
+
+  casper.thenOpen('https://login.proboards.com/forum_submit/login', {
+    method: 'post',
+    data: {
+      forum: proboardNr,
+      email: user,
+      password: password,
+      "continue": 'Continue'
+    }
+  }, function() {});
+
+  readBoard = function(board) {
+    return casper.thenOpen(board.link, function() {
+      board.boards = this.evaluate(findBoards);
+      this.each(board.boards, function(casper, subboard) {
+        return readBoard(subboard);
+      });
+      return this.thenOpen(board.link, function() {
+        var boardPages;
+        this.echo("getting threads for board '" + board.title + "'...");
+        board.threads = [];
+        boardPages = this.evaluate(findPages);
+        this.each(boardPages, function(casper, boardPage) {
+          return this.thenOpen(boardPage, function() {
+            return board.threads = board.threads.concat(this.evaluate(findThreads));
+          });
+        });
+        return this.then(function() {
+          this.then(function() {
+            return board.threads = board.threads[0];
+          });
+          return this.each(board.threads, function(casper, thread) {
+            thread.posts = [];
+            return this.thenOpen(thread.link, function() {
+              var linkParts, pollName, threadPages;
+              this.echo("\tgetting posts for thread '" + thread.title + "'...");
+              pollName = null;
+              if (this.exists('div.poll.show.ui-poll')) {
+                console.log('\t\tsaving poll...');
+                linkParts = thread.link.split('/');
+                pollName = "" + linkParts[linkParts.length - 1] + ".png";
+                this.captureSelector("data/images/polls/" + pollName, 'div.poll.show.ui-poll');
+              }
+              thread.poll = pollName;
+              threadPages = this.evaluate(findPages);
+              this.each(threadPages, function(casper, threadPage) {
+                return this.thenOpen(threadPage, function() {
+                  var posts;
+                  posts = this.evaluate(findPosts, replaceHtml);
+                  this.each(posts, function(casper, post) {
+                    var attachment, _i, _len, _ref, _results;
+                    post.message = loadImages(post.message.message);
+                    utils.dump(post);
+                    _ref = post.attachments;
+                    _results = [];
+                    for (_i = 0, _len = _ref.length; _i < _len; _i++) {
+                      attachment = _ref[_i];
+                      casper.download(attachment.url, "data/attachments/" + attachment.name);
+                      _results.push(attachment.url = "{{baseurl}}/attachments/" + attachment.name);
+                    }
+                    return _results;
+                  });
+                  utils.dump(posts);
+                  return thread.posts = thread.posts.concat(posts);
+                });
+              });
+              return this.then(function() {
+                if (thread.poll && thread.posts[0]) {
+                  thread.posts[0].message = "[img]{{baseurl}}/images/polls/" + thread.poll + "[/img]\n\n" + thread.posts[0].message;
+                }
+                if (thread.poll && !thread.posts[0]) {
+                  return console.log("how the fuck did you manage that?");
+                }
+              });
+            });
+          });
+        });
+      });
+    });
+  };
+
+  proboard = {};
+
+  casper.thenOpen(proboardUrl, function() {
+    proboard.boards = this.evaluate(findBoards);
+    return readBoard(proboard.boards[0]);
+  });
+
+  casper.then(function() {
+    var json;
+    json = JSON.stringify(proboard, null, '\t');
+    return fs.write("data/" + proboardName + ".json", json, 'w');
+  });
+
+  casper.run();
+
+}).call(this);