From fc83ea25a966596ca6c81977e7625a6f972a5b0a Mon Sep 17 00:00:00 2001 From: Sebastian Hugentobler Date: Thu, 12 Mar 2015 15:56:56 +0100 Subject: [PATCH] initial commit --- .gitignore | 3 + Gruntfile.js | 23 ++ README.md | 13 ++ package.json | 15 ++ src/proboard_saver.coffee | 377 +++++++++++++++++++++++++++++++++ src/proboard_saver.js | 430 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 861 insertions(+) create mode 100644 .gitignore create mode 100644 Gruntfile.js create mode 100644 README.md create mode 100644 package.json create mode 100644 src/proboard_saver.coffee create mode 100644 src/proboard_saver.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fde91ac --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +build/ +node_modules/ +.DS_Store diff --git a/Gruntfile.js b/Gruntfile.js new file mode 100644 index 0000000..56a6838 --- /dev/null +++ b/Gruntfile.js @@ -0,0 +1,23 @@ +module.exports = function(grunt) { + grunt.initConfig({ + pkg: grunt.file.readJSON( 'package.json' ), + + run: { + saver: { + cmd: 'casperjs', + args: [ + 'src/proboard_saver.coffee', + '--board-nr=YOUR-BOARD-NR', + '--board-name=YOUR-BOARD-NAME', + '--user=YOUR-USERNAME', + '--password=YOUR-PASSWORD' + ] + } + } + }); + + grunt.loadNpmTasks('grunt-run'); + + grunt.registerTask('default', ['run:saver']); + grunt.registerTask('save', ['run:saver']); +}; diff --git a/README.md b/README.md new file mode 100644 index 0000000..801319e --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +This tool tries to scrap all the accessible data from a [Proboards Forum](https://proboards.com/). + +I wrote this because I wanted to export at least the text data for an old +board hosted there which some friends and I were using years ago. As it turned +out you simply can't do that. So here we are. + +The program probably does some horrible things and I can't say if it will work +for every theme. But hey, it only has to work one time to get at the data. + +It is able to detect attachments and images and tries to download them too. + +A working [casperjs](http://casperjs.org/) installation is needed for the stuff to work. + diff --git a/package.json b/package.json new file mode 100644 index 0000000..911b708 --- /dev/null +++ b/package.json @@ -0,0 +1,15 @@ +{ + "name": "proboard_saver", + "version": "0.0.1", + "description": "", + "main": "src/proboard_saver.coffee", + "dependencies": { + "grunt": "~0.4.2", + "grunt-run": "~0.2.1" + }, + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "Sebastian Hugentobler", + "license": "CC0" +} diff --git a/src/proboard_saver.coffee b/src/proboard_saver.coffee new file mode 100644 index 0000000..937936a --- /dev/null +++ b/src/proboard_saver.coffee @@ -0,0 +1,377 @@ +# 2015 by Sebastian Hugentobler +# To the extent possible under law, the author(s) have dedicated all copyright +# and related and neighboring rights to this software to the public domain +# worldwide. This software is distributed without any warranty. +# See http://creativecommons.org/publicdomain/zero/1.0/ for a description of CC0. + +casper = require('casper').create( + verbose: false + logLevel: 'info' + pageSettings: { + webSecurityEnabled: false + } +) + +utils = require('utils') +fs = require('fs') + +casper.on 'error', (msg, trace) -> + @echo "Error: #{msg}", "ERROR" + +casper.on 'page.error', (msg, trace) -> + @echo "Error: #{msg}", "ERROR" + +casper.on 'remote.message', (msg, trace) -> + if not /Unsafe JavaScript attempt/.test msg + @echo "remote log: #{msg}", "INFO" + +loadImages = (searchString) -> + images = searchString.match /\[img\](.*?)\[\/img\]/g + if images + for image in images + detailImage = image.match(/\[img\](.*?)\[\/img\]/)[1] + + imageParts = detailImage.split '/' + imageName = imageParts[imageParts.length - 1] + console.log "\t\tdownloading image '#{imageName}'..." + + casper.download detailImage, "data/images/#{imageName}" + + re = new RegExp("\\[img\\]#{detailImage}\\[/img\\]", "i") + searchString = searchString.replace re, "[img]{{baseurl}}/images/#{imageName}[/img]" + + return searchString + +replaceHtml = (element) -> + images = Array::map.call element.querySelectorAll('img'), (img) -> + src: img.src, alt: if img.hasAttribute('alt') then img.alt else '' + + for image in images + element.innerHTML = element.innerHTML.replace /]*>/, "[img#{if image.alt then '=' + image.alt else ''}]#{image.src}[/img]" + + videos = Array::map.call element.querySelectorAll("iframe[title='YouTube video player']"), (video) -> video.src.split('/')[4].split('?')[0] + for video in videos + element.innerHTML = element.innerHTML.replace /