The scripts below will export MindTouch pages as HTML and export the files as the raw files. This is currently only targeting the main hierarchy. This isn't a supported method but can be added to, to streamline.
After completing this tutorial you will be able to export your main hierarchy as HTML and files.
The script below has red highlighted areas that need to be configured to your MindTouch site. Add the appropriate target, name, and password along with the API key if it is a private site and then save these as PHP files on a server. Before running the files create a directory call pages in the same directory where you execute this script. Then run the following:
php files.php
php pages.php
<?php
$myFile = "files.xml";
$fh = fopen($myFile, 'w') or die("can't open file");
//replace these values to match your own
$username = "username";
$password = "pass";
$url = "http://yoursite.mindtouch.com/@api/deki/files";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERPWD, "$username:$password");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$stringData = curl_exec($ch);
fwrite($fh, $stringData);
fclose($fh);
curl_close($ch);
$dom = new DOMDocument;
$dom->load('files.xml');
$xpath = new DOMXPath($dom);
$files = $xpath->query("//files/*/contents");
if($files->length) {
foreach ($files as $file) {
$link = $file->getAttribute('href');
$dlink = $link."?apikey=insertapikey";
$filename = substr($link, strpos($link,'=')+strlen('='));
$path = 'files/' . $filename;
echo "Downloading " . $filename;
$fp = fopen($path, 'w');
$ch = curl_init($dlink);
curl_setopt($ch, CURLOPT_FILE, $fp);
$output = curl_exec($ch);
curl_close($ch);
fclose($fp);
}
} else {
die('Error');
}
?>
<?php
/* Used to parse the title from the downloading XML */
function get_string_between($string, $start, $end){
$string = " ".$string;
$ini = strpos($string,$start);
if ($ini == 0) return "";
$ini += strlen($start);
$len = strpos($string,$end,$ini) - $ini;
return substr($string,$ini,$len);
}
/* gets the data from a URL */
function get_data($url)
{
$ch = curl_init();
$timeout = 5;
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
/* Downloads the main pages.xml output locally so the script can loop through the results */
$myFile = "pages.xml";
$fh = fopen($myFile, 'w') or die("can't open file");
//add your username, password, and site url
$username = "USER";
$password = "PASSWORD";
$url = "http://sitename/@api/deki/pages";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERPWD, "$username:$password");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$stringData = curl_exec($ch);
fwrite($fh, $stringData);
fclose($fh);
curl_close($ch);
/* Parses through downloaded XML to retrieve the locations of pages so that the script can download the pages */
$dom = new DOMDocument;
$dom->load('pages.xml');
$xpath = new DOMXPath($dom);
$pages = $xpath->query("//page");
if($pages->length) {
foreach ($pages as $page) {
$link = $page->getAttribute('href');
//$name = $page->path;
$linkstrip = explode('?', $link);
$linkclean = $linkstrip[0];
/* Add apikey to allow authenticated requests. Can remove this call if site is public */
$dlink = $linkclean."/contents?apikey=APIKEYHERE";
//echo $name;
$location = 'pages/page.html';
/* Saving HTML locally on server */
echo "Downloading html";
$download = get_data($dlink);
$decodedown = html_entity_decode($download);
$fp = fopen($location, 'w');
fwrite($fp,$decodedown);
fclose($fp);
/* Finding title in XML so that the file can be named the same as the page title */
$titlesearch = file_get_contents($location);
$title = get_string_between($titlesearch,'title="','">');
echo $title;
rename($location,'pages/'.str_replace('/', '-', $title).'.html');
}
} else {
die('Error');
}
?>
Please feel free to take this and reuse to fit your need.
| Images 0 | ||
|---|---|---|
| No images to display in the gallery. |
Copyright © 2011 MindTouch, Inc. Powered by