#!/usr/bin/perl;
=pod
########################################
###                                  ###
###  IMPORT Twiki to Mindtouch       ###
###  MODULE                          ###
########################################
## Copyright 2009 Tim Hunt / Australian Bureau of Meteorology
## To interact with a DekiWiki / Mindtouch instance.

=cut
package Dream;
use strict;
use warnings;

#-- modules
use Carp;
use DateTime;
use LWP;
use HTTP::Cookies;
use HTTP::Request::Common qw{GET POST};
use URI::Escape;
use MIME::Types qw {by_suffix};

use Data::Dumper;

## we will use an autoloader to generate the object accessor methods.
## Here are the methods allowed (for data storage)
our $AUTOLOAD;
our $fields = {};
my @attr = qw{
	user
	pass
	page_name
	page_title
	mode
	site
	agent
	api_url
	api_call
	cookie
	page_data
	parent
	edittime
	timezone
	page_set_url
	filename
	processed_file
	do_once
	twiki_host
	first_page
	new_home
	twiki_root
	twiki_api
	twiki_web
	twiki_list
	twiki_pages
};
# stuff them into the keys of a hashref
map $fields->{$_}++, @attr;

sub new {
	my $proto  = shift;
	my $class  = ref($proto) || $proto;
	my $parent = ref($proto) && $proto;
	our $fields;
	my $self = {
		'_permitted' => $fields,
	};

	bless($self, $class);
	$self->parent($parent);
	return $self;
}

sub make_agent
{
	my $self = shift;
	# LWP user agent.
	## Set up a UserAgent with Cookie Jar
	my $ua = LWP::UserAgent->new();
	my $cookie_jar = HTTP::Cookies->new();
	$ua->cookie_jar($cookie_jar);
	# get the auth cookie for the user;
	my $auth_url = $self->mode . $self->user .':'. $self->pass .'@'. $self->site . $self->api_url . '/users/authenticate';
	#print $auth_url."\n";
	my $request = HTTP::Request->new(GET => $auth_url);
	my $result = $ua->request( $request );
	
	## Deconstruct the cookies in the jar.
	## We want to ensure that there are no quotes around the
	## authtoken ($key) values.
	my ($domain, $path, $key);
	$domain = ( keys %{ $cookie_jar->{'COOKIES'} })[0];
	$path   = ( keys %{ $cookie_jar->{'COOKIES'}{$domain} })[0];
	$key    = ( keys %{ $cookie_jar->{'COOKIES'}{$domain}{$path} })[0];
	
	my @values = @{ $cookie_jar->{'COOKIES'}{$domain}{$path}{$key} };
	foreach my $value ( @values )
	{
		$value =~ s/\"//g if defined $value;
	}
	$cookie_jar->{'COOKIES'}{$domain}{$path}{$key} = \@values;
	
	$self->agent($ua);
	return 1;
}

sub get_page_list
{
	my $self = shift;
	my $get_twiki_request = HTTP::Request->new(
				GET => $self->twiki_host().$self->twiki_api().$self->twiki_web().$self->twiki_list(),
	);
	my $get_twiki_result = $self->agent->request($get_twiki_request);
	my $page = $get_twiki_result->{_content};
	   $page = $self->html_to_fragment($page);
	my $Twiki_Link_re = q{<li>.*<a.*href=".*}.$self->twiki_web().q{/(\w*)};
	   $Twiki_Link_re = qr{$Twiki_Link_re};
	
	my @pages = $page =~ m{$Twiki_Link_re}mgxs ;
	
	# use a grep to get all of the 'first_page' pages
	#$self->first_page('RadarGroup');
	#$self->new_home('Radar Engineering');
	my $want_re = $self->first_page();
	my @wanted_pages = grep {/^$want_re/} @pages;
	@pages = @wanted_pages;
	
	## Want to shift root page to the front - it's going to be the parent page
	my %magic_map;
	map $magic_map{$_} = $_ , sort @pages;
	delete $magic_map{$self->first_page()};
	@pages  = @magic_map{@pages};
	unshift @pages , $self->first_page();

	$self->twiki_pages(\@pages);

}

sub grab_and_upload
{
	my ($self, $page_to_get, $page) = @_;
	my $twiki_page  = HTTP::Request->new(
			GET => $page_to_get,
	);
	my $twiki_content = $self->agent->request($twiki_page);
	$twiki_content    = $twiki_content->{_content};

## Find any images and other attachments to the page;
#  store them here in a hash and attach them later
#  To attach them, we need to know the page id, not its
#  name.
	my $attachments = 0;
	if (grep /twikiAttachments/, $twiki_content)
	{
		## There are attachments in the page, as we have found
		#  the attachment class.
		$attachments = 1;
	}
	my @attachments;
	my %att_hash;
	## Build this once
	my $attachments_regex = q{/twiki/pub/}.$self->twiki_root().'/'.$self->first_page().q{.+?/\w+\.\w{3}};
	   $attachments_regex = qr{($attachments_regex)};
	if ($attachments == 1)
	{
		## get the images and other attachments
		foreach my $line (split /\n/, $twiki_content)
		{
			my ($att) = $line =~ m{$attachments_regex};
			push @attachments, $att if defined $att;
		}
		print "Attachments!\n";
		## Use a hash slice  to populate the array into a hash
		#  without a loop and also remove duplicates.
		@att_hash{@attachments} = @attachments;
	}
	$twiki_content    = $self->html_to_fragment($twiki_content);
	## Change the links internally to reflect the new structure.
	my $parent   = $self->parent();
	my $o_parent = $self->twiki_root();
	my $first    = $self->first_page();
	$twiki_content    =~ s/$o_parent/$parent/g;
	$twiki_content    =~ s/$first//g;

	## Change the page name to remove 'firstPage'
	if( $page !~ /^$first()$/)
	{
		$page =~ s/$first//;
	}
	else
	{
		$page = $self->new_home();
	}
	
	$self->page_name($self->parent().'/'.$page);
	$self->page_title($page);
	if($self->do_once() == 1)
	{
		my ($short_parent) = $self->parent() =~ m{(.*)/};
		$self->page_name($short_parent.'/'.$page);
		$self->do_once(0);
	}
	# URI encode the page name TWICE
	$self->page_name( uri_escape( uri_escape( $self->page_name ) ) );
	my $deki_page = $self->mode . $self->site . $self->api_url . '/pages/=' .$self->page_name ;
	my $deki_image_url = $deki_page . '/files/=';
	# Set up the post of a data page
	$self->page_set_url( $deki_page .  '/contents?edittime=' . $self->edit_time() );
	my $post_res = $self->post_page($twiki_content);

	# Then loop through the %att_hash and upload all of the attachments
	foreach my $att_link (keys %att_hash)
	{
		my ($filename) = $att_link =~ m{.*/(.*)$};
		my ($mime_type, $encoding) = by_suffix($filename);
		$filename = uri_escape(uri_escape($filename));
		print " + Attach file: $filename with mime-type $mime_type\n";
		my $att_link_uri = $self->twiki_host().$att_link;
		my $twiki_page  = HTTP::Request->new(
			GET => $att_link_uri,
		);
		my $twiki_content = $self->agent->request($twiki_page)->{_content};
		# attach this content to the page found above
		# API call - PUT: pages/{pageid}/files/{filename}
		my $upload_url = $deki_image_url . $filename ;
		
		my $att_upload_req =  HTTP::Request->new(
					PUT   => $upload_url,
		);
		$att_upload_req->content($twiki_content);
		$att_upload_req->content_type($mime_type);
		my $att_result = $self->agent->request($att_upload_req);
		my ($file_id) = $att_result->{_content} =~ m{^.*file\ id\=\"(\d+)};
		$att_hash{$att_link} = $file_id;
		sleep 3;
	}
	# if there were attachments, re-upload the page with modified links
	if ($attachments == 1)
	{
		## if we have atachments, we need to change links to them
		$twiki_content = $self->change_attachment_links(
					'attachments' => \%att_hash,
					'page_content' => $twiki_content,
		);
		my $post_res = $self->post_page($twiki_content);	
	}
}

sub post_page
{
	my $self = shift;
	my $twiki_content = shift;
	my $page_set_req = HTTP::Request->new(
				POST => $self->page_set_url(),
	);
	$self->agent->default_header('Content_Type' => 'text/plain; charset=iso-8859-1');
	$page_set_req->content($twiki_content);
	my $post_res = $self->agent->request($page_set_req);
	return $post_res;
}

sub change_attachment_links
{
	my $self = shift;
	my %attr = @_;
	my $att_list = $attr{'attachments'};
	my $content  = $attr{'page_content'};
	
	# if the attachment list is empty, just return the content
	return $content if scalar keys %$att_list == 0;
	
	# Otherwise, itterate over the attachments and replace them with
	# links to the new location.
	foreach my $link (keys %$att_list)
	{
		my $file_number = $att_list->{$link};
		my ($file_name) = $link =~ m{.*/(.*)};
		#http://example.com/@api/deki/files/748/=ImageName.png
		my $old_link    = q{src=.*/}.$file_name.q{"}; 
		my $new_link    = q{src="}.$self->api_url().'/files/'. $file_number .'/='.$file_name.q{" };
		$content        =~ s{$old_link}{$new_link}g;
	}

	return $content;
}

sub edit_time
{
	my $self = shift;

	my $dt = DateTime->now();
	$dt->set_time_zone($self->timezone());
	my $date = $dt->ymd('').$dt->hms('');

	$self->edittime($date);
	return $date;
}

## We need to get a TWiki page and render it to XHTML
## No 'body' tags, nor a H1, but all else.
sub html_to_fragment
{
	my $self = shift;
	my $text  = shift;

	($text) = $text =~ m{<body.*?>(.*)</body.*?>}smx;
	## strip out the following tags:
	my @reg_exes = (
		qr{</?div.*?>},    # DIV
		qr{class=".*?"},   # class modifiers
		qr{name=".*?"},    # names...
		qr{ID=".*?"},      # ID modifiers
		qr{</?span.*?>},   # SPAN
		qr{<!--.*?-->},    # HTML comments
		qr{</?font.*?>},   # FONT tags
		qr{</?center.*?>}, # center tags
	);
	foreach my $regex (@reg_exes)
	{
		$text =~ s{$regex}{}smxg;
	}
	## change the URL to local:
	my $old_uri = qr{".*/twiki/bin/view/};
	my $new_uri = q{"/};
	$text =~ s{$old_uri}{$new_uri}smxg;
	## Change the h1 to a h2
	$text =~ s{([<|</])h1>}{$1h2>}smxg;
	
	# we get 'double spacing'. Kill it.
	# This is due to a <p /> in the TWiki output; the paras are not correctly
	# enclosed. I need to replace :
	#
	#  lorem ipsum
	#  <p />
	#  lorem ipsum
	#  <p />
	#
	# with
	#  <p>lorem ipsum</p>
	#  <p>lorem ipsum</p>
	# !!
	
	## Split the text, re-build it
	my @lines_of_text = split /\n/, $text;
	my $processed_text = '';
	my $first_para = 0;
	foreach my $in_line (@lines_of_text)
	{
		chomp( $in_line );
		if( $in_line =~ m{<p\s/>})
		{
			if( $first_para == 0)
			{
				$processed_text .= '<p>';
				$first_para = 1;
			}
			else
			{
				$processed_text .= '</p><p>'
			}
		}
		else
		{
			$processed_text .= $in_line . "\n";
		}
	}
	# we need to close the para, but where?
	$processed_text .= '</p>';
	$text = $processed_text;
	$self->processed_file($text);
	return $text;
}

sub dump{
	my $self = shift;
	print Dumper $self;
}


###########################################################################
## We are autoloading accessors. They must exist in the array at the top! #
###########################################################################
sub AUTOLOAD
{
	my $self = shift;
	my $type = ref($self) or croak "$self is not an object";
	our $AUTOLOAD;

	my $name =  $AUTOLOAD;
	$name =~ s/.*://;
	unless ( exists $self->{'_permitted'}->{$name} )
	{
		croak "Can't access `$name' field in class $type";
	}
	if (@_)
	{
		$self->{$name} = shift;
	}

	return $self->{$name};

}

1; # because we do

