#!/usr/bin/perl

# Read in a Labyrinth exported "mapz" file.  Output something usable
# in Emacs org-mode.

# Usage: lab2org mapzfile orgfile

# where mapzfile is an /exported/ labyrinth file, and orgfile is
# output suitable for use with org-mode.

# The Perl modules XML::Simple and Data::Dumper are required. Get them
# from your Linux distro's repositories, or from a CPAN site.

# ---------------------

# All in all, Labyrinth is a great tool for very rapid development of
# ideas in mind-map form.  I use it mostly for sketching out fiction:
# plots, characters, locations, etc. I make good use of both images
# and extended information. I also find it useful for high-level
# project and workload management. Labyrinth is much simpler and has
# fewer features than most of the competition, but I really like the
# speed and ease of use, which seem just about unrivaled.

# But in the end I do all my work with Emacs org-mode, and sooner or
# later I want and need to get my map into an outline format. Other
# mind-mapping tools like VYM and SimpleMind export nicely to org-mode
# and other outline formats. If I am to use Labyrinth I need to have
# at least org-mode export capability.

# So I made this small utility, cobbled together in a fairly short
# time. It works on /exported/ Labyrinth .mapz files. It could easily
# be made to work on Labyrinth's internal .map files (just get rid of
# some of the conversion stuff at the top) but Labyrinth uses really
# goofy superlong filenames, and it's just too much trouble to mess
# with. (Note that the .mapz files are simply tar files.)

# If you don't want org-mode output, hacking this to produce a
# different outline format would be very simple.

# Labyrinth allows arbitrary linking. That's great flexibility but
# when generating a hierarchy there will be problems if there are
# circular links or if a node has more than one parent. I deal with
# this in a simple way. If I find a second parent for a node, I
# discard that link and just keep the link to the first parent. Also,
# if I find a circular structure, I break the link chain to remove the
# circularity. These solutions are stopgap but not as crude as they
# sound and result in minimum loss of information. In any case, the
# need for circular linking or multiple parenting is usually not
# great.

# This is currently alpha software, meaning it's preliminary, may be
# missing features, have features that don't work or don't work as
# advertised, may (and likely will) give erroneous output, and has
# bugs that are probably many and serious. It is free software,
# released into the public domain, and it is provided with no warranty
# or promise of support and with no liabilities accepted. Use at your
# own risk.

# ------------------------

# Some further notes about using Labyrinth and XML::Simple follow.

# 1. Labyrinth stores files in an odd location. On my system it's

#    ~/home/userid/.local/share/labyrinth 

#    and these files (.map) have very long hexadecimal like names.
#    This is pretty inconvenient, so I set up a directory

#    ~/home/userid/data/laby 

#    and symlink that to the default location above. So my .map files
#    are stored in a place where I can easily do backups and sync with
#    other computers, but labyrinth can still find them. I also set up

#    ~/home/userid/data/laby/images

#    and I store all image files here so that when I sync from
#    computer to computer, I'm not missing graphics.

# 2. Labyrinth sometimes messes up its own internal data structure,
#    and this can cause some issues, especially when exporting with
#    laby2org. If you add and delete nodes and especially links, there
#    are sometimes residuals, and once in a while node text is
#    lost. This manifests in an output org file having empty
#    headlines, or in error messages about multiple parents for a
#    node. Sometimes closing and reopening the map file will help show
#    where the text problems lie, and drop some of the phantom
#    links. You can also turn on debug within laby2org and examine the
#    data structure dumped out to the file l2odebug. A full fix will
#    require patches to labyrinth, something I have yet to
#    investigate (and ultimately, likely won't "get around to").

# 3. Never, never, NEVER close a Labyrinth map with the 'x'
#    widget. ALWAYS use File-Close. But even this isn't good enough,
#    because Labyrinth doesn't understand save on close and I've lost
#    a lot of content this way. You have to wait for the labyrinth
#    autosave interval (I think 60 seconds). Or, you can export a mapz
#    file, which apparently forces a save. This needs some looking
#    into!

# 4. XML::Simple is a great tool that saves a lot of work, but it has
#    one annoying quirk (it's actually a feature that causes trouble
#    for laby2org). If there are multiple instances (meaning more than
#    one) of a given type (node, link, graphic), then XML::Simple
#    outputs a list with each instance being an element of the
#    list. However, if there is exactly one (for instance exactly one
#    graphic) XML::Simple outputs a scalar. You'll see some ugly code
#    below that determines whether we're dealing with a scalar or a
#    list.

# ------------------------------

# I'm a casual coder and the "quality" of my code reflects that. So if
# you wish to criticize my coding skills and style, feel free to do so
# as long as you send me improvements. Constructive comments,
# suggestions, great job offers, and said improvements are welcomed. I
# have to cover myself by saying that I don't promise anything, but
# please send your input to:

#   laby2org@bobnewell.net .

# ------------------------------

# Changelog:

# 2016-12-11 0.02 Added extensive notes and did some cleanup prior
#                 to publishing an alpha release.
#                 Put in previously omitted drawing_thought processing.
#                 Vastly improved reading of tar file.

# 2016-12-10 0.01 Completed initial coding. Bob Newell, Honolulu,
#                 Hawai`i.

# --------------------------

use XML::Simple;
use Data::Dumper;

$version = "0.02 alpha";
print "\n=== laby2org $version ===\n\n";

# Set to 1 to dump data structure for debugging.
$debug = 0;

# Check for two args.
if (@ARGV != 2) {
   die "Usage: laby2org mapzfile orgfile\n";
}

# Open Labyrinth file.

if (open(lyin, $ARGV[0])) {
  binmode(lyin);
  print "Opened $ARGV[0] for input.\n";
} else {
  die "Can't open input file $ARGV[0]\n";
}

# Start progress indicator.
print "Working.";

# Read the input mapz file, which is in tar format. The map file comes
# first. The size is in the first header at offset 124.

# Skip over unwanted map file info.
unless (sysread lyin, $ljunk, 124) {die "Defective mapz file";}
# Grab the map file size. It's a string representing an octal number!
unless (sysread lyin, $lsize, 12) {die "Defective mapz file";}
# Convert to a decimal number.
$msize = oct($lsize);
# Skip rest of header.
unless (sysread lyin, $ljunk, 376) {die "Defective mapz file";}
# Read the map file.
unless (sysread lyin, $lstr, $msize) {die "Defective mapz file";}
# Done with input file.
close lyin;

# Separate the map XML into lines.
$lstr =~ s/>/>\n/g;
# Convert any stray \r into \n (labyrinth throws a few in sometimes).
$lstr =~ s/\r/\n/g;

print ".";

# Now use XML module to parse data structure.
$xml=new XML::Simple;
$dxml=$xml->XMLin($lstr);

# Debugging, dump data structure.
if ($debug > 0) {
  open(foob,">l2odebug");
  print foob Dumper($dxml);
}

# Dereference and process each record type.  There is an issue in that
# if there is only ONE record of a given type, it will not be returned
# as an array.  In such a case we make it into a one-element array.

# 'thought' -> 'identity' 'content' 'Extended'->'content' 
# Build a table with id, content, extended.

print ".";
$tcount = -1;

eval { my $a = @{$dxml->{thought}}; };
if ($@=~/^Not an ARRAY reference/) {
        $dxml->{thought} = [ $dxml->{thought} ] ;
}

foreach $t (@{$dxml->{thought}})
{
  $tcount++;
  $tid[$tcount] = $t->{identity};
  $tcontent[$tcount] = $t->{content};
# Content has an extra \n at start.
  $tcontent[$tcount] = substr($tcontent[$tcount],1);
  $textended[$tcount] = $t->{Extended}->{content};
  $tparent[$tcount] = -1;
  $tchildcount[$tcount] = -1;
  $tused[$tcount] = 0;
}

print ".";

eval { my $a = @{$dxml->{image_thought}}; };
if ($@=~/^Not an ARRAY reference/) {
        $dxml->{image_thought} = [ $dxml->{image_thought} ] ;
}

# 'image_thought' -> 'identity' 'file'
# Treat these as thoughts with file name as content.

foreach $t (@{$dxml->{image_thought}})
{
  $tcount++;
  $tid[$tcount] = $t->{identity};
# Add org-mode link syntax.
  $tcontent[$tcount] = "[[".$t->{file}."]]";
  $textended[$tcount] = $t->{Extended}->{content};
  $tparent[$tcount] = -1;
  $tchildcount[$tcount] = -1;
  $tused[$tcount] = 0;
}

print ".";

eval { my $a = @{$dxml->{drawing_thought}}; };
if ($@=~/^Not an ARRAY reference/) {
        $dxml->{drawing_thought} = [ $dxml->{drawing_thought} ] ;
}

# 'drawing_thought' -> 'identity'
# Treat these as thoughts with arbitrary string as content.

foreach $t (@{$dxml->{drawing_thought}})
{
  $tcount++;
  $tid[$tcount] = $t->{identity};
# Add some text. Can't save the artwork!
  $tcontent[$tcount] = "Drawing thought, node $tid[$tcount]";
  $textended[$tcount] = $t->{Extended}->{content};
  $tparent[$tcount] = -1;
  $tchildcount[$tcount] = -1;
  $tused[$tcount] = 0;
}

print ".";

eval { my $a = @{$dxml->{link}}; };
if ($@=~/^Not an ARRAY reference/) {
        $dxml->{link} = [ $dxml->{link} ] ;
}

# 'link' -> 'child' 'parent'
# Build link table.

$lcount = -1;
foreach $l (@{$dxml->{link}}) {
  $lcount++;
  $lchild = $l->{child};
  $node = $lchild;
  $jc = findnode();
  $lparent = $l->{parent};
  $node = $lparent;
  $jp = findnode();

# Unless there are circular links there can only be one parent.  But
# there can be multiple children.  $jp is the parent of $jc; $jc is
# the child of $jp.  Assign the parent. If circular warn and don't use
# link.

  if ($tparent[$jc] == -1) {
    $tparent[$jc]=$jp;
# Build out the child chain.
    $tchildcount[$jp]++;
    $tchild[$jp][$tchildcount[$jp]] = $jc;
  } else {
# Warn about a node that has multiple parents. Ignore the problematic
# link.
      print "Multiple parents for $lchild, ignoring $lparent -> $lchild\n";
  }
}

print ".\n";

# Open output file.

if (open(lyout, ">$ARGV[1]")) {
 print "Opened $ARGV[1] for output.\n";
 } else {
 die "Cannot open output file $ARGV[1]";
}

# Now the hard part, build the hierarchical output.  If a node has a
# parent of -1, it is top level.  Process top level first, following
# children recursively.

for ($j=0;$j<=$tcount;$j++) {
    if ($tused[$j] == 0) {
	if ($tparent[$j] == -1) {
	    $tused[$j] = 1;
# Output top-level entry.
	    print lyout "* $tcontent[$j]",$textended[$j],"\n";
	    $tused[$j] = 1;
# Now recursively trace children.
	    $level = 0;
	    $tcj[$level] = $j;
	    cursekids();
	}
    }
}

# Check for unprocessed nodes and warn.

for ($j=0;$j<=$tcount;$j++) {
  if ($tused[$j] == 0) {
	print "Node $tid[$j] was not processed.\n";
  }
}

close lyout;
print "Done.\n";
exit 0;

sub findnode
{
  my $n;
  for ($n=0;$n<=$tcount;$n++) {
    if ($tid[$n] == $node) {
	return $n;
    }
  }
  die "Error - node $node not found.\n";
}

sub cursekids
{
    my $mychildcount = $tchildcount[$tcj[$level]];
    unless ($mychildcount == -1) {
    for (my $k=0;$k<=$mychildcount;$k++) {
	for (my $m=0;$m<=$level+1;$m++) {
	    print lyout "*";
	}
	my $mychild = $tchild[$tcj[$level]][$k];
	if ($tused[$mychild] == 0) {
	    print lyout " $tcontent[$mychild]",$textended[$mychild],"\n";
	    $tused[$mychild] = 1;
	    $level++;
	    $tcj[$level] = $mychild;
	    cursekids();
	    $level--;
	} else {
# Somehow we have doubled back on a node, maybe a circular link.
	    print "Already traced $mychild\n";
	}
    }
  }
}
