#! /usr/bin/env python -- # -*- python -*-
# -*- coding: iso-8859-1 -*-
#-----------------------------------------------------------------------------
#	file	MiniWiki.py
#	role	Simple translation engine from Wiki to XHTML
#
#	Look at '__rcs_info__' section and 'docstring' below for more
#	information
#
#  Copyright (c) 2004 - UBS-Valoria
#
#  This software is distributed under FSF GPL -- read details in
#  GNU_LICENSE and COPYING files provided with this distribution.
#
#-----------------------------------------------------------------------------
#
# DISCLAIMER
#
#   This software is provided by 'UBS - Valoria' "as is" and any
#   expressed or implied warranties, including, but not limited to, the
#   implied warranties of merchantability and fitness for a particular
#   purpose are disclaimed.  In no event shall 'UBS - Valoria' or its
#   contributors be liable for any direct, indirect, incidental,
#   special, exemplary, or consequential damages (including, but not
#   limited to, procurement of substitute goods or services; loss of
#   use, data, or profits; or business interruption) however caused and
#   on any theory of liability, whether in contract, strict liability,
#   or tort (including negligence or otherwise) arising in any way out
#   of the use of this software, even if advised of the possibility of
#   such damage.
#
#-----------------------------------------------------------------------------
#
#   The module, classes and methods 'docinfo' strings and meta-information
#   defined bellow are used to produce documentation through 'HappyDoc'
#   documentation tool: complete it carefully.
#
"""text accumulator with substitutions

    This module defines the class 'ubs.text.MiniWiki'. This class defines a
    string accumulator handled as a StringIO; each write operation implies
    a replacement tokens substitution.

    Look at class documentation for an overview of functionalities and all
    features.

    This module uses 'cStringIO' standard module

"""
__version__ = "$Revision: 1.1.1.1 $"[11:-2]
__copyr__ = """Copyright (c) 2004 - UBS-Valoria"""
__rcs_info__ = {
    #  Creation Information
    #
    'module_name'  : '$RCSfile: MiniWiki.py,v $',
    'rcs_id'       : '$Id: MiniWiki.py,v 1.1.1.1 2005/06/13 00:22:27 deveaux Exp $',
    'creator'      : 'D.Deveaux <daniel.deveaux@univ-ubs.fr>',
    'project'      : 'CMS SSI',
    'created'      : '2004/11/19',

    #  Current Information
    #
    'author'       : '$Author: deveaux $'[11:-2],
    'state'	   : '$State: Exp $'[8:-2],
    'date'         : '$Date: 2005/06/13 00:22:27 $'[7:-2],
}

#
#   Standard modules importation
#
import string, re, os
from StringIO import StringIO

#
#   Specific modules importation
#

#
#   Module-level informations
#


# --------------------------------------------------------------------------

# Block-level syntax
RE_header = re.compile (r'^(\+{3,6})\s*(.*)$')
RE_list = re.compile (r'^([#\*]+)\s*(.*)$')
RE_dlist = re.compile (r'^:\s*(.*?)\s*:\s*(.*)$')
RE_blockq = re.compile (r'^((&gt;)+)\s*(.*)$')  ## ^> is replaced by &gt;
RE_pre = re.compile (r'^``(.*)$')
RE_hr = re.compile (r'^-{4,}')

# Inline elements
RE_emph = re.compile (r"''(.*?)''")
RE_stro = re.compile (r'\*\*(.*?)\*\*')
RE_emst = re.compile (r"''\*\*(.*?)\*\*''")
RE_tt = re.compile (r'\{\{(.*?)\}\}')
RE_delins = re.compile (r'@@\-\-\-\s*([^@]*?\s*)\+\+\+(\s*[^@]*?)\s*@@')
RE_del = re.compile (r'@@\-\-\-\s*([^@]*?)\s*?@@')
RE_ins = re.compile (r'@@\+\+\+\s*([^@]*?)\s*?@@')

# links (WikiWord links are not implemented
RE_genlnk = re.compile (r'\[([a-zA-Z0-9:\._\-/@\{\}]+?)\s+(.*?)\]')
RE_simlnk = re.compile (r'\[([a-zA-Z0-9:\._\-/@\{\}]+?)]')
RE_url = re.compile (r'([^\'])((http|https|mailto|ftp):([[a-zA-Z0-9:\._\-/@]+))')
#RE_img1 = re.compile (r'\[([^\s]+\.(png|gif|jpg))\s*(.*?)\]')
RE_img1 = re.compile (r'(([a-zA-Z0-9:\._\-/]+)\.(png|gif|jpg))\s*')
RE_img2 = re.compile (r'\[<img src=\'(.*?)\' alt=\'.*?\' />\s+(.*?)\]')
# Arrays are not implemented now

# Special pattern and function to handle blockquote wiki command
#   see _removeHTML command
RE_bq = re.compile (r'^((%gt;)+)', re.MULTILINE)
def _gt_rep (self, matchobj):
    return matchobj.group(0).sub ("%gt;", ">")


# --------------------------------------------------------------------------

class MiniWiki:
    """'ubs.text.MiniWiki' class.

    This class handles a simple wiki to xhtml converter
    """

    def __init__(self):
	"""MiniWiki constructor.
	"""
	#  instance variables definition and initialisation ----------
	self._ErrorMsgs = ('',
	    'bad list imbrication level',
	    'different lists should be separated by a least a blank line',
	    'an empty term can not start a description list',
	    )

	self._ListsTags = {
	    '*':  ('\n<ul>\n', '<li>', '</li>\n', '</ul>\n'),
	    '#':  ('\n<ol>\n', '<li>', '</li>\n', '</ol>\n'),
	    'd':  ('\n<dl>\n', '<dt>%s</dt>\n<dd>', '</dd>\n', '</dl>\n')
	    }
	self.init()
        # --------------------------------------------------------- __init__()

    def init (self):
	"""initialisation mthod called for each transformation.
	"""
	#  instance variables definition and initialisation ----------
	self._cb = None			# the text accumulation buffer
	self._cptpar = 0		# paragraph counter
	self._llevels = []		# stack of list levels
	self._indlist = 0		# in description list?
	self._inpre = 0                 # in preformated paragraph?
	self._blocklevel = 0		# current blocks imbrication level
	self._divtext = 0		# is in a <div class='text'> section?

	self._error = 0			# current error state (Ok)
        # --------------------------------------------------------- init()

    #
    #	Accessors ------------------------------------------------------------
    #
    def transform (self,
	wikitext			# text to translate in XHTML
	):
	"""Translate the wiki text in XHTML
	"""
	self.init()
	self._cb = StringIO()
	# remove all HTML tags in the text
#	print "111" + wikitext
	wikitext = self._removeHTML (wikitext)
	# join continuation lines (with '\' at the end)
#	print "222" + wikitext
	wikitext = wikitext.replace ("\\\n", " ")
	# split the text in lines
#	print "333" + wikitext
	lines = string.split (wikitext, "\n")
#	print lines
        lcpt = 0
	errmsgs=[]
	for line in lines:
	    self.error = 0
	    lcpt += 1
	    m = RE_header.search (line)
	    if m:
		self._apply_heading (m.group(1), m.group(2))
	    else:
		m = RE_list.search (line)
		if m:
		    self._apply_list (m.group(1), m.group(2))
		else:
		    m = RE_dlist.search (line)
		    if m:
			self._apply_dlist (m.group(1), m.group(2))
		    else:
			m = RE_blockq.search (line)
			if m:
			    self._apply_blockq (m.group(1), m.group(3))
			else:
			    m =RE_pre.search (line)
			    if m:
				self._apply_pre (m.group(1))
			    else:
				if RE_hr.search (line):
				    self._cb.write ("<hr />\n")
				else:
				    self._parag (line)
	    if self._error:
		errmsgs.append ((lcpt, self._error))
		self._error = 0
	self._unstackblocks (0)
	self._close_lists (0)
	if self._divtext:
	    self._cb.write ("</div>\n")
	result = self._cb.getvalue()
	if self._cptpar < 2:
	    result = re.sub (r"^<div class='text'>\n<p>", '', result)
	    result = re.sub (r"</p>\n</div>\n$", '', result)
	status = ''
	if len (errmsgs):	# errors have been detected
	    for (lcpt, errn) in errmsgs:
		status += "    line %d: %s\n" % (lcpt, self._ErrorMsgs[errn])
	return (result, status)
        # ---------------------------------------------------------- transform()


    #
    #	Local Function -------------------------------------------------------
    #
    def _removeHTML (self, txt):
	"""Convert HTML commands in entities
	"""
	txt = txt.replace ('&', '&amp;')
	txt = txt.replace ('<', '&lt;')
	txt = txt.replace ('>', '&gt;')
	return txt
	# ------------------------------------------------------ _removeHTML()

    def _add_divtext (self):
	"""add a divtext section if it not exists
	"""
	if not self._divtext:
	    self._cb.write ("<div class='text'>\n")
	    self._divtext = 1
	# ------------------------------------------------------ _removeHTML()

    def _parag (self, txt):
	"""Handle a paragraph (ie a line)
	"""
	self._close_lists (0)
	self._unstackblocks (0)
	self._close_dlist()
	self._end_pre()
	if txt:
	    self._cptpar += 1
	    self._add_divtext()
	    self._cb.write ("<p>")
	    self._cb.write (self._inline_format (txt))
	    self._cb.write ("</p>\n")
	# ----------------------------------------------------------- _parag()

    def _apply_heading (self, pilot, text):
	"""Handle a heading command
	"""
	self._close_lists (0)
	self._unstackblocks (0)
	self._close_dlist()
	self._end_pre()
	if text:
	    if self._divtext:
		self._cb.write ("</div>\n")
	    l = len(pilot)
	    self._cb.write ("<h%d>" % l)
	    self._cb.write (self._inline_format (text))
	    self._cb.write ("</h%d>\n<div class='text'>\n" % l)
	    self._divtext = 1
	# --------------------------------------------------- _apply_heading()

    def _apply_list (self, pilot, text):
	"""Handle a list command
	"""
	self._unstackblocks (0)
	self._close_dlist()
	self._end_pre()
	if text:
	    self._cptpar += 1
	    self._add_divtext()
	    l = len (pilot) ; type = pilot[-1]
	    curllevel = len (self._llevels)
	    ## print l, type, curllevel, self._llevels, text
	    if l > curllevel:
		if l != curllevel + 1:
		    self._error = 1
		    ## print "error 1 detected!!"
		else:
		    #  new list imbrication
		    self._llevels.append (type)
		    ## print self._llevels, len (self._llevels)
		    self._cb.write (self._ListsTags[type][0])
		    self._cb.write (self._ListsTags[type][1])
		    self._cb.write (self._inline_format (text))
	    elif l < curllevel:
		# close higher level lists
		self._close_lists (l)
		# and then add the new item
		self._cb.write (self._ListsTags[type][2])
		self._cb.write (self._ListsTags[type][1])
		self._cb.write (self._inline_format (text))
	    else:	# current list continuation
		if type != self._llevels[-1]:
		    self._error = 2
		    ## print "error 2 detected!!"
		else:
		    self._cb.write (self._ListsTags[type][2])
		    self._cb.write (self._ListsTags[type][1])
		    self._cb.write (self._inline_format (text))
	# ------------------------------------------------------ _apply_list()

    def _close_lists (self, level=0):
	"""close all the imbricated lists higher than 'level'
	"""
	curllevel = len (self._llevels)
	if level < curllevel:
	    while level < curllevel:
		ltype = self._llevels[-1]
		## print ltype, self._llevels
		self._cb.write (self._ListsTags[ltype][2])
		self._cb.write (self._ListsTags[ltype][3])
		self._llevels = self._llevels[:-1]
		curllevel = len (self._llevels)
	# ----------------------------------------------------- _close_lists()

    def _apply_dlist (self, term, text):
	"""Handle a description list command
	"""
	self._close_lists (0)
	self._unstackblocks (0)
	self._end_pre()
	if text:
	    self._cptpar += 1
	    self._add_divtext()
	    if self._indlist:
		if term:
		    self._cb.write (self._ListsTags['d'][2])
		    self._cb.write (self._ListsTags['d'][1] % term)
		    self._cb.write (self._inline_format (text))
		else:
		    self._cb.write ("\n<p>")
		    self._cb.write (self._inline_format (text))
		    self._cb.write ("</p>\n")
	    else:
		if term:
		    self._cb.write (self._ListsTags['d'][0])
		    self._cb.write (self._ListsTags['d'][1] % term)
		    self._cb.write (self._inline_format (text))
		    self._indlist = 1
		else:
		    self._error = 3  # not empty term to start a list
	# ------------------------------------------------------ _apply_list()

    def _close_dlist (self, level=0):
	"""close the actual description list
	"""
	if self._indlist:
	    self._cb.write (self._ListsTags['d'][2])
	    self._cb.write (self._ListsTags['d'][3])
	    self._indlist = 0
	# ----------------------------------------------------- _close_dlist()

    def _apply_blockq (self, pilot, text):
	"""Handle a heading command
	"""
	self._close_lists (0)
	self._close_dlist()
	self._end_pre()
	if text:
	    self._cptpar += 1
	    self._add_divtext()
	    level = len(pilot) / 4
	    if level > self._blocklevel:
		while level > self._blocklevel:
		    self._blocklevel += 1
		    self._cb.write ("<blockquote>\n")
	    elif level < self._blocklevel:
		self._unstackblocks (level)
	    else:
		pass
	    self._cb.write ("<p>" + self._inline_format (text) + "</p>\n")
	# ---------------------------------------------------- _apply_blockq()

    def _unstackblocks (self, final=0):
	"""close all opened indent blocks until 'final' level is reached
	"""
	while self._blocklevel > final:
	    self._blocklevel -= 1
	    self._cb.write ("</blockquote>\n")
	# ------------------------------------------------------ _removeHTML()

    def _apply_pre (self, text):
	"""Handle a preformated paragraph
	"""
	self._close_lists (0)
	self._close_dlist()
	self._unstackblocks (0)
	if text:
	    self._cptpar += 1
	    self._add_divtext()
	    if self._inpre:
		self._cb.write ("%s\n" % text)
	    else:
		self._cb.write ("<div class='box'>\n")
		self._cb.write ("<pre style='font-size: 90%; "
		                "font-family: monospace;'>\n")
		self._cb.write ("%s\n" % text)
		self._inpre = 1
	# ------------------------------------------------------- _apply_pre()

    def _end_pre (self):
	"""close all opened indent blocks until 'final' level is reached
	"""
	if self._inpre:
	    self._inpre = 0
	    self._cb.write ("</pre>\n</div>\n")
	# --------------------------------------------------------- _end_pre()

    def _inline_format (self, text):
	"""Handle the inline format of the paragraph
	"""
	text = RE_emst.sub ("<em><b>\g<1></b></em>", text)
	text = RE_emph.sub ("<em>\g<1></em>", text)
	text = RE_stro.sub ("<b>\g<1></b>", text)
	text = RE_tt.sub ("<tt>\g<1></tt>", text)
	text = RE_delins.sub ("<del>\g<1></del><ins>\g<2></ins>", text)
	text = RE_ins.sub ("<ins>\g<1></ins>", text)
	text = RE_del.sub ("<del>\g<1></del>", text)
	text = self._handle_links (text)
	## text = self._handle_arrays (text)	# no implemented
	return text
	# --------------------------------------------------- _inline_format()

    def _handle_links (self, text):
	"""Handle the inline links constructions
	"""
	text = RE_img1.sub ("<img src='\g<1>' alt='\g<1>' /> ", text)
	text = RE_img2.sub ("<img src='\g<1>' alt='\g<2>' /> ", text)
	text = RE_genlnk.sub ("<a target='_new' href='\g<1>'>\g<2></a>", text)
	text = RE_simlnk.sub (
	          "<a href='\g<1>' target='_new'><tt>\g<1></tt></a>", text)
	text = RE_url.sub (
	         "\g<1><a href='\g<2>' target='_new'>\g<2></a> ", text)
	return text
	# --------------------------------------------------- _inline_format()

    # ------------------------------------------------------------------------
    #	Testing Units implementing methods
    #

    def TST_transform (self, text):
    	"""Creation testing unit.
        """
	(txt, status) = self.transform (text)
	##print "status = '%s'" % status
	print txt
        # ---------------------------------------------------- TST_transform()


# ------------------------------------------------------------- class MiniWiki
#
#   Main testing function called for self-test
#
def test():
    """Main testing function

    Launched by direct execution of module: "python ubs.text.Subst.py"
    """

    import sys, getopt

    text1 = "Une simple chane"
    text2 = """+++ Un titre
Un essai tout ''simple'' qui va se **compliquer** beaucoup
----
Un paragraphe\\
qui se ''**poursuit sur une autre ligne**''
Un autre
>
> Avec une identation
>> Une autre
Retour  la normale
Une <b>tentative</b> de foutre le <em>bordel</em> avec HTML
Alors que c'est si {{simple}} de bien faire
Des essais @@---de suppression et +++ d'ajout @@ ensemble
Et spars @@---suppression @@ et @@+++ ajout @@
Et spars @@+++ ajout @@ et @@---suppression@@
Tout peut arriver
Les liens http://pouevretseu.free.fr et [pes.free.fr]
encore [http://pouevretseu.free.fr PES] et mailto:pes@online.fr pour voir
encore **[http://pouevretseu.free.fr PES]** et ''mailto:pes@online.fr'' pour voir
et une url en bout de ligne http://perso.free.fr/
images images/toto.gif ou encore [dudule.png la fleur]
"""
    text3 = """++++Essai de listes
# d'abord
# simple

* toujours
* simple
Un autre imbrique maintenant
* un item
** sous-item 1
** sous-item 2
* item suivant
*# avec une enum
*# pour voir
*## et un
*## et deux
* pour finir
* pour finir

"""
    text4 = """++++ Essai de liste de description
:une commande: c'est beau
:deux: c'est mieux
:: surtout si on peut mettre des paragraphes
:trois: a voir
``    def TST_transform (self, text):
``        "Creation testing unit."
``        (txt, status) = self.transform (text)
``        print "status = '%s'" % status
``        print txt
``        # ---------------------------- TST_transform()

"""
    print """<html>
<head>
<title>Un test</title>
<style type='text/css'>
 body { background-color: white; font-family: sans-serif;}
 .text {margin: 0.3ex 1em 0.3ex 2em; text-align: justify;}
 .box {margin: 0.3ex 2em; padding: 0.5ex 1em;
       border: 1px solid black; background-color: #d9e9e9; }
 p {margin: 0.1ex 0ex; text-align: justify;}
</style>
</head>
<body>
"""
    wiki = MiniWiki()
    wiki.TST_transform (text1)
    wiki.TST_transform (text2)
    wiki.TST_transform (text3)
    wiki.TST_transform (text4)
    print "</body>\n</html>"
    # ----------------------------------------------------------------- test()

#   Launch test if called standalone
if __name__=="__main__": test()

# ------------------------------------------------------------ module MiniWiki

