<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.1"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
    xmlns="http://www.tei-c.org/ns/1.0"
    xmlns:tei="http://www.tei-c.org/ns/1.0"
    xmlns:exsl="http://exslt.org/common"
    xmlns:msxsl="urn:schemas-microsoft-com:xslt"
    xmlns:ui="http://oreo.grainger.uiuc.edu/stylesheets/"
    >
    <xsl:output doctype-system="http://ariadne.northwestern.edu/monk/dtds/teisimple.dtd"
        indent="yes" method="xml" encoding="UTF-8" media-type="text/xml"/>
    <!-- the value of pbAlign is added to book pagination to get leaf (page-image) value: if page 7 of book is page-image 15, then this value is 8 -->
    <xsl:param name="pbAlign" select="12"/>
    <!-- the value of firstBodyPage will be number of first pagebreak treated as part of the TEI text/body; value should be calculated using book pagination -->
    <xsl:param name="firstBodyPage" select="3"/>
    <!-- the value of lastBodyPage will be number of last pagebreak that is part of the TEI text/body; value should be calculated using book pagination; subsequent djvu pages will not be processed.  -->
    <xsl:param name="lastBodyPage" select="367"/>
    <!-- page no.292, image no.312-->
    <!-- will ignore paragraphs where 2nd coord of first word of first line of paragraph is less than or equal to ignoreAbove value (i.e., to suppress running head), could default to typical header margin -->
    <xsl:param name="ignoreAboveOdd" select="210"/>
    <!-- will ignore paragraphs where 4th coord of first word of first line of paragraph is greater than or equal to ignoreBelow value (i.e., to suppress running footer), could default to page-height attribute value minus typical footer margin  -->
    <xsl:param name="ignoreBelowOdd" select="3100"/>
    <!-- will ignore paragraphs where 2nd coord of first word of first line of paragraph is less than or equal to ignoreAbove value (i.e., to suppress running head), could default to typical header margin -->
    <xsl:param name="ignoreAboveEven" select="210"/>
    <!-- will ignore paragraphs where 4th coord of first word of first line of paragraph is greater than or equal to ignoreBelow value (i.e., to suppress running footer), could default to page-height attribute value minus typical footer margin  -->
    <xsl:param name="ignoreBelowEven" select="3100"/>
    <!-- If string value of this param is 'True' then will add TEI div each time it sees a paragraph with first line beginning CHAPTER; CHAPTER I creates openning div; last div closes with end of last page in body (see above)  -->
    <xsl:param name="chapterDiv" select="&quot;False&quot;"/>
    <!-- name of TEI Header fragment file as generated by transforming MARC XML to TEI Header XML. For OCA filename is derivable from djvu.xml filename. Probably could combine MARC transform into this XSL later.  -->
    <!--<xsl:param name="TEIHeaderFragFileName" select="&quot;..\results\adventuresoftoms00twai2_tei_header.xml&quot; "/>-->
    <xsl:param name="TEIHeaderFragFileName"
        select="&quot;SampleXMLFiles\ghostseer01schiuoft_tei_header.xml&quot; "/>
    <xsl:template match="/">
        <xsl:element name="TEI">
            <xsl:attribute name="version">5.0</xsl:attribute>
            <!-- add the TEI Header previously created by transforming MARC record -->
            <xsl:text>&#10;</xsl:text>
            <xsl:copy-of select="document($TEIHeaderFragFileName)/tei:teiHeader"/>
            <xsl:element name="text">
                <xsl:element name="body">
                    <xsl:apply-templates select="/DjVuXML/BODY/OBJECT[PARAM/@name='PAGE' ]"/>
                </xsl:element>
            </xsl:element>
        </xsl:element>
    </xsl:template>
    <xsl:template match="OBJECT[PARAM/@name='PAGE']">
        <xsl:variable name="pagenum" select="position()-number($pbAlign)"/>
        <xsl:if
            test="$pagenum&gt;=number($firstBodyPage) and $pagenum&lt;=number($lastBodyPage)">
            <xsl:text>&#10;</xsl:text>
            <xsl:element name="pb">
                <xsl:attribute name="xml:id">
                    <xsl:value-of select="concat(&quot;page&quot;,string($pagenum))"/>
                </xsl:attribute>
                <xsl:attribute name="n">
                    <xsl:number value="$pagenum"/>
                </xsl:attribute>
                <xsl:attribute name="facs">                    
                    <xsl:value-of select="concat('images/', substring-before(./PARAM/@value,'.djvu'),'.jpg')"/>
                </xsl:attribute>
            </xsl:element>
            <xsl:choose>
                <xsl:when test="HIDDENTEXT/PAGECOLUMN/REGION/PARAGRAPH">
                    <xsl:apply-templates select="HIDDENTEXT/PAGECOLUMN/REGION/PARAGRAPH">
                        <xsl:with-param name="page-num" select="$pagenum"/>
                    </xsl:apply-templates>
                </xsl:when>
            </xsl:choose>
            <xsl:if test="$pagenum=number($lastBodyPage) and $chapterDiv=&quot;True&quot;">
                <xsl:text disable-output-escaping="yes">&lt;/div>&#10;</xsl:text>
            </xsl:if>
        </xsl:if>
    </xsl:template>
    <xsl:template match="PARAGRAPH">
        <xsl:param name="page-num" select="0"/>
        <xsl:variable name="firstWord" select="./LINE/WORD"/>
        <xsl:variable name="coords" select="./LINE/WORD/@coords"/>
        <xsl:variable name="coord1" select="substring-after($coords, &quot;,&quot;)"/>
        <xsl:variable name="coord2" select="substring-before($coord1, &quot;,&quot;)"/>
        <xsl:variable name="coord3" select="substring-after($coord1, ',')"/>
        <xsl:variable name="coord4" select="substring-after($coord3, ',')"/>
        <xsl:variable name="odd" select="$page-num mod 2"/>
        <!-- the following 4 variables are sequences of coord values from which max or min is selected as coordinate to define box bounding paragraph
               note the need for the xmlns attribute (because the default namespace for this stylesheet is TEI). -->
        <xsl:variable name="rend1" xmlns="">
            <xsl:for-each select="./LINE">
                <xsl:element name="val">
                    <xsl:value-of
                        select="substring-before(string(./WORD[1]/@coords), &quot;,&quot;)"
                    />
                </xsl:element>
            </xsl:for-each>
        </xsl:variable>
        <xsl:variable name="rend2" xmlns="">
            <xsl:for-each select="./LINE[1]/WORD">
                <xsl:element name="val">
                    <xsl:value-of
                        select="substring-after(substring-after(substring-after(string(./@coords), &quot;,&quot;), &quot;,&quot;), &quot;,&quot;)"
                    />
                </xsl:element>
            </xsl:for-each>
        </xsl:variable>
        <xsl:variable name="rend3" xmlns="">
            <xsl:for-each select="./LINE">
                <xsl:element name="val">
                    <xsl:value-of
                        select="substring-before(substring-after(substring-after(string(./WORD[last()]/@coords), &quot;,&quot;), &quot;,&quot;), &quot;,&quot;)"
                    />
                </xsl:element>
            </xsl:for-each>
        </xsl:variable>
        <xsl:variable name="rend4" xmlns="">
            <xsl:for-each select="./LINE[last()]/WORD">
                <xsl:element name="val">
                    <xsl:value-of
                        select="substring-before(substring-after(string(./@coords), &quot;,&quot;), &quot;,&quot;)"
                    />
                </xsl:element>
            </xsl:for-each>
        </xsl:variable>
        <xsl:if
            test="($odd and number($coord2)&gt;number($ignoreAboveOdd) and number($coord4)&lt;number($ignoreBelowOdd)) or
            (not($odd ) and number($coord2)&gt;number($ignoreAboveEven) and number($coord4)&lt;number($ignoreBelowEven))">
            <xsl:choose>
                <xsl:when
                    test="$firstWord=&quot;CHAPTER&quot; and $chapterDiv=&quot;True&quot;">
                    <xsl:text>&#10;</xsl:text>
                    <xsl:if test="number($page-num)!=number($firstBodyPage)">
                        <xsl:text disable-output-escaping="yes">&lt;/div>&#10;</xsl:text>
                    </xsl:if>
                    <xsl:text disable-output-escaping="yes">&lt;div xml:id="</xsl:text>
                    <xsl:value-of
                        select="concat(&quot;div&quot;, string((number($page-num)*100)), '&quot; ')"/>
                    <xsl:text disable-output-escaping="yes">org="uniform" part="N" sample="complete" type="Chapter">&#10;</xsl:text>
                    <xsl:element name="head">
                        <xsl:apply-templates select="LINE"/>
                        <xsl:text>&#10;&#09;</xsl:text>
                    </xsl:element>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:element name="p">
                        <xsl:attribute name="xml:id">
                            <xsl:value-of
                                select="concat(&quot;para&quot;, string((number($page-num)*100)+(position()*2)))"
                            />
                        </xsl:attribute>
                        <xsl:element name="seg">
                            <xsl:attribute name="xml:id">
                                <xsl:value-of
                                    select="concat(&quot;seg&quot;,string((number($page-num)*100)+(position()*2)))"
                                />
                            </xsl:attribute>
                            
                            <xsl:choose>
                                <!-- These are XPath 2.0 functions, but if you set xsl:stylesheet version='2.0', generate warnings in oXygen if using 1.0 parser 
                                     Tested with Saxon 9B version 9.1.0.3 -->
                                <xsl:when test="function-available('min')">        
                                    <xsl:attribute name="rend">
                                        <xsl:value-of select="min(($rend1/val[1]))"/>
                                        <xsl:text>,</xsl:text>
                                        <xsl:value-of select="min(($rend2/val[1]))"/>
                                        <xsl:text>,</xsl:text>
                                        <xsl:value-of select="max(($rend3/val[1]))"/>
                                        <xsl:text>,</xsl:text>
                                        <xsl:value-of select="max(($rend4/val[1]))"/>
                                    </xsl:attribute>
                                </xsl:when>
                                
                                <!-- The node-set function included with MS XML 4 parser allows gracefallback to 1.0 XSLT. 
                                      Tested with MSXML 4 parser. Actually seems a bit faster than Saxon 9B implementation. -->
                                <xsl:when test="function-available('msxsl:node-set')">
                                    <xsl:attribute name="rend">
                                        <xsl:call-template name="min">
                                            <xsl:with-param name="currnode" select="msxsl:node-set($rend1)/val[1]"/>
                                        </xsl:call-template>
                                        <xsl:text>,</xsl:text>
                                        <xsl:call-template name="min">
                                            <xsl:with-param name="currnode" select="msxsl:node-set($rend2)/val[1]"/>
                                        </xsl:call-template>
                                        <xsl:text>,</xsl:text>
                                        <xsl:call-template name="max">
                                            <xsl:with-param name="currnode" select="msxsl:node-set($rend3)/val[1]"/>
                                        </xsl:call-template>
                                        <xsl:text>,</xsl:text>
                                        <xsl:call-template name="max">
                                            <xsl:with-param name="currnode" select="msxsl:node-set($rend4)/val[1]"/>
                                        </xsl:call-template>
                                    </xsl:attribute>
                                </xsl:when>
                                
                                <!-- This uses VBScript extension instead of the node-set function and self-recursive templates min and max to calculate rend value
                                     Tested with MS XML 4, but turns out the VBScript approach is notiecably slower than non-script solution.
                                     Should never choose this case since preceeding will take priority for MS XML parsers. -->
                                <xsl:when test="function-available('ui:getCoords')">
                                    <xsl:variable name="pcoords" select="ui:getCoords(./LINE)"/>
                                    <xsl:attribute name="rend">
                                        <xsl:value-of select="$pcoords"/>
                                    </xsl:attribute>
                                </xsl:when>
                                
                                <!-- This leverages exsl node-set extension function, which is built into Saxon parsers.
                                     But when tested with Saxon 6.5.5, can only do first page or two before running out of memory.
                                     Suggests either an error in our recursion or a memory leak in the old Saxon parser. -->
                                <xsl:when test="function-available('exsl:node-set')">
                                    <xsl:attribute name="rend">
                                        <xsl:call-template name="min">
                                            <xsl:with-param name="currnode" select="exsl:node-set($rend1)/val[1]"/>
                                        </xsl:call-template>
                                        <xsl:text>,</xsl:text>
                                        <xsl:call-template name="min">
                                            <xsl:with-param name="currnode" select="exsl:node-set($rend2)/val[1]"/>
                                        </xsl:call-template>
                                        <xsl:text>,</xsl:text>
                                        <xsl:call-template name="max">
                                            <xsl:with-param name="currnode" select="exsl:node-set($rend3)/val[1]"/>
                                        </xsl:call-template>
                                        <xsl:text>,</xsl:text>
                                        <xsl:call-template name="max">
                                            <xsl:with-param name="currnode" select="exsl:node-set($rend4)/val[1]"/>
                                        </xsl:call-template>
                                    </xsl:attribute>
                                </xsl:when>
                                
                                <!-- As long as xsl:stylesheet version is 1.1 (not 1.0), Saxon 6.5.5 also likes this approach without the node-set function.
                                     But again Saxon 6.5.5 can only do for a couple of pages. -->
                                <xsl:otherwise>
                                    <xsl:attribute name="rend">
                                        <xsl:call-template name="min">
                                            <xsl:with-param name="currnode" select="$rend1/val[1]"/>
                                        </xsl:call-template>
                                        <xsl:text>, </xsl:text>
                                        <xsl:call-template name="min">
                                            <xsl:with-param name="currnode" select="$rend2/val[1]"/>
                                        </xsl:call-template>
                                        <xsl:text>, </xsl:text>
                                        <xsl:call-template name="max">
                                            <xsl:with-param name="currnode" select="$rend3/val[1]"/>
                                        </xsl:call-template>
                                        <xsl:text>, </xsl:text>
                                        <xsl:call-template name="max">
                                            <xsl:with-param name="currnode" select="$rend4/val[1]"/>
                                        </xsl:call-template>
                                    </xsl:attribute>
                                </xsl:otherwise>
                            </xsl:choose>
                            
                            <xsl:apply-templates select="LINE"/>
                            <xsl:text>&#10;&#09;</xsl:text>
                        </xsl:element>
                    </xsl:element>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:if>
    </xsl:template>
    <xsl:template match="LINE">
        <xsl:text>&#10;&#09;</xsl:text>
        <xsl:element name="lb"/>
        <xsl:apply-templates select="WORD"/>
    </xsl:template>
    <xsl:template match="WORD">
        <xsl:apply-templates/>
        <xsl:if test="position()&lt;last()">
            <xsl:if test="text()!='&quot;'">
                <xsl:if test="text()!=&quot;&apos;&quot;">
                    <xsl:text> </xsl:text>
                </xsl:if>
            </xsl:if>
        </xsl:if>
    </xsl:template>
    <xsl:template name="max">
        <xsl:param name="max" select="0"/>
        <xsl:param name="currnode" select="."/>
        <xsl:variable name="currnum" select="number($currnode)"/>
        <xsl:choose>
            <xsl:when test="$currnum>$max and $currnode/following-sibling::*">
                <xsl:call-template name="max">
                    <xsl:with-param name="max" select="$currnum"/>
                    <xsl:with-param name="currnode" select="$currnode/following-sibling::*"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="$currnode/following-sibling::*">
                <xsl:call-template name="max">
                    <xsl:with-param name="max" select="$max"/>
                    <xsl:with-param name="currnode" select="$currnode/following-sibling::*"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="$currnum>$max">
                <xsl:value-of select="$currnum"/>
            </xsl:when>
            <xsl:otherwise>
                <xsl:value-of select="$max"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
    <xsl:template name="min">
        <xsl:param name="min" select="10000"/>
        <xsl:param name="currnode" select="."/>
        <xsl:variable name="currnum" select="number($currnode)"/>
        <xsl:choose>
            <xsl:when test="$currnum&lt;$min and $currnode/following-sibling::*">
                <xsl:call-template name="min">
                    <xsl:with-param name="min" select="$currnum"/>
                    <xsl:with-param name="currnode" select="$currnode/following-sibling::*"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="$currnode/following-sibling::*">
                <xsl:call-template name="min">
                    <xsl:with-param name="min" select="$min"/>
                    <xsl:with-param name="currnode" select="$currnode/following-sibling::*"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="$currnum&lt;$min">
                <xsl:value-of select="$currnum"/>
            </xsl:when>
            <xsl:otherwise>
                <xsl:value-of select="$min"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
    <msxsl:script language="VBSCRIPT" implements-prefix="ui">
        <![CDATA[
	function getCoords(lines)
		'find the smallest y2 in the first line
		Set firstLine = lines.item(0)
		Set coords = firstLine.selectNodes("WORD/@coords")
		
		Set firstCoord = coords.item(0)
		lastcomma = InStrRev(firstCoord.text, ",")
		y2 = cint(Mid(firstCoord.text, lastcomma+1))
		For Each coord In coords
			lastcomma = InStrRev(coord.text, ",")
			y2new = cint(Mid(coord.text, lastcomma+1))
			If y2new < y2 Then y2 = y2new
		Next 
		
		'find the largest y1 in the last line
		Set lastLine = lines.item(lines.length-1)
		Set coords = lastLine.selectNodes("WORD/@coords")
		
		Set lastCoord = coords.item(coords.length-1)
		firstcomma = InStr(lastCoord.text, ",")
		secondcomma = InStr(firstcomma+1, lastCoord.text, ",")
		y1 = cint(Mid(lastCoord.text, firstcomma+1, secondcomma-firstcomma-1))
		For Each coord In coords
			firstcomma = InStr(coord.text, ",")
			secondcomma = InStr(firstcomma+1, coord.text, ",")
			y1new = cint(Mid(coord.text, firstcomma+1, secondcomma-firstcomma-1))
			If y1new > y1 Then y1 = y1new
		Next 
		
		'find the smallest x1 in the first word of any line
		' and the largetst x2 in the last word of any line
		Set firstwordcoords = firstLine.selectNodes("WORD[1]/@coords")
		firstcomma = InStr(firstwordcoords.item(0).text, ",")
		x1 = cint(Mid(firstwordcoords.item(0).text, 1, firstcomma-1))
		
		Set lastwordcoords = firstLine.selectNodes("WORD[position()=last()]/@coords")
		firstcomma = InStr(lastwordcoords.item(0).text, ",")
		secondcomma = InStr(firstcomma+1, lastwordcoords.item(0).text, ",")
		thirdcomma = InStr(secondcomma+1, lastwordcoords.item(0).text, ",")
		x2 = cint(Mid(lastwordcoords.item(0).text, secondcomma+1, thirdcomma-secondcomma-1))
		For Each line In lines
			Set firstwordcoords = line.selectNodes("WORD[1]/@coords")
			firstcomma = InStr(firstwordcoords.item(0).text, ",")
			x1temp = cint(Mid(firstwordcoords.item(0).text, 1, firstcomma-1))
			For Each coord In firstwordcoords
				firstcomma = InStr(coord.text, ",")
				x1new = cint(Mid(coord.text, 1, firstcomma-1))
				If x1new < x1temp Then x1temp = x1new
			Next 
			If x1temp < x1 Then x1 = x1temp
			
			Set lastwordcoords = line.selectNodes("WORD[position()=last()]/@coords")
			firstcomma = InStr(lastwordcoords.item(0).text, ",")
			secondcomma = InStr(firstcomma+1, lastwordcoords.item(0).text, ",")
			thirdcomma = InStr(secondcomma+1, lastwordcoords.item(0).text, ",")
			x2temp = cint(Mid(lastwordcoords.item(0).text, secondcomma+1, thirdcomma-secondcomma-1))
			For Each coord In lastwordcoords
				firstcomma = InStr(lastwordcoords.item(0).text, ",")
				secondcomma = InStr(firstcomma+1, coord.text, ",")
				thirdcomma = InStr(secondcomma+1, coord.text, ",")
				x2new = cint(Mid(coord.text, secondcomma+1, thirdcomma-secondcomma-1))
				If x2new > x2temp Then x2temp = x2new
			Next 
			If x2temp > x2 Then x2 = x2temp
		Next 
		getCoords= cstr(x1) + "," + cstr(y2) + "," + cstr(x2) + "," + cstr(y1) 
	end function
	]]>
    </msxsl:script>
</xsl:stylesheet>
