Project

Profile

Help

Bug #1498 » convert-xml.xsl

A 'nieve' XML Parser - Philip Fearon, 2012-04-25 20:59

 
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs f"
xmlns:f="internal">

<xsl:template match="/">
<html>
<head>
<title>3</title>
</head>
<body>
<xsl:sequence select="f:render()"/>
</body>
</html>
</xsl:template>

<xsl:function name="f:render">
<xsl:variable name="xmlText" select="unparsed-text('test-render3.xml')"/>
<xsl:variable name="tokens" as="xs:string*" select="tokenize($xmlText, '&lt;')"/>
<xsl:message><xsl:value-of select="count($tokens)"/></xsl:message>
<xsl:sequence select="f:iterateTokens($tokens,1,'n',0, 0)"/>

</xsl:function>

<xsl:function name="f:getTagType">
<xsl:param name="token" as="xs:string?"/>
<xsl:variable name="t" select="$token"/>
<xsl:variable name="t1" select="substring($t,1,1)"/>
<xsl:variable name="t2" select="substring($t,2,1)"/>

<xsl:choose>
<xsl:when test="$t1 eq '?'">pi</xsl:when>
<xsl:when test="$t1 eq '!' and $t2 eq '-'">cm</xsl:when>
<xsl:when test="$t1 eq '!' and $t2 eq '['">cd</xsl:when>
<xsl:when test="$t1 eq '!'">dt</xsl:when>
<xsl:when test="$t1 eq '/'">cl</xsl:when>
<!-- open tag (may be self-closing) -->
<xsl:otherwise>tg</xsl:otherwise>
</xsl:choose>
</xsl:function>

<xsl:function name="f:iterateTokens" as="element()*">
<xsl:param name="tokens" as="xs:string*"/>
<xsl:param name="index" as="xs:integer"/>
<xsl:param name="expected" as="xs:string"/>
<xsl:param name="beganAt" as="xs:integer"/>
<xsl:param name="level" as="xs:integer"/>
<xsl:variable name="token" select="$tokens[$index]" as="xs:string?"/>
<xsl:variable name="prevToken" select="$tokens[$index + 1]" as="xs:string?"/>
<xsl:variable name="nextToken" select="$tokens[$index - 1]" as="xs:string?"/>
<xsl:variable name="awaiting" select="$expected ne 'n'" as="xs:boolean"/>
<!--
<trace>token: <xsl:value-of select="$token"/></trace>
-->
<xsl:variable name="expectedOutput" as="element()*">
<xsl:if test="$awaiting">
<!-- looking to close an open tag -->
<!-- consider: <!DOCTYPE person [<!ELEMENT ... ]> as well as reference only -->
<xsl:variable name="beforeFind" select="substring-before($token, $expected)"/>
<xsl:variable name="found"
select="if (string-length($beforeFind) gt 0)
then true()
else starts-with($beforeFind, $expected)" as="xs:boolean"/>
<xsl:if test="$found">
<xsl:variable name="type" as="xs:string*">
</xsl:variable>
<xsl:element name="span">
<xsl:attribute name="class" select="f:getTagType($tokens[$beganAt])"/>
<xsl:attribute name="close" select="$expected"/>
<xsl:value-of
select="string-join(
(
for $x in $beganAt to ($index -1) return
concat('&lt;', $tokens[$x]),
'&lt;',$beforeFind,$expected)
, '')
"/>
</xsl:element>
<span class="tx">
<xsl:value-of select="substring($token, string-length($beforeFind) + string-length($expected) + 1)"/>
</span>
</xsl:if>
</xsl:if>
</xsl:variable>

<!-- return 2 strings if required close found - that before and that after (even if empty string)
if no required close found - just return the required close -->
<xsl:variable name="parseStrings" as="element()*">
<xsl:if test="not($awaiting)">
<xsl:variable name="char1" as="xs:string?" select="substring($token,1,1)"/>
<xsl:variable name="requiredClose" as="xs:string">
<xsl:variable name="char2" as="xs:string?" select="substring($token,2,1)"/>
<xsl:choose>
<xsl:when test="$char1 eq '?'">?&gt;</xsl:when>
<xsl:when test="$char1 eq '!' and $char2 eq '-'">--&gt;</xsl:when>
<xsl:when test="$char1 eq '!' and $char2 eq '['">]]&gt;</xsl:when> <!-- assume cdata: <![CDATA[]]> -->
<xsl:when test="$char1 eq '!'">
<xsl:value-of select="if (contains($token,'[')) then ']>' else '>'"/>
</xsl:when>
<xsl:otherwise>&gt;</xsl:otherwise>
</xsl:choose>
</xsl:variable>

<xsl:variable name="beforeClose" select="substring-before($token, $requiredClose)" as="xs:string"/>

<xsl:choose>
<xsl:when test="string-length($token) eq 0">
<x/>
</xsl:when>
<xsl:when test="$char1 = ('?','!','/')">
<!-- cdata, dtd, pi, comment, or close-tag -->
<xsl:variable name="foundClose"
select="if (string-length($beforeClose) gt 0)
then true()
else starts-with($beforeClose, $requiredClose)"
as="xs:boolean"/>
<xsl:choose>
<xsl:when test="$foundClose">
<span class="{f:getTagType($token)}" close="{$requiredClose}">
<xsl:value-of select="$beforeClose"/>
</span>
<span class="tx">
<xsl:value-of select="substring($token, string-length($beforeClose) + string-length($requiredClose) + 1)"/>
</span>
</xsl:when>
<xsl:otherwise>
<required>
<xsl:value-of select="$requiredClose"/>
</required>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="parts" as="xs:string*">
<xsl:analyze-string regex="&quot;.*?&quot;|'.*?'|[^'&quot;]+|['&quot;]" select="$token">
<xsl:matching-substring>
<xsl:value-of select="."/>
</xsl:matching-substring>
<xsl:non-matching-substring>
<xsl:value-of select="."/>
</xsl:non-matching-substring>
</xsl:analyze-string>
</xsl:variable>

<xsl:sequence select="f:getAttributes($parts, 1)"/>

<!--
<dummy><xsl:value-of select="$parts[1]"/></dummy>

-->

<!-- must be an open tag, so check for attributes -->
</xsl:otherwise>
</xsl:choose>
</xsl:if>
</xsl:variable>


<xsl:variable name="stillAwaiting" as="xs:boolean"
select="$awaiting and empty($expectedOutput)"/>

<xsl:if test="count($parseStrings) gt 1">
<xsl:sequence select="$parseStrings"/>
</xsl:if>

<xsl:sequence select="$expectedOutput"/>

<xsl:variable name="newExpected" as="xs:string"
select="if ($index eq 1) then
'n'
else if ($stillAwaiting)
then $expected
else if (count($parseStrings) eq 1)
then $parseStrings
else 'n'"/>

<xsl:variable name="newBeganAt" as="xs:integer"
select="if ($stillAwaiting) then $beganAt else $index"/>

<xsl:variable name="newLevel" select="0"/>

<xsl:if test="$index le count($tokens)">
<xsl:sequence select="f:iterateTokens($tokens, $index + 1, $newExpected, $newBeganAt, $newLevel)"/>
</xsl:if>
</xsl:function>

<xsl:function name="f:getAttributes" as="element()*">
<xsl:param name="parts" as="xs:string*"/>
<xsl:param name="index" as="xs:integer"/>
<xsl:variable name="part1" as="xs:string?"
select="$parts[$index]"/>
<xsl:variable name="part2" as="xs:string?"
select="$parts[$index + 1]"/>

<xsl:variable name="elementName" as="xs:string?">
<xsl:if test="$index eq 1">
<!-- need part for element name -->
<span class="en">
<xsl:value-of
select="tokenize($part1, '[\n\p{Zs}]+' )[1]"/>
</span>
</xsl:if>
</xsl:variable>

<xsl:if test="$index eq 1">
<span class="en">
<xsl:value-of select="$elementName"/>
</span>
</xsl:if>

<xsl:if test="exists($part2)">
<xsl:variable name="left" as="xs:string"
select="if ($index eq 1)
then substring($part1, string-length($elementName) + 1)
else $part1"/>
<!--
<left><xsl:value-of select="$left"/></left>
-->
<xsl:variable name="equals" as="element()*">
<xsl:analyze-string select="$left" regex="([=\n\p{{Zs}}])+">
<xsl:matching-substring>
<m class="match">
<xsl:value-of select="."/>
</m>
</xsl:matching-substring>
<xsl:non-matching-substring>
<m>
<xsl:value-of select="."/>
</m>
</xsl:non-matching-substring>
</xsl:analyze-string>
</xsl:variable>

<span class="eq-sep"><xsl:value-of select="$equals[1]"/></span>
<span class="eq-atn"><xsl:value-of select="$equals[2]"/></span>
<span class="eq-equ"><xsl:value-of select="$equals[3]"/></span>

<span class="av">
<xsl:value-of select="$part2"/>
</span>
</xsl:if>
<xsl:if test="$index + 2 le count($parts)">
<xsl:sequence select="f:getAttributes($parts, $index + 2)"/>
</xsl:if>
</xsl:function>

<xsl:function name="f:findExpected">
<xsl:param name="tokens" as="xs:string"/>
<xsl:param name="index" as="xs:integer"/>
<xsl:param name="expected" as="xs:string"/>
</xsl:function>



</xsl:stylesheet>
(1-1/4)