|
<xsl:stylesheet
|
|
version="2.0"
|
|
xmlns:d="data:,dpc"
|
|
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
|
exclude-result-prefixes="d xs">
|
|
|
|
<!--
|
|
HTML Parser in XSLT2 Copyright 2004-2009 David Carlisle
|
|
Distribution, use and modification of this code permited so long as original is cited.
|
|
-->
|
|
|
|
<!-- $Id: htmlparse.xsl,v 1.33 2009-05-08 13:52:36 David Carlisle Exp $-->
|
|
|
|
<!--
|
|
|
|
d:htmlparse(string)
|
|
d:htmlparse(string,namespace,html-mode)
|
|
|
|
The one argument form is equivalent to
|
|
d:htmlparse(string,'http://ww.w3.org/1999/xhtml',true())
|
|
|
|
|
|
Parses the string as HTML and/or XML using some inbuilt heuristics to
|
|
control implied opening and closing of elements.
|
|
|
|
It doesn't have full knowledge of HTML DTD but does have full list of
|
|
empty elements and full list of entity definitions. HTML entities, and
|
|
decimal and hex character references are all accepted. Note html-entities
|
|
are recognised even if html-mode=false().
|
|
|
|
Element names are lowercased (if html-mode is true()) and placed into the
|
|
namespace specified by the namespace parameter (which may be "" to denote
|
|
no-namespace) unless the input has explict namespace declarations, in
|
|
which case these will be honoured.
|
|
|
|
Attribute names are lowercased if html-mode=true()
|
|
|
|
Four styles of attribute value are supported
|
|
a="double quote delimited, including possibly unquoted < and >"
|
|
a='single quote delimited, including possibly unquoted < and >'
|
|
a=unquotedtoken
|
|
a
|
|
which parse as the XML
|
|
a="double quote delimited, including possibly unquoted < and >"
|
|
a="single quote delimited, including possibly unquoted < and >"
|
|
a="unquotedtoken"
|
|
a="a"
|
|
unquotedtoken here means any non empty sequence that is not white space
|
|
a single or double quote or a > character (earlier versions were more
|
|
restrictive but browsers are very forgiving...)
|
|
|
|
Doctype declarations are accepted but ignored
|
|
Comments and processing instructions produce equivalent constructs in the
|
|
result tree
|
|
CDATA sections are parsed correctly (Baring bugs)
|
|
HTML Script and Style elemnets have some support as CDATA elements if
|
|
html-mode=true()
|
|
|
|
If html-mode is true() then the parser will restart certain elements if
|
|
their closure has been forced. This feature (based on an idea
|
|
described in some slides on John Cowan's TagSoup parser) affects
|
|
elements named in the variable d:restart, which may be redefined by
|
|
any importing stylesheet as required). So an input of
|
|
normal <b> bold <i> bold italic </b> italic </i> normal
|
|
parses as
|
|
normal <b> bold <i> bold italic </i></b><i> italic </i> normal
|
|
in html mode. If html-mode is false() it would parse as
|
|
normal <b> bold <i> bold italic </i></b> italic normal
|
|
with the <i> being closed at the </b> and the later </i> being ignored
|
|
(with a warning message).
|
|
|
|
XML "/>" empty element syntax is also accepted as are XML Namespace
|
|
declarations, resulting elements will be in the specified namespaces
|
|
(So Microsoft Style embedding of XML inside HTML should be parsed
|
|
correctly, note however that if html-mode=true() all elements are
|
|
lowercased even in "XML" sections, and html element names such as
|
|
li will be recognised and treated specially even in XML sections.
|
|
|
|
|
|
Note: very long attribute values (in excess of 800 characters) can cause
|
|
java regexp stack overflow in Saxon (It may be able to avoid this
|
|
by using a larger JVM, haven't checked.)
|
|
|
|
|
|
Examples
|
|
|
|
"<body><a>1<BR>2</a><p>zzz<p>www</body>"
|
|
|
|
parses as
|
|
|
|
<body xmlns="http://www/w3.org/1999/xhtml"><a>1<br/>2</a><p>zzz</p><p>www</p></body>
|
|
|
|
With the default (one argument form) and parses as
|
|
|
|
<body><a>1<BR>2</BR></a><p>zzz<p>www</p></p></body>
|
|
|
|
if namespace="" and html-mode=false()
|
|
|
|
Typical use:
|
|
|
|
1)
|
|
To produce a tree corresponding to the external HTML file
|
|
file.html:
|
|
|
|
<xsl:copy-of select="d:htmlparse(unparsed-text('file.html','ISO-8859-1'))"/>
|
|
|
|
2)
|
|
To parse a CDATA section quoted snippet of HTML in an element foo:
|
|
<foo><![CDATA[...<a href="#x">click here</a> ...]]><foo>
|
|
|
|
<xsl:template match="foo">
|
|
<xsl:copy-of select="d:htmlparse(.)"/>
|
|
</xsl:template>
|
|
|
|
|
|
-->
|
|
|
|
|
|
<!-- avoid using a (...)* here to avoid renumbering all the groups.
|
|
will need to do that one day -->
|
|
<xsl:variable name="d:attr"
|
|
select="'(\i\c*)\s*(=\s*("[^"]*"|''[^'']*''|[^ \t\n\r''">]+))?\s*'"/>
|
|
|
|
<xsl:variable name="d:elem"
|
|
select="'(\i\c*)'"/>
|
|
|
|
<xsl:variable name="d:comment"
|
|
select="'<!\-\-[^\-]*(\-[^\-]+)*\-\->'"/>
|
|
|
|
<xsl:variable name="d:pi"
|
|
select="'<\?\i\c*[^>]*>'"/>
|
|
|
|
<xsl:variable name="d:doctype"
|
|
select="'<!D[^\[<>]*(\[[^\]]*\])?>'"/>
|
|
|
|
|
|
<xsl:variable name="d:msif"
|
|
select="'<!\[(end)?if.*?\]>'"/>
|
|
|
|
<xsl:variable name="d:cdata"
|
|
select="'<!\[CDATA(.|\s)*\]\]>'"/>
|
|
|
|
<xsl:function name="d:htmlparse">
|
|
<xsl:param name="string" as="xs:string"/>
|
|
<xsl:sequence select="d:htmlparse($string,'http://www.w3.org/1999/xhtml',true())"/>
|
|
</xsl:function>
|
|
|
|
<xsl:function name="d:htmlparse">
|
|
<xsl:param name="string" as="xs:string"/>
|
|
<xsl:param name="namespace" as="xs:string"/> <!-- anyURI -->
|
|
<xsl:param name="html-mode" as="xs:boolean"/>
|
|
|
|
<xsl:variable name="x">
|
|
<xsl:analyze-string select="replace($string,' ',' ')"
|
|
regex="<(/?){$d:elem}\s*(({$d:attr})*)(/?)>|{$d:comment}|{$d:pi}|{$d:doctype}|{$d:cdata}|({$d:msif})">
|
|
<xsl:matching-substring>
|
|
<xsl:choose>
|
|
<xsl:when test="starts-with(.,'<![CDATA')">
|
|
<xsl:value-of select="substring(.,10,string-length(.)-13)"/>
|
|
</xsl:when>
|
|
<xsl:when test="starts-with(.,'<!D')"></xsl:when>
|
|
<xsl:when test="starts-with(.,'<!-')">
|
|
<comment>
|
|
<xsl:value-of select="substring(.,5,string-length(.)-7)"/>
|
|
</comment>
|
|
</xsl:when>
|
|
<xsl:when test="starts-with(.,'<![')"></xsl:when>
|
|
<xsl:when test="starts-with(.,'<?')">
|
|
<pi>
|
|
<xsl:value-of select="normalize-space((substring(.,3,string-length(.)-4)))"/>
|
|
</pi>
|
|
</xsl:when>
|
|
<xsl:when test="(regex-group(1)='/')">
|
|
<end name="{if ($html-mode) then lower-case(regex-group(2)) else regex-group(2)}"/>
|
|
</xsl:when>
|
|
<xsl:otherwise>
|
|
<start name="{if ($html-mode) then lower-case(regex-group(2)) else regex-group(2)}">
|
|
<attrib>
|
|
<xsl:analyze-string regex="{$d:attr}" select="regex-group(3)">
|
|
<xsl:matching-substring>
|
|
<xsl:choose>
|
|
<xsl:when test="starts-with(regex-group(1),'xmlns')">
|
|
<d:ns>
|
|
<xsl:variable name="n"
|
|
select="d:chars(substring(regex-group(3),2,string-length(regex-group(3))-2))"/>
|
|
<xsl:namespace name="{substring-after(regex-group(1),'xmlns:')}"
|
|
select="if ($n) then $n else 'data:,dpc'"/>
|
|
</d:ns>
|
|
</xsl:when>
|
|
<xsl:otherwise>
|
|
<attribute name="{if ($html-mode) then lower-case(regex-group(1)) else regex-group(1)}">
|
|
<xsl:choose>
|
|
<xsl:when test="starts-with(regex-group(3),'"')">
|
|
<xsl:value-of select="d:chars(substring(regex-group(3),2,string-length(regex-group(3))-2))"/>
|
|
</xsl:when>
|
|
<xsl:when test="starts-with(regex-group(3),'''')">
|
|
<xsl:value-of select="d:chars(substring(regex-group(3),2,string-length(regex-group(3))-2))"/>
|
|
</xsl:when>
|
|
<xsl:when test="string(regex-group(2))">
|
|
<xsl:value-of select="regex-group(3)"/>
|
|
</xsl:when>
|
|
<xsl:otherwise>
|
|
<xsl:value-of select="regex-group(1)"/>
|
|
</xsl:otherwise>
|
|
</xsl:choose>
|
|
</attribute>
|
|
</xsl:otherwise>
|
|
</xsl:choose>
|
|
</xsl:matching-substring>
|
|
</xsl:analyze-string>
|
|
</attrib>
|
|
</start>
|
|
<xsl:if test="regex-group(8)='/'">
|
|
<end name="{if ($html-mode) then lower-case(regex-group(2)) else regex-group(2)}"/>
|
|
</xsl:if>
|
|
</xsl:otherwise>
|
|
</xsl:choose>
|
|
</xsl:matching-substring>
|
|
<xsl:non-matching-substring>
|
|
<xsl:value-of select="."/>
|
|
</xsl:non-matching-substring>
|
|
</xsl:analyze-string>
|
|
</xsl:variable>
|
|
|
|
|
|
<xsl:variable name="y">
|
|
<xsl:choose>
|
|
<xsl:when test="$html-mode">
|
|
<xsl:apply-templates mode="d:html" select="$x/node()[1]"/>
|
|
</xsl:when>
|
|
<xsl:otherwise>
|
|
<xsl:apply-templates mode="d:gxml" select="$x/node()[1]"/>
|
|
</xsl:otherwise>
|
|
</xsl:choose>
|
|
</xsl:variable>
|
|
|
|
<xsl:variable name="j">
|
|
<xsl:element name="x" namespace="{if ($namespace) then $namespace else ''}"/>
|
|
</xsl:variable>
|
|
|
|
<xsl:variable name="z">
|
|
<xsl:apply-templates mode="d:tree" select="$y/node()[1]">
|
|
<xsl:with-param name="ns" select="$j/*/namespace::*[name()='']"/>
|
|
</xsl:apply-templates>
|
|
</xsl:variable>
|
|
|
|
<!--
|
|
<xsl:copy-of select="$x"/>
|
|
===
|
|
<xsl:copy-of select="$y"/>
|
|
===
|
|
-->
|
|
|
|
<xsl:copy-of select="$z"/>
|
|
|
|
</xsl:function>
|
|
|
|
<xsl:function name="d:chars">
|
|
<xsl:param name="s" as="xs:string"/>
|
|
<xsl:value-of>
|
|
<xsl:analyze-string select="$s" regex="&(#?)(x?)([0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*);">
|
|
<xsl:matching-substring>
|
|
<xsl:choose>
|
|
<xsl:when test="regex-group(2)='x'">
|
|
<xsl:value-of select="codepoints-to-string(
|
|
d:hex(
|
|
for $i in string-to-codepoints(upper-case(regex-group(3)))
|
|
return if ($i > 64) then $i - 55 else $i - 48))"/>
|
|
</xsl:when>
|
|
<xsl:when test="regex-group(1)='#'">
|
|
<xsl:value-of select="codepoints-to-string(xs:integer(regex-group(3)))"/>
|
|
</xsl:when>
|
|
<xsl:when test="$d:ents/key('d:ents',regex-group(3))">
|
|
<xsl:value-of select="$d:ents/key('d:ents',regex-group(3))"/>
|
|
</xsl:when>
|
|
<xsl:otherwise>
|
|
<xsl:message>htmlparse: Unknown entity: <xsl:value-of select="regex-group(3)"/></xsl:message>
|
|
<xsl:text>&</xsl:text>
|
|
<xsl:value-of select="regex-group(3)"/>
|
|
<xsl:text>;</xsl:text>
|
|
</xsl:otherwise>
|
|
</xsl:choose>
|
|
</xsl:matching-substring>
|
|
<xsl:non-matching-substring>
|
|
<xsl:value-of select="."/>
|
|
</xsl:non-matching-substring>
|
|
</xsl:analyze-string>
|
|
</xsl:value-of>
|
|
</xsl:function>
|
|
|
|
|
|
<xsl:function name="d:hex" as="xs:integer">
|
|
<xsl:param name="x" as="xs:integer*"/>
|
|
<xsl:value-of
|
|
select="if (empty($x)) then 0 else ($x[last()] + 16* d:hex($x[position()!=last()]))"/>
|
|
</xsl:function>
|
|
|
|
<xsl:template mode="d:cdata" match="text()">
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<xsl:value-of select="."/>
|
|
<xsl:apply-templates mode="#current" select="following-sibling::node()[1]">
|
|
<xsl:with-param name="s" select="$s"/>
|
|
</xsl:apply-templates>
|
|
</xsl:template>
|
|
|
|
<xsl:template mode="d:html d:gxml" match="text()">
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<xsl:value-of select="d:chars(.)"/>
|
|
<xsl:apply-templates mode="#current" select="following-sibling::node()[1]">
|
|
<xsl:with-param name="s" select="$s"/>
|
|
</xsl:apply-templates>
|
|
</xsl:template>
|
|
|
|
<xsl:template mode="d:html d:gxml" match="comment|pi">
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<xsl:copy-of select="."/>
|
|
<xsl:apply-templates mode="#current" select="following-sibling::node()[1]">
|
|
<xsl:with-param name="s" select="$s"/>
|
|
</xsl:apply-templates>
|
|
</xsl:template>
|
|
|
|
|
|
<xsl:template mode="d:html" match="start[@name=('script','style')]">
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<start name="{@name}" s="{$s}">
|
|
<xsl:copy-of select="attrib"/>
|
|
</start>
|
|
<xsl:apply-templates mode="d:cdata" select="following-sibling::node()[1]"/>
|
|
<end name="{@name}" s="{$s}"/>
|
|
<xsl:apply-templates mode="d:html"
|
|
select="following-sibling::end[@name=current()/@name][1]/following-sibling::node()[1]">
|
|
<xsl:with-param name="s" select="$s"/>
|
|
</xsl:apply-templates>
|
|
</xsl:template>
|
|
|
|
|
|
|
|
<xsl:template mode="d:cdata" match="start">
|
|
<xsl:text><</xsl:text>
|
|
<xsl:value-of select="(@name,.)"/>
|
|
<xsl:text>></xsl:text>
|
|
<xsl:apply-templates mode="d:cdata" select="following-sibling::node()[1]"/>
|
|
</xsl:template>
|
|
|
|
<xsl:template mode="d:html d:gxml" match="start">
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<start name="{@name}" s="{$s}">
|
|
<xsl:copy-of select="attrib"/>
|
|
</start>
|
|
<xsl:apply-templates mode="#current" select="following-sibling::node()[1]">
|
|
<xsl:with-param name="s" select="(string(@name),$s)"/>
|
|
</xsl:apply-templates>
|
|
</xsl:template>
|
|
|
|
<xsl:template mode="d:html" match="start[@name=('br','hr','basefont','area','link','img','param','input','col','frame','isindex','base','meta')]">
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<start name="{@name}" s="{$s}">
|
|
<xsl:copy-of select="attrib"/>
|
|
</start>
|
|
<end name="{@name}" s="{$s}"/>
|
|
<xsl:apply-templates mode="d:html"
|
|
select="following-sibling::node()[not(self::end/@name=current()/@name)][1]">
|
|
<xsl:with-param name="s" select="$s"/>
|
|
</xsl:apply-templates>
|
|
</xsl:template>
|
|
|
|
<xsl:variable name="d:lists" select="('ul','ol','dl')"/>
|
|
<xsl:variable name="d:listitems" select="('li','dt','dd')"/>
|
|
|
|
<xsl:template mode="d:html" match="start[@name=$d:listitems]">
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<xsl:choose>
|
|
<xsl:when test="not($d:lists=$s) or $d:lists=$s[1]">
|
|
<start name="{@name}" s="{$s}">
|
|
<xsl:copy-of select="attrib"/>
|
|
</start>
|
|
<xsl:apply-templates mode="d:html" select="following-sibling::node()[1]">
|
|
<xsl:with-param name="s" select="(string(@name),$s)"/>
|
|
</xsl:apply-templates>
|
|
</xsl:when>
|
|
<xsl:otherwise>
|
|
<xsl:call-template name="d:end">
|
|
<xsl:with-param name="s" select="$s"/>
|
|
<xsl:with-param name="n" select="$s[1]"/>
|
|
<xsl:with-param name="next" select="."/>
|
|
</xsl:call-template>
|
|
</xsl:otherwise>
|
|
</xsl:choose>
|
|
</xsl:template>
|
|
|
|
|
|
|
|
<xsl:template mode="d:html" match="start[@name='td']">
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<xsl:choose>
|
|
<xsl:when test="not('tr'=$s) or 'tr'=$s[1]">
|
|
<start name="{@name}" s="{$s}">
|
|
<xsl:copy-of select="attrib"/>
|
|
</start>
|
|
<xsl:apply-templates mode="d:html" select="following-sibling::node()[1]">
|
|
<xsl:with-param name="s" select="(string(@name),$s)"/>
|
|
</xsl:apply-templates>
|
|
</xsl:when>
|
|
<xsl:otherwise>
|
|
<xsl:call-template name="d:end">
|
|
<xsl:with-param name="s" select="$s"/>
|
|
<xsl:with-param name="n" select="$s[1]"/>
|
|
<xsl:with-param name="next" select="."/>
|
|
</xsl:call-template>
|
|
</xsl:otherwise>
|
|
</xsl:choose>
|
|
</xsl:template>
|
|
|
|
|
|
<xsl:template mode="d:html" match="start[@name='p']">
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<xsl:choose>
|
|
<xsl:when test="not('p'=$s)">
|
|
<start name="{@name}" s="{$s}">
|
|
<xsl:copy-of select="attrib"/>
|
|
</start>
|
|
<xsl:apply-templates mode="d:html" select="following-sibling::node()[1]">
|
|
<xsl:with-param name="s" select="(string(@name),$s)"/>
|
|
</xsl:apply-templates>
|
|
</xsl:when>
|
|
<xsl:otherwise>
|
|
<xsl:call-template name="d:end">
|
|
<xsl:with-param name="s" select="$s"/>
|
|
<xsl:with-param name="n" select="$s[1]"/>
|
|
<xsl:with-param name="next" select="."/>
|
|
</xsl:call-template>
|
|
</xsl:otherwise>
|
|
</xsl:choose>
|
|
</xsl:template>
|
|
|
|
|
|
<xsl:template mode="d:gxml" match="end">
|
|
<xsl:param name="n" select="@name" as="xs:string"/>
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<xsl:param name="next" select="following-sibling::node()[1]" as="node()?"/>
|
|
<xsl:variable name="s2" select="$s[position()!=1]"/>
|
|
<xsl:choose>
|
|
<xsl:when test="$s[1]=$n">
|
|
<end name="{$n}" s="{$s2}"/>
|
|
<xsl:apply-templates mode="#current" select="$next">
|
|
<xsl:with-param name="s" select="$s2"/>
|
|
</xsl:apply-templates>
|
|
</xsl:when>
|
|
<xsl:when test="not($n=$s)">
|
|
<!--====/<xsl:value-of select="$n"/>======-->
|
|
<xsl:message>htmlparse: Not well formed (ignoring /<xsl:value-of select="$n"/>)</xsl:message>
|
|
<xsl:apply-templates mode="#current" select="$next">
|
|
<xsl:with-param name="s" select="$s"/>
|
|
</xsl:apply-templates>
|
|
</xsl:when>
|
|
<xsl:otherwise>
|
|
<end name="{$s[1]}" s="{$s2}"/>
|
|
<xsl:apply-templates mode="#current" select=".">
|
|
<xsl:with-param name="s" select="$s2"/>
|
|
</xsl:apply-templates>
|
|
</xsl:otherwise>
|
|
</xsl:choose>
|
|
</xsl:template>
|
|
|
|
|
|
<xsl:variable name="d:restart" select="('i', 'b', 'font')"/>
|
|
|
|
<xsl:template mode="d:html" match="end" name="d:end">
|
|
<xsl:param name="n" select="@name" as="xs:string"/>
|
|
<xsl:param name="s" select="()" as="xs:string*"/>
|
|
<xsl:param name="r" select="()" as="xs:string*"/>
|
|
<xsl:param name="next" select="following-sibling::node()[1]" as="node()?"/>
|
|
<xsl:variable name="s2" select="$s[position()!=1]"/>
|
|
<xsl:choose>
|
|
<xsl:when test="$s[1]=$n">
|
|
<end name="{$n}" s="{$s2}"/>
|
|
<xsl:for-each select="$r">
|
|
<xsl:variable name="rp" select="1+last()-position()"/>
|
|
<start name="{$r[$rp]}" s="{($r[position()>$rp],$s2)}"/>
|
|
</xsl:for-each>
|
|
<xsl:apply-templates mode="#current" select="$next">
|
|
<xsl:with-param name="s" select="($r,$s2)"/>
|
|
</xsl:apply-templates>
|
|
</xsl:when>
|
|
<xsl:when test="not($n=$s)">
|
|
<!--====/<xsl:value-of select="$n"/>======-->
|
|
<xsl:message>htmlparse: Not well formed (ignoring /<xsl:value-of select="$n"/>)</xsl:message>
|
|
<xsl:apply-templates mode="#current" select="$next">
|
|
<xsl:with-param name="s" select="$s"/>
|
|
</xsl:apply-templates>
|
|
</xsl:when>
|
|
<xsl:otherwise>
|
|
<end name="{$s[1]}" s="{$s2}"/>
|
|
<xsl:apply-templates mode="#current" select=".">
|
|
<xsl:with-param name="s" select="$s2"/>
|
|
<xsl:with-param name="r" select="if ($s[1] = $d:restart) then ($r,$s[1]) else ()"/>
|
|
</xsl:apply-templates>
|
|
</xsl:otherwise>
|
|
</xsl:choose>
|
|
</xsl:template>
|
|
|
|
|
|
|
|
<xsl:template mode="d:tree" match="text()">
|
|
<xsl:param name="ns" as="node()*"/>
|
|
<xsl:copy-of select="."/>
|
|
<xsl:apply-templates select="following-sibling::node()[1]" mode="d:tree">
|
|
<xsl:with-param name="ns" select="$ns"/>
|
|
</xsl:apply-templates>
|
|
</xsl:template>
|
|
|
|
<xsl:template mode="d:tree" match="comment">
|
|
<xsl:param name="ns" as="node()*"/>
|
|
<xsl:comment>
|
|
<xsl:value-of select="."/>
|
|
</xsl:comment>
|
|
<xsl:apply-templates select="following-sibling::node()[1]" mode="d:tree">
|
|
<xsl:with-param name="ns" select="$ns"/>
|
|
</xsl:apply-templates>
|
|
</xsl:template>
|
|
|
|
<xsl:template mode="d:tree" match="pi">
|
|
<xsl:param name="ns" as="node()*"/>
|
|
<xsl:processing-instruction name="{substring-before(.,' ')}">
|
|
<xsl:value-of select="substring-after(.,' ')"/>
|
|
</xsl:processing-instruction>
|
|
<xsl:apply-templates select="following-sibling::node()[1]" mode="d:tree">
|
|
<xsl:with-param name="ns" select="$ns"/>
|
|
</xsl:apply-templates>
|
|
</xsl:template>
|
|
|
|
|
|
|
|
<xsl:template mode="d:tree" match="start">
|
|
<xsl:param name="ns" as="node()*"/>
|
|
<xsl:variable name="n" select="following-sibling::end[@s=current()/@s][1]"/>
|
|
<xsl:variable name="xns" select="attrib/d:ns/namespace::*" as="node()*"/>
|
|
<xsl:variable name="nns" select="($ns,$xns)" as="node()*"/>
|
|
<xsl:element name="{if(string(@name))then @name else 'xml'}"
|
|
namespace="{$nns[name()=substring-before(current()/@name,':')][last()][not(.='data:,dpc')]}">
|
|
<xsl:for-each select="attrib/attribute">
|
|
<xsl:attribute name="{@name}" namespace="{if(contains(@name,':')) then $nns[name()=substring-before(current()/@name,':')][last()][not(.='data:,dpc')] else ''}" select="."/>
|
|
</xsl:for-each>
|
|
<xsl:apply-templates select="following-sibling::node()[1][not(. is $n)]" mode="d:tree">
|
|
<xsl:with-param name="ns" select="$nns"/>
|
|
</xsl:apply-templates>
|
|
</xsl:element>
|
|
<xsl:apply-templates select="$n/following-sibling::node()[1]" mode="d:tree">
|
|
<xsl:with-param name="ns" select="$ns"/>
|
|
</xsl:apply-templates>
|
|
</xsl:template>
|
|
|
|
<!--
|
|
Old version without NS support
|
|
<xsl:template mode="d:tree" match="start">
|
|
<xsl:variable name="n" select="following-sibling::end[@s=current()/@s][1]"/>
|
|
<xsl:element name="{@name}" namespace="http://www.w3.org/1999/xhtml">
|
|
<xsl:copy-of select="attrib/@*"/>
|
|
<xsl:apply-templates select="following-sibling::node()[1][not(. is $n)]" mode="d:tree"/>
|
|
</xsl:element>
|
|
<xsl:apply-templates select="$n/following-sibling::node()[1]" mode="d:tree"/>
|
|
</xsl:template>
|
|
-->
|
|
|
|
|
|
<xsl:variable name="d:ents">
|
|
|
|
<entity name="Aacute">Á</entity>
|
|
<entity name="aacute">á</entity>
|
|
<entity name="Acirc">Â</entity>
|
|
<entity name="acirc">â</entity>
|
|
<entity name="acute">´</entity>
|
|
<entity name="AElig">Æ</entity>
|
|
<entity name="aelig">æ</entity>
|
|
<entity name="Agrave">À</entity>
|
|
<entity name="agrave">à</entity>
|
|
<entity name="Aring">Å</entity>
|
|
<entity name="aring">å</entity>
|
|
<entity name="Atilde">Ã</entity>
|
|
<entity name="atilde">ã</entity>
|
|
<entity name="Auml">Ä</entity>
|
|
<entity name="auml">ä</entity>
|
|
<entity name="brvbar">¦</entity>
|
|
<entity name="Ccedil">Ç</entity>
|
|
<entity name="ccedil">ç</entity>
|
|
<entity name="cedil">¸</entity>
|
|
<entity name="cent">¢</entity>
|
|
<entity name="copy">©</entity>
|
|
<entity name="COPY">©</entity>
|
|
<entity name="curren">¤</entity>
|
|
<entity name="deg">°</entity>
|
|
<entity name="divide">÷</entity>
|
|
<entity name="Eacute">É</entity>
|
|
<entity name="eacute">é</entity>
|
|
<entity name="Ecirc">Ê</entity>
|
|
<entity name="ecirc">ê</entity>
|
|
<entity name="Egrave">È</entity>
|
|
<entity name="egrave">è</entity>
|
|
<entity name="ETH">Ð</entity>
|
|
<entity name="eth">ð</entity>
|
|
<entity name="Euml">Ë</entity>
|
|
<entity name="euml">ë</entity>
|
|
<entity name="frac12">½</entity>
|
|
<entity name="frac14">¼</entity>
|
|
<entity name="frac34">¾</entity>
|
|
<entity name="Iacute">Í</entity>
|
|
<entity name="iacute">í</entity>
|
|
<entity name="Icirc">Î</entity>
|
|
<entity name="icirc">î</entity>
|
|
<entity name="iexcl">¡</entity>
|
|
<entity name="Igrave">Ì</entity>
|
|
<entity name="igrave">ì</entity>
|
|
<entity name="iquest">¿</entity>
|
|
<entity name="Iuml">Ï</entity>
|
|
<entity name="iuml">ï</entity>
|
|
<entity name="laquo">«</entity>
|
|
<entity name="macr">¯</entity>
|
|
<entity name="micro">µ</entity>
|
|
<entity name="middot">·</entity>
|
|
<entity name="nbsp"> </entity>
|
|
<entity name="not">¬</entity>
|
|
<entity name="Ntilde">Ñ</entity>
|
|
<entity name="ntilde">ñ</entity>
|
|
<entity name="Oacute">Ó</entity>
|
|
<entity name="oacute">ó</entity>
|
|
<entity name="Ocirc">Ô</entity>
|
|
<entity name="ocirc">ô</entity>
|
|
<entity name="Ograve">Ò</entity>
|
|
<entity name="ograve">ò</entity>
|
|
<entity name="ordf">ª</entity>
|
|
<entity name="ordm">º</entity>
|
|
<entity name="Oslash">Ø</entity>
|
|
<entity name="oslash">ø</entity>
|
|
<entity name="Otilde">Õ</entity>
|
|
<entity name="otilde">õ</entity>
|
|
<entity name="Ouml">Ö</entity>
|
|
<entity name="ouml">ö</entity>
|
|
<entity name="para">¶</entity>
|
|
<entity name="plusmn">±</entity>
|
|
<entity name="pound">£</entity>
|
|
<entity name="raquo">»</entity>
|
|
<entity name="reg">®</entity>
|
|
<entity name="REG">®</entity>
|
|
<entity name="sect">§</entity>
|
|
<entity name="shy">­</entity>
|
|
<entity name="sup1">¹</entity>
|
|
<entity name="sup2">²</entity>
|
|
<entity name="sup3">³</entity>
|
|
<entity name="szlig">ß</entity>
|
|
<entity name="THORN">Þ</entity>
|
|
<entity name="thorn">þ</entity>
|
|
<entity name="times">×</entity>
|
|
<entity name="Uacute">Ú</entity>
|
|
<entity name="uacute">ú</entity>
|
|
<entity name="Ucirc">Û</entity>
|
|
<entity name="ucirc">û</entity>
|
|
<entity name="Ugrave">Ù</entity>
|
|
<entity name="ugrave">ù</entity>
|
|
<entity name="uml">¨</entity>
|
|
<entity name="Uuml">Ü</entity>
|
|
<entity name="uuml">ü</entity>
|
|
<entity name="Yacute">Ý</entity>
|
|
<entity name="yacute">ý</entity>
|
|
<entity name="yen">¥</entity>
|
|
<entity name="yuml">ÿ</entity>
|
|
|
|
|
|
<entity name="bdquo">„</entity>
|
|
<entity name="circ">ˆ</entity>
|
|
<entity name="Dagger">‡</entity>
|
|
<entity name="dagger">†</entity>
|
|
<entity name="emsp"> </entity>
|
|
<entity name="ensp"> </entity>
|
|
<entity name="euro">€</entity>
|
|
<entity name="gt">></entity>
|
|
<entity name="GT">></entity>
|
|
<entity name="ldquo">“</entity>
|
|
<entity name="lrm">‎</entity>
|
|
<entity name="lsaquo">‹</entity>
|
|
<entity name="lsquo">‘</entity>
|
|
<entity name="lt"><</entity>
|
|
<entity name="LT"><</entity>
|
|
<entity name="amp">&</entity>
|
|
<entity name="AMP">&</entity>
|
|
<entity name="mdash">—</entity>
|
|
<entity name="ndash">–</entity>
|
|
<entity name="OElig">Œ</entity>
|
|
<entity name="oelig">œ</entity>
|
|
<entity name="permil">‰</entity>
|
|
<entity name="quot">"</entity>
|
|
<entity name="QUOT">"</entity>
|
|
<entity name="rdquo">”</entity>
|
|
<entity name="rlm">‏</entity>
|
|
<entity name="rsaquo">›</entity>
|
|
<entity name="rsquo">’</entity>
|
|
<entity name="sbquo">‚</entity>
|
|
<entity name="Scaron">Š</entity>
|
|
<entity name="scaron">š</entity>
|
|
<entity name="thinsp"> </entity>
|
|
<entity name="tilde">˜</entity>
|
|
<entity name="Yuml">Ÿ</entity>
|
|
<entity name="zwj">‍</entity>
|
|
<entity name="zwnj">‌</entity>
|
|
|
|
|
|
<entity name="alefsym">ℵ</entity>
|
|
<entity name="Alpha">Α</entity>
|
|
<entity name="alpha">α</entity>
|
|
<entity name="and">∧</entity>
|
|
<entity name="ang">∠</entity>
|
|
<entity name="asymp">≈</entity>
|
|
<entity name="Beta">Β</entity>
|
|
<entity name="beta">β</entity>
|
|
<entity name="bull">•</entity>
|
|
<entity name="cap">∩</entity>
|
|
<entity name="Chi">Χ</entity>
|
|
<entity name="chi">χ</entity>
|
|
<entity name="clubs">♣</entity>
|
|
<entity name="cong">≅</entity>
|
|
<entity name="crarr">↵</entity>
|
|
<entity name="cup">∪</entity>
|
|
<entity name="dArr">⇓</entity>
|
|
<entity name="darr">↓</entity>
|
|
<entity name="Delta">Δ</entity>
|
|
<entity name="delta">δ</entity>
|
|
<entity name="diams">♦</entity>
|
|
<entity name="empty">∅</entity>
|
|
<entity name="Epsilon">Ε</entity>
|
|
<entity name="epsilon">ε</entity>
|
|
<entity name="equiv">≡</entity>
|
|
<entity name="Eta">Η</entity>
|
|
<entity name="eta">η</entity>
|
|
<entity name="exist">∃</entity>
|
|
<entity name="fnof">ƒ</entity>
|
|
<entity name="forall">∀</entity>
|
|
<entity name="frasl">⁄</entity>
|
|
<entity name="Gamma">Γ</entity>
|
|
<entity name="gamma">γ</entity>
|
|
<entity name="ge">≥</entity>
|
|
<entity name="hArr">⇔</entity>
|
|
<entity name="harr">↔</entity>
|
|
<entity name="hearts">♥</entity>
|
|
<entity name="hellip">…</entity>
|
|
<entity name="image">ℑ</entity>
|
|
<entity name="infin">∞</entity>
|
|
<entity name="int">∫</entity>
|
|
<entity name="Iota">Ι</entity>
|
|
<entity name="iota">ι</entity>
|
|
<entity name="isin">∈</entity>
|
|
<entity name="Kappa">Κ</entity>
|
|
<entity name="kappa">κ</entity>
|
|
<entity name="Lambda">Λ</entity>
|
|
<entity name="lambda">λ</entity>
|
|
<entity name="lang">〈</entity>
|
|
<entity name="lArr">⇐</entity>
|
|
<entity name="larr">←</entity>
|
|
<entity name="lceil">⌈</entity>
|
|
<entity name="le">≤</entity>
|
|
<entity name="lfloor">⌊</entity>
|
|
<entity name="lowast">∗</entity>
|
|
<entity name="loz">◊</entity>
|
|
<entity name="minus">−</entity>
|
|
<entity name="Mu">Μ</entity>
|
|
<entity name="mu">μ</entity>
|
|
<entity name="nabla">∇</entity>
|
|
<entity name="ne">≠</entity>
|
|
<entity name="ni">∋</entity>
|
|
<entity name="notin">∉</entity>
|
|
<entity name="nsub">⊄</entity>
|
|
<entity name="Nu">Ν</entity>
|
|
<entity name="nu">ν</entity>
|
|
<entity name="oline">‾</entity>
|
|
<entity name="Omega">Ω</entity>
|
|
<entity name="omega">ω</entity>
|
|
<entity name="Omicron">Ο</entity>
|
|
<entity name="omicron">ο</entity>
|
|
<entity name="oplus">⊕</entity>
|
|
<entity name="or">∨</entity>
|
|
<entity name="otimes">⊗</entity>
|
|
<entity name="part">∂</entity>
|
|
<entity name="perp">⊥</entity>
|
|
<entity name="Phi">Φ</entity>
|
|
<entity name="phi">ϕ</entity>
|
|
<entity name="Pi">Π</entity>
|
|
<entity name="pi">π</entity>
|
|
<entity name="piv">ϖ</entity>
|
|
<entity name="Prime">″</entity>
|
|
<entity name="prime">′</entity>
|
|
<entity name="prod">∏</entity>
|
|
<entity name="prop">∝</entity>
|
|
<entity name="Psi">Ψ</entity>
|
|
<entity name="psi">ψ</entity>
|
|
<entity name="radic">√</entity>
|
|
<entity name="rang">〉</entity>
|
|
<entity name="rArr">⇒</entity>
|
|
<entity name="rarr">→</entity>
|
|
<entity name="rceil">⌉</entity>
|
|
<entity name="real">ℜ</entity>
|
|
<entity name="rfloor">⌋</entity>
|
|
<entity name="Rho">Ρ</entity>
|
|
<entity name="rho">ρ</entity>
|
|
<entity name="sdot">⋅</entity>
|
|
<entity name="Sigma">Σ</entity>
|
|
<entity name="sigma">σ</entity>
|
|
<entity name="sigmaf">ς</entity>
|
|
<entity name="sim">∼</entity>
|
|
<entity name="spades">♠</entity>
|
|
<entity name="sub">⊂</entity>
|
|
<entity name="sube">⊆</entity>
|
|
<entity name="sum">∑</entity>
|
|
<entity name="sup">⊃</entity>
|
|
<entity name="supe">⊇</entity>
|
|
<entity name="Tau">Τ</entity>
|
|
<entity name="tau">τ</entity>
|
|
<entity name="there4">∴</entity>
|
|
<entity name="Theta">Θ</entity>
|
|
<entity name="theta">θ</entity>
|
|
<entity name="thetasym">ϑ</entity>
|
|
<entity name="trade">™</entity>
|
|
<entity name="TRADE">™</entity>
|
|
<entity name="uArr">⇑</entity>
|
|
<entity name="uarr">↑</entity>
|
|
<entity name="upsih">ϒ</entity>
|
|
<entity name="Upsilon">Υ</entity>
|
|
<entity name="upsilon">υ</entity>
|
|
<entity name="weierp">℘</entity>
|
|
<entity name="Xi">Ξ</entity>
|
|
<entity name="xi">ξ</entity>
|
|
<entity name="Zeta">Ζ</entity>
|
|
<entity name="zeta">ζ</entity>
|
|
|
|
</xsl:variable>
|
|
|
|
<xsl:key name="d:ents" match="entity" use="@name"/>
|
|
|
|
</xsl:stylesheet>
|