Project

Profile

Help

Bug #4212 » XMLtoCSV.xslt

Transformation - Juan Lopez, 2019-05-10 12:35

 
<?xml version="1.0" encoding="UTF-8"?>
<!--
XSLT development allows to create CSV file Following RFC 4180

Definition of the CSV Format

While there are various specifications and implementations for the
CSV format (for ex. [4], [5], [6] and [7]), there is no formal
specification in existence, which allows for a wide variety of
interpretations of CSV files. This section documents the format that
seems to be followed by most implementations:

1. Each record is located on a separate line, delimited by a line
break (CRLF). For example:

aaa,bbb,ccc CRLF
zzz,yyy,xxx CRLF

2. The last record in the file may or may not have an ending line
break. For example:

aaa,bbb,ccc CRLF
zzz,yyy,xxx

3. There maybe an optional header line appearing as the first line
of the file with the same format as normal record lines. This
header will contain names corresponding to the fields in the file
and should contain the same number of fields as the records in
the rest of the file (the presence or absence of the header line
should be indicated via the optional "header" parameter of this
MIME type). For example:

field_name,field_name,field_name CRLF
aaa,bbb,ccc CRLF
zzz,yyy,xxx CRLF

4. Within the header and each record, there may be one or more
fields, separated by commas. Each line should contain the same
number of fields throughout the file. Spaces are considered part
of a field and should not be ignored. The last field in the
record must not be followed by a comma. For example:

aaa,bbb,ccc

5. Each field may or may not be enclosed in double quotes (however
some programs, such as Microsoft Excel, do not use double quotes
at all). If fields are not enclosed with double quotes, then
double quotes may not appear inside the fields. For example:

"aaa","bbb","ccc" CRLF
zzz,yyy,xxx

6. Fields containing line breaks (CRLF), double quotes, and commas
should be enclosed in double-quotes. For example:

"aaa","b CRLF
bb","ccc" CRLF
zzz,yyy,xxx

7. If double-quotes are used to enclose fields, then a double-quote
appearing inside a field must be escaped by preceding it with
another double quote. For example:

"aaa","b""bb","ccc"

Enhance features:

Parameter sep : Provide a separator between fields, by default we have case semicolon . Other regions around the world use comma
Parameter line1 : Allows to create header in first line with value "Y". By default header appears
Parameter forceq : It is possible to force double quotes for header and rows with value "Y". By default feature is not available

Variable maxlength: Max length and length by default when it is not defined.

Works with XML document with 2 levels of depth and level tags naming convention are not important.
Order of fields can be choosen in configuration file. Also can be duplicated or ignored.

-->
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:fn="http://www.w3.org/2005/02/xpath-functions" xmlns:xdt="http://www.w3.org/2005/02/xpath-datatypes" xmlns:csv="csv:csv" xmlns:my="my:my" exclude-result-prefixes="my csv xsl xs fn xdt">
<xsl:import href="XMLtoCSV_CONF.xslt"/>
<xsl:output method="text" encoding="utf-8"/>
<xsl:strip-space elements="*"/>
<xsl:param name="sep"/>
<xsl:param name="line1"/>
<xsl:param name="forceq"/>
<xsl:function name="my:reverse-string" as="xs:string">
<xsl:param name="arg" as="xs:string?"/>
<xsl:sequence select="codepoints-to-string(reverse(string-to-codepoints($arg)))"/>
</xsl:function>
<xsl:function name="my:pad-string-to-length" as="xs:string">
<xsl:param name="stringToPad" as="xs:string?"/>
<xsl:param name="padChar" as="xs:string"/>
<xsl:param name="length" as="xs:integer"/>
<xsl:param name="pos" as="xs:string"/>
<xsl:choose>
<xsl:when test="$pos eq 'L'">
<xsl:sequence select="
substring(
string-join (
(normalize-space($stringToPad), for $i in (1 to $length) return $padChar)
,'')
,1,$length)
"/>
</xsl:when>
<xsl:when test="$pos eq 'R'">
<xsl:sequence select="
my:reverse-string(substring(
my:reverse-string(string-join (
(for $i in (1 to $length) return $padChar, normalize-space($stringToPad))
,''))
,1,$length))
"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring(normalize-space($stringToPad),1,$length)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<xsl:function name="my:string-to-CSV" as="xs:string">
<xsl:param name="stringVal" as="xs:string?"/>
<xsl:param name="delim" as="xs:string"/>
<xsl:param name="length" as="xs:integer"/>
<xsl:variable name="val">
<xsl:value-of select="substring(normalize-space($stringVal),1,$length)"/>
</xsl:variable>
<xsl:variable name="val1">
<xsl:value-of select="replace(replace($val, '&quot;', '&quot;&quot;'), $delim, concat('&quot;',$delim,'&quot;'))"/>
</xsl:variable>
<xsl:choose>
<xsl:when test="$force = ('Y','y','TRUE','true')">
<xsl:value-of select="concat('&quot;',$val1,'&quot;')"/>
</xsl:when>
<xsl:otherwise>
<xsl:choose>
<!-- <xsl:when test="contains($val1, '&quot;') or contains($val1, $padSpace)"> -->
<xsl:when test="contains($val1, '&quot;')">
<xsl:value-of select="concat('&quot;',$val1,'&quot;')"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$val1"/>
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<xsl:function name="my:length" as="xs:integer">
<xsl:param name="stringVal" as="xs:string?"/>
<xsl:value-of select="if ($stringVal) then $stringVal else $maxlength"/>
</xsl:function>
<xsl:variable name="delimiter">
<xsl:choose>
<xsl:when test="$sep != ''">
<xsl:value-of select="$sep"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="';'"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="lineh">
<xsl:choose>
<xsl:when test="$line1 != ''">
<xsl:value-of select="$line1"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="'Y'"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="force">
<xsl:choose>
<xsl:when test="$forceq != ''">
<xsl:value-of select="$forceq"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="'N'"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="maxlength">
<xsl:text>2000</xsl:text>
</xsl:variable>
<!-- space as text fill character -->
<xsl:variable name="padSpace">
<xsl:text>&#32;</xsl:text>
</xsl:variable>
<!-- zero as number fill character -->
<xsl:variable name="padNum">
<xsl:text>0</xsl:text>
</xsl:variable>
<!-- end-of-line = carriage return, change as needed -->
<xsl:variable name="eol">
<xsl:text>&#13;</xsl:text>
</xsl:variable>
<xsl:key name="COLUMNS" match="col" use="'all'"/>
<xsl:template match="/">
<xsl:if test="$lineh = ('Y','y','TRUE','true')">
<!-- Output the CSV header -->
<xsl:for-each select="key('COLUMNS','all',$CONF)">
<xsl:choose>
<xsl:when test="@header != ''">
<xsl:value-of select="my:string-to-CSV(@header,$delimiter,$maxlength)"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="my:string-to-CSV(@key,$delimiter,$maxlength)"/>
</xsl:otherwise>
</xsl:choose>
<xsl:if test="position() != last()">
<xsl:value-of select="$delimiter"/>
</xsl:if>
</xsl:for-each>
<!-- Add a newline at the end of the record -->
<xsl:value-of select="$eol"/>
</xsl:if>
<!-- Output rows for each matched property -->
<xsl:apply-templates select="*"/>
</xsl:template>
<xsl:template match="node()/*">
<xsl:variable name="property" select="."/>
<xsl:for-each select="key('COLUMNS','all',$CONF)">
<xsl:variable name="id" select="@key"/>
<xsl:variable name="value">
<xsl:choose>
<xsl:when test="$property/*[name() = $id] != ''">
<xsl:value-of select="$property/*[name() = $id]"/>
</xsl:when>
<xsl:otherwise>
<xsl:choose>
<xsl:when test="@default !=''">
<xsl:value-of select="@default"/>
</xsl:when>
<xsl:when test="@typ eq 'N'">
<xsl:value-of select="$padNum"/>
</xsl:when>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<!-- TRANSFORMATION -->
<!-- Extract the column name and value -->
<xsl:choose>
<xsl:when test="@typ eq 'S'">
<xsl:choose>
<xsl:when test="@case eq 'U'">
<xsl:value-of select="my:string-to-CSV(upper-case($value),$delimiter,my:length(@len))"/>
</xsl:when>
<xsl:when test="@case eq 'L'">
<xsl:value-of select="my:string-to-CSV(lower-case($value),$delimiter,my:length(@len))"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="my:string-to-CSV($value,$delimiter,my:length(@len))"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:when test="@typ eq 'N'">
<xsl:value-of select="my:string-to-CSV(my:pad-string-to-length($value,$padNum,@len,'R'),$delimiter,@len)"/>
</xsl:when>
<!-- <xsl:when test="@typ eq 'D'">
<xsl:value-of select="my:pad-string-to-length(translate($value,'/',''),$padSpace,@len,@pos)"/>
</xsl:when> -->
<xsl:otherwise>
<xsl:choose>
<xsl:when test="@case eq 'U'">
<xsl:value-of select="my:string-to-CSV(upper-case($value),$delimiter,my:length(@len))"/>
</xsl:when>
<xsl:when test="@case eq 'L'">
<xsl:value-of select="my:string-to-CSV(lower-case($value),$delimiter,my:length(@len))"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="my:string-to-CSV($value,$delimiter,my:length(@len))"/>
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
<!-- Add the delimiter unless we are the last expression -->
<xsl:if test="position() != last()">
<xsl:value-of select="$delimiter"/>
</xsl:if>
</xsl:for-each>
<xsl:if test="position() != last()">
<xsl:value-of select="$eol"/>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
<!-- EXAMPLE OF XML
<ROOT>
<RA>
<NAME> Juan </NAME>
<JOB> Ingeniero de caminos</JOB>
<DUMMY/>
<AGE>22</AGE>
<DOB>2004/12/01</DOB>
</RA>
<R>
<NAME>Luis</NAME>
<JOB>Cartero </JOB>
<AGE>31</AGE>
<DOB>2014/12/06</DOB>
<DUMMY>d</DUMMY>
</R>
<R>
<NAME>Ana</NAME>
<JOB> Secretaria </JOB>
<AGE>43</AGE>
<DOB>2014/12/07</DOB>
</R>
<R>
<NAME>Maria</NAME>
<JOB>Presidenta</JOB>
<DUMMY>3</DUMMY>
<AGE>55</AGE>
<DOB>2014/12/09</DOB>
</R>
<R>
<NAME>Mar "IA FIN</NAME>
<JOB>Presidenta "ESP"</JOB>
<DUMMY>3</DUMMY>
<AGE>27</AGE>
<DOB>2014/12/09</DOB>
</R>
<R>
<NAME>Ana; END</NAME>
<JOB>Secretario, Secretary</JOB>
<DUMMY>3</DUMMY>
<DOB>2014/12/09</DOB>
</R>
</ROOT>
-->
(3-3/5)