Project

Profile

Help

Bug #4982 » SaxonBugDemo.java

small demo app - Tomas Vanhala, 2021-05-04 10:01

 
1
package fi.tulli.tesb.core.util.schema;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.io.ByteArrayOutputStream;
5
import java.io.File;
6
import java.io.IOException;
7
import java.io.InputStream;
8
import java.io.InputStreamReader;
9
import java.nio.file.Files;
10
import java.nio.file.Path;
11
import java.nio.file.Paths;
12
import java.util.ArrayList;
13
import java.util.List;
14
import java.util.zip.ZipEntry;
15
import java.util.zip.ZipInputStream;
16

    
17
import javax.xml.XMLConstants;
18
import javax.xml.transform.Source;
19
import javax.xml.transform.stream.StreamSource;
20
import javax.xml.validation.SchemaFactory;
21

    
22
import org.slf4j.Logger;
23
import org.slf4j.LoggerFactory;
24
import org.w3c.dom.ls.LSInput;
25
import org.w3c.dom.ls.LSResourceResolver;
26
import org.xml.sax.SAXException;
27

    
28
import com.saxonica.config.EnterpriseConfiguration;
29

    
30
import fi.tulli.tesb.core.util.xml.SchemaFactoryUtil;
31
import net.sf.saxon.lib.Feature;
32
import net.sf.saxon.lib.FeatureKeys;
33

    
34
/**
35
 * A demo program for TESB-4972. In the end the Saxonica's implementation didn't have a bug, but
36
 * Apache's implementation is more robust. Saxonica's schema load seems to be over five times slower
37
 * than Apache's. Consider adding rather quickly evicting cache to zip content fetch.
38
 * 
39
 * @author ext-jarttpet
40
 */
41
public class SaxonBugDemo {
42
    
43
    private static final Logger LOG = LoggerFactory.getLogger(SchemaValidatorImpl.class);
44
    
45
    private static byte[] schemasInZip;
46
    static {
47
        File schemaZip = new File(
48
                "/home/coder/workspace/tp_tesb/core2/util/schema/src/test/resources/fi/tulli/tesb/core/util/schema/SchemaValidatorImplTest_zipped_schemas.zip");
49
        try {
50
            schemasInZip = Files.readAllBytes(schemaZip.toPath());
51
        } catch (IOException e) {
52
            throw new RuntimeException(e);
53
        }
54
    }
55
    
56
    public static void main(String args[]) {
57
        SaxonBugDemo application = new SaxonBugDemo();
58
        long startTime = System.currentTimeMillis();
59
        LOG.info("Loading schemas from zip with Apache's implementation...");
60
        application.run(getApacheSchemaFactory());
61
        LOG.info("Took {} milliseconds.", System.currentTimeMillis() - startTime);
62
        
63
        startTime = System.currentTimeMillis();
64
        LOG.info("Loading schemas from zip with Saxonica's implementation...");
65
        application.run(getSaxonSchemaFactory());
66
        LOG.info("Took {} milliseconds.", System.currentTimeMillis() - startTime);
67
    }
68
    
69
    private void run(SchemaFactory schemaFactory) {
70
        List<Source> sources = new ArrayList<>();
71
        try {
72
            addZippedSchemas(sources);
73
        } catch (IOException e) {
74
            throw new RuntimeException(e);
75
        }
76
        
77
        schemaFactory.setResourceResolver(resourceResolver);
78
        
79
        try {
80
            schemaFactory.newSchema(sources.toArray(new Source[0]));
81
        } catch (SAXException e) {
82
            throw new RuntimeException(e);
83
        }
84
    }
85
    
86
    /**
87
     * Resolve an external resource. Sources have system identifiers of the xsd files indicating
88
     * where files are found as follows: "zip:/path/to/file.xsd".
89
     * 
90
     * Schema factory calls <code>resolveResource</code> method with systemId of the source file in
91
     * baseURI parameter, and the imported xsd that needs to be resolved in systemId parameter.
92
     */
93
    private static LSResourceResolver resourceResolver = new LSResourceResolver() {
94
        @Override
95
        public LSInput resolveResource(String type, String namespaceURI, String publicId, String systemId,
96
                String baseURI) {
97
            
98
            LOG.debug(
99
                    "Resolver called with parameters type {}, "
100
                            + "namespaceURI {}, publicId {}, systemId {}, baseURI {}",
101
                    type, namespaceURI, publicId, systemId, baseURI);
102
            
103
            String searchedSchemaFile = null;
104
            
105
            if (baseURI != null && baseURI.startsWith("zip:")) {
106
                String[] splittedBaseURI = baseURI.split(":");
107
                Path baseFileFolder = Paths.get(splittedBaseURI[1]).getParent();
108
                searchedSchemaFile = baseFileFolder.resolve(systemId).normalize().toString();
109
            } else {
110
                String errorLine = String.format(
111
                        "The baseURI is lost! Unable to determine where to find xsd file %s as baseURI is now %s",
112
                        systemId, baseURI);
113
                throw new RuntimeException(errorLine);
114
            }
115
            
116
            byte[] uncompressedSchema = getZippedSchema(searchedSchemaFile);
117
            if (uncompressedSchema == null) {
118
                String errorLine = String.format(
119
                        "Schema file %s is supposed to be inside %s, but it was not found!",
120
                        searchedSchemaFile);
121
                throw new RuntimeException(errorLine);
122
            }
123
            
124
            systemId = "zip:" + searchedSchemaFile;
125
            baseURI = null;
126
            
127
            InputStream stream = new ByteArrayInputStream(uncompressedSchema);
128
            LSInputImpl input = new LSInputImpl();
129
            input.setPublicId(publicId);
130
            input.setSystemId(systemId);
131
            input.setBaseURI(baseURI);
132
            input.setCharacterStream(new InputStreamReader(stream));
133
            
134
            LOG.info("Resolved xsd file content: publicId {} systemId {} baseURI {}", publicId, systemId, baseURI);
135
            
136
            return input;
137
        }
138
    };
139
    
140
    private static SchemaFactory getSaxonSchemaFactory() {
141
        EnterpriseConfiguration conf = new EnterpriseConfiguration();
142
        conf.setConfigurationProperty(FeatureKeys.LICENSE_FILE_LOCATION, "/opt/tulli/licenses/saxon-license.lic");
143
        conf.setConfigurationProperty(Feature.MULTIPLE_SCHEMA_IMPORTS, Boolean.TRUE);
144
        return new com.saxonica.ee.jaxp.SchemaFactoryImpl(conf);
145
    }
146
    
147
    private static SchemaFactory getApacheSchemaFactory() {
148
        SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI,
149
                "org.apache.xerces.jaxp.validation.XMLSchemaFactory", SchemaFactoryUtil.class.getClassLoader());
150
        return schemaFactory;
151
    }
152
    
153
    /**
154
     * Search a xsd file from the zip. Return an uncompressed byte array of the file content.
155
     */
156
    private static byte[] getZippedSchema(String searchedSchemaFile) {
157
        try (ZipInputStream zi = new ZipInputStream(new ByteArrayInputStream(schemasInZip))) {
158
            ZipEntry zipEntry = null;
159
            while ((zipEntry = zi.getNextEntry()) != null) {
160
                if (!zipEntry.isDirectory()) {
161
                    String normalizedName = Paths.get("/", zipEntry.getName()).normalize().toString();
162
                    if (normalizedName.equals(searchedSchemaFile)) {
163
                        ByteArrayOutputStream os = new ByteArrayOutputStream();
164
                        int b = zi.read();
165
                        while (b >= 0) {
166
                            os.write(b);
167
                            b = zi.read();
168
                        }
169
                        os.flush();
170
                        
171
                        return os.toByteArray();
172
                    }
173
                }
174
            }
175
        } catch (IOException e) {
176
            throw new RuntimeException(e);
177
        }
178
        return null;
179
    }
180
    
181
    /**
182
     * Read through the zip file and make <code>Source</code> entries from every xsd-file. Generate
183
     * a systemId of the xsd file indicating where the file is found as follows:
184
     * "zip:/path/to/file.xsd"
185
     */
186
    private static void addZippedSchemas(List<Source> sources) throws IOException {
187
        try (ZipInputStream zi = new ZipInputStream(new ByteArrayInputStream(schemasInZip))) {
188
            ZipEntry zipEntry = null;
189
            while ((zipEntry = zi.getNextEntry()) != null) {
190
                if (!zipEntry.isDirectory() && zipEntry.getName().toLowerCase().endsWith(".xsd")) {
191
                    String normalizedName = Paths.get("/", zipEntry.getName()).normalize().toString();
192
                    
193
                    try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
194
                        int b = zi.read();
195
                        while (b >= 0) {
196
                            os.write(b);
197
                            b = zi.read();
198
                        }
199
                        os.flush();
200
                        
201
                        byte[] schemaDoc = os.toByteArray();
202
                        String systemId = String.format("zip:%s", normalizedName);
203
                        sources.add(new StreamSource(new ByteArrayInputStream(schemaDoc), systemId));
204
                        LOG.debug("Found {} from the zip. Added source with systemId {}", normalizedName, systemId);
205
                    }
206
                }
207
            }
208
        }
209
    }
210
}
(1-1/6)