|
package fi.tulli.tesb.core.util.schema;
|
|
|
|
import java.io.ByteArrayInputStream;
|
|
import java.io.ByteArrayOutputStream;
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.InputStreamReader;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.Paths;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.zip.ZipEntry;
|
|
import java.util.zip.ZipInputStream;
|
|
|
|
import javax.xml.XMLConstants;
|
|
import javax.xml.transform.Source;
|
|
import javax.xml.transform.stream.StreamSource;
|
|
import javax.xml.validation.SchemaFactory;
|
|
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.w3c.dom.ls.LSInput;
|
|
import org.w3c.dom.ls.LSResourceResolver;
|
|
import org.xml.sax.SAXException;
|
|
|
|
import com.saxonica.config.EnterpriseConfiguration;
|
|
|
|
import fi.tulli.tesb.core.util.xml.SchemaFactoryUtil;
|
|
import net.sf.saxon.lib.Feature;
|
|
import net.sf.saxon.lib.FeatureKeys;
|
|
|
|
/**
|
|
* A demo program for TESB-4972. In the end the Saxonica's implementation didn't have a bug, but
|
|
* Apache's implementation is more robust. Saxonica's schema load seems to be over five times slower
|
|
* than Apache's. Consider adding rather quickly evicting cache to zip content fetch.
|
|
*
|
|
* @author ext-jarttpet
|
|
*/
|
|
public class SaxonBugDemo {
|
|
|
|
private static final Logger LOG = LoggerFactory.getLogger(SchemaValidatorImpl.class);
|
|
|
|
private static byte[] schemasInZip;
|
|
static {
|
|
File schemaZip = new File(
|
|
"/home/coder/workspace/tp_tesb/core2/util/schema/src/test/resources/fi/tulli/tesb/core/util/schema/SchemaValidatorImplTest_zipped_schemas.zip");
|
|
try {
|
|
schemasInZip = Files.readAllBytes(schemaZip.toPath());
|
|
} catch (IOException e) {
|
|
throw new RuntimeException(e);
|
|
}
|
|
}
|
|
|
|
public static void main(String args[]) {
|
|
SaxonBugDemo application = new SaxonBugDemo();
|
|
long startTime = System.currentTimeMillis();
|
|
LOG.info("Loading schemas from zip with Apache's implementation...");
|
|
application.run(getApacheSchemaFactory());
|
|
LOG.info("Took {} milliseconds.", System.currentTimeMillis() - startTime);
|
|
|
|
startTime = System.currentTimeMillis();
|
|
LOG.info("Loading schemas from zip with Saxonica's implementation...");
|
|
application.run(getSaxonSchemaFactory());
|
|
LOG.info("Took {} milliseconds.", System.currentTimeMillis() - startTime);
|
|
}
|
|
|
|
private void run(SchemaFactory schemaFactory) {
|
|
List<Source> sources = new ArrayList<>();
|
|
try {
|
|
addZippedSchemas(sources);
|
|
} catch (IOException e) {
|
|
throw new RuntimeException(e);
|
|
}
|
|
|
|
schemaFactory.setResourceResolver(resourceResolver);
|
|
|
|
try {
|
|
schemaFactory.newSchema(sources.toArray(new Source[0]));
|
|
} catch (SAXException e) {
|
|
throw new RuntimeException(e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Resolve an external resource. Sources have system identifiers of the xsd files indicating
|
|
* where files are found as follows: "zip:/path/to/file.xsd".
|
|
*
|
|
* Schema factory calls <code>resolveResource</code> method with systemId of the source file in
|
|
* baseURI parameter, and the imported xsd that needs to be resolved in systemId parameter.
|
|
*/
|
|
private static LSResourceResolver resourceResolver = new LSResourceResolver() {
|
|
@Override
|
|
public LSInput resolveResource(String type, String namespaceURI, String publicId, String systemId,
|
|
String baseURI) {
|
|
|
|
LOG.debug(
|
|
"Resolver called with parameters type {}, "
|
|
+ "namespaceURI {}, publicId {}, systemId {}, baseURI {}",
|
|
type, namespaceURI, publicId, systemId, baseURI);
|
|
|
|
String searchedSchemaFile = null;
|
|
|
|
if (baseURI != null && baseURI.startsWith("zip:")) {
|
|
String[] splittedBaseURI = baseURI.split(":");
|
|
Path baseFileFolder = Paths.get(splittedBaseURI[1]).getParent();
|
|
searchedSchemaFile = baseFileFolder.resolve(systemId).normalize().toString();
|
|
} else {
|
|
String errorLine = String.format(
|
|
"The baseURI is lost! Unable to determine where to find xsd file %s as baseURI is now %s",
|
|
systemId, baseURI);
|
|
throw new RuntimeException(errorLine);
|
|
}
|
|
|
|
byte[] uncompressedSchema = getZippedSchema(searchedSchemaFile);
|
|
if (uncompressedSchema == null) {
|
|
String errorLine = String.format(
|
|
"Schema file %s is supposed to be inside %s, but it was not found!",
|
|
searchedSchemaFile);
|
|
throw new RuntimeException(errorLine);
|
|
}
|
|
|
|
systemId = "zip:" + searchedSchemaFile;
|
|
baseURI = null;
|
|
|
|
InputStream stream = new ByteArrayInputStream(uncompressedSchema);
|
|
LSInputImpl input = new LSInputImpl();
|
|
input.setPublicId(publicId);
|
|
input.setSystemId(systemId);
|
|
input.setBaseURI(baseURI);
|
|
input.setCharacterStream(new InputStreamReader(stream));
|
|
|
|
LOG.info("Resolved xsd file content: publicId {} systemId {} baseURI {}", publicId, systemId, baseURI);
|
|
|
|
return input;
|
|
}
|
|
};
|
|
|
|
private static SchemaFactory getSaxonSchemaFactory() {
|
|
EnterpriseConfiguration conf = new EnterpriseConfiguration();
|
|
conf.setConfigurationProperty(FeatureKeys.LICENSE_FILE_LOCATION, "/opt/tulli/licenses/saxon-license.lic");
|
|
conf.setConfigurationProperty(Feature.MULTIPLE_SCHEMA_IMPORTS, Boolean.TRUE);
|
|
return new com.saxonica.ee.jaxp.SchemaFactoryImpl(conf);
|
|
}
|
|
|
|
private static SchemaFactory getApacheSchemaFactory() {
|
|
SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI,
|
|
"org.apache.xerces.jaxp.validation.XMLSchemaFactory", SchemaFactoryUtil.class.getClassLoader());
|
|
return schemaFactory;
|
|
}
|
|
|
|
/**
|
|
* Search a xsd file from the zip. Return an uncompressed byte array of the file content.
|
|
*/
|
|
private static byte[] getZippedSchema(String searchedSchemaFile) {
|
|
try (ZipInputStream zi = new ZipInputStream(new ByteArrayInputStream(schemasInZip))) {
|
|
ZipEntry zipEntry = null;
|
|
while ((zipEntry = zi.getNextEntry()) != null) {
|
|
if (!zipEntry.isDirectory()) {
|
|
String normalizedName = Paths.get("/", zipEntry.getName()).normalize().toString();
|
|
if (normalizedName.equals(searchedSchemaFile)) {
|
|
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
|
int b = zi.read();
|
|
while (b >= 0) {
|
|
os.write(b);
|
|
b = zi.read();
|
|
}
|
|
os.flush();
|
|
|
|
return os.toByteArray();
|
|
}
|
|
}
|
|
}
|
|
} catch (IOException e) {
|
|
throw new RuntimeException(e);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Read through the zip file and make <code>Source</code> entries from every xsd-file. Generate
|
|
* a systemId of the xsd file indicating where the file is found as follows:
|
|
* "zip:/path/to/file.xsd"
|
|
*/
|
|
private static void addZippedSchemas(List<Source> sources) throws IOException {
|
|
try (ZipInputStream zi = new ZipInputStream(new ByteArrayInputStream(schemasInZip))) {
|
|
ZipEntry zipEntry = null;
|
|
while ((zipEntry = zi.getNextEntry()) != null) {
|
|
if (!zipEntry.isDirectory() && zipEntry.getName().toLowerCase().endsWith(".xsd")) {
|
|
String normalizedName = Paths.get("/", zipEntry.getName()).normalize().toString();
|
|
|
|
try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
|
|
int b = zi.read();
|
|
while (b >= 0) {
|
|
os.write(b);
|
|
b = zi.read();
|
|
}
|
|
os.flush();
|
|
|
|
byte[] schemaDoc = os.toByteArray();
|
|
String systemId = String.format("zip:%s", normalizedName);
|
|
sources.add(new StreamSource(new ByteArrayInputStream(schemaDoc), systemId));
|
|
LOG.debug("Found {} from the zip. Added source with systemId {}", normalizedName, systemId);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|