package fi.tulli.tesb.core.util.schema;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.xml.XMLConstants;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.SchemaFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.ls.LSInput;
import org.w3c.dom.ls.LSResourceResolver;
import org.xml.sax.SAXException;
import com.saxonica.config.EnterpriseConfiguration;
import fi.tulli.tesb.core.util.xml.SchemaFactoryUtil;
import net.sf.saxon.lib.Feature;
import net.sf.saxon.lib.FeatureKeys;
* A demo program for TESB-4972. In the end the Saxonica's implementation didn't have a bug, but
* Apache's implementation is more robust. Saxonica's schema load seems to be over five times slower
* than Apache's. Consider adding rather quickly evicting cache to zip content fetch.
* @author ext-jarttpet
public class SaxonBugDemo {
private static final Logger LOG = LoggerFactory.getLogger(SchemaValidatorImpl.class);
private static byte[] schemasInZip;
static {
File schemaZip = new File(
try {
schemasInZip = Files.readAllBytes(schemaZip.toPath());
} catch (IOException e) {
throw new RuntimeException(e);
public static void main(String args[]) {
SaxonBugDemo application = new SaxonBugDemo();
long startTime = System.currentTimeMillis();
LOG.info("Loading schemas from zip with Apache's implementation...");
LOG.info("Took {} milliseconds.", System.currentTimeMillis() - startTime);
startTime = System.currentTimeMillis();
LOG.info("Loading schemas from zip with Saxonica's implementation...");
LOG.info("Took {} milliseconds.", System.currentTimeMillis() - startTime);
private void run(SchemaFactory schemaFactory) {
List<Source> sources = new ArrayList<>();
try {
} catch (IOException e) {
throw new RuntimeException(e);
try {
schemaFactory.newSchema(sources.toArray(new Source[0]));
} catch (SAXException e) {
throw new RuntimeException(e);
* Resolve an external resource. Sources have system identifiers of the xsd files indicating
* where files are found as follows: "zip:/path/to/file.xsd".
* Schema factory calls <code>resolveResource</code> method with systemId of the source file in
* baseURI parameter, and the imported xsd that needs to be resolved in systemId parameter.
private static LSResourceResolver resourceResolver = new LSResourceResolver() {
public LSInput resolveResource(String type, String namespaceURI, String publicId, String systemId,
String baseURI) {
"Resolver called with parameters type {}, "
+ "namespaceURI {}, publicId {}, systemId {}, baseURI {}",
type, namespaceURI, publicId, systemId, baseURI);
String searchedSchemaFile = null;
if (baseURI != null && baseURI.startsWith("zip:")) {
String[] splittedBaseURI = baseURI.split(":");
Path baseFileFolder = Paths.get(splittedBaseURI[1]).getParent();
searchedSchemaFile = baseFileFolder.resolve(systemId).normalize().toString();
} else {
String errorLine = String.format(
"The baseURI is lost! Unable to determine where to find xsd file %s as baseURI is now %s",
systemId, baseURI);
throw new RuntimeException(errorLine);
byte[] uncompressedSchema = getZippedSchema(searchedSchemaFile);
if (uncompressedSchema == null) {
String errorLine = String.format(
"Schema file %s is supposed to be inside %s, but it was not found!",
throw new RuntimeException(errorLine);
systemId = "zip:" + searchedSchemaFile;
baseURI = null;
InputStream stream = new ByteArrayInputStream(uncompressedSchema);
LSInputImpl input = new LSInputImpl();
input.setCharacterStream(new InputStreamReader(stream));
LOG.info("Resolved xsd file content: publicId {} systemId {} baseURI {}", publicId, systemId, baseURI);
return input;
private static SchemaFactory getSaxonSchemaFactory() {
EnterpriseConfiguration conf = new EnterpriseConfiguration();
conf.setConfigurationProperty(FeatureKeys.LICENSE_FILE_LOCATION, "/opt/tulli/licenses/saxon-license.lic");
conf.setConfigurationProperty(Feature.MULTIPLE_SCHEMA_IMPORTS, Boolean.TRUE);
return new com.saxonica.ee.jaxp.SchemaFactoryImpl(conf);
private static SchemaFactory getApacheSchemaFactory() {
SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI,
"org.apache.xerces.jaxp.validation.XMLSchemaFactory", SchemaFactoryUtil.class.getClassLoader());
return schemaFactory;
* Search a xsd file from the zip. Return an uncompressed byte array of the file content.
private static byte[] getZippedSchema(String searchedSchemaFile) {
try (ZipInputStream zi = new ZipInputStream(new ByteArrayInputStream(schemasInZip))) {
ZipEntry zipEntry = null;
while ((zipEntry = zi.getNextEntry()) != null) {
if (!zipEntry.isDirectory()) {
String normalizedName = Paths.get("/", zipEntry.getName()).normalize().toString();
if (normalizedName.equals(searchedSchemaFile)) {
ByteArrayOutputStream os = new ByteArrayOutputStream();
int b = zi.read();
while (b >= 0) {
b = zi.read();
return os.toByteArray();
} catch (IOException e) {
throw new RuntimeException(e);
return null;
* Read through the zip file and make <code>Source</code> entries from every xsd-file. Generate
* a systemId of the xsd file indicating where the file is found as follows:
* "zip:/path/to/file.xsd"
private static void addZippedSchemas(List<Source> sources) throws IOException {
try (ZipInputStream zi = new ZipInputStream(new ByteArrayInputStream(schemasInZip))) {
ZipEntry zipEntry = null;
while ((zipEntry = zi.getNextEntry()) != null) {
if (!zipEntry.isDirectory() && zipEntry.getName().toLowerCase().endsWith(".xsd")) {
String normalizedName = Paths.get("/", zipEntry.getName()).normalize().toString();
try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
int b = zi.read();
while (b >= 0) {
b = zi.read();
byte[] schemaDoc = os.toByteArray();
String systemId = String.format("zip:%s", normalizedName);
sources.add(new StreamSource(new ByteArrayInputStream(schemaDoc), systemId));
LOG.debug("Found {} from the zip. Added source with systemId {}", normalizedName, systemId);