Bug #6612
openMemory leak under sustained load (12.5)
0%
Description
We're using Saxonche in a Python application (running in Docker on AWS ECS) to process many XML messages under sustained load. Currently there are about 3-6 events per second, and this might increase to 100-200 events per second.
We're observing that the Saxonche PySaxonProcessor is allocating 0.5 - 4.0 MB per run, and is not releasing the memory as long as the load is sustained. If we pauze the event processing we see the memory being released after a few minutes. Unfortunately this is not workable for us, because the container runs out of memory.
Our code looks like this:
@profile
def transform_with_xslt(
self,
input: bytes,
xslt_path: str | Path,
xpath_to_root: str | None = None,
mapping_input_parameter_name: str | None = None,
namespace_of_inputmessageschema: str | None = None,
) -> str:
with PySaxonProcessor(license=False) as processor:
xml_doc = processor.parse_xml(xml_text=input.decode())
xslt_processor = processor.new_xslt30_processor()
# Compile the enhanced template and cache it
xslt = self._compiled_xslt(xslt_processor, xslt_path)
# Use the configured XPath expression
xpath_proc = processor.new_xpath_processor()
if namespace_of_inputmessageschema:
# If the namespaces is provided, set it as the default namespace. This allows us to define
# XPath expressions without specifying the namespace. The system will default to the namespace
# provided here. Also see:
# https://www.saxonica.com/saxon-c/doc12/html/saxonc.html#PyXPathProcessor-declare_namespace
xpath_proc.declare_namespace("", namespace_of_inputmessageschema)
xpath_proc.set_context(xdm_item=xml_doc)
root_node = xpath_proc.evaluate_single(f"/{xpath_to_root}")
xslt.set_parameter(mapping_input_parameter_name, root_node)
result = xslt.apply_templates_returning_string(xdm_node=xml_doc)
if result is None:
raise MappingError("XSLT transformation produced no output")
return result
If we send a single event, the memory profiler (memory_profiler
) shows us the following output:
Line # Mem usage Increment Occurrences Line Contents
=============================================================
37 147.3 MiB 147.3 MiB 1 @profile
38 def transform_with_xslt(
39 self,
40 input: bytes,
41 xslt_path: str | Path,
42 xpath_to_root: str | None = None,
43 mapping_input_parameter_name: str | None = None,
44 namespace_of_inputmessageschema: str | None = None,
45 ) -> str:
46 149.2 MiB 0.2 MiB 2 with PySaxonProcessor(license=False) as processor:
47 148.0 MiB 0.5 MiB 1 xml_doc = processor.parse_xml(xml_text=input.decode())
48 148.0 MiB 0.0 MiB 1 xslt_processor = processor.new_xslt30_processor()
49
50 # Compile the enhanced template and cache it
51 148.2 MiB 0.2 MiB 1 xslt = self._compiled_xslt(xslt_processor, xslt_path)
52
53 # Use the configured XPath expression
54 148.2 MiB 0.0 MiB 1 xpath_proc = processor.new_xpath_processor()
55
56 148.2 MiB 0.0 MiB 1 if namespace_of_inputmessageschema:
57 # If the namespaces is provided, set it as the default namespace. This allows us to define
58 # XPath expressions without specifying the namespace. The system will default to the namespace
59 # provided here. Also see:
60 # https://www.saxonica.com/saxon-c/doc12/html/saxonc.html#PyXPathProcessor-declare_namespace
61 xpath_proc.declare_namespace("", namespace_of_inputmessageschema)
62
63 148.2 MiB 0.0 MiB 1 xpath_proc.set_context(xdm_item=xml_doc)
64 148.3 MiB 0.0 MiB 1 root_node = xpath_proc.evaluate_single(f"/{xpath_to_root}")
65 148.3 MiB 0.0 MiB 1 xslt.set_parameter(mapping_input_parameter_name, root_node)
66
67 149.2 MiB 0.9 MiB 1 result = xslt.apply_templates_returning_string(xdm_node=xml_doc)
68
69 149.2 MiB 0.0 MiB 1 if result is None:
70 raise MappingError("XSLT transformation produced no output")
71
72 149.2 MiB 0.0 MiB 1 return result
If we run it again, this is the output (please note the 2.5MiB increase in memory usage):
Line # Mem usage Increment Occurrences Line Contents
=============================================================
37 149.8 MiB 149.8 MiB 1 @profile
38 def transform_with_xslt(
39 self,
40 input: bytes,
41 xslt_path: str | Path,
42 xpath_to_root: str | None = None,
43 mapping_input_parameter_name: str | None = None,
44 namespace_of_inputmessageschema: str | None = None,
45 ) -> str:
46 151.8 MiB 0.2 MiB 2 with PySaxonProcessor(license=False) as processor:
47 150.6 MiB 0.6 MiB 1 xml_doc = processor.parse_xml(xml_text=input.decode())
48 150.6 MiB 0.0 MiB 1 xslt_processor = processor.new_xslt30_processor()
49
50 # Compile the enhanced template and cache it
51 150.8 MiB 0.2 MiB 1 xslt = self._compiled_xslt(xslt_processor, xslt_path)
52
53 # Use the configured XPath expression
54 150.8 MiB 0.0 MiB 1 xpath_proc = processor.new_xpath_processor()
55
56 150.8 MiB 0.0 MiB 1 if namespace_of_inputmessageschema:
57 # If the namespaces is provided, set it as the default namespace. This allows us to define
58 # XPath expressions without specifying the namespace. The system will default to the namespace
59 # provided here. Also see:
60 # https://www.saxonica.com/saxon-c/doc12/html/saxonc.html#PyXPathProcessor-declare_namespace
61 xpath_proc.declare_namespace("", namespace_of_inputmessageschema)
62
63 150.8 MiB 0.0 MiB 1 xpath_proc.set_context(xdm_item=xml_doc)
64 150.8 MiB 0.0 MiB 1 root_node = xpath_proc.evaluate_single(f"/{xpath_to_root}")
65 150.8 MiB 0.0 MiB 1 xslt.set_parameter(mapping_input_parameter_name, root_node)
66
67 151.8 MiB 0.9 MiB 1 result = xslt.apply_templates_returning_string(xdm_node=xml_doc)
68
69 151.8 MiB 0.0 MiB 1 if result is None:
70 raise MappingError("XSLT transformation produced no output")
71
72 151.8 MiB 0.0 MiB 1 return result
When we run a thousand invocations, the number just keeps increasing:
Line # Mem usage Increment Occurrences Line Contents
=============================================================
37 420.6 MiB 420.6 MiB 1 @profile
38 def transform_with_xslt(
39 self,
40 input: bytes,
41 xslt_path: str | Path,
42 xpath_to_root: str | None = None,
43 mapping_input_parameter_name: str | None = None,
44 namespace_of_inputmessageschema: str | None = None,
45 ) -> str:
46 421.5 MiB 0.0 MiB 2 with PySaxonProcessor(license=False) as processor:
47 421.5 MiB 0.9 MiB 1 xml_doc = processor.parse_xml(xml_text=input.decode())
48 421.5 MiB 0.0 MiB 1 xslt_processor = processor.new_xslt30_processor()
49
50 # Compile the enhanced template and cache it
51 421.5 MiB 0.0 MiB 1 xslt = self._compiled_xslt(xslt_processor, xslt_path)
52
53 # Use the configured XPath expression
54 421.5 MiB 0.0 MiB 1 xpath_proc = processor.new_xpath_processor()
55
56 421.5 MiB 0.0 MiB 1 if namespace_of_inputmessageschema:
57 # If the namespaces is provided, set it as the default namespace. This allows us to define
58 # XPath expressions without specifying the namespace. The system will default to the namespace
59 # provided here. Also see:
60 # https://www.saxonica.com/saxon-c/doc12/html/saxonc.html#PyXPathProcessor-declare_namespace
61 xpath_proc.declare_namespace("", namespace_of_inputmessageschema)
62
63 421.5 MiB 0.0 MiB 1 xpath_proc.set_context(xdm_item=xml_doc)
64 421.5 MiB 0.0 MiB 1 root_node = xpath_proc.evaluate_single(f"/{xpath_to_root}")
65 421.5 MiB 0.0 MiB 1 xslt.set_parameter(mapping_input_parameter_name, root_node)
66
67 421.5 MiB 0.0 MiB 1 result = xslt.apply_templates_returning_string(xdm_node=xml_doc)
68
69 421.5 MiB 0.0 MiB 1 if result is None:
70 raise MappingError("XSLT transformation produced no output")
71
72 421.5 MiB 0.0 MiB 1 return result
Considering the sustained load, this is a major problem for us. It seems that garbage collection is taking place when the event stream pauzes, but this outside of our control. We would like the memory to be released as soon as the system is done processing the event. Can you help us resolve this issue?
Please register to edit this issue