Julius Werner has submitted this change. ( https://review.coreboot.org/c/coreboot/+/83208?usp=email )
Change subject: commonlib/device_tree: Improve node and property allocation speed ......................................................................
commonlib/device_tree: Improve node and property allocation speed
Now that the device tree code has been made available in libpayload, we should reintroduce the node and property allocation optimization for libpayload's memory allocator that was originally dropped when porting this code from depthcharge to coreboot.
On a Qualcomm SC7180 unflattening a normal ChromeOS kernel device tree, this saves roughly ~145ms. The total scratch space used is about ~1350 nodes and ~5200 properties, so we leave a little room to grow with the constants hardcoded here.
Change-Id: I0f4d80a8b750febfb069b32ef47304ccecdc35af Signed-off-by: Julius Werner jwerner@chromium.org Reviewed-on: https://review.coreboot.org/c/coreboot/+/83208 Tested-by: build bot (Jenkins) no-reply@coreboot.org Reviewed-by: Maximilian Brune maximilian.brune@9elements.com Reviewed-by: Raul Rangel rrangel@chromium.org --- M src/commonlib/device_tree.c 1 file changed, 41 insertions(+), 8 deletions(-)
Approvals: Maximilian Brune: Looks good to me, approved build bot (Jenkins): Verified Raul Rangel: Looks good to me, approved
diff --git a/src/commonlib/device_tree.c b/src/commonlib/device_tree.c index f70aaf7..0ec6ea9 100644 --- a/src/commonlib/device_tree.c +++ b/src/commonlib/device_tree.c @@ -25,6 +25,41 @@ #define FDT_MAX_MEMORY_REGIONS 16 // should be a good enough upper bound
/* + * libpayload's malloc() has a linear allocation complexity, which means that it + * degrades massively if we make a few thousand small allocations. Preventing + * that problem with a custom scratchpad is well-worth some increase in BSS + * size (64 * 2000 + 40 * 10000 = ~1/2 MB). + */ + +/* Try to give these a healthy margin above what the average kernel DT needs. */ +#define LP_ALLOC_NODE_SCRATCH_COUNT 2000 +#define LP_ALLOC_PROP_SCRATCH_COUNT 10000 + +static struct device_tree_node *alloc_node(void) +{ +#ifndef __COREBOOT__ + static struct device_tree_node scratch[LP_ALLOC_NODE_SCRATCH_COUNT]; + static int counter = 0; + + if (counter < ARRAY_SIZE(scratch)) + return &scratch[counter++]; +#endif + return xzalloc(sizeof(struct device_tree_node)); +} + +static struct device_tree_property *alloc_prop(void) +{ +#ifndef __COREBOOT__ + static struct device_tree_property scratch[LP_ALLOC_PROP_SCRATCH_COUNT]; + static int counter = 0; + + if (counter < ARRAY_SIZE(scratch)) + return &scratch[counter++]; +#endif + return xzalloc(sizeof(struct device_tree_property)); +} + +/* * Functions for picking apart flattened trees. */
@@ -625,14 +660,14 @@ return 0; offset += size;
- struct device_tree_node *node = xzalloc(sizeof(*node)); + struct device_tree_node *node = alloc_node(); *new_node = node; node->name = name;
struct fdt_property fprop; last = &node->properties; while ((size = fdt_next_property(blob, offset, &fprop))) { - struct device_tree_property *prop = xzalloc(sizeof(*prop)); + struct device_tree_property *prop = alloc_prop(); prop->prop = fprop;
if (dt_prop_is_phandle(prop)) { @@ -1003,9 +1038,7 @@ if (!create) return NULL;
- found = calloc(1, sizeof(*found)); - if (!found) - return NULL; + found = alloc_node(); found->name = strdup(*path); if (!found->name) return NULL; @@ -1333,7 +1366,7 @@ } }
- prop = xzalloc(sizeof(*prop)); + prop = alloc_prop(); list_insert_after(&prop->list_node, &node->properties); prop->prop.name = name; prop->prop.data = data; @@ -1847,7 +1880,7 @@ continue; } } else { - dst_prop = xzalloc(sizeof(*dst_prop)); + dst_prop = alloc_prop(); list_insert_after(&dst_prop->list_node, &dst->properties); } @@ -1866,7 +1899,7 @@ }
if (!dst_node) { - dst_node = xzalloc(sizeof(*dst_node)); + dst_node = alloc_node(); *dst_node = *src_node; list_insert_after(&dst_node->list_node, &dst->children); } else {