Stream large JSON with multiple children using bun/node - Memory safe approach

3 days ago 9
ARTICLE AD BOX

Im trying to fetch a huge json file, specifically a json response from figma.

Issues I tried using the regular way of doing which is res.json() which gives the whole data but has huge memory spikes. So I tried using a lib called stream-json

The problem is by combining with bun child process, res.json() works better compared to stream-json, but I'm pretty sure it should be coz of the way i've implemented the code. Tried googleing and using A.I still not getting a steady output.

//Core function to extract function extractMinimalNode(node) { if (!node) return null; const { id, name, type, visible, fills, strokes, styles, style, componentId, overrides, children } = node; // Recursively extract children let minimalChildren = []; if (children && Array.isArray(children)) { minimalChildren = children.map(extractMinimalNode).filter(Boolean); } return { id, name, type, visible, fills, strokes, styles, style, componentId, overrides, children: minimalChildren.length ? minimalChildren : undefined }; } // Old way using json stream but it still consumes a lot of memory async function processBatchOld(batch, fileId, apiKey) { const nodeIds = batch.map(item => item.nodeID).join(','); const batchFetchUrl = `https://api.figma.com/v1/files/${fileId}/nodes?ids=${nodeIds}`; const response = await fetch(batchFetchUrl, { headers: { "X-Figma-Token": apiKey } }); console.log('[worker] after fetch()', mem()); const pipeline = Readable.fromWeb(response.body) .pipe(StreamJson.parser()) .pipe(pick({ filter: 'nodes' })) .pipe(streamObject()); console.log('[worker] after pipeline setup', mem()); for await (const { key, value } of pipeline) { if (value && value.document) { let minimal = extractMinimalNode(value.document); if (minimal) { // Find batchItem for page_id const batchItem = batch.find(item => item.nodeID === minimal.id); let page_id = batchItem?.type === "page" ? batchItem.nodeID : batchItem?.type === "section" ? batchItem.pageID : undefined; if (page_id) { // figmaDB.addNodes([{ // id: minimal.id, // page_id, // data: minimal // }]); } minimal = null; } } // Insert components if (value && value.components) { //figmaDB.insertComponent(generateRandomID(), fileId, value.components); value.components = null; } // Insert component sets if (value && value.componentSets) { //figmaDB.insertComponentSet(generateRandomID(), fileId, value.componentSets); value.componentSets = null; } // Insert styles if (value && value.styles) { //figmaDB.insertStyle(generateRandomID(), fileId, value.styles); value.styles = null; } if (global.gc) { //console.log('Triggering GC in batch worker:', process.pid); global.gc(); } // Optionally trigger GC every N nodes // if (global.gc) global.gc(); } console.log('[worker] after EMPTY loop', mem()); } // using non stream but this works and has only least memory spikes async function processBatch(batch, fileId, apiKey) { const nodeIds = batch.map(item => item.nodeID).join(','); const batchFetchUrl = `https://api.figma.com/v1/files/${fileId}/nodes?ids=${nodeIds}`; const response = await fetch(batchFetchUrl, { headers: { 'X-Figma-Token': apiKey }, }); console.log('[worker] after fetch()', mem()); // ❗ This is safe as long as your batch size is small (few nodeIds) const json = await response.json(); console.log('[worker] after response.json()', mem()); const nodes = json.nodes || {}; for (const [nodeId, nodeData] of Object.entries(nodes)) { const { document, components, componentSets, styles } = nodeData || {}; // --- document / nodes -> DB --- if (document) { const minimal = extractMinimalNode(document); if (minimal && minimal.id) { const batchItem = batch.find(item => item.nodeID === minimal.id); const page_id = batchItem?.type === 'page' ? batchItem.nodeID : batchItem?.type === 'section' ? batchItem.pageID : undefined; if (page_id) { figmaDB.addNodes([ { id: minimal.id, page_id, data: minimal, }, ]); } } } // --- components / sets / styles -> DB --- if (components) { figmaDB.insertComponent(generateRandomID(), fileId, components); } if (componentSets) { figmaDB.insertComponentSet(generateRandomID(), fileId, componentSets); } if (styles) { figmaDB.insertStyle(generateRandomID(), fileId, styles); } // help GC drop per-node objects nodeData.document = null; nodeData.components = null; nodeData.componentSets = null; nodeData.styles = null; } if (global.gc) { global.gc(); console.log('[worker] after GC', mem()); } }

Any help would be appreciated. Thanks in advance

Read Entire Article