From 61d387135fbec2f5782a60cbd0ce2cb9550f58ed Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Wed, 18 Dec 2024 19:02:03 +0800 Subject: [PATCH] Improve emoji SVG parsing by caching Basically, when we first encounter the document, we parse it as before, but we also note the offsets of other glyphs and store the remaining XML. The next time we see another glyph, we can simply parse that glyph node and insert it back into the stored XML. --- modules/text_server_adv/thorvg_svg_in_ot.cpp | 151 ++++++++++++++----- modules/text_server_adv/thorvg_svg_in_ot.h | 13 ++ modules/text_server_fb/thorvg_svg_in_ot.cpp | 151 ++++++++++++++----- modules/text_server_fb/thorvg_svg_in_ot.h | 13 ++ 4 files changed, 246 insertions(+), 82 deletions(-) diff --git a/modules/text_server_adv/thorvg_svg_in_ot.cpp b/modules/text_server_adv/thorvg_svg_in_ot.cpp index cbccb190950..a64ae043a8c 100644 --- a/modules/text_server_adv/thorvg_svg_in_ot.cpp +++ b/modules/text_server_adv/thorvg_svg_in_ot.cpp @@ -74,6 +74,46 @@ void tvg_svg_in_ot_free(FT_Pointer *p_state) { memdelete(state); } +static void construct_xml(Ref &parser, double &r_embox_x, double &r_embox_y, String *p_xml, int64_t &r_tag_count) { + if (parser->get_node_type() == XMLParser::NODE_ELEMENT) { + *p_xml += vformat("<%s", parser->get_node_name()); + bool is_svg_tag = parser->get_node_name() == "svg"; + for (int i = 0; i < parser->get_attribute_count(); i++) { + String aname = parser->get_attribute_name(i); + String value = parser->get_attribute_value(i); + if (is_svg_tag && aname == "viewBox") { + PackedStringArray vb = value.split(" "); + if (vb.size() == 4) { + r_embox_x = vb[2].to_float(); + r_embox_y = vb[3].to_float(); + } + } else if (is_svg_tag && aname == "width") { + r_embox_x = value.to_float(); + } else if (is_svg_tag && aname == "height") { + r_embox_y = value.to_float(); + } else { + *p_xml += vformat(" %s=\"%s\"", aname, value); + } + } + + if (parser->is_empty()) { + *p_xml += "/>"; + } else { + *p_xml += ">"; + if (r_tag_count >= 0) { + r_tag_count++; + } + } + } else if (parser->get_node_type() == XMLParser::NODE_TEXT) { + *p_xml += parser->get_node_data(); + } else if (parser->get_node_type() == XMLParser::NODE_ELEMENT_END) { + *p_xml += vformat("", parser->get_node_name()); + if (r_tag_count > 0) { + r_tag_count--; + } + } +} + FT_Error tvg_svg_in_ot_preset_slot(FT_GlyphSlot p_slot, FT_Bool p_cache, FT_Pointer *p_state) { TVG_State *state = *reinterpret_cast(p_state); if (!state) { @@ -91,57 +131,86 @@ FT_Error tvg_svg_in_ot_preset_slot(FT_GlyphSlot p_slot, FT_Bool p_cache, FT_Poin parser->_open_buffer((const uint8_t *)document->svg_document, document->svg_document_length); String xml_body; + double embox_x = document->units_per_EM; double embox_y = document->units_per_EM; - while (parser->read() == OK) { - if (parser->has_attribute("id")) { - const String &gl_name = parser->get_named_attribute_value("id"); - if (gl_name.begins_with("glyph")) { - int dot_pos = gl_name.find_char('.'); - int64_t gl_idx = gl_name.substr(5, (dot_pos > 0) ? dot_pos - 5 : -1).to_int(); - if (p_slot->glyph_index != gl_idx) { - parser->skip_section(); - continue; - } - } - } - if (parser->get_node_type() == XMLParser::NODE_ELEMENT && parser->get_node_name() == "svg") { - if (parser->has_attribute("viewBox")) { - PackedStringArray vb = parser->get_named_attribute_value("viewBox").split(" "); - if (vb.size() == 4) { - embox_x = vb[2].to_float(); - embox_y = vb[3].to_float(); - } - } - if (parser->has_attribute("width")) { - embox_x = parser->get_named_attribute_value("width").to_float(); - } - if (parser->has_attribute("height")) { - embox_y = parser->get_named_attribute_value("height").to_float(); + TVG_DocumentCache &cache = state->document_map[document->svg_document]; + + if (!cache.xml_body.is_empty()) { + // If we have a cached document, that means we have already parsed it. + // All node cache should be available. + + xml_body = cache.xml_body; + embox_x = cache.embox_x; + embox_y = cache.embox_y; + + ERR_FAIL_COND_V(!cache.node_caches.has(p_slot->glyph_index), FT_Err_Invalid_SVG_Document); + Vector &ncs = cache.node_caches[p_slot->glyph_index]; + + uint64_t offset = 0; + for (TVG_NodeCache &nc : ncs) { + // Seek will call read() internally. + if (parser->seek(nc.document_offset) == OK) { + int64_t tag_count = 0; + String xml_node; + + // We only parse the glyph node. + do { + construct_xml(parser, embox_x, embox_y, &xml_node, tag_count); + } while (tag_count != 0 && parser->read() == OK); + + xml_body = xml_body.insert(nc.body_offset + offset, xml_node); + offset += xml_node.length(); } } - if (parser->get_node_type() == XMLParser::NODE_ELEMENT) { - xml_body += vformat("<%s", parser->get_node_name()); - bool is_svg_tag = parser->get_node_name() == "svg"; - for (int i = 0; i < parser->get_attribute_count(); i++) { - String aname = parser->get_attribute_name(i); - if (is_svg_tag && (aname == "viewBox" || aname == "width" || aname == "height")) { - continue; + } else { + String xml_node; + String xml_body_temp; + + String *p_xml = &xml_body_temp; + int64_t tag_count = -1; + + while (parser->read() == OK) { + if (parser->has_attribute("id")) { + const String &gl_name = parser->get_named_attribute_value("id"); + if (gl_name.begins_with("glyph")) { + int dot_pos = gl_name.find_char('.'); + int64_t gl_idx = gl_name.substr(5, (dot_pos > 0) ? dot_pos - 5 : -1).to_int(); + + TVG_NodeCache node_cache = TVG_NodeCache(); + node_cache.document_offset = parser->get_node_offset(), + node_cache.body_offset = (uint64_t)cache.xml_body.length(); + cache.node_caches[gl_idx].push_back(node_cache); + + if (p_slot->glyph_index != gl_idx) { + parser->skip_section(); + continue; + } + tag_count = 0; + xml_node = ""; + p_xml = &xml_node; } - xml_body += vformat(" %s=\"%s\"", aname, parser->get_attribute_value(i)); } - if (parser->is_empty()) { - xml_body += "/>"; - } else { - xml_body += ">"; + xml_body_temp = ""; + construct_xml(parser, embox_x, embox_y, p_xml, tag_count); + + if (xml_body_temp.length() > 0) { + xml_body += xml_body_temp; + cache.xml_body += xml_body_temp; + continue; + } + + if (tag_count == 0) { + p_xml = &xml_body_temp; + tag_count = -1; + xml_body += xml_node; } - } else if (parser->get_node_type() == XMLParser::NODE_TEXT) { - xml_body += parser->get_node_data(); - } else if (parser->get_node_type() == XMLParser::NODE_ELEMENT_END) { - xml_body += vformat("", parser->get_node_name()); } + + cache.embox_x = embox_x; + cache.embox_y = embox_y; } std::unique_ptr picture = tvg::Picture::gen(); diff --git a/modules/text_server_adv/thorvg_svg_in_ot.h b/modules/text_server_adv/thorvg_svg_in_ot.h index a0e7e3a1d2f..77a00d175af 100644 --- a/modules/text_server_adv/thorvg_svg_in_ot.h +++ b/modules/text_server_adv/thorvg_svg_in_ot.h @@ -68,9 +68,22 @@ struct GL_State { tvg::Matrix m; }; +struct TVG_NodeCache { + uint64_t document_offset; + uint64_t body_offset; +}; + +struct TVG_DocumentCache { + String xml_body; + double embox_x; + double embox_y; + HashMap> node_caches; +}; + struct TVG_State { Mutex mutex; HashMap glyph_map; + HashMap document_map; }; FT_Error tvg_svg_in_ot_init(FT_Pointer *p_state); diff --git a/modules/text_server_fb/thorvg_svg_in_ot.cpp b/modules/text_server_fb/thorvg_svg_in_ot.cpp index cbccb190950..a64ae043a8c 100644 --- a/modules/text_server_fb/thorvg_svg_in_ot.cpp +++ b/modules/text_server_fb/thorvg_svg_in_ot.cpp @@ -74,6 +74,46 @@ void tvg_svg_in_ot_free(FT_Pointer *p_state) { memdelete(state); } +static void construct_xml(Ref &parser, double &r_embox_x, double &r_embox_y, String *p_xml, int64_t &r_tag_count) { + if (parser->get_node_type() == XMLParser::NODE_ELEMENT) { + *p_xml += vformat("<%s", parser->get_node_name()); + bool is_svg_tag = parser->get_node_name() == "svg"; + for (int i = 0; i < parser->get_attribute_count(); i++) { + String aname = parser->get_attribute_name(i); + String value = parser->get_attribute_value(i); + if (is_svg_tag && aname == "viewBox") { + PackedStringArray vb = value.split(" "); + if (vb.size() == 4) { + r_embox_x = vb[2].to_float(); + r_embox_y = vb[3].to_float(); + } + } else if (is_svg_tag && aname == "width") { + r_embox_x = value.to_float(); + } else if (is_svg_tag && aname == "height") { + r_embox_y = value.to_float(); + } else { + *p_xml += vformat(" %s=\"%s\"", aname, value); + } + } + + if (parser->is_empty()) { + *p_xml += "/>"; + } else { + *p_xml += ">"; + if (r_tag_count >= 0) { + r_tag_count++; + } + } + } else if (parser->get_node_type() == XMLParser::NODE_TEXT) { + *p_xml += parser->get_node_data(); + } else if (parser->get_node_type() == XMLParser::NODE_ELEMENT_END) { + *p_xml += vformat("", parser->get_node_name()); + if (r_tag_count > 0) { + r_tag_count--; + } + } +} + FT_Error tvg_svg_in_ot_preset_slot(FT_GlyphSlot p_slot, FT_Bool p_cache, FT_Pointer *p_state) { TVG_State *state = *reinterpret_cast(p_state); if (!state) { @@ -91,57 +131,86 @@ FT_Error tvg_svg_in_ot_preset_slot(FT_GlyphSlot p_slot, FT_Bool p_cache, FT_Poin parser->_open_buffer((const uint8_t *)document->svg_document, document->svg_document_length); String xml_body; + double embox_x = document->units_per_EM; double embox_y = document->units_per_EM; - while (parser->read() == OK) { - if (parser->has_attribute("id")) { - const String &gl_name = parser->get_named_attribute_value("id"); - if (gl_name.begins_with("glyph")) { - int dot_pos = gl_name.find_char('.'); - int64_t gl_idx = gl_name.substr(5, (dot_pos > 0) ? dot_pos - 5 : -1).to_int(); - if (p_slot->glyph_index != gl_idx) { - parser->skip_section(); - continue; - } - } - } - if (parser->get_node_type() == XMLParser::NODE_ELEMENT && parser->get_node_name() == "svg") { - if (parser->has_attribute("viewBox")) { - PackedStringArray vb = parser->get_named_attribute_value("viewBox").split(" "); - if (vb.size() == 4) { - embox_x = vb[2].to_float(); - embox_y = vb[3].to_float(); - } - } - if (parser->has_attribute("width")) { - embox_x = parser->get_named_attribute_value("width").to_float(); - } - if (parser->has_attribute("height")) { - embox_y = parser->get_named_attribute_value("height").to_float(); + TVG_DocumentCache &cache = state->document_map[document->svg_document]; + + if (!cache.xml_body.is_empty()) { + // If we have a cached document, that means we have already parsed it. + // All node cache should be available. + + xml_body = cache.xml_body; + embox_x = cache.embox_x; + embox_y = cache.embox_y; + + ERR_FAIL_COND_V(!cache.node_caches.has(p_slot->glyph_index), FT_Err_Invalid_SVG_Document); + Vector &ncs = cache.node_caches[p_slot->glyph_index]; + + uint64_t offset = 0; + for (TVG_NodeCache &nc : ncs) { + // Seek will call read() internally. + if (parser->seek(nc.document_offset) == OK) { + int64_t tag_count = 0; + String xml_node; + + // We only parse the glyph node. + do { + construct_xml(parser, embox_x, embox_y, &xml_node, tag_count); + } while (tag_count != 0 && parser->read() == OK); + + xml_body = xml_body.insert(nc.body_offset + offset, xml_node); + offset += xml_node.length(); } } - if (parser->get_node_type() == XMLParser::NODE_ELEMENT) { - xml_body += vformat("<%s", parser->get_node_name()); - bool is_svg_tag = parser->get_node_name() == "svg"; - for (int i = 0; i < parser->get_attribute_count(); i++) { - String aname = parser->get_attribute_name(i); - if (is_svg_tag && (aname == "viewBox" || aname == "width" || aname == "height")) { - continue; + } else { + String xml_node; + String xml_body_temp; + + String *p_xml = &xml_body_temp; + int64_t tag_count = -1; + + while (parser->read() == OK) { + if (parser->has_attribute("id")) { + const String &gl_name = parser->get_named_attribute_value("id"); + if (gl_name.begins_with("glyph")) { + int dot_pos = gl_name.find_char('.'); + int64_t gl_idx = gl_name.substr(5, (dot_pos > 0) ? dot_pos - 5 : -1).to_int(); + + TVG_NodeCache node_cache = TVG_NodeCache(); + node_cache.document_offset = parser->get_node_offset(), + node_cache.body_offset = (uint64_t)cache.xml_body.length(); + cache.node_caches[gl_idx].push_back(node_cache); + + if (p_slot->glyph_index != gl_idx) { + parser->skip_section(); + continue; + } + tag_count = 0; + xml_node = ""; + p_xml = &xml_node; } - xml_body += vformat(" %s=\"%s\"", aname, parser->get_attribute_value(i)); } - if (parser->is_empty()) { - xml_body += "/>"; - } else { - xml_body += ">"; + xml_body_temp = ""; + construct_xml(parser, embox_x, embox_y, p_xml, tag_count); + + if (xml_body_temp.length() > 0) { + xml_body += xml_body_temp; + cache.xml_body += xml_body_temp; + continue; + } + + if (tag_count == 0) { + p_xml = &xml_body_temp; + tag_count = -1; + xml_body += xml_node; } - } else if (parser->get_node_type() == XMLParser::NODE_TEXT) { - xml_body += parser->get_node_data(); - } else if (parser->get_node_type() == XMLParser::NODE_ELEMENT_END) { - xml_body += vformat("", parser->get_node_name()); } + + cache.embox_x = embox_x; + cache.embox_y = embox_y; } std::unique_ptr picture = tvg::Picture::gen(); diff --git a/modules/text_server_fb/thorvg_svg_in_ot.h b/modules/text_server_fb/thorvg_svg_in_ot.h index a0e7e3a1d2f..77a00d175af 100644 --- a/modules/text_server_fb/thorvg_svg_in_ot.h +++ b/modules/text_server_fb/thorvg_svg_in_ot.h @@ -68,9 +68,22 @@ struct GL_State { tvg::Matrix m; }; +struct TVG_NodeCache { + uint64_t document_offset; + uint64_t body_offset; +}; + +struct TVG_DocumentCache { + String xml_body; + double embox_x; + double embox_y; + HashMap> node_caches; +}; + struct TVG_State { Mutex mutex; HashMap glyph_map; + HashMap document_map; }; FT_Error tvg_svg_in_ot_init(FT_Pointer *p_state);