All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bernd Kuhls <bernd.kuhls@t-online.de>
To: buildroot@busybox.net
Subject: [Buildroot] [PATCH/next v2 1/1] package/icu: Add support to generate a subset of ICU data
Date: Tue,  1 Jun 2021 08:06:08 +0200	[thread overview]
Message-ID: <20210601060608.5531-1-bernd.kuhls@t-online.de> (raw)

Recent versions of ICU (64+) provide a tool for configuring ICU locale
data file with finer granularity [1].

Default generated size for libicudata.so is ~27M, which is quite large
for embedded systems and all of them may not even need all locale data.

This patch adds support for a custom data filter file in json format to
reduce the size of libicudata.so, e.g.

{
  "localeFilter": {
    "filterType": "language",
    "includelist": [
      "en",
      "de",
      "it"
    ]
  }
}

would only generate the locale data for english/german/italian.

This would reduce the size of libicudata.so to 12M.

[1] https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md

Signed-off-by: Bernd Kuhls <bernd.kuhls@t-online.de>
---
v2: switch ICU_COPY_CUSTOM_DATA to ICU_PRE_CONFIGURE_HOOKS to fix
    parallel build issues

 package/icu/Config.in |  9 +++++++++
 package/icu/icu.hash  |  1 +
 package/icu/icu.mk    | 24 +++++++++++++++++++++++-
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/package/icu/Config.in b/package/icu/Config.in
index b0c9eac83d..564e509fa0 100644
--- a/package/icu/Config.in
+++ b/package/icu/Config.in
@@ -25,6 +25,15 @@ config BR2_PACKAGE_ICU_CUSTOM_DATA_PATH
 	  provided by buildroot.
 	  Leave empty to not use this functionality.
 
+config BR2_PACKAGE_ICU_DATA_FILTER_FILE
+	string "Path to custom data configuration file"
+	help
+	  The ICU Data Build Tool enables you to write a configuration
+	  file that specifies what features and locales to include in a
+	  custom data bundle:
+	  https://github.com/unicode-org/icu/blob/main/docs/userguide/icu_data/buildtool.md
+	  Leave empty to not use this functionality.
+
 endif
 
 comment "icu needs a toolchain w/ C++, wchar, threads, gcc >= 4.9, host gcc >= 4.9"
diff --git a/package/icu/icu.hash b/package/icu/icu.hash
index 5ed7cc4217..ab0a642259 100644
--- a/package/icu/icu.hash
+++ b/package/icu/icu.hash
@@ -1,3 +1,4 @@
 # Locally computed
 sha256  4cba7b7acd1d3c42c44bb0c14be6637098c7faf2b330ce876bc5f3b915d09745  icu4c-69_1-src.tgz
+sha256  4fc2d8cfc3343673123586fca3967404abd4e346fba5515829204533b3bae4bf  icu4c-69_1-data.zip
 sha256  7915b19db903070778581ae05d8bf4ea241b34a05deb51ca4f5cbb15ea1cbba3  LICENSE
diff --git a/package/icu/icu.mk b/package/icu/icu.mk
index 0a17c61462..3acc0d266e 100644
--- a/package/icu/icu.mk
+++ b/package/icu/icu.mk
@@ -7,7 +7,8 @@
 # Git tags (and therefore versions on release-monitoring.org) use the
 # XX-Y format, but the tarballs are named XX_Y and the containing
 # directories XX.Y.
-ICU_VERSION = 69-1
+ICU_VERSION_MAJOR = 69
+ICU_VERSION = $(ICU_VERSION_MAJOR)-1
 ICU_SOURCE = icu4c-$(subst -,_,$(ICU_VERSION))-src.tgz
 ICU_SITE = \
 	https://github.com/unicode-org/icu/releases/download/release-$(ICU_VERSION)
@@ -56,6 +57,27 @@ endef
 ICU_POST_PATCH_HOOKS += ICU_COPY_CUSTOM_DATA
 endif
 
+ICU_DATA_FILTER_FILE = $(call qstrip,$(BR2_PACKAGE_ICU_DATA_FILTER_FILE))
+
+ifneq ($(ICU_DATA_FILTER_FILE),)
+HOST_ICU_DATA_SOURCE = $(subst src.tgz,data.zip,$(ICU_SOURCE))
+HOST_ICU_EXTRA_DOWNLOADS += $(HOST_ICU_SITE)/$(HOST_ICU_DATA_SOURCE)
+
+define HOST_ICU_EXTRACT_DATA
+	rm -rf $(@D)/$(HOST_ICU_SUBDIR)/data
+	$(UNZIP) $(ICU_DL_DIR)/$(HOST_ICU_DATA_SOURCE) -d $(@D)/$(HOST_ICU_SUBDIR)
+endef
+HOST_ICU_POST_EXTRACT_HOOKS += HOST_ICU_EXTRACT_DATA
+
+HOST_ICU_CONF_ENV = ICU_DATA_FILTER_FILE=$(ICU_DATA_FILTER_FILE)
+HOST_ICU_CONF_OPTS += --with-data-packaging=archive
+
+define ICU_COPY_CUSTOM_DATA
+	$(INSTALL) -D -m 644 $(HOST_ICU_DIR)/$(HOST_ICU_SUBDIR)/data/out/icudt$(ICU_VERSION_MAJOR)l.dat $(@D)/$(ICU_SUBDIR)/data/in/
+endef
+ICU_PRE_CONFIGURE_HOOKS += ICU_COPY_CUSTOM_DATA
+endif
+
 define ICU_REMOVE_DEV_FILES
 	rm -f $(addprefix $(TARGET_DIR)/usr/bin/,derb genbrk gencfu gencnval gendict genrb icuinfo makeconv uconv)
 	rm -f $(addprefix $(TARGET_DIR)/usr/sbin/,genccode gencmn gennorm2 gensprep icupkg)
-- 
2.29.2

             reply	other threads:[~2021-06-01  6:06 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-01  6:06 Bernd Kuhls [this message]
2021-07-18 21:11 ` [Buildroot] [PATCH/next v2 1/1] package/icu: Add support to generate a subset of ICU data Thomas Petazzoni
     [not found] ` <20210718231120.76adc4e3__4803.22810342244$1626642706$gmane$org@windsurf>
2021-07-19  5:45   ` Bernd Kuhls
2021-07-19  7:46     ` Thomas Petazzoni

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210601060608.5531-1-bernd.kuhls@t-online.de \
    --to=bernd.kuhls@t-online.de \
    --cc=buildroot@busybox.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.