[virt-tools-list] [libosinfo 7/8] rfc: Infer ISO language from label

Christophe Fergeau cfergeau at redhat.com
Mon Dec 3 11:23:36 UTC 2012


Now that libosinfo has an osinfo_db_identify_media method which
modifies the media it was passed, we can generate properties which
needs information from the media stored in the OsinfoDB, and
information from the actual media (ISO volume ID).
This is useful to guess what languages are supported by a given
Windows ISO: the end of the ISO volume ID has a language code, which
we can translate to a locale identifier.

This commit adds a lang-regex property to the OsinfoDB database to
extract the language code from Windows ISO volume IDs, and
then add mapping tables to turn it into a locale identifier.
---
 data/oses/windows.xml.in   |   2 +
 data/schemas/libosinfo.rng |   5 ++
 osinfo/libosinfo.syms      |   4 +-
 osinfo/osinfo_db.c         | 177 +++++++++++++++++++++++++++++++++++++++++++++
 osinfo/osinfo_loader.c     |   4 +-
 osinfo/osinfo_media.c      |  67 ++++++++++++++++-
 osinfo/osinfo_media.h      |   3 +
 7 files changed, 258 insertions(+), 4 deletions(-)

diff --git a/data/oses/windows.xml.in b/data/oses/windows.xml.in
index d09e873..e8c29f9 100644
--- a/data/oses/windows.xml.in
+++ b/data/oses/windows.xml.in
@@ -739,12 +739,14 @@
       <iso>
         <volume-id>(HB1_CCPA_X86FRE|HRM_CCSA_X86FRE|HRM_CCSA_X86CHK|HRM_CCSNA_X86CHK|HRM_CCSNA_X86FRE|HRM_CENA_X86FREV|HRM_CENA_X86CHKV|HRM_CENNA_X86FREV|HRM_CENNA_X86CHKV|HRM_CPRA_X86FREV|HRM_CPRNA_X86FREV)_</volume-id>
         <publisher-id>MICROSOFT CORPORATION</publisher-id>
+        <lang-regex>[[:upper:][:digit:]_]*_([[:upper:]]*-[[:upper:]]*)</lang-regex>
       </iso>
     </media>
     <media arch="x86_64">
       <iso>
         <volume-id>(HB1_CCPA_X64FRE|HRM_CCSA_X64FRE|HRM_CCSA_X64CHK|HRM_CCSNA_X64FRE|HRM_CCSNA_X64CHK|HRM_CENNA_X64FREV|HRM_CENNA_X64CHKV|HRM_CENA_X64FREV|HRM_CENA_X64CHKV|HRM_CPRA_X64FREV|HRM_CPRNA_X64FREV)_</volume-id>
         <publisher-id>MICROSOFT CORPORATION</publisher-id>
+        <lang-regex>[[:upper:][:digit:]_]*_([[:upper:]]*-[[:upper:]]*)</lang-regex>
       </iso>
     </media>
 
diff --git a/data/schemas/libosinfo.rng b/data/schemas/libosinfo.rng
index 87635dd..36fa1a1 100644
--- a/data/schemas/libosinfo.rng
+++ b/data/schemas/libosinfo.rng
@@ -281,6 +281,11 @@
             <text/>
           </element>
         </optional>
+        <optional>
+          <element name='lang-regex'>
+            <text/>
+          </element>
+        </optional>
       </interleave>
     </element>
   </define>
diff --git a/osinfo/libosinfo.syms b/osinfo/libosinfo.syms
index d45e58e..7c3efe1 100644
--- a/osinfo/libosinfo.syms
+++ b/osinfo/libosinfo.syms
@@ -341,11 +341,11 @@ LIBOSINFO_0.2.2 {
 	osinfo_install_config_set_target_disk;
 	osinfo_install_config_get_script_disk;
 	osinfo_install_config_set_script_disk;
-
 	osinfo_install_script_get_avatar_format;
 	osinfo_install_script_get_path_format;
-
 	osinfo_install_script_get_product_key_format;
+
+	osinfo_media_get_languages;
 } LIBOSINFO_0.2.1;
 
 /* Symbols in next release...
diff --git a/osinfo/osinfo_db.c b/osinfo/osinfo_db.c
index 46101d6..2c2eb5a 100644
--- a/osinfo/osinfo_db.c
+++ b/osinfo/osinfo_db.c
@@ -38,6 +38,177 @@ G_DEFINE_TYPE (OsinfoDb, osinfo_db, G_TYPE_OBJECT);
      (((str) != NULL) &&                                                \
       g_regex_match_simple((pattern), (str), 0, 0)))
 
+static gchar *get_raw_lang(const char *volume_id, const gchar *regex_str)
+{
+    GRegex *regex;
+    GMatchInfo *match;
+    gboolean matched;
+    gchar *raw_lang = NULL;
+
+    regex = g_regex_new(regex_str, G_REGEX_ANCHORED,
+                        G_REGEX_MATCH_ANCHORED, NULL);
+    if (regex == NULL)
+        return NULL;
+
+    matched = g_regex_match(regex, volume_id, G_REGEX_MATCH_ANCHORED, &match);
+    if (!matched || !g_match_info_matches(match))
+        goto end;
+    raw_lang = g_match_info_fetch(match, 1);
+    if (raw_lang == NULL)
+        goto end;
+
+end:
+    g_match_info_unref(match);
+    g_regex_unref(regex);
+
+    return raw_lang;
+}
+
+struct LanguageMapping {
+    const char *iso_label_lang;
+    const char *gettext_lang;
+};
+
+static GHashTable *init_win_lang_map(void)
+{
+    GHashTable *lang_map;
+    const struct LanguageMapping lang_table[] = {
+        /* ISO label strings up to Windows 7 */
+        { "EN", "en_US" },
+        { "AR", "ar_SA" },
+        { "BG", "bg_BG" },
+        { "HK", "zh_HK" },
+        { "CN", "zh_CN" },
+        { "TW", "zh_TW" },
+        { "HR", "hr_HR" },
+        { "CS", "cs_CZ" },
+        { "DA", "da_DK" },
+        { "NL", "nl_NL" },
+        { "ET", "et_EE" },
+        { "FI", "fi_FI" },
+        { "FR", "fr_FR" },
+        { "DE", "de_DE" },
+        { "EL", "el_GR" },
+        { "HE", "he_IL" },
+        { "HU", "hu_HU" },
+        { "IT", "it_IT" },
+        { "JA", "ja_JP" },
+        { "KO", "ko_KR" },
+        { "LV", "lv_LV" },
+        { "LT", "lt_LT" },
+        { "NO", "nb_NO" },
+        { "PL", "pl_PL" },
+        { "BR", "pt_BR" },
+        { "PT", "pt_PT" },
+        { "RO", "ro_RO" },
+        { "RU", "ru_RU" },
+        { "SRL", "sr_RS at latin" },
+        { "SK", "sk_SK" },
+        { "SL", "sl_SI" },
+        { "ES", "es_ES" },
+        { "SV", "sv_SE" },
+        { "TH", "th_TH" },
+        { "TR", "tr_TR" },
+        { "UK", "uk_UA" },
+
+        /* starting from Windows 8, the ISO label contains both
+         * language and country code */
+        { "EN-US", "en_US" },
+        { "EN-GB", "en_GB" },
+        { "AR-SA", "ar_SA" },
+        { "BG-BG", "bg_BG" },
+        { "ZH-HK", "zh_HK" },
+        { "ZH-CN", "zh_CN" },
+        { "ZH-TW", "zh_TW" },
+        { "HR-HR", "hr_HR" },
+        { "CS-CZ", "cs_CZ" },
+        { "DA-DK", "da_DK" },
+        { "NL-NL", "nl_NL" },
+        { "ET-EE", "et_EE" },
+        { "FI-FI", "fi_FI" },
+        { "FR-FR", "fr_FR" },
+        { "DE-DE", "de_DE" },
+        { "EL-GR", "el_GR" },
+        { "HE-IL", "he_IL" },
+        { "HU-HU", "hu_HU" },
+        { "IT-IT", "it_IT" },
+        { "JA-JP", "ja_JP" },
+        { "KO-KR", "ko_KR" },
+        { "LV-LV", "lv_LV" },
+        { "LT-LT", "lt_LT" },
+        { "NB-NO", "nb_NO" },
+        { "PL-PL", "pl_PL" },
+        { "PT-BR", "pt_BR" },
+        { "PT-PT", "pt_PT" },
+        { "RO-RO", "ro_RO" },
+        { "RU-RU", "ru_RU" },
+        { "SR-LATN-CS", "sr_RS at latin" },
+        { "SK-SK", "sk_SK" },
+        { "SL-SI", "sl_SI" },
+        { "ES-ES", "es_ES" },
+        { "SV-SE", "sv_SE" },
+        { "TH-TH", "th_TH" },
+        { "TR-TR", "tr_TR" },
+        { "UK-UA", "uk_UA" },
+
+        { "EU-ES", "eu_ES" }, //language pack
+        { "CA-ES", "ca_ES" }, //language pack
+        { "GL-ES", "gl_ES" }, //language pack
+        { "KY-KG", "ky_KG" }, //language pack
+
+        { NULL, NULL }
+    };
+    const struct LanguageMapping *it;
+
+    lang_map = g_hash_table_new(g_str_hash, g_str_equal);
+
+    for (it = lang_table; it->iso_label_lang != NULL; it++)
+        g_hash_table_insert(lang_map, (gpointer)it->iso_label_lang,
+                            (gpointer)it->gettext_lang);
+
+    return lang_map;
+}
+
+static const gchar *language_code_from_raw(const char *raw_lang)
+{
+    static GOnce win_lang_map_once = G_ONCE_INIT;
+    GHashTable *lang_map;
+
+    lang_map = g_once (&win_lang_map_once,
+                       (GThreadFunc)init_win_lang_map,
+                       NULL);
+
+    return g_hash_table_lookup(lang_map, raw_lang);
+}
+
+static GList *match_languages(OsinfoMedia *media, OsinfoMedia *db_media)
+{
+    const gchar *volume_id;
+    const gchar *regex;
+    gchar *raw_lang;
+    GList *languages;
+
+    g_return_val_if_fail(OSINFO_IS_MEDIA(media), NULL);
+    g_return_val_if_fail(OSINFO_IS_MEDIA(db_media), NULL);
+
+    regex = osinfo_entity_get_param_value(OSINFO_ENTITY(db_media),
+                                          OSINFO_MEDIA_PROP_LANG_REGEX);
+    if (regex == NULL)
+        return NULL;
+
+    volume_id = osinfo_media_get_volume_id(media);
+    if (volume_id == NULL)
+        return NULL;
+
+    raw_lang = get_raw_lang(volume_id, regex);
+
+    languages = g_list_append(NULL,
+                              (gpointer)language_code_from_raw(raw_lang));
+    g_free(raw_lang);
+
+    return languages;
+}
+
 /**
  * SECTION:osinfo_db
  * @short_description: Database of all entities
@@ -470,6 +641,7 @@ OsinfoOs *osinfo_db_guess_os_from_media(OsinfoDb *db,
 
 static void fill_media (OsinfoMedia *media, OsinfoMedia *matched_media, OsinfoOs *os)
 {
+    GList *languages;
     gboolean is_installer;
     gboolean is_live;
     gint reboots;
@@ -478,6 +650,11 @@ static void fill_media (OsinfoMedia *media, OsinfoMedia *matched_media, OsinfoOs
     const gchar *arch;
     const gchar *url;
 
+    languages = match_languages(media, matched_media);
+    if (languages != NULL)
+        osinfo_media_set_languages(media, languages);
+    g_list_free(languages);
+
     arch = osinfo_media_get_architecture(matched_media);
     if (arch != NULL)
         g_object_set(G_OBJECT(media), "architecture", arch, NULL);
diff --git a/osinfo/osinfo_loader.c b/osinfo/osinfo_loader.c
index c49d303..7e69818 100644
--- a/osinfo/osinfo_loader.c
+++ b/osinfo/osinfo_loader.c
@@ -764,7 +764,9 @@ static OsinfoMedia *osinfo_loader_media (OsinfoLoader *loader,
              strcmp((const gchar *)nodes[i]->name,
                     OSINFO_MEDIA_PROP_PUBLISHER_ID) != 0 &&
              strcmp((const gchar *)nodes[i]->name,
-                    OSINFO_MEDIA_PROP_APPLICATION_ID) != 0))
+                    OSINFO_MEDIA_PROP_APPLICATION_ID) != 0 &&
+             strcmp((const gchar *)nodes[i]->name,
+                    OSINFO_MEDIA_PROP_LANG_REGEX) != 0))
             continue;
 
         osinfo_entity_set_param(OSINFO_ENTITY(media),
diff --git a/osinfo/osinfo_media.c b/osinfo/osinfo_media.c
index c25ca5a..6f2c56b 100644
--- a/osinfo/osinfo_media.c
+++ b/osinfo/osinfo_media.c
@@ -137,6 +137,7 @@ G_DEFINE_TYPE (OsinfoMedia, osinfo_media, OSINFO_TYPE_ENTITY);
 struct _OsinfoMediaPrivate
 {
     GWeakRef os;
+    GList *languages;
 };
 
 enum {
@@ -153,7 +154,8 @@ enum {
     PROP_INSTALLER,
     PROP_LIVE,
     PROP_INSTALLER_REBOOTS,
-    PROP_OS
+    PROP_OS,
+    PROP_LANGUAGES,
 };
 
 static void
@@ -220,10 +222,15 @@ osinfo_media_get_property (GObject    *object,
                          osinfo_media_get_installer_reboots (media));
         break;
 
+
     case PROP_OS:
         g_value_take_object (value, osinfo_media_get_os (media));
         break;
 
+    case PROP_LANGUAGES:
+        g_value_set_pointer (value, osinfo_media_get_languages (media));
+        break;
+
     default:
         /* We don't have any other property... */
         G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
@@ -310,6 +317,10 @@ osinfo_media_set_property(GObject      *object,
         osinfo_media_set_os(media, g_value_get_object(value));
         break;
 
+    case PROP_LANGUAGES:
+        osinfo_media_set_languages(media, g_value_get_pointer(value));
+        break;
+
     default:
         /* We don't have any other property... */
         G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
@@ -320,6 +331,10 @@ osinfo_media_set_property(GObject      *object,
 static void
 osinfo_media_finalize (GObject *object)
 {
+    OsinfoMedia *media = OSINFO_MEDIA(object);
+
+    g_list_free(media->priv->languages);
+
     /* Chain up to the parent class */
     G_OBJECT_CLASS (osinfo_media_parent_class)->finalize (object);
 }
@@ -514,6 +529,24 @@ osinfo_media_class_init (OsinfoMediaClass *klass)
                                   G_PARAM_READWRITE |
                                   G_PARAM_STATIC_STRINGS);
     g_object_class_install_property (g_klass, PROP_OS, pspec);
+
+    /*
+     * OsinfoMedia::languages:
+     *
+     * If media is an installer, this property indicates the languages that
+     * can be used during automatic installations.
+     *
+     * On media that are not installers, this property will indicate the
+     * languages that the user interface can be displayed in.
+     * Use #osinfo_media_get_installer (or OsinfoMedia::installer) to know
+     * if the media is an installer or not.
+     */
+    pspec = g_param_spec_pointer ("languages",
+                                  "Languages",
+                                  _("Supported languages"),
+                                  G_PARAM_READABLE |
+                                  G_PARAM_STATIC_STRINGS);
+    g_object_class_install_property (g_klass, PROP_LANGUAGES, pspec);
 }
 
 static void
@@ -1099,6 +1132,38 @@ void osinfo_media_set_os(OsinfoMedia *media, OsinfoOs *os)
     g_weak_ref_set(&media->priv->os, os);
     g_object_unref(os);
 }
+
+/**
+ * osinfo_media_get_languages:
+ * @media: a #OsinfoMedia instance
+ *
+ * If media is an installer, this property indicates the languages that
+ * can be used during automatic installations.
+ *
+ * On media that are not installers, this property will indicate the
+ * languages that the user interface can be displayed in.
+ * Use #osinfo_media_get_installer (or OsinfoMedia::installer) to know
+ * if the media is an installer or not.
+ *
+ * Returns: (transfer none)(element-type utf8): a #GList
+ * containing the list of supported supported languages which must not be
+ * freed as it is owned by libosinfo, or NULL if the supported languages
+ * are unknown
+ */
+GList *osinfo_media_get_languages(OsinfoMedia *media)
+{
+    g_return_val_if_fail(OSINFO_IS_MEDIA(media), NULL);
+
+    return media->priv->languages;
+}
+
+void osinfo_media_set_languages(OsinfoMedia *media, GList *languages)
+{
+    g_return_if_fail(OSINFO_IS_MEDIA(media));
+
+    g_list_free(media->priv->languages);
+    media->priv->languages = g_list_copy(languages);
+}
 /*
  * Local variables:
  *  indent-tabs-mode: nil
diff --git a/osinfo/osinfo_media.h b/osinfo/osinfo_media.h
index 3052abe..e74fbe0 100644
--- a/osinfo/osinfo_media.h
+++ b/osinfo/osinfo_media.h
@@ -81,6 +81,7 @@ typedef struct _OsinfoMediaPrivate OsinfoMediaPrivate;
 #define OSINFO_MEDIA_PROP_LIVE           "live"
 #define OSINFO_MEDIA_PROP_INSTALLER      "installer"
 #define OSINFO_MEDIA_PROP_INSTALLER_REBOOTS "installer-reboots"
+#define OSINFO_MEDIA_PROP_LANG_REGEX     "lang-regex"
 
 /* object */
 struct _OsinfoMedia
@@ -125,6 +126,8 @@ const gchar *osinfo_media_get_kernel_path(OsinfoMedia *media);
 const gchar *osinfo_media_get_initrd_path(OsinfoMedia *media);
 OsinfoOs *osinfo_media_get_os(OsinfoMedia *media);
 void osinfo_media_set_os(OsinfoMedia *media, OsinfoOs *os);
+GList *osinfo_media_get_languages(OsinfoMedia *media);
+void osinfo_media_set_languages(OsinfoMedia *media, GList *languages);
 gboolean osinfo_media_get_installer(OsinfoMedia *media);
 gboolean osinfo_media_get_live(OsinfoMedia *media);
 gint osinfo_media_get_installer_reboots(OsinfoMedia *media);
-- 
1.8.0.1




More information about the virt-tools-list mailing list