Python helper tools for Hebrew characters

The following Python code will generate a python module called hebrew_dict.py that contains a dict called hebrew that has the unicode names and chars for the Hebrew codeset stored as key-value pairs. Compatibility Python 2.7:

#!/usr/bin/python
# -*- coding: utf-8 -*-

import io
from unicodedata import *

head = u'''#!/usr/bin/python
# -*- coding: utf-8 -*-
'''

with io.open('hebrew_dict.py', 'w', encoding='utf-8') as file:
    start = 1425
    end = 1524
    i = start
    file.write(head)
    file.write(u'hebrew = {')
    while i <= end:
        try:
            file.write(u"\tu'"+name(unichr(i))+u"' : u'"+unichr(i)+u"', \n")
        except ValueError:
            print unichr(i) + u' is not a unicode char with a name.'
        i += 1
    file.write(u'}\n')

Then following Python code will then take that output and create further helper dicts based on the different groups of chars according to their unicode name. It sends the output back into the hebrew_dict.py file:

#!/usr/bin/python
# -*- coding: utf-8 -*-

import io
from hebrew_dict import *
import itertools

head = u'''#!/usr/bin/python
# -*- coding: utf-8 -*-
'''

new_hebrew_dict = {}
collection_dict = {}

# Organise the characters by type according to their names
for key, item in hebrew.iteritems():
    key_words = key.split(u' ')
    key_words = key_words[1:] # Get rid of "Hebrew" at the beginning of each name
    if not key_words[0] in collection_dict:
        collection_dict[key_words[0]] = []
    collection_dict[key_words[0]].append1)key_words[1:],item

with io.open('hebrew_dict.py', 'w', encoding='utf-8') as file:
    file.write(head)
    file.write(u'hebrew = {\n')
    for key, value in hebrew.iteritems():
        file.write(u"\tu'"+key+u"' : u'"+value+u"', \n")
    file.write(u'}\n')
    for key, collection in collection_dict.iteritems():
        file.write(key+u' = {\n')
        for item in collection:
            file.write(u'\tu"'+u' '.join(item[0])+u'" : u"'+item[1]+u'",\n')
        file.write(u'}\n')

And here is the output:

#!/usr/bin/python
# -*- coding: utf-8 -*-
hebrew = {	u'HEBREW ACCENT ETNAHTA' : u'֑', 
	u'HEBREW ACCENT SEGOL' : u'֒', 
	u'HEBREW ACCENT SHALSHELET' : u'֓', 
	u'HEBREW ACCENT ZAQEF QATAN' : u'֔', 
	u'HEBREW ACCENT ZAQEF GADOL' : u'֕', 
	u'HEBREW ACCENT TIPEHA' : u'֖', 
	u'HEBREW ACCENT REVIA' : u'֗', 
	u'HEBREW ACCENT ZARQA' : u'֘', 
	u'HEBREW ACCENT PASHTA' : u'֙', 
	u'HEBREW ACCENT YETIV' : u'֚', 
	u'HEBREW ACCENT TEVIR' : u'֛', 
	u'HEBREW ACCENT GERESH' : u'֜', 
	u'HEBREW ACCENT GERESH MUQDAM' : u'֝', 
	u'HEBREW ACCENT GERSHAYIM' : u'֞', 
	u'HEBREW ACCENT QARNEY PARA' : u'֟', 
	u'HEBREW ACCENT TELISHA GEDOLA' : u'֠', 
	u'HEBREW ACCENT PAZER' : u'֡', 
	u'HEBREW ACCENT ATNAH HAFUKH' : u'֢', 
	u'HEBREW ACCENT MUNAH' : u'֣', 
	u'HEBREW ACCENT MAHAPAKH' : u'֤', 
	u'HEBREW ACCENT MERKHA' : u'֥', 
	u'HEBREW ACCENT MERKHA KEFULA' : u'֦', 
	u'HEBREW ACCENT DARGA' : u'֧', 
	u'HEBREW ACCENT QADMA' : u'֨', 
	u'HEBREW ACCENT TELISHA QETANA' : u'֩', 
	u'HEBREW ACCENT YERAH BEN YOMO' : u'֪', 
	u'HEBREW ACCENT OLE' : u'֫', 
	u'HEBREW ACCENT ILUY' : u'֬', 
	u'HEBREW ACCENT DEHI' : u'֭', 
	u'HEBREW ACCENT ZINOR' : u'֮', 
	u'HEBREW MARK MASORA CIRCLE' : u'֯', 
	u'HEBREW POINT SHEVA' : u'ְ', 
	u'HEBREW POINT HATAF SEGOL' : u'ֱ', 
	u'HEBREW POINT HATAF PATAH' : u'ֲ', 
	u'HEBREW POINT HATAF QAMATS' : u'ֳ', 
	u'HEBREW POINT HIRIQ' : u'ִ', 
	u'HEBREW POINT TSERE' : u'ֵ', 
	u'HEBREW POINT SEGOL' : u'ֶ', 
	u'HEBREW POINT PATAH' : u'ַ', 
	u'HEBREW POINT QAMATS' : u'ָ', 
	u'HEBREW POINT HOLAM' : u'ֹ', 
	u'HEBREW POINT HOLAM HASER FOR VAV' : u'ֺ', 
	u'HEBREW POINT QUBUTS' : u'ֻ', 
	u'HEBREW POINT DAGESH OR MAPIQ' : u'ּ', 
	u'HEBREW POINT METEG' : u'ֽ', 
	u'HEBREW PUNCTUATION MAQAF' : u'־', 
	u'HEBREW POINT RAFE' : u'ֿ', 
	u'HEBREW PUNCTUATION PASEQ' : u'׀', 
	u'HEBREW POINT SHIN DOT' : u'ׁ', 
	u'HEBREW POINT SIN DOT' : u'ׂ', 
	u'HEBREW PUNCTUATION SOF PASUQ' : u'׃', 
	u'HEBREW MARK UPPER DOT' : u'ׄ', 
	u'HEBREW MARK LOWER DOT' : u'ׅ', 
	u'HEBREW PUNCTUATION NUN HAFUKHA' : u'׆', 
	u'HEBREW POINT QAMATS QATAN' : u'ׇ', 
	u'HEBREW LETTER ALEF' : u'א', 
	u'HEBREW LETTER BET' : u'ב', 
	u'HEBREW LETTER GIMEL' : u'ג', 
	u'HEBREW LETTER DALET' : u'ד', 
	u'HEBREW LETTER HE' : u'ה', 
	u'HEBREW LETTER VAV' : u'ו', 
	u'HEBREW LETTER ZAYIN' : u'ז', 
	u'HEBREW LETTER HET' : u'ח', 
	u'HEBREW LETTER TET' : u'ט', 
	u'HEBREW LETTER YOD' : u'י', 
	u'HEBREW LETTER FINAL KAF' : u'ך', 
	u'HEBREW LETTER KAF' : u'כ', 
	u'HEBREW LETTER LAMED' : u'ל', 
	u'HEBREW LETTER FINAL MEM' : u'ם', 
	u'HEBREW LETTER MEM' : u'מ', 
	u'HEBREW LETTER FINAL NUN' : u'ן', 
	u'HEBREW LETTER NUN' : u'נ', 
	u'HEBREW LETTER SAMEKH' : u'ס', 
	u'HEBREW LETTER AYIN' : u'ע', 
	u'HEBREW LETTER FINAL PE' : u'ף', 
	u'HEBREW LETTER PE' : u'פ', 
	u'HEBREW LETTER FINAL TSADI' : u'ץ', 
	u'HEBREW LETTER TSADI' : u'צ', 
	u'HEBREW LETTER QOF' : u'ק', 
	u'HEBREW LETTER RESH' : u'ר', 
	u'HEBREW LETTER SHIN' : u'ש', 
	u'HEBREW LETTER TAV' : u'ת', 
	u'HEBREW LIGATURE YIDDISH DOUBLE VAV' : u'װ', 
	u'HEBREW LIGATURE YIDDISH VAV YOD' : u'ױ', 
	u'HEBREW LIGATURE YIDDISH DOUBLE YOD' : u'ײ', 
	u'HEBREW PUNCTUATION GERESH' : u'׳', 
	u'HEBREW PUNCTUATION GERSHAYIM' : u'״', 
}
LIGATURE = {
	u"YIDDISH VAV YOD" : u"ױ",
	u"YIDDISH DOUBLE YOD" : u"ײ",
	u"YIDDISH DOUBLE VAV" : u"װ",
}
POINT = {
	u"PATAH" : u"ַ",
	u"SHEVA" : u"ְ",
	u"SIN DOT" : u"ׂ",
	u"QUBUTS" : u"ֻ",
	u"HATAF SEGOL" : u"ֱ",
	u"SEGOL" : u"ֶ",
	u"HATAF PATAH" : u"ֲ",
	u"HATAF QAMATS" : u"ֳ",
	u"RAFE" : u"ֿ",
	u"TSERE" : u"ֵ",
	u"HOLAM HASER FOR VAV" : u"ֺ",
	u"SHIN DOT" : u"ׁ",
	u"METEG" : u"ֽ",
	u"QAMATS QATAN" : u"ׇ",
	u"HIRIQ" : u"ִ",
	u"DAGESH OR MAPIQ" : u"ּ",
	u"QAMATS" : u"ָ",
	u"HOLAM" : u"ֹ",
}
PUNCTUATION = {
	u"SOF PASUQ" : u"׃",
	u"PASEQ" : u"׀",
	u"GERSHAYIM" : u"״",
	u"GERESH" : u"׳",
	u"NUN HAFUKHA" : u"׆",
	u"MAQAF" : u"־",
}
ACCENT = {
	u"QADMA" : u"֨",
	u"MERKHA" : u"֥",
	u"GERESH MUQDAM" : u"֝",
	u"MAHAPAKH" : u"֤",
	u"MUNAH" : u"֣",
	u"GERESH" : u"֜",
	u"ZAQEF QATAN" : u"֔",
	u"SEGOL" : u"֒",
	u"ZARQA" : u"֘",
	u"REVIA" : u"֗",
	u"ETNAHTA" : u"֑",
	u"OLE" : u"֫",
	u"ILUY" : u"֬",
	u"DARGA" : u"֧",
	u"TEVIR" : u"֛",
	u"YERAH BEN YOMO" : u"֪",
	u"ATNAH HAFUKH" : u"֢",
	u"DEHI" : u"֭",
	u"YETIV" : u"֚",
	u"QARNEY PARA" : u"֟",
	u"TELISHA GEDOLA" : u"֠",
	u"ZAQEF GADOL" : u"֕",
	u"TELISHA QETANA" : u"֩",
	u"PASHTA" : u"֙",
	u"SHALSHELET" : u"֓",
	u"PAZER" : u"֡",
	u"MERKHA KEFULA" : u"֦",
	u"GERSHAYIM" : u"֞",
	u"ZINOR" : u"֮",
	u"TIPEHA" : u"֖",
}
MARK = {
	u"UPPER DOT" : u"ׄ",
	u"MASORA CIRCLE" : u"֯",
	u"LOWER DOT" : u"ׅ",
}
LETTER = {
	u"FINAL KAF" : u"ך",
	u"YOD" : u"י",
	u"NUN" : u"נ",
	u"QOF" : u"ק",
	u"BET" : u"ב",
	u"FINAL TSADI" : u"ץ",
	u"ZAYIN" : u"ז",
	u"SHIN" : u"ש",
	u"AYIN" : u"ע",
	u"TSADI" : u"צ",
	u"MEM" : u"מ",
	u"LAMED" : u"ל",
	u"FINAL MEM" : u"ם",
	u"VAV" : u"ו",
	u"TET" : u"ט",
	u"DALET" : u"ד",
	u"KAF" : u"כ",
	u"HET" : u"ח",
	u"PE" : u"פ",
	u"FINAL NUN" : u"ן",
	u"SAMEKH" : u"ס",
	u"ALEF" : u"א",
	u"GIMEL" : u"ג",
	u"HE" : u"ה",
	u"RESH" : u"ר",
	u"FINAL PE" : u"ף",
	u"TAV" : u"ת",
}
Creative Commons License
This work by pete is licensed under a Creative Commons Attribution 4.0 International

References   [ + ]

1. key_words[1:],item