import re import unicodedata from method import Method, Argument, arg category_map = { 'Lu': 'Letter, Uppercase', 'Ll': 'Letter, Lowercase', 'Lt': 'Letter, Titlecase', 'Lm': 'Letter, Modifier', 'Lo': 'Letter, Other', 'Mn': 'Mark, Nonspacing', 'Mc': 'Mark, Spacing Combining', 'Me': 'Mark, Enclosing', 'Nd': 'Number, Decimal Digit', 'Nl': 'Number, Letter', 'No': 'Number, Other', 'Pc': 'Punctuation, Connector', 'Pd': 'Punctuation, Dash', 'Ps': 'Punctuation, Open', 'Pe': 'Punctuation, Close', 'Pi': 'Punctuation, Initial quote (may behave like Ps or Pe depending on usage)', 'Pf': 'Punctuation, Final quote (may behave like Ps or Pe depending on usage)', 'Po': 'Punctuation, Other', 'Sm': 'Symbol, Math', 'Sc': 'Symbol, Currency', 'Sk': 'Symbol, Modifier', 'So': 'Symbol, Other', 'Zs': 'Separator, Space', 'Zl': 'Separator, Line', 'Zp': 'Separator, Paragraph', 'Cc': 'Other, Control', 'Cf': 'Other, Format', 'Cs': 'Other, Surrogate', 'Co': 'Other, Private Use', 'Cn': 'Other, Not Assigned (no characters in the file have this property)', } bidirect_map = { 'L': 'Left-to-Right', 'LRE': 'Left-to-Right Embedding', 'LRO': 'Left-to-Right Override', 'R': 'Right-to-Left', 'AL': 'Right-to-Left Arabic', 'RLE': 'Right-to-Left Embedding', 'RLO': 'Right-to-Left Override', 'PDF': 'Pop Directional Format', 'EN': 'European Number', 'ES': 'European Number Separator', 'ET': 'European Number Terminator', 'AN': 'Arabic Number', 'CS': 'Common Number Separator', 'NSM': 'Nonspacing Mark', 'BN': 'Boundary Neutral', 'B': 'Paragraph Separator', 'S': 'Segment Separator', 'WS': 'Whitespace', 'ON': 'Other Neutrals', } combine_map = { 0: 'Spacing, split, enclosing, reordrant, and Tibetan subjoined', 1: 'Overlays and interior', 7: 'Nuktas', 8: 'Hiragana/Katakana voicing marks', 9: 'Viramas', 10: 'Start of fixed position classes', 199: 'End of fixed position classes', 200: 'Below left attached', 202: 'Below attached', 204: 'Below right attached', 208: 'Left attached (reordrant around single base character)', 210: 'Right attached', 212: 'Above left attached', 214: 'Above attached', 216: 'Above right attached', 218: 'Below left', 220: 'Below', 222: 'Below right', 224: 'Left (reordrant around single base character)', 226: 'Right', 228: 'Above right', 230: 'Above', 232: 'Above left', 233: 'Double below', 234: 'Double above', 240: 'Below (iota subscript)', } width_map = { 'W': 'East Asian Wide', 'F': 'East Asian Full-width', 'A': 'East Asian Ambiguous', 'H': 'East Asian Half-width', 'Na': 'East Asian Narrow', 'N': 'Narrow', } def hex2(i): h = hex(i)[2:] if len(h) % 2 == 1: return '0' + h else: return h def uniesc(i): return '\\x' + hex2(i) def unichar(s): s = "u'" + s + "'" try: return eval(s, {}, {}) except: return None def unicodeget(u, fname, fallback): try: f = getattr(unicodedata, fname) value = f(u) if value: return value except: return fallback class Utf8Describe(Method): '''get detailed utf-8 data about a particular utf-8 code point''' args = [arg("code", t=type(""), p="Code Point: ", h="UTF-8 code point to use")] cpt_re = re.compile('^\\u(?:[0-9a-fA-F]{2})+$') format = ''' Glyph %s Name %s Code %s Category %s Bidirectional %s Combining %s Width %s Mirroring %s Decomposition %s Decimal %s Digit %s Lookup %s Normalize %s Numeric %s''' def _execute(self, w, **vargs): s = "u'" + vargs['code'] + "'" u = unichar(vargs['code']) if u is None: w.set_error("invalid: %s" % vargs['code']) return a = unicodeget(u, 'category', '??') b = unicodeget(u, 'bidirectional', '?') c = unicodeget(u, 'combining', '?') d = unicodeget(u, 'east_asian_width', '?') code = repr(u)[2:-1] name = unicodeget(u, 'name', 'Unnamed') category = category_map.get(a, 'No Category') + ' (%s)' % a bidirect = bidirect_map.get(b, 'No Directional Info') + ' (%s)' % b combine = combine_map.get(c, 'No Combining Info') + ' (%s)' % c mirror = unicodeget(u, 'mirrored', 'Unknown Mirroring') width = width_map.get(d, 'Unknown Width') + ' (%s)' % d decomposition = unicodeget(u, 'decomposition', 'No Decomposition Info') decimal = unicodeget(u, 'decimal', 'n/a') digit = unicodeget(u, 'digit', 'n/a') lookup = unicodeget(u, 'lookup', 'n/a') normalize = unicodeget(u, 'normalize', 'n/a') numeric = unicodeget(u, 'numeric', 'n/a') data = self.format % (u, name, code, category, bidirect, combine, width, mirror, decomposition, decimal, digit, lookup, normalize, numeric) w.application.data_buffer('*Utf8-Info*', data.strip(), switch_to=True) class Utf8DescribeChar(Utf8Describe): '''get utf-8 representation of the highlighted character''' args = [] def _execute(self, w, **vargs): p = w.logical_cursor() u = w.buffer.get_substring(p, p.add(1, 0)) Utf8Describe._execute(self, w, code=u) class Utf8Query(Method): '''insert UTF-8 data into the buffer''' args = [arg("name", t=type(""), p="Glpyh Name: ", h="the name of the UTF-8 Glpyh")] def _execute(self, w, **vargs): name = vargs['name'] try: u = unicodedata.lookup(name) w.set_error("glyph %s (%s)" % (u, repr(u)[2:-1])) except KeyError: w.set_error("glpyh %r was not found" % name) class Utf8Insert(Method): '''insert UTF-8 data into the buffer''' args = [arg("data", t=type(""), p="UTF-8 Data: ", h="the UTF-8 data to use")] def _execute(self, w, **vargs): s = "u'" + vargs['data'] + "'" u = unichar(vargs['code']) if u is None: w.set_error("invalid: %s" % vargs['data'])