From 3ac5fe7775ee5b666bf610abbca8e54f1f6cab53 Mon Sep 17 00:00:00 2001 From: Erik Osheim Date: Thu, 11 Jun 2009 23:08:57 -0400 Subject: [PATCH] allow utf-8 data in error msgs, etc --HG-- branch : pmacs2 --- application.py | 30 ++++++--- method/utf8.py | 179 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 183 insertions(+), 26 deletions(-) diff --git a/application.py b/application.py index ec0e7e8..1835657 100755 --- a/application.py +++ b/application.py @@ -526,9 +526,9 @@ class Application(object): self.error_timestamp = time.time() def set_error(self, s): self.set_msg(s) - self.log.append_lines([s, ""], act=buffer.ACT_NONE, force=True) + self.log.append_lines([s, u""], act=buffer.ACT_NONE, force=True) def clear_error(self): - self.error_string = "" + self.error_string = u"" self.error_timestamp = None def try_manual_resize(self): y, x = self.stdscr.getmaxyx() @@ -606,7 +606,10 @@ class Application(object): pass else: self.highlight_mark = False - + + def addstr(self, y, x, s, attr=curses.A_NORMAL): + self.win.addstr(y, x, s.encode('utf-8'), attr) + # the mighty run-loop! def run(self): self.done = False @@ -645,7 +648,8 @@ class Application(object): # clear the error line; it might look confusing to the user try: - self.win.addstr(self.y-1, 0, ' ' * self.x) + #self.win.addstr(self.y-1, 0, ' ' * self.x) + self.addstr(self.y-1, 0, ' ' * self.x) except: pass self.win.refresh() @@ -783,7 +787,8 @@ class Application(object): char = chr(junk & 255) attr = color.build(fg, bg) try: - self.win.addstr(sy, sx, char, attr) + #self.win.addstr(sy, sx, char, attr) + self.addstr(sy, sx, char, attr) except Exception, e: raise Exception, "(%d, %d, %r, %r) v. (%d, %d)" % \ (sy, sx, fg, bg, self.y, self.x) @@ -914,8 +919,8 @@ class Application(object): for j in range(0, slot.height): char = chr(self.win.inch(j + slot.y_offset, limit) & 255) attr = color.build('default', shade, 'bold') - self.win.addstr(j + slot.y_offset, limit + w.mode.lmargin, - char, attr) + #self.win.addstr(j + slot.y_offset, limit + w.mode.lmargin, char, attr) + self.addstr(j + slot.y_offset, limit + w.mode.lmargin, char, attr) def _draw_slot(self, i): slot = self.bufferlist.slots[i] @@ -967,7 +972,8 @@ class Application(object): return status = slot.window.mode.get_status_bar() status = status.ljust(slot.width)[:slot.width] - self.win.addstr(slot.height + slot.y_offset, 0, status, curses.A_REVERSE) + #self.win.addstr(slot.height + slot.y_offset, 0, status, curses.A_REVERSE) + self.addstr(slot.height + slot.y_offset, 0, status, curses.A_REVERSE) # input bar drawing def draw_minibuffer(self): @@ -978,9 +984,10 @@ class Application(object): for i in range(0, len(lines)): line = lines[i] try: - self.win.addstr(self.y - len(lines) + i, 0, line, attr) + #self.win.addstr(self.y - len(lines) + i, 0, line, attr) + self.addstr(self.y - len(lines) + i, 0, line, attr) except: - pass + raise if self.error_string or not self.mini_buffer_is_open(): return pattr = color.build('cyan', 'default', 'bold') @@ -988,7 +995,8 @@ class Application(object): for i in range(0, len(plines)): pline = plines[i] try: - self.win.addstr(self.y - len(lines) + i, 0, pline, pattr) + #self.win.addstr(self.y - len(lines) + i, 0, pline, pattr) + self.addstr(self.y - len(lines) + i, 0, pline, pattr) except: pass diff --git a/method/utf8.py b/method/utf8.py index a009f1d..80480e1 100644 --- a/method/utf8.py +++ b/method/utf8.py @@ -1,25 +1,174 @@ -import os, commands, re, tempfile -from subprocess import Popen, PIPE, STDOUT - -import buffer, default, dirutil, lex, regex, util, window -from point import Point - +import re +import unicodedata from method import Method, Argument, arg -class Utf8Get(Method): - def _execute(self, w, **vargs): - p = w.logical_cursor() - c = w.buffer.get_substring(p, p.add(1, 0)) - w.set_error(repr(c)) +category_map = { + 'Lu': 'Letter, Uppercase', + 'Ll': 'Letter, Lowercase', + 'Lt': 'Letter, Titlecase', + 'Lm': 'Letter, Modifier', + 'Lo': 'Letter, Other', + 'Mn': 'Mark, Nonspacing', + 'Mc': 'Mark, Spacing Combining', + 'Me': 'Mark, Enclosing', + 'Nd': 'Number, Decimal Digit', + 'Nl': 'Number, Letter', + 'No': 'Number, Other', + 'Pc': 'Punctuation, Connector', + 'Pd': 'Punctuation, Dash', + 'Ps': 'Punctuation, Open', + 'Pe': 'Punctuation, Close', + 'Pi': 'Punctuation, Initial quote (may behave like Ps or Pe depending on usage)', + 'Pf': 'Punctuation, Final quote (may behave like Ps or Pe depending on usage)', + 'Po': 'Punctuation, Other', + 'Sm': 'Symbol, Math', + 'Sc': 'Symbol, Currency', + 'Sk': 'Symbol, Modifier', + 'So': 'Symbol, Other', + 'Zs': 'Separator, Space', + 'Zl': 'Separator, Line', + 'Zp': 'Separator, Paragraph', + 'Cc': 'Other, Control', + 'Cf': 'Other, Format', + 'Cs': 'Other, Surrogate', + 'Co': 'Other, Private Use', + 'Cn': 'Other, Not Assigned (no characters in the file have this property)', +} + +bidirect_map = { + 'L': 'Left-to-Right', + 'LRE': 'Left-to-Right Embedding', + 'LRO': 'Left-to-Right Override', + 'R': 'Right-to-Left', + 'AL': 'Right-to-Left Arabic', + 'RLE': 'Right-to-Left Embedding', + 'RLO': 'Right-to-Left Override', + 'PDF': 'Pop Directional Format', + 'EN': 'European Number', + 'ES': 'European Number Separator', + 'ET': 'European Number Terminator', + 'AN': 'Arabic Number', + 'CS': 'Common Number Separator', + 'NSM': 'Nonspacing Mark', + 'BN': 'Boundary Neutral', + 'B': 'Paragraph Separator', + 'S': 'Segment Separator', + 'WS': 'Whitespace', + 'ON': 'Other Neutrals', +} + +combine_map = { + 0: 'Spacing, split, enclosing, reordrant, and Tibetan subjoined', + 1: 'Overlays and interior', + 7: 'Nuktas', + 8: 'Hiragana/Katakana voicing marks', + 9: 'Viramas', + 10: 'Start of fixed position classes', + 199: 'End of fixed position classes', + 200: 'Below left attached', + 202: 'Below attached', + 204: 'Below right attached', + 208: 'Left attached (reordrant around single base character)', + 210: 'Right attached', + 212: 'Above left attached', + 214: 'Above attached', + 216: 'Above right attached', + 218: 'Below left', + 220: 'Below', + 222: 'Below right', + 224: 'Left (reordrant around single base character)', + 226: 'Right', + 228: 'Above right', + 230: 'Above', + 232: 'Above left', + 233: 'Double below', + 234: 'Double above', + 240: 'Below (iota subscript)', +} + +width_map = { + 'W': 'East Asian Wide', + 'F': 'East Asian Full-width', + 'A': 'East Asian Ambiguous', + 'H': 'East Asian Half-width', + 'Na': 'East Asian Narrow', + 'N': 'Narrow', +} + +def unicodeget(u, fname, fallback): + try: + f = getattr(unicodedata, fname) + value = f(u) + if value: + return value + except: + return fallback class Utf8Describe(Method): + '''get detailed utf-8 data about a particular utf-8 code point''' + args = [arg("code", t=type(""), p="Code Point: ", h="UTF-8 code point to use")] + cpt_re = re.compile('^\\u(?:[0-9a-fA-F]{2})+$') + format = ''' +Glyph %s +Name %s +Category %s + +Bidirectional %s +Combining %s +Width %s +Mirroring %s +Decomposition %s + +Decimal %s +Digit %s +Lookup %s +Normalize %s +Numeric %s''' def _execute(self, w, **vargs): - w.set_error("not implemented") + s = "u'" + vargs['code'] + "'" + try: + u = eval(s, {}, {}) + w.insert_string_at_cursor(u) + except: + w.set_error("invalid: %s" % vargs['data']) + return + + a = unicodeget(u, 'category', '??') + b = unicodeget(u, 'bidirectional', '?') + c = unicodeget(u, 'combining', '?') + d = unicodeget(u, 'east_asian_width', '?') + + name = unicodeget(u, 'name', 'Unnamed') + category = category_map.get(a, 'No Category') + ' (%s)' % a + bidirect = bidirect_map.get(b, 'No Directional Info') + ' (%s)' % b + combine = combine_map.get(c, 'No Combining Info') + ' (%s)' % c + + mirror = unicodeget(u, 'mirrored', 'Unknown Mirroring') + width = width_map.get(d, 'Unknown Width') + ' (%s)' % d + + decomposition = unicodeget(u, 'decomposition', 'No Decomposition Info') + decimal = unicodeget(u, 'decimal', 'n/a') + digit = unicodeget(u, 'digit', 'n/a') + lookup = unicodeget(u, 'lookup', 'n/a') + normalize = unicodeget(u, 'normalize', 'n/a') + numeric = unicodeget(u, 'numeric', 'n/a') + + data = self.format % (u, name, category, bidirect, combine, width, + mirror, decomposition, decimal, digit, lookup, + normalize, numeric) + w.application.data_buffer('*Utf8-Info*', data.strip(), switch_to=True) + +class Utf8DescribeChar(Utf8Describe): + '''get utf-8 representation of the highlighted character''' + args = [] + def _execute(self, w, **vargs): + p = w.logical_cursor() + u = w.buffer.get_substring(p, p.add(1, 0)) + Utf8Describe._execute(self, w, code=u) class Utf8Insert(Method): - '''insert the specified UTF-8 character into the buffer''' - args = [arg("data", t=type(""), p="Data: ", - h="the UTF-8 escaped data to use to use")] + '''insert UTF-8 data into the buffer''' + args = [arg("data", t=type(""), p="UTF-8 Data: ", h="the UTF-8 data to use")] def _execute(self, w, **vargs): s = "u'" + vargs['data'] + "'" try: