From 3ac5fe7775ee5b666bf610abbca8e54f1f6cab53 Mon Sep 17 00:00:00 2001
From: Erik Osheim <erik@osheim.org>
Date: Thu, 11 Jun 2009 23:08:57 -0400
Subject: [PATCH] allow utf-8 data in error msgs, etc

--HG--
branch : pmacs2
---
 application.py |  30 ++++++---
 method/utf8.py | 179 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 183 insertions(+), 26 deletions(-)

diff --git a/application.py b/application.py
index ec0e7e8..1835657 100755
--- a/application.py
+++ b/application.py
@@ -526,9 +526,9 @@ class Application(object):
         self.error_timestamp = time.time()
     def set_error(self, s):
         self.set_msg(s)
-        self.log.append_lines([s, ""], act=buffer.ACT_NONE, force=True)
+        self.log.append_lines([s, u""], act=buffer.ACT_NONE, force=True)
     def clear_error(self):
-        self.error_string = ""
+        self.error_string = u""
         self.error_timestamp = None
     def try_manual_resize(self):
         y, x = self.stdscr.getmaxyx()
@@ -606,7 +606,10 @@ class Application(object):
                 pass
             else:
                 self.highlight_mark = False
-        
+
+    def addstr(self, y, x, s, attr=curses.A_NORMAL):
+        self.win.addstr(y, x, s.encode('utf-8'), attr)
+
     # the mighty run-loop!
     def run(self):
         self.done = False
@@ -645,7 +648,8 @@ class Application(object):
 
         # clear the error line; it might look confusing to the user
         try:
-            self.win.addstr(self.y-1, 0, ' ' * self.x)
+            #self.win.addstr(self.y-1, 0, ' ' * self.x)
+            self.addstr(self.y-1, 0, ' ' * self.x)
         except:
             pass
         self.win.refresh()
@@ -783,7 +787,8 @@ class Application(object):
         char = chr(junk & 255)
         attr = color.build(fg, bg)
         try:
-            self.win.addstr(sy, sx, char, attr)
+            #self.win.addstr(sy, sx, char, attr)
+            self.addstr(sy, sx, char, attr)
         except Exception, e:
             raise Exception, "(%d, %d, %r, %r) v. (%d, %d)" % \
                 (sy, sx, fg, bg, self.y, self.x)
@@ -914,8 +919,8 @@ class Application(object):
                 for j in range(0, slot.height):
                     char = chr(self.win.inch(j + slot.y_offset, limit) & 255)
                     attr = color.build('default', shade, 'bold')
-                    self.win.addstr(j + slot.y_offset, limit + w.mode.lmargin,
-                                    char, attr)
+                    #self.win.addstr(j + slot.y_offset, limit + w.mode.lmargin, char, attr)
+                    self.addstr(j + slot.y_offset, limit + w.mode.lmargin, char, attr)
         
     def _draw_slot(self, i):
         slot     = self.bufferlist.slots[i]
@@ -967,7 +972,8 @@ class Application(object):
             return
         status = slot.window.mode.get_status_bar()
         status = status.ljust(slot.width)[:slot.width]
-        self.win.addstr(slot.height + slot.y_offset, 0, status, curses.A_REVERSE)
+        #self.win.addstr(slot.height + slot.y_offset, 0, status, curses.A_REVERSE)
+        self.addstr(slot.height + slot.y_offset, 0, status, curses.A_REVERSE)
 
     # input bar drawing
     def draw_minibuffer(self):
@@ -978,9 +984,10 @@ class Application(object):
         for i in range(0, len(lines)):
             line = lines[i]
             try:
-                self.win.addstr(self.y - len(lines) + i, 0, line, attr)
+                #self.win.addstr(self.y - len(lines) + i, 0, line, attr)
+                self.addstr(self.y - len(lines) + i, 0, line, attr)
             except:
-                pass
+                raise
         if self.error_string or not self.mini_buffer_is_open():
             return
         pattr = color.build('cyan', 'default', 'bold')
@@ -988,7 +995,8 @@ class Application(object):
         for i in range(0, len(plines)):
             pline = plines[i]
             try:
-                self.win.addstr(self.y - len(lines) + i, 0, pline, pattr)
+                #self.win.addstr(self.y - len(lines) + i, 0, pline, pattr)
+                self.addstr(self.y - len(lines) + i, 0, pline, pattr)
             except:
                 pass
 
diff --git a/method/utf8.py b/method/utf8.py
index a009f1d..80480e1 100644
--- a/method/utf8.py
+++ b/method/utf8.py
@@ -1,25 +1,174 @@
-import os, commands, re, tempfile
-from subprocess import Popen, PIPE, STDOUT
-
-import buffer, default, dirutil, lex, regex, util, window
-from point import Point
-
+import re
+import unicodedata
 from method import Method, Argument, arg
 
-class Utf8Get(Method):
-    def _execute(self, w, **vargs):
-        p = w.logical_cursor()
-        c = w.buffer.get_substring(p, p.add(1, 0))
-        w.set_error(repr(c))
+category_map = {
+    'Lu': 'Letter, Uppercase',
+    'Ll': 'Letter, Lowercase',
+    'Lt': 'Letter, Titlecase',
+    'Lm': 'Letter, Modifier',
+    'Lo': 'Letter, Other',
+    'Mn': 'Mark, Nonspacing',
+    'Mc': 'Mark, Spacing Combining',
+    'Me': 'Mark, Enclosing',
+    'Nd': 'Number, Decimal Digit',
+    'Nl': 'Number, Letter',
+    'No': 'Number, Other',
+    'Pc': 'Punctuation, Connector',
+    'Pd': 'Punctuation, Dash',
+    'Ps': 'Punctuation, Open',
+    'Pe': 'Punctuation, Close',
+    'Pi': 'Punctuation, Initial quote (may behave like Ps or Pe depending on usage)',
+    'Pf': 'Punctuation, Final quote (may behave like Ps or Pe depending on usage)',
+    'Po': 'Punctuation, Other',
+    'Sm': 'Symbol, Math',
+    'Sc': 'Symbol, Currency',
+    'Sk': 'Symbol, Modifier',
+    'So': 'Symbol, Other',
+    'Zs': 'Separator, Space',
+    'Zl': 'Separator, Line',
+    'Zp': 'Separator, Paragraph',
+    'Cc': 'Other, Control',
+    'Cf': 'Other, Format',
+    'Cs': 'Other, Surrogate',
+    'Co': 'Other, Private Use',
+    'Cn': 'Other, Not Assigned (no characters in the file have this property)',
+}
+
+bidirect_map = {
+    'L':   'Left-to-Right',
+    'LRE': 'Left-to-Right Embedding',
+    'LRO': 'Left-to-Right Override',
+    'R':   'Right-to-Left',
+    'AL':  'Right-to-Left Arabic',
+    'RLE': 'Right-to-Left Embedding',
+    'RLO': 'Right-to-Left Override',
+    'PDF': 'Pop Directional Format',
+    'EN':  'European Number',
+    'ES':  'European Number Separator',
+    'ET':  'European Number Terminator',
+    'AN':  'Arabic Number',
+    'CS':  'Common Number Separator',
+    'NSM': 'Nonspacing Mark',
+    'BN':  'Boundary Neutral',
+    'B':   'Paragraph Separator',
+    'S':   'Segment Separator',
+    'WS':  'Whitespace',
+    'ON':  'Other Neutrals',
+}
+
+combine_map = {
+    0:   'Spacing, split, enclosing, reordrant, and Tibetan subjoined',
+    1:   'Overlays and interior',
+    7:   'Nuktas',
+    8:   'Hiragana/Katakana voicing marks',
+    9:   'Viramas',
+    10:  'Start of fixed position classes',
+    199: 'End of fixed position classes',
+    200: 'Below left attached',
+    202: 'Below attached',
+    204: 'Below right attached',
+    208: 'Left attached (reordrant around single base character)',
+    210: 'Right attached',
+    212: 'Above left attached',
+    214: 'Above attached',
+    216: 'Above right attached',
+    218: 'Below left',
+    220: 'Below',
+    222: 'Below right',
+    224: 'Left (reordrant around single base character)',
+    226: 'Right',
+    228: 'Above right',
+    230: 'Above',
+    232: 'Above left',
+    233: 'Double below',
+    234: 'Double above',
+    240: 'Below (iota subscript)',
+}
+
+width_map = {
+    'W':  'East Asian Wide',
+    'F':  'East Asian Full-width',
+    'A':  'East Asian Ambiguous',
+    'H':  'East Asian Half-width',
+    'Na': 'East Asian Narrow',
+    'N':  'Narrow',
+}
+
+def unicodeget(u, fname, fallback):
+    try:
+        f = getattr(unicodedata, fname)
+        value = f(u)
+        if value:
+            return value
+    except:
+        return fallback
 
 class Utf8Describe(Method):
+    '''get detailed utf-8 data about a particular utf-8 code point'''
+    args = [arg("code", t=type(""), p="Code Point: ", h="UTF-8 code point to use")]
+    cpt_re = re.compile('^\\u(?:[0-9a-fA-F]{2})+$')
+    format = '''
+Glyph          %s
+Name           %s
+Category       %s
+
+Bidirectional  %s
+Combining      %s
+Width          %s
+Mirroring      %s
+Decomposition  %s
+
+Decimal        %s
+Digit          %s
+Lookup         %s
+Normalize      %s
+Numeric        %s'''
     def _execute(self, w, **vargs):
-        w.set_error("not implemented")
+        s = "u'" + vargs['code'] + "'"
+        try:
+            u = eval(s, {}, {})
+            w.insert_string_at_cursor(u)
+        except:
+            w.set_error("invalid: %s" % vargs['data'])
+            return
+
+        a = unicodeget(u, 'category', '??')
+        b = unicodeget(u, 'bidirectional', '?')
+        c = unicodeget(u, 'combining', '?')
+        d = unicodeget(u, 'east_asian_width', '?')
+
+        name     = unicodeget(u, 'name', 'Unnamed')
+        category = category_map.get(a, 'No Category') + ' (%s)' % a
+        bidirect = bidirect_map.get(b, 'No Directional Info') + ' (%s)' % b
+        combine  = combine_map.get(c, 'No Combining Info') + ' (%s)' % c
+
+        mirror = unicodeget(u, 'mirrored', 'Unknown Mirroring')
+        width  = width_map.get(d, 'Unknown Width') + ' (%s)' % d
+
+        decomposition = unicodeget(u, 'decomposition', 'No Decomposition Info')
+        decimal       = unicodeget(u, 'decimal', 'n/a')
+        digit         = unicodeget(u, 'digit', 'n/a')
+        lookup        = unicodeget(u, 'lookup', 'n/a')
+        normalize     = unicodeget(u, 'normalize', 'n/a')
+        numeric       = unicodeget(u, 'numeric', 'n/a')
+        
+        data = self.format % (u, name, category, bidirect, combine, width,
+                              mirror, decomposition, decimal, digit, lookup,
+                              normalize, numeric)
+        w.application.data_buffer('*Utf8-Info*', data.strip(), switch_to=True)
+
+class Utf8DescribeChar(Utf8Describe):
+    '''get utf-8 representation of the highlighted character'''
+    args = []
+    def _execute(self, w, **vargs):
+        p = w.logical_cursor()
+        u = w.buffer.get_substring(p, p.add(1, 0))
+        Utf8Describe._execute(self, w, code=u)
 
 class Utf8Insert(Method):
-    '''insert the specified UTF-8 character into the buffer'''
-    args = [arg("data", t=type(""), p="Data: ",
-                h="the UTF-8 escaped data to use to use")]
+    '''insert UTF-8 data into the buffer'''
+    args = [arg("data", t=type(""), p="UTF-8 Data: ", h="the UTF-8 data to use")]
     def _execute(self, w, **vargs):
         s = "u'" + vargs['data'] + "'"
         try: