2009-06-11 23:08:57 -04:00
|
|
|
import re
|
|
|
|
import unicodedata
|
|
|
|
from method import Method, Argument, arg
|
2009-06-11 18:52:57 -04:00
|
|
|
|
2009-06-11 23:08:57 -04:00
|
|
|
category_map = {
|
|
|
|
'Lu': 'Letter, Uppercase',
|
|
|
|
'Ll': 'Letter, Lowercase',
|
|
|
|
'Lt': 'Letter, Titlecase',
|
|
|
|
'Lm': 'Letter, Modifier',
|
|
|
|
'Lo': 'Letter, Other',
|
|
|
|
'Mn': 'Mark, Nonspacing',
|
|
|
|
'Mc': 'Mark, Spacing Combining',
|
|
|
|
'Me': 'Mark, Enclosing',
|
|
|
|
'Nd': 'Number, Decimal Digit',
|
|
|
|
'Nl': 'Number, Letter',
|
|
|
|
'No': 'Number, Other',
|
|
|
|
'Pc': 'Punctuation, Connector',
|
|
|
|
'Pd': 'Punctuation, Dash',
|
|
|
|
'Ps': 'Punctuation, Open',
|
|
|
|
'Pe': 'Punctuation, Close',
|
|
|
|
'Pi': 'Punctuation, Initial quote (may behave like Ps or Pe depending on usage)',
|
|
|
|
'Pf': 'Punctuation, Final quote (may behave like Ps or Pe depending on usage)',
|
|
|
|
'Po': 'Punctuation, Other',
|
|
|
|
'Sm': 'Symbol, Math',
|
|
|
|
'Sc': 'Symbol, Currency',
|
|
|
|
'Sk': 'Symbol, Modifier',
|
|
|
|
'So': 'Symbol, Other',
|
|
|
|
'Zs': 'Separator, Space',
|
|
|
|
'Zl': 'Separator, Line',
|
|
|
|
'Zp': 'Separator, Paragraph',
|
|
|
|
'Cc': 'Other, Control',
|
|
|
|
'Cf': 'Other, Format',
|
|
|
|
'Cs': 'Other, Surrogate',
|
|
|
|
'Co': 'Other, Private Use',
|
|
|
|
'Cn': 'Other, Not Assigned (no characters in the file have this property)',
|
|
|
|
}
|
2009-06-11 18:52:57 -04:00
|
|
|
|
2009-06-11 23:08:57 -04:00
|
|
|
bidirect_map = {
|
|
|
|
'L': 'Left-to-Right',
|
|
|
|
'LRE': 'Left-to-Right Embedding',
|
|
|
|
'LRO': 'Left-to-Right Override',
|
|
|
|
'R': 'Right-to-Left',
|
|
|
|
'AL': 'Right-to-Left Arabic',
|
|
|
|
'RLE': 'Right-to-Left Embedding',
|
|
|
|
'RLO': 'Right-to-Left Override',
|
|
|
|
'PDF': 'Pop Directional Format',
|
|
|
|
'EN': 'European Number',
|
|
|
|
'ES': 'European Number Separator',
|
|
|
|
'ET': 'European Number Terminator',
|
|
|
|
'AN': 'Arabic Number',
|
|
|
|
'CS': 'Common Number Separator',
|
|
|
|
'NSM': 'Nonspacing Mark',
|
|
|
|
'BN': 'Boundary Neutral',
|
|
|
|
'B': 'Paragraph Separator',
|
|
|
|
'S': 'Segment Separator',
|
|
|
|
'WS': 'Whitespace',
|
|
|
|
'ON': 'Other Neutrals',
|
|
|
|
}
|
2009-06-11 18:52:57 -04:00
|
|
|
|
2009-06-11 23:08:57 -04:00
|
|
|
combine_map = {
|
|
|
|
0: 'Spacing, split, enclosing, reordrant, and Tibetan subjoined',
|
|
|
|
1: 'Overlays and interior',
|
|
|
|
7: 'Nuktas',
|
|
|
|
8: 'Hiragana/Katakana voicing marks',
|
|
|
|
9: 'Viramas',
|
|
|
|
10: 'Start of fixed position classes',
|
|
|
|
199: 'End of fixed position classes',
|
|
|
|
200: 'Below left attached',
|
|
|
|
202: 'Below attached',
|
|
|
|
204: 'Below right attached',
|
|
|
|
208: 'Left attached (reordrant around single base character)',
|
|
|
|
210: 'Right attached',
|
|
|
|
212: 'Above left attached',
|
|
|
|
214: 'Above attached',
|
|
|
|
216: 'Above right attached',
|
|
|
|
218: 'Below left',
|
|
|
|
220: 'Below',
|
|
|
|
222: 'Below right',
|
|
|
|
224: 'Left (reordrant around single base character)',
|
|
|
|
226: 'Right',
|
|
|
|
228: 'Above right',
|
|
|
|
230: 'Above',
|
|
|
|
232: 'Above left',
|
|
|
|
233: 'Double below',
|
|
|
|
234: 'Double above',
|
|
|
|
240: 'Below (iota subscript)',
|
|
|
|
}
|
|
|
|
|
|
|
|
width_map = {
|
|
|
|
'W': 'East Asian Wide',
|
|
|
|
'F': 'East Asian Full-width',
|
|
|
|
'A': 'East Asian Ambiguous',
|
|
|
|
'H': 'East Asian Half-width',
|
|
|
|
'Na': 'East Asian Narrow',
|
|
|
|
'N': 'Narrow',
|
|
|
|
}
|
|
|
|
|
2009-09-05 20:10:13 -04:00
|
|
|
def hex2(i):
|
|
|
|
h = hex(i)[2:]
|
|
|
|
if len(h) % 2 == 1:
|
|
|
|
return '0' + h
|
|
|
|
else:
|
|
|
|
return h
|
|
|
|
|
|
|
|
def uniesc(i):
|
|
|
|
return '\\x' + hex2(i)
|
|
|
|
|
|
|
|
def unichar(s):
|
|
|
|
s = "u'" + s + "'"
|
|
|
|
try:
|
|
|
|
return eval(s, {}, {})
|
|
|
|
except:
|
|
|
|
return None
|
|
|
|
|
2009-06-11 23:08:57 -04:00
|
|
|
def unicodeget(u, fname, fallback):
|
|
|
|
try:
|
|
|
|
f = getattr(unicodedata, fname)
|
|
|
|
value = f(u)
|
|
|
|
if value:
|
|
|
|
return value
|
|
|
|
except:
|
|
|
|
return fallback
|
2009-06-11 18:52:57 -04:00
|
|
|
|
|
|
|
class Utf8Describe(Method):
|
2009-06-11 23:08:57 -04:00
|
|
|
'''get detailed utf-8 data about a particular utf-8 code point'''
|
|
|
|
args = [arg("code", t=type(""), p="Code Point: ", h="UTF-8 code point to use")]
|
|
|
|
cpt_re = re.compile('^\\u(?:[0-9a-fA-F]{2})+$')
|
|
|
|
format = '''
|
|
|
|
Glyph %s
|
|
|
|
Name %s
|
2009-06-12 10:53:32 -04:00
|
|
|
Code %s
|
2009-06-11 23:08:57 -04:00
|
|
|
Category %s
|
|
|
|
|
|
|
|
Bidirectional %s
|
|
|
|
Combining %s
|
|
|
|
Width %s
|
|
|
|
Mirroring %s
|
|
|
|
Decomposition %s
|
|
|
|
|
|
|
|
Decimal %s
|
|
|
|
Digit %s
|
|
|
|
Lookup %s
|
|
|
|
Normalize %s
|
|
|
|
Numeric %s'''
|
|
|
|
def _execute(self, w, **vargs):
|
|
|
|
s = "u'" + vargs['code'] + "'"
|
2009-09-05 20:10:13 -04:00
|
|
|
u = unichar(vargs['code'])
|
|
|
|
if u is None:
|
|
|
|
w.set_error("invalid: %s" % vargs['code'])
|
2009-06-11 23:08:57 -04:00
|
|
|
return
|
|
|
|
|
|
|
|
a = unicodeget(u, 'category', '??')
|
|
|
|
b = unicodeget(u, 'bidirectional', '?')
|
|
|
|
c = unicodeget(u, 'combining', '?')
|
|
|
|
d = unicodeget(u, 'east_asian_width', '?')
|
|
|
|
|
2009-06-12 10:53:32 -04:00
|
|
|
code = repr(u)[2:-1]
|
2009-06-11 23:08:57 -04:00
|
|
|
name = unicodeget(u, 'name', 'Unnamed')
|
|
|
|
category = category_map.get(a, 'No Category') + ' (%s)' % a
|
|
|
|
bidirect = bidirect_map.get(b, 'No Directional Info') + ' (%s)' % b
|
|
|
|
combine = combine_map.get(c, 'No Combining Info') + ' (%s)' % c
|
|
|
|
|
|
|
|
mirror = unicodeget(u, 'mirrored', 'Unknown Mirroring')
|
|
|
|
width = width_map.get(d, 'Unknown Width') + ' (%s)' % d
|
|
|
|
|
|
|
|
decomposition = unicodeget(u, 'decomposition', 'No Decomposition Info')
|
|
|
|
decimal = unicodeget(u, 'decimal', 'n/a')
|
|
|
|
digit = unicodeget(u, 'digit', 'n/a')
|
|
|
|
lookup = unicodeget(u, 'lookup', 'n/a')
|
|
|
|
normalize = unicodeget(u, 'normalize', 'n/a')
|
|
|
|
numeric = unicodeget(u, 'numeric', 'n/a')
|
|
|
|
|
2009-06-12 10:53:32 -04:00
|
|
|
data = self.format % (u, name, code, category, bidirect, combine, width,
|
2009-06-11 23:08:57 -04:00
|
|
|
mirror, decomposition, decimal, digit, lookup,
|
|
|
|
normalize, numeric)
|
|
|
|
w.application.data_buffer('*Utf8-Info*', data.strip(), switch_to=True)
|
|
|
|
|
|
|
|
class Utf8DescribeChar(Utf8Describe):
|
|
|
|
'''get utf-8 representation of the highlighted character'''
|
|
|
|
args = []
|
2009-06-11 18:52:57 -04:00
|
|
|
def _execute(self, w, **vargs):
|
2009-06-11 23:08:57 -04:00
|
|
|
p = w.logical_cursor()
|
|
|
|
u = w.buffer.get_substring(p, p.add(1, 0))
|
|
|
|
Utf8Describe._execute(self, w, code=u)
|
2009-06-11 18:52:57 -04:00
|
|
|
|
2009-06-12 10:53:32 -04:00
|
|
|
class Utf8Query(Method):
|
|
|
|
'''insert UTF-8 data into the buffer'''
|
|
|
|
args = [arg("name", t=type(""), p="Glpyh Name: ", h="the name of the UTF-8 Glpyh")]
|
|
|
|
def _execute(self, w, **vargs):
|
|
|
|
name = vargs['name']
|
|
|
|
try:
|
|
|
|
u = unicodedata.lookup(name)
|
|
|
|
w.set_error("glyph %s (%s)" % (u, repr(u)[2:-1]))
|
|
|
|
except KeyError:
|
|
|
|
w.set_error("glpyh %r was not found" % name)
|
|
|
|
|
2009-06-11 18:52:57 -04:00
|
|
|
class Utf8Insert(Method):
|
2009-06-11 23:08:57 -04:00
|
|
|
'''insert UTF-8 data into the buffer'''
|
|
|
|
args = [arg("data", t=type(""), p="UTF-8 Data: ", h="the UTF-8 data to use")]
|
2009-06-11 18:52:57 -04:00
|
|
|
def _execute(self, w, **vargs):
|
|
|
|
s = "u'" + vargs['data'] + "'"
|
2009-09-05 20:10:13 -04:00
|
|
|
u = unichar(vargs['code'])
|
|
|
|
if u is None:
|
2009-06-11 18:52:57 -04:00
|
|
|
w.set_error("invalid: %s" % vargs['data'])
|