From 377c1775f835c9edcf6e58213110602d8002b2c0 Mon Sep 17 00:00:00 2001 From: Erik Osheim Date: Tue, 9 Jun 2009 23:37:43 -0400 Subject: [PATCH] initial unicode support --HG-- branch : pmacs2 --- BUGS | 11 +++++++++++ application.py | 3 +++ buffer/__init__.py | 11 ++++++----- code_examples/georgian.txt | 3 +++ code_examples/greek.txt | 3 +++ code_examples/hebrew.txt | 1 + code_examples/japanese.txt | 4 ++++ code_examples/runes.txt | 3 +++ code_examples/russian.txt | 5 +++++ render.py | 3 ++- 10 files changed, 41 insertions(+), 6 deletions(-) create mode 100644 code_examples/georgian.txt create mode 100644 code_examples/greek.txt create mode 100644 code_examples/hebrew.txt create mode 100644 code_examples/japanese.txt create mode 100644 code_examples/runes.txt create mode 100644 code_examples/russian.txt diff --git a/BUGS b/BUGS index b1a0be1..5d81551 100644 --- a/BUGS +++ b/BUGS @@ -1,5 +1,16 @@ === OUSTANDING BUGS === +2009/06/09: UNICODE ISSUES + 1. some double-width characters don't work right (see japanese.txt) + + 2. need input methods for unicode characters + + 3. regexes need to be made unicode-aware + + 4. make ad-hoc strings in application u'' strings + + 5. handling terminal output type (utf-8, latin-1, ascii) + 2009/06/09: 1. many of the dir-mode methods need better names, to make it clear they aren't general purpose. diff --git a/application.py b/application.py index dd0818c..2f2613d 100755 --- a/application.py +++ b/application.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import curses, curses.ascii, getpass, os, re, string, sys, termios, time +import locale import math import traceback from subprocess import Popen, PIPE, STDOUT @@ -1046,6 +1047,8 @@ if __name__ == "__main__": 'aes': open_aes_file, } + locale.setlocale(locale.LC_ALL, '') + # preprocess args argv = list(sys.argv[1:]) goto_line = None diff --git a/buffer/__init__.py b/buffer/__init__.py index a9ff49c..709f88b 100644 --- a/buffer/__init__.py +++ b/buffer/__init__.py @@ -631,11 +631,12 @@ class FileBuffer(Buffer): self.nl = self._detect_nl_type(data) data = self.read_filter(data) - data = data.replace("\t", " ") - for i in range(0, min(len(data), 128)): - if data[i] not in string.printable: - raise BinaryDataException("binary files are not supported") - return data + try: + data = data.decode('utf-8') + data = data.replace("\t", " ") + return data + except UnicodeDecodeError: + raise BinaryDataException("binary files are not supported") def open(self): data = self.read() self.lines = data.split(self.nl) diff --git a/code_examples/georgian.txt b/code_examples/georgian.txt new file mode 100644 index 0000000..c4ef3ed --- /dev/null +++ b/code_examples/georgian.txt @@ -0,0 +1,3 @@ +ვეპხის ტყაოსანი შოთა რუსთაველი + +ღმერთსი შემვედრე, ნუთუ კვლა დამხსნას სოფლისა შრომასა, ცეცხლს, წყალსა და მიწასა, ჰაერთა თანა მრომასა; მომცნეს ფრთენი და აღვფრინდე, მივჰხვდე მას ჩემსა ნდომასა, დღისით და ღამით ვჰხედვიდე მზისა ელვათა კრთომაასა. diff --git a/code_examples/greek.txt b/code_examples/greek.txt new file mode 100644 index 0000000..7480f90 --- /dev/null +++ b/code_examples/greek.txt @@ -0,0 +1,3 @@ +Τη γλώσσα μου έδωσαν ελληνική +το σπίτι φτωχικό στις αμμουδιές του Ομήρου. +Μονάχη έγνοια η γλώσσα μου στις αμμουδιές του Ομήρου. diff --git a/code_examples/hebrew.txt b/code_examples/hebrew.txt new file mode 100644 index 0000000..5bbe7d4 --- /dev/null +++ b/code_examples/hebrew.txt @@ -0,0 +1 @@ +זה כיף סתם לשמוע איך תנצח קרפד עץ טוב בגן diff --git a/code_examples/japanese.txt b/code_examples/japanese.txt new file mode 100644 index 0000000..245f556 --- /dev/null +++ b/code_examples/japanese.txt @@ -0,0 +1,4 @@ +いろはにほへど ちりぬるを +わがよたれぞ つねならむ +うゐのおくやま けふこえて +あさきゆめみじ ゑひもせず diff --git a/code_examples/runes.txt b/code_examples/runes.txt new file mode 100644 index 0000000..dc561e3 --- /dev/null +++ b/code_examples/runes.txt @@ -0,0 +1,3 @@ +ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ +ᛋᚳᛖᚪᛚ᛫ᚦᛖᚪᚻ᛫ᛗᚪᚾᚾᚪ᛫ᚷᛖᚻᚹᛦᛚᚳ᛫ᛗᛁᚳᛚᚢᚾ᛫ᚻᛦᛏ᛫ᛞᚫᛚᚪᚾ +ᚷᛁᚠ᛫ᚻᛖ᛫ᚹᛁᛚᛖ᛫ᚠᚩᚱ᛫ᛞᚱᛁᚻᛏᚾᛖ᛫ᛞᚩᛗᛖᛋ᛫ᚻᛚᛇᛏᚪᚾ᛬ diff --git a/code_examples/russian.txt b/code_examples/russian.txt new file mode 100644 index 0000000..46fffcf --- /dev/null +++ b/code_examples/russian.txt @@ -0,0 +1,5 @@ +абвгдеёжзийклмно +прстуфхцчшчьыъэюя +АБВГДЕЁЖЗИЙ +КЛМНОПРСТУФ +ХЦЧШЩЬЫЪЭЮЯ diff --git a/render.py b/render.py index 4801bae..b578ec5 100644 --- a/render.py +++ b/render.py @@ -35,7 +35,8 @@ class RenderString(object): s = '%-*s' % (x2 - x, self.string) else: s = self.string - cwin.addstr(self.y + y, self.x + x, s, self.attrs) + s2 = s.encode('utf-8') + cwin.addstr(self.y + y, self.x + x, s2, self.attrs) except Exception, e: raise #v = (self.y, y, self.x, x, self.string, self.attrs, str(e))