some buffer cleanup... better encoding support

--HG-- branch : pmacs2
2010-03-01 01:20:31 -05:00 · 2010-03-01 01:20:31 -05:00 · 03ab1d3984
parent ec83e87680
commit 03ab1d3984
2 changed files with 65 additions and 41 deletions
--- a/buffer/init.py
+++ b/buffer/init.py
@ -1,6 +1,7 @@
 from util import defaultdict
 import codecs, datetime, grp, os, pwd, re, shutil, stat, string
 import fcntl, select, pty, threading
 import chardet
 #import aes, dirutil, regex, highlight, lex, term
 import dirutil, regex, highlight, lex, term
 from point import Point
@ -8,10 +9,11 @@ from subprocess import Popen, PIPE, STDOUT
 from keyinput import MAP
 # undo/redo stack constants
-ACT_NONE    = -1
+ACT_NONE = 'none'
-ACT_NORM    = 0
+ACT_NORM = 'norm'
-ACT_UNDO    = 1
+ACT_UNDO = 'undo'
-ACT_REDO    = 2
+ACT_REDO = 'redo'
 STACK_LIMIT = 1024
 def hasher(data):
@ -34,8 +36,8 @@ class AddMove(object):
        self.p       = p
        self.lines   = lines
        self.undo_id = buffer.undo_id
-    def restore(self, act=ACT_UNDO):
+    def restore(self, act):
-        assert act == ACT_UNDO or act == ACT_REDO
+        assert act in (ACT_UNDO, ACT_REDO)
        self.buffer.insert_lines(self.p, self.lines, act)
    def getpos(self):
        return self.p
@ -48,7 +50,7 @@ class DelMove(object):
        self.p2      = p2
        self.undo_id = buffer.undo_id
    def restore(self, act):
-        assert act == ACT_UNDO or act == ACT_REDO
+        assert act in (ACT_UNDO, ACT_REDO)
        self.buffer.delete(self.p1, self.p2, act)
    def getpos(self):
        return self.p1
@ -76,9 +78,20 @@ class Buffer(object):
        self.metadata    = {}
    def _detect_nl_type(self, data):
-        mac_c  = len(self.mac_re.findall(data))
+        mac_c = unix_c = win_c = 0
-        unix_c = len(self.unix_re.findall(data))
+        l = len(data)
-        win_c  = len(self.win_re.findall(data))
+        i = 0
        while i < l:
            if data[i:i + 2] == '\r\n':
                win_c += 1
                i += 2
            else:
                if data[i] == '\n':
                    unix_c += 1
                elif data[i] == '\r':
                    mac_c += 1
                i += 1
        if (unix_c and mac_c) or (unix_c and win_c) or (mac_c and win_c):
            # warn the user?
            pass
@ -264,6 +277,12 @@ class Buffer(object):
        # the file has not been modified now
        self.modified = False
    def backup(self):
        '''backup path, and return the path to the temporary backup file'''
        tf, tpath = tempfile.mkstemp(prefix='pmc')
        tf.write(open(self.path, 'rb').read())
        tf.close()
        return tpath
    def readonly(self):
        return False
    def read_filter(self, data):
@ -562,16 +581,10 @@ class FileBuffer(Buffer):
        Buffer.__init__(self)
        self.path      = os.path.realpath(path)
        self.checksum  = None
        self.bytemark = ''
        self.codec     = 'utf-8'
-        if name is None:
+        self._name     = name or os.path.basename(path)
-            self._name = os.path.basename(self.path)
+        self._readonly = os.path.exists(path) and not os.access(path, os.W_OK)
-        else:
+
            self._name = name
        if os.path.exists(self.path) and not os.access(self.path, os.W_OK):
            self._readonly = True
        else:
            self._readonly = False
    def readonly(self):
        return self._readonly
@ -624,30 +637,29 @@ class FileBuffer(Buffer):
        if self.path_exists():
            f = self._open_file_r()
            data = f.read()
            if '\t' in data:
                self.writetabs = True
            f.close()
            self.store_checksum(data)
            self.codec = chardet.detect(data)['encoding'].lower()
        else:
            data = ''
            self.codec = 'utf-8'
-        if data.startswith('\xEF\xBB\xBF'):
+        if self.codec == 'utf-8' and data.startswith(codecs.BOM_UTF8):
-            # utf-8 bytemark
+            self.codec = 'utf-8-sig'
-            self.bytemark = data[:3]
+        elif self.codec.startswith('utf-16'):
-            data          = data[3:]
+            self.codec = 'utf-16'
        data = data.decode(self.codec)
        if '\t' in data: self.writetabs = True
        self.nl = self._detect_nl_type(data)
        data = self.read_filter(data)
        if '\x00' in data[:8192]:
            raise BinaryDataException("binary files are not supported")
-        for codec in ('utf-8', 'latin-1'):
+        return data
            data2 = self.decode(data, codec)
            if data2 is not None: return data2
        raise BinaryDataException("binary files are not supported")
    def open(self):
        data = self.read()
@ -687,7 +699,7 @@ class FileBuffer(Buffer):
            data = self.write_filter(data.encode(self.codec))
            f2 = self._open_file_w(self.path, preserve=False)
-            f2.write(self.bytemark + data)
+            f2.write(data)
            f2.close()
        #except Exception, e:
        except NameError, e:
--- a/method/init.py
+++ b/method/init.py
@ -1,3 +1,4 @@
 import codecs
 import os, commands, re, tempfile
 from subprocess import Popen, PIPE, STDOUT
@ -1151,3 +1152,14 @@ class SetTokenColors(Method):
            a.cached_colors = {}
            a.token_colors[name] = colors
            w.set_error('Color for %s set to %r' % (name, colors))
 class SetCodec(Method):
    args = [arg('codec', p='Codec: ', h='')]
    def _execute(self, w, **vargs):
        codec = vargs['codec']
        try:
            codecs.lookup(codec)
            w.buffer.codec = codec
            w.set_error('setting %r encoding to %r' % (w.buffer.name(), codec))
        except LookupError:
            w.set_error('Codec %r was not found' % codec)