some buffer cleanup... better encoding support

--HG--
branch : pmacs2
This commit is contained in:
Erik Osheim 2010-03-01 01:20:31 -05:00
parent ec83e87680
commit 03ab1d3984
2 changed files with 65 additions and 41 deletions

View File

@ -1,6 +1,7 @@
from util import defaultdict
import codecs, datetime, grp, os, pwd, re, shutil, stat, string
import fcntl, select, pty, threading
import chardet
#import aes, dirutil, regex, highlight, lex, term
import dirutil, regex, highlight, lex, term
from point import Point
@ -8,10 +9,11 @@ from subprocess import Popen, PIPE, STDOUT
from keyinput import MAP
# undo/redo stack constants
ACT_NONE = -1
ACT_NORM = 0
ACT_UNDO = 1
ACT_REDO = 2
ACT_NONE = 'none'
ACT_NORM = 'norm'
ACT_UNDO = 'undo'
ACT_REDO = 'redo'
STACK_LIMIT = 1024
def hasher(data):
@ -34,8 +36,8 @@ class AddMove(object):
self.p = p
self.lines = lines
self.undo_id = buffer.undo_id
def restore(self, act=ACT_UNDO):
assert act == ACT_UNDO or act == ACT_REDO
def restore(self, act):
assert act in (ACT_UNDO, ACT_REDO)
self.buffer.insert_lines(self.p, self.lines, act)
def getpos(self):
return self.p
@ -48,7 +50,7 @@ class DelMove(object):
self.p2 = p2
self.undo_id = buffer.undo_id
def restore(self, act):
assert act == ACT_UNDO or act == ACT_REDO
assert act in (ACT_UNDO, ACT_REDO)
self.buffer.delete(self.p1, self.p2, act)
def getpos(self):
return self.p1
@ -76,9 +78,20 @@ class Buffer(object):
self.metadata = {}
def _detect_nl_type(self, data):
mac_c = len(self.mac_re.findall(data))
unix_c = len(self.unix_re.findall(data))
win_c = len(self.win_re.findall(data))
mac_c = unix_c = win_c = 0
l = len(data)
i = 0
while i < l:
if data[i:i + 2] == '\r\n':
win_c += 1
i += 2
else:
if data[i] == '\n':
unix_c += 1
elif data[i] == '\r':
mac_c += 1
i += 1
if (unix_c and mac_c) or (unix_c and win_c) or (mac_c and win_c):
# warn the user?
pass
@ -264,6 +277,12 @@ class Buffer(object):
# the file has not been modified now
self.modified = False
def backup(self):
'''backup path, and return the path to the temporary backup file'''
tf, tpath = tempfile.mkstemp(prefix='pmc')
tf.write(open(self.path, 'rb').read())
tf.close()
return tpath
def readonly(self):
return False
def read_filter(self, data):
@ -562,16 +581,10 @@ class FileBuffer(Buffer):
Buffer.__init__(self)
self.path = os.path.realpath(path)
self.checksum = None
self.bytemark = ''
self.codec = 'utf-8'
if name is None:
self._name = os.path.basename(self.path)
else:
self._name = name
if os.path.exists(self.path) and not os.access(self.path, os.W_OK):
self._readonly = True
else:
self._readonly = False
self._name = name or os.path.basename(path)
self._readonly = os.path.exists(path) and not os.access(path, os.W_OK)
def readonly(self):
return self._readonly
@ -624,30 +637,29 @@ class FileBuffer(Buffer):
if self.path_exists():
f = self._open_file_r()
data = f.read()
if '\t' in data:
self.writetabs = True
f.close()
self.store_checksum(data)
self.codec = chardet.detect(data)['encoding'].lower()
else:
data = ''
self.codec = 'utf-8'
if data.startswith('\xEF\xBB\xBF'):
# utf-8 bytemark
self.bytemark = data[:3]
data = data[3:]
if self.codec == 'utf-8' and data.startswith(codecs.BOM_UTF8):
self.codec = 'utf-8-sig'
elif self.codec.startswith('utf-16'):
self.codec = 'utf-16'
data = data.decode(self.codec)
if '\t' in data: self.writetabs = True
self.nl = self._detect_nl_type(data)
data = self.read_filter(data)
if '\x00' in data[:8192]:
raise BinaryDataException("binary files are not supported")
for codec in ('utf-8', 'latin-1'):
data2 = self.decode(data, codec)
if data2 is not None: return data2
raise BinaryDataException("binary files are not supported")
return data
def open(self):
data = self.read()
@ -687,7 +699,7 @@ class FileBuffer(Buffer):
data = self.write_filter(data.encode(self.codec))
f2 = self._open_file_w(self.path, preserve=False)
f2.write(self.bytemark + data)
f2.write(data)
f2.close()
#except Exception, e:
except NameError, e:

View File

@ -1,3 +1,4 @@
import codecs
import os, commands, re, tempfile
from subprocess import Popen, PIPE, STDOUT
@ -1151,3 +1152,14 @@ class SetTokenColors(Method):
a.cached_colors = {}
a.token_colors[name] = colors
w.set_error('Color for %s set to %r' % (name, colors))
class SetCodec(Method):
args = [arg('codec', p='Codec: ', h='')]
def _execute(self, w, **vargs):
codec = vargs['codec']
try:
codecs.lookup(codec)
w.buffer.codec = codec
w.set_error('setting %r encoding to %r' % (w.buffer.name(), codec))
except LookupError:
w.set_error('Codec %r was not found' % codec)