some buffer cleanup... better encoding support

--HG--
branch : pmacs2
This commit is contained in:
Erik Osheim 2010-03-01 01:20:31 -05:00
parent ec83e87680
commit 03ab1d3984
2 changed files with 65 additions and 41 deletions

View File

@ -1,6 +1,7 @@
from util import defaultdict from util import defaultdict
import codecs, datetime, grp, os, pwd, re, shutil, stat, string import codecs, datetime, grp, os, pwd, re, shutil, stat, string
import fcntl, select, pty, threading import fcntl, select, pty, threading
import chardet
#import aes, dirutil, regex, highlight, lex, term #import aes, dirutil, regex, highlight, lex, term
import dirutil, regex, highlight, lex, term import dirutil, regex, highlight, lex, term
from point import Point from point import Point
@ -8,10 +9,11 @@ from subprocess import Popen, PIPE, STDOUT
from keyinput import MAP from keyinput import MAP
# undo/redo stack constants # undo/redo stack constants
ACT_NONE = -1 ACT_NONE = 'none'
ACT_NORM = 0 ACT_NORM = 'norm'
ACT_UNDO = 1 ACT_UNDO = 'undo'
ACT_REDO = 2 ACT_REDO = 'redo'
STACK_LIMIT = 1024 STACK_LIMIT = 1024
def hasher(data): def hasher(data):
@ -34,8 +36,8 @@ class AddMove(object):
self.p = p self.p = p
self.lines = lines self.lines = lines
self.undo_id = buffer.undo_id self.undo_id = buffer.undo_id
def restore(self, act=ACT_UNDO): def restore(self, act):
assert act == ACT_UNDO or act == ACT_REDO assert act in (ACT_UNDO, ACT_REDO)
self.buffer.insert_lines(self.p, self.lines, act) self.buffer.insert_lines(self.p, self.lines, act)
def getpos(self): def getpos(self):
return self.p return self.p
@ -48,7 +50,7 @@ class DelMove(object):
self.p2 = p2 self.p2 = p2
self.undo_id = buffer.undo_id self.undo_id = buffer.undo_id
def restore(self, act): def restore(self, act):
assert act == ACT_UNDO or act == ACT_REDO assert act in (ACT_UNDO, ACT_REDO)
self.buffer.delete(self.p1, self.p2, act) self.buffer.delete(self.p1, self.p2, act)
def getpos(self): def getpos(self):
return self.p1 return self.p1
@ -76,9 +78,20 @@ class Buffer(object):
self.metadata = {} self.metadata = {}
def _detect_nl_type(self, data): def _detect_nl_type(self, data):
mac_c = len(self.mac_re.findall(data)) mac_c = unix_c = win_c = 0
unix_c = len(self.unix_re.findall(data)) l = len(data)
win_c = len(self.win_re.findall(data)) i = 0
while i < l:
if data[i:i + 2] == '\r\n':
win_c += 1
i += 2
else:
if data[i] == '\n':
unix_c += 1
elif data[i] == '\r':
mac_c += 1
i += 1
if (unix_c and mac_c) or (unix_c and win_c) or (mac_c and win_c): if (unix_c and mac_c) or (unix_c and win_c) or (mac_c and win_c):
# warn the user? # warn the user?
pass pass
@ -264,6 +277,12 @@ class Buffer(object):
# the file has not been modified now # the file has not been modified now
self.modified = False self.modified = False
def backup(self):
'''backup path, and return the path to the temporary backup file'''
tf, tpath = tempfile.mkstemp(prefix='pmc')
tf.write(open(self.path, 'rb').read())
tf.close()
return tpath
def readonly(self): def readonly(self):
return False return False
def read_filter(self, data): def read_filter(self, data):
@ -562,16 +581,10 @@ class FileBuffer(Buffer):
Buffer.__init__(self) Buffer.__init__(self)
self.path = os.path.realpath(path) self.path = os.path.realpath(path)
self.checksum = None self.checksum = None
self.bytemark = ''
self.codec = 'utf-8' self.codec = 'utf-8'
if name is None: self._name = name or os.path.basename(path)
self._name = os.path.basename(self.path) self._readonly = os.path.exists(path) and not os.access(path, os.W_OK)
else:
self._name = name
if os.path.exists(self.path) and not os.access(self.path, os.W_OK):
self._readonly = True
else:
self._readonly = False
def readonly(self): def readonly(self):
return self._readonly return self._readonly
@ -624,30 +637,29 @@ class FileBuffer(Buffer):
if self.path_exists(): if self.path_exists():
f = self._open_file_r() f = self._open_file_r()
data = f.read() data = f.read()
if '\t' in data:
self.writetabs = True
f.close() f.close()
self.store_checksum(data) self.store_checksum(data)
self.codec = chardet.detect(data)['encoding'].lower()
else: else:
data = '' data = ''
self.codec = 'utf-8'
if data.startswith('\xEF\xBB\xBF'): if self.codec == 'utf-8' and data.startswith(codecs.BOM_UTF8):
# utf-8 bytemark self.codec = 'utf-8-sig'
self.bytemark = data[:3] elif self.codec.startswith('utf-16'):
data = data[3:] self.codec = 'utf-16'
data = data.decode(self.codec)
if '\t' in data: self.writetabs = True
self.nl = self._detect_nl_type(data) self.nl = self._detect_nl_type(data)
data = self.read_filter(data) data = self.read_filter(data)
if '\x00' in data[:8192]: if '\x00' in data[:8192]:
raise BinaryDataException("binary files are not supported") raise BinaryDataException("binary files are not supported")
for codec in ('utf-8', 'latin-1'): return data
data2 = self.decode(data, codec)
if data2 is not None: return data2
raise BinaryDataException("binary files are not supported")
def open(self): def open(self):
data = self.read() data = self.read()
@ -687,7 +699,7 @@ class FileBuffer(Buffer):
data = self.write_filter(data.encode(self.codec)) data = self.write_filter(data.encode(self.codec))
f2 = self._open_file_w(self.path, preserve=False) f2 = self._open_file_w(self.path, preserve=False)
f2.write(self.bytemark + data) f2.write(data)
f2.close() f2.close()
#except Exception, e: #except Exception, e:
except NameError, e: except NameError, e:

View File

@ -1,3 +1,4 @@
import codecs
import os, commands, re, tempfile import os, commands, re, tempfile
from subprocess import Popen, PIPE, STDOUT from subprocess import Popen, PIPE, STDOUT
@ -1151,3 +1152,14 @@ class SetTokenColors(Method):
a.cached_colors = {} a.cached_colors = {}
a.token_colors[name] = colors a.token_colors[name] = colors
w.set_error('Color for %s set to %r' % (name, colors)) w.set_error('Color for %s set to %r' % (name, colors))
class SetCodec(Method):
args = [arg('codec', p='Codec: ', h='')]
def _execute(self, w, **vargs):
codec = vargs['codec']
try:
codecs.lookup(codec)
w.buffer.codec = codec
w.set_error('setting %r encoding to %r' % (w.buffer.name(), codec))
except LookupError:
w.set_error('Codec %r was not found' % codec)