diff --git a/application.py b/application.py index f59614f..1ef10a6 100755 --- a/application.py +++ b/application.py @@ -12,7 +12,7 @@ import mode2 import mode_mini, mode_search, mode_replace, mode_which import mode_console, mode_consolemini import mode_blame, mode_diff -import mode_c, mode_python, mode_perl, mode_xml, mode_nasm, mode_sh, mode_javascript +import mode_c, mode_python, mode_perl, mode_xml, mode_nasm, mode_sh, mode_javascript, mode_sql import mode_life, mode_text, mode_mutt def run(buffers, jump_to_line=None, init_mode=None): @@ -100,7 +100,7 @@ class Application(object): 'life': mode_life.Life, 'mutt': mode_mutt.Mutt, 'javascript': mode_javascript.Javascript, - #'sql': mode_sql.Sql, + 'sql': mode_sql.Sql, #'template': mode_tt.Template, } @@ -129,7 +129,7 @@ class Application(object): '.html': 'xml', '.htm': 'xml', '.js': 'javascript', - #'.sql': 'sql', + '.sql': 'sql', #'.tt': 'template' } self.mode_detection = { diff --git a/code_examples/MakeEfileServer.sql b/code_examples/MakeEfileServer.sql new file mode 100644 index 0000000..5beb830 --- /dev/null +++ b/code_examples/MakeEfileServer.sql @@ -0,0 +1,202 @@ +-- this file is used to initialize the efile server tables +-- these tables are distinct from the esub tables +-- they are used to proxy communications between the IRS and the BDS + +-- this table stores all the various username/password pairs we use +-- the one with the most recent date is assumed to be current +CREATE TABLE e2_password ( + created_date TIMESTAMPTZ DEFAULT current_timestamp, + username TEXT NOT NULL, + password TEXT NOT NULL +); + +-- these are the username/passwords which we use or have used with the IRS +-- NOTE: the order we build these is very important; the most recent should be +-- the last insert to run +INSERT INTO e2_password (username, password) VALUES ('GM8PR643', 'a9G?x3N#'); +INSERT INTO e2_password (username, password) VALUES ('GM8PR643', '!Q2w#E4r'); + +-- this table stores all the IRS processing centers we send to +CREATE TABLE e2_center ( + code VARCHAR(1) PRIMARY KEY, + city TEXT NOT NULL, + url TEXT NOT NULL, + letter TEXT NOT NULL, + CHECK(letter IN ('Andover', 'Austin')) +); + +-- these are the processing centers we know about +INSERT INTO e2_center VALUES ('C', 'Andover', 'efileA.ems.irs.gov', 'Andover'); +INSERT INTO e2_center VALUES ('E', 'Austin', 'efileA.ems.irs.gov', 'Austin'); +INSERT INTO e2_center VALUES ('F', 'Kansas City', 'efileB.ems.irs.gov', 'Andover'); +INSERT INTO e2_center VALUES ('G', 'Philadelphia', 'efileA.ems.irs.gov', 'Austin'); +INSERT INTO e2_center VALUES ('H', 'Fresno', 'efileB.ems.irs.gov', 'Austin'); + +-- this table maps each state to its particular processing center +CREATE TABLE e2_state ( + abbrev VARCHAR(2) NOT NULL UNIQUE, + name TEXT NOT NULL UNIQUE, + code VARCHAR(1) REFERENCES e2_center (code) +); + +-- this data is from 2005 and needs to be updated +INSERT INTO e2_state VALUES ('AL', 'Alabama', 'E'); +INSERT INTO e2_state VALUES ('AK', 'Arkansas', 'E'); +INSERT INTO e2_state VALUES ('AZ', 'Arizona', 'H'); +INSERT INTO e2_state VALUES ('AR', 'Alaska', 'H'); +INSERT INTO e2_state VALUES ('CA', 'Califorina', 'H'); +INSERT INTO e2_state VALUES ('CO', 'Colorado', 'E'); +INSERT INTO e2_state VALUES ('CN', 'Connecticut', 'C'); +INSERT INTO e2_state VALUES ('DC', 'District of Columbia', 'C'); +INSERT INTO e2_state VALUES ('DE', 'Delaware', 'C'); +INSERT INTO e2_state VALUES ('FL', 'Florida', 'G'); +INSERT INTO e2_state VALUES ('GA', 'Georgia', 'G'); +INSERT INTO e2_state VALUES ('HI', 'Hawaii', 'H'); +INSERT INTO e2_state VALUES ('IA', 'Iowa', 'E'); +INSERT INTO e2_state VALUES ('ID', 'Idaho', 'H'); +INSERT INTO e2_state VALUES ('IL', 'Illinois', 'F'); +INSERT INTO e2_state VALUES ('IN', 'Indiana', 'F'); +INSERT INTO e2_state VALUES ('KS', 'Kansas', 'F'); +INSERT INTO e2_state VALUES ('KY', 'Kentucky', 'G'); +INSERT INTO e2_state VALUES ('LA', 'Louisiana', 'E'); +INSERT INTO e2_state VALUES ('MA', 'Massachusets', 'C'); +INSERT INTO e2_state VALUES ('MD', 'Maryland', 'C'); +INSERT INTO e2_state VALUES ('ME', 'Maine', 'C'); +INSERT INTO e2_state VALUES ('MI', 'Michigan', 'F'); +INSERT INTO e2_state VALUES ('MN', 'Minnesota', 'F'); +INSERT INTO e2_state VALUES ('MO', 'Missouri', 'F'); +INSERT INTO e2_state VALUES ('MS', 'Mississippi', 'E'); +INSERT INTO e2_state VALUES ('MT', 'Montana', 'H'); +INSERT INTO e2_state VALUES ('NE', 'Nebraska', 'E'); +INSERT INTO e2_state VALUES ('NH', 'New Hampshire', 'C'); +INSERT INTO e2_state VALUES ('NJ', 'New Jersey', 'C'); +INSERT INTO e2_state VALUES ('NM', 'New Mexico', 'E'); +INSERT INTO e2_state VALUES ('NV', 'Nevada', 'H'); +INSERT INTO e2_state VALUES ('NC', 'North Carolina', 'G'); +INSERT INTO e2_state VALUES ('ND', 'North Dakota', 'E'); +INSERT INTO e2_state VALUES ('NY', 'New York', 'C'); +INSERT INTO e2_state VALUES ('OH', 'Ohio', 'F'); +INSERT INTO e2_state VALUES ('OK', 'Oklahoma', 'E'); +INSERT INTO e2_state VALUES ('OR', 'Oregon', 'H'); +INSERT INTO e2_state VALUES ('PA', 'Pennsylvania', 'C'); +INSERT INTO e2_state VALUES ('RI', 'Rhode Island', 'C'); +INSERT INTO e2_state VALUES ('SC', 'South Carolina', 'G'); +INSERT INTO e2_state VALUES ('SD', 'South Dakota', 'E'); +INSERT INTO e2_state VALUES ('TN', 'Tennessee', 'G'); +INSERT INTO e2_state VALUES ('TX', 'Texas', 'E'); +INSERT INTO e2_state VALUES ('UT', 'Utah', 'H'); +INSERT INTO e2_state VALUES ('VA', 'Virginia', 'C'); +INSERT INTO e2_state VALUES ('VT', 'Vermont', 'C'); +INSERT INTO e2_state VALUES ('WA', 'Washington', 'H'); +INSERT INTO e2_state VALUES ('WI', 'Wisconsin', 'F'); +INSERT INTO e2_state VALUES ('WV', 'West Virginia', 'F'); +INSERT INTO e2_state VALUES ('WY', 'Wyoming', 'H'); + +-- the batch table is used to keep track of batches, both those currently +-- being built, and those which have been sent off. +CREATE TABLE e2_batch ( + -- database key + batch_id SERIAL PRIMARY KEY, + -- when we created the batch + created_timestamp TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + -- when we sent the batch; if NULL, then it wasn't sent + sent_timestamp TIMESTAMPTZ, + -- the statuses are as follows: + -- 1. active: returns are actively being added to the batch + -- 2. waiting: returns are not being added; batch is ready to be sent + -- 2. failed: batch was abandoned and never sent + -- (all efiles in the batch need to be re-batched and resent) + -- 3. sent: the batch has been sent + status TEXT NOT NULL DEFAULT 'pending', + -- processing center code to use + code VARCHAR(1) REFERENCES e2_center (code), + -- efin is different than us for vita sites, and each vita site has its own + -- batch, so we need to know which efin the batch is for + efin TEXT NOT NULL, + -- our own generated batch number that we generate according to irs rules + batch_num INTEGER NOT NULL, + -- whether this is a pats batch + pats BOOLEAN DEFAULT FALSE, + -- the gtx key we got when we sent this batch, starts out as NULL + gtx TEXT, + -- the lowest unused serial number for related returns + curr_serial_num INTEGER NOT NULL DEFAULT 0, + --is vita or not-needed to know how to generate the tranA record + is_vita INTEGER, + -- the actual batch data + data TEXT, + UNIQUE(batch_num, efin), + CHECK(efin ~ '^[0-9]{6}$'), + -- gtx is of the form *YYYYMMDDhhmmss.xxxx, the irs gives them to us on + -- batch transmission + CHECK(gtx IS NULL OR gtx ~ '^[A-Z][0-9]{8}[0-9]{6}\.[0-9]{4}$'), + CHECK(status IN ('pending', 'queued', 'failed', 'sent', 'accepted', 'rejected')) +); + +-- determines for each code/efin combination the id of the active batch, if any +CREATE TABLE e2_current_batch ( + batch_id INTEGER REFERENCES e2_batch (batch_id), + code VARCHAR(1) REFERENCES e2_center (code), + efin TEXT NOT NULL, + pats BOOLEAN, + UNIQUE(code, efin, pats), + CHECK(e2_verify_current_batch(batch_id, code, efin)) +); + +-- this needs to be recreated each day. it keeps track of how many batches have +-- been sent on a particular day. +CREATE SEQUENCE e2_batch_transmission; + +-- each client who efiles a return will have a record in this table each time +-- we attempt to batch that return and send it to the IRS +-- +-- some of these records will be auto-generated by the efile server, but can't +-- be set in a DEFAULT statement: serial +CREATE TABLE e2_return ( + return_id SERIAL PRIMARY KEY, + created_timestamp TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + batch_id INTEGER REFERENCES e2_batch (batch_id), + state VARCHAR(2) REFERENCES e2_state (abbrev), + fed_status TEXT NOT NULL DEFAULT 'unknown', + state_status TEXT NOT NULL DEFAULT 'unknown', + fed_response DATE, + state_response DATE, + efin TEXT NOT NULL, + pats BOOLEAN DEFAULT FALSE, + serial_num INTEGER NOT NULL, + sandbox TEXT NOT NULL, + client_id INTEGER NOT NULL, + edoc_id INTEGER NOT NULL, + dcn TEXT NOT NULL, + is_vita INTEGER, + data TEXT, + UNIQUE(batch_id, serial_num), + CHECK(state_status IN ('accepted', 'rejected', 'conditional', 'unknown', 'fed_return_rejected', 'failed')), + CHECK(fed_status IN ('accepted', 'rejected', 'conditional', 'unknown', 'failed')), + CHECK(dcn ~ '^00[0-9]{6}[0-9]{3}[0-9]{2}7$') +); + +-- acks are the irs' responses to our batches. in addition to being parsed into +-- "return ack" objects the data from the ack itself is placed here +CREATE TABLE e2_ack ( + ack_id SERIAL PRIMARY KEY, + created_timestamp TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + gtx TEXT NOT NULL, + extension TEXT NOT NULL, + data TEXT NOT NULL + -- gtx keys correspond to those stored in batches *YYYYMMDDhhmmss.xxxx + CHECK(gtx ~ '^[A-Z][0-9]{8}[0-9]{6}\.[0-9]{4}$'), + CHECK(extension ~ '^(NAK|ACK|S[A-Z][A-Z])$') +); + +-- error codes are associted with return_acks, and describe the errors that a +-- rejected ack will have +CREATE TABLE e2_error_code ( + error_code_id SERIAL PRIMARY KEY, + dcn TEXT, + recipient VARCHAR(2) NOT NULL, + error_code VARCHAR(4) NOT NULL, + efile_line VARCHAR(4) NOT NULL, + efile_form VARCHAR(32) NOT NULL, + form_instance VARCHAR(8) NOT NULL +); diff --git a/lex2.py b/lex2.py index 4ab541b..5f47c8b 100755 --- a/lex2.py +++ b/lex2.py @@ -86,7 +86,9 @@ class PatternRule(Rule): assert name not in reserved_names, "reserved rule name: %r" % name self.name = name self.pattern = pattern - self.re = re.compile(pattern) + self._compile() + def _compile(self): + self.re = re.compile(self.pattern) def _match(self, lexer, parent, m): s = m.group(0) token = self.make_token(lexer, s, self.name, parent) @@ -100,6 +102,10 @@ class PatternRule(Rule): else: return False +class NocasePatternRule(PatternRule): + def _compile(self): + self.re = re.compile(self.pattern, re.IGNORECASE) + class ContextPatternRule(PatternRule): def __init__(self, name, pattern, fallback): assert valid_name_re.match(name), 'invalid name %r' % name diff --git a/mode_sql.py b/mode_sql.py index 1729c76..1689a11 100644 --- a/mode_sql.py +++ b/mode_sql.py @@ -1,37 +1,60 @@ -import commands, os.path, sets, sys +import color, mode2 +from lex2 import Grammar, PatternRule, NocasePatternRule, RegionRule -import color, default, mode, lex, lex_sql, method, tab, tab_sql +class StringGrammar(Grammar): + rules = [ + PatternRule( + name=r'octal', + pattern=r'\\[0-7]{3}', + ), + PatternRule( + name=r'escaped', + pattern=r'\\.', + ), + ] -class Sql(mode.Fundamental): +class SqlGrammar(Grammar): + rules = [ + PatternRule(name=r'comment', pattern=r'--.*$'), + RegionRule(name=r'comment', start='/\*', grammar=Grammar(), end='\*/'), + PatternRule(name=r'delimiter', pattern=r'[();,\.:\$\[\]]'), + NocasePatternRule(name=r'attribute', pattern=r'(?:check|exists|unique|not null|default|primary key|minvalue|foreign key|references)(?![A-Za-z0-9_])'), + NocasePatternRule(name=r'operator', pattern=r'(?:case|when|then|else|end|not|and|or|is not|is|in|not in)(?![A-Za-z0-9_])'), + NocasePatternRule(name=r'keyword', pattern=r'(?:create database|create index|create sequence|create table|create trigger|create view|select|insert|update|delete|drop database|drop index|drop sequence|drop table|drop trigger|drop view|create user|alter user|drop user|drop function|grant|revoke|create function|create or replace function|create or replace view|create language|create operator|create type)(?![A-Za-z0-9_])'), + NocasePatternRule(name=r'pseudokeyword', pattern=r'(?:returns|language|right join|left join|inner join|outer join|join|where|null|true|false|into|values|as|from|order by|asc|desc|limit|distinct|cascade|using|on)(?![A-Za-z0-9_])'), + NocasePatternRule(name=r'type', pattern=r'(?:void|row|serial|varchar|float|integer|int|text|timestamptz|timestamp|datetz|date|timetz|time|boolean|bool)(?![A-Za-z0-9_])'), + PatternRule(name=r'function', pattern=r'(?:nextval|current_timestamp|current_time|current_date)(?![A-Za-z0-9_])'), + RegionRule(name=r'string', start="'", grammar=StringGrammar(), end="'"), + RegionRule(name=r'quoted', start='"', grammar=StringGrammar(), end='"'), + PatternRule(name=r'bareword', pattern=r'[A-Za-z0-9_]+'), + ] + +class Sql(mode2.Fundamental): + grammar = SqlGrammar() + opentoken = 'delimiter' + opentags = {'(': ')', '[': ']', '{': '}'} + closetoken = 'delimiter' + closetags = {')': '(', ']': '[', '}': '{'} def __init__(self, w): - mode.Fundamental.__init__(self, w) - - self.tag_matching = True - self.grammar = lex_sql.SqlGrammar() - self.lexer = lex.Lexer(self.grammar) - + mode2.Fundamental.__init__(self, w) + self.add_bindings('close-paren', (')',)) + self.add_bindings('close-brace', ('}',)) + self.add_bindings('close-bracket', (']',)) self.colors = { - 'sql comment': color.build('red', 'default', 'bold'), - 'c comment': color.build('red', 'default', 'bold'), - 'operator1': color.build('yellow', 'default', 'bold'), - 'operator2': color.build('yellow', 'default', 'bold'), - 'attribute1': color.build('magenta', 'default', 'bold'), - 'attribute2': color.build('magenta', 'default', 'bold'), - 'keyword1': color.build('cyan', 'default', 'bold'), - 'keyword2': color.build('cyan', 'default', 'bold'), - 'pseudo-keyword1': color.build('cyan', 'default', 'bold'), - 'pseudo-keyword2': color.build('cyan', 'default', 'bold'), - 'type1': color.build('green', 'default', 'bold'), - 'type2': color.build('green', 'default', 'bold'), - 'function': color.build('yellow', 'default', 'bold'), - 'quoted': color.build('yellow', 'default', 'bold'), - 'string': color.build('green', 'default', 'bold'), - 'bareword': color.build('default', 'default', 'bold'), + 'comment': color.build('red', 'default'), + 'operator': color.build('yellow', 'default'), + 'attribute': color.build('magenta', 'default'), + 'keyword': color.build('cyan', 'default'), + 'pseudokeyword': color.build('cyan', 'default'), + 'type': color.build('green', 'default'), + 'function': color.build('yellow', 'default'), + 'quoted': color.build('yellow', 'default'), + 'string.start': color.build('green', 'default'), + 'string.null': color.build('green', 'default'), + 'string.escaped': color.build('magenta', 'default'), + 'string.octal': color.build('magenta', 'default'), + 'string.end': color.build('green', 'default'), + 'bareword': color.build('default', 'default'), } - - #self.highlighter.lex_buffer() - #self.get_regions() - self.tabber = tab_sql.SQLTabber(self) - def name(self): return "Sql"