From 1e9e66adb4c94f409bd6fb90dfec3c7b95017095 Mon Sep 17 00:00:00 2001
From: moculus <none@none>
Date: Mon, 25 Jun 2007 15:49:04 +0000
Subject: [PATCH] sql fixed

--HG--
branch : pmacs2
---
 application.py                    |   6 +-
 code_examples/MakeEfileServer.sql | 202 ++++++++++++++++++++++++++++++
 lex2.py                           |   8 +-
 mode_sql.py                       |  83 +++++++-----
 4 files changed, 265 insertions(+), 34 deletions(-)
 create mode 100644 code_examples/MakeEfileServer.sql

diff --git a/application.py b/application.py
index f59614f..1ef10a6 100755
--- a/application.py
+++ b/application.py
@@ -12,7 +12,7 @@ import mode2
 import mode_mini, mode_search, mode_replace, mode_which
 import mode_console, mode_consolemini
 import mode_blame, mode_diff
-import mode_c, mode_python, mode_perl, mode_xml, mode_nasm, mode_sh, mode_javascript
+import mode_c, mode_python, mode_perl, mode_xml, mode_nasm, mode_sh, mode_javascript, mode_sql
 import mode_life, mode_text, mode_mutt
 
 def run(buffers, jump_to_line=None, init_mode=None):
@@ -100,7 +100,7 @@ class Application(object):
             'life':        mode_life.Life,
             'mutt':        mode_mutt.Mutt,
             'javascript':  mode_javascript.Javascript,
-            #'sql':        mode_sql.Sql,
+            'sql':         mode_sql.Sql,
             #'template':   mode_tt.Template,
         }
 
@@ -129,7 +129,7 @@ class Application(object):
             '.html':   'xml',
             '.htm':    'xml',
             '.js':     'javascript',
-            #'.sql':   'sql',
+            '.sql':    'sql',
             #'.tt':    'template'
         }
         self.mode_detection = {
diff --git a/code_examples/MakeEfileServer.sql b/code_examples/MakeEfileServer.sql
new file mode 100644
index 0000000..5beb830
--- /dev/null
+++ b/code_examples/MakeEfileServer.sql
@@ -0,0 +1,202 @@
+-- this file is used to initialize the efile server tables
+-- these tables are distinct from the esub tables
+-- they are used to proxy communications between the IRS and the BDS
+
+-- this table stores all the various username/password pairs we use
+-- the one with the most recent date is assumed to be current
+CREATE TABLE e2_password (
+    created_date TIMESTAMPTZ DEFAULT current_timestamp,
+    username     TEXT        NOT NULL,
+    password     TEXT        NOT NULL
+);
+
+-- these are the username/passwords which we use or have used with the IRS
+-- NOTE: the order we build these is very important; the most recent should be
+-- the last insert to run
+INSERT INTO e2_password (username, password) VALUES ('GM8PR643', 'a9G?x3N#');
+INSERT INTO e2_password (username, password) VALUES ('GM8PR643', '!Q2w#E4r');
+
+-- this table stores all the IRS processing centers we send to
+CREATE TABLE e2_center (
+    code   VARCHAR(1) PRIMARY KEY,
+    city   TEXT       NOT NULL,
+    url    TEXT       NOT NULL,
+    letter TEXT       NOT NULL,
+    CHECK(letter IN ('Andover', 'Austin'))
+);
+
+-- these are the processing centers we know about
+INSERT INTO e2_center VALUES ('C', 'Andover', 'efileA.ems.irs.gov', 'Andover');
+INSERT INTO e2_center VALUES ('E', 'Austin', 'efileA.ems.irs.gov', 'Austin');
+INSERT INTO e2_center VALUES ('F', 'Kansas City', 'efileB.ems.irs.gov', 'Andover');
+INSERT INTO e2_center VALUES ('G', 'Philadelphia', 'efileA.ems.irs.gov', 'Austin');
+INSERT INTO e2_center VALUES ('H', 'Fresno', 'efileB.ems.irs.gov', 'Austin');
+
+-- this table maps each state to its particular processing center
+CREATE TABLE e2_state (
+    abbrev VARCHAR(2) NOT NULL UNIQUE,
+    name   TEXT       NOT NULL UNIQUE,
+    code   VARCHAR(1) REFERENCES e2_center (code)
+);
+
+-- this data is from 2005 and needs to be updated
+INSERT INTO e2_state VALUES ('AL', 'Alabama', 'E');
+INSERT INTO e2_state VALUES ('AK', 'Arkansas', 'E');
+INSERT INTO e2_state VALUES ('AZ', 'Arizona', 'H');
+INSERT INTO e2_state VALUES ('AR', 'Alaska', 'H');
+INSERT INTO e2_state VALUES ('CA', 'Califorina', 'H');
+INSERT INTO e2_state VALUES ('CO', 'Colorado', 'E');
+INSERT INTO e2_state VALUES ('CN', 'Connecticut', 'C');
+INSERT INTO e2_state VALUES ('DC', 'District of Columbia', 'C');
+INSERT INTO e2_state VALUES ('DE', 'Delaware', 'C');
+INSERT INTO e2_state VALUES ('FL', 'Florida', 'G');
+INSERT INTO e2_state VALUES ('GA', 'Georgia', 'G');
+INSERT INTO e2_state VALUES ('HI', 'Hawaii', 'H');
+INSERT INTO e2_state VALUES ('IA', 'Iowa', 'E');
+INSERT INTO e2_state VALUES ('ID', 'Idaho', 'H');
+INSERT INTO e2_state VALUES ('IL', 'Illinois', 'F');
+INSERT INTO e2_state VALUES ('IN', 'Indiana', 'F');
+INSERT INTO e2_state VALUES ('KS', 'Kansas', 'F');
+INSERT INTO e2_state VALUES ('KY', 'Kentucky', 'G');
+INSERT INTO e2_state VALUES ('LA', 'Louisiana', 'E');
+INSERT INTO e2_state VALUES ('MA', 'Massachusets', 'C');
+INSERT INTO e2_state VALUES ('MD', 'Maryland', 'C');
+INSERT INTO e2_state VALUES ('ME', 'Maine', 'C');
+INSERT INTO e2_state VALUES ('MI', 'Michigan', 'F');
+INSERT INTO e2_state VALUES ('MN', 'Minnesota', 'F');
+INSERT INTO e2_state VALUES ('MO', 'Missouri', 'F');
+INSERT INTO e2_state VALUES ('MS', 'Mississippi', 'E');
+INSERT INTO e2_state VALUES ('MT', 'Montana', 'H');
+INSERT INTO e2_state VALUES ('NE', 'Nebraska', 'E');
+INSERT INTO e2_state VALUES ('NH', 'New Hampshire', 'C');
+INSERT INTO e2_state VALUES ('NJ', 'New Jersey', 'C');
+INSERT INTO e2_state VALUES ('NM', 'New Mexico', 'E');
+INSERT INTO e2_state VALUES ('NV', 'Nevada', 'H');
+INSERT INTO e2_state VALUES ('NC', 'North Carolina', 'G');
+INSERT INTO e2_state VALUES ('ND', 'North Dakota', 'E');
+INSERT INTO e2_state VALUES ('NY', 'New York', 'C');
+INSERT INTO e2_state VALUES ('OH', 'Ohio', 'F');
+INSERT INTO e2_state VALUES ('OK', 'Oklahoma', 'E');
+INSERT INTO e2_state VALUES ('OR', 'Oregon', 'H');
+INSERT INTO e2_state VALUES ('PA', 'Pennsylvania', 'C');
+INSERT INTO e2_state VALUES ('RI', 'Rhode Island', 'C');
+INSERT INTO e2_state VALUES ('SC', 'South Carolina', 'G');
+INSERT INTO e2_state VALUES ('SD', 'South Dakota', 'E');
+INSERT INTO e2_state VALUES ('TN', 'Tennessee', 'G');
+INSERT INTO e2_state VALUES ('TX', 'Texas', 'E');
+INSERT INTO e2_state VALUES ('UT', 'Utah', 'H');
+INSERT INTO e2_state VALUES ('VA', 'Virginia', 'C');
+INSERT INTO e2_state VALUES ('VT', 'Vermont', 'C');
+INSERT INTO e2_state VALUES ('WA', 'Washington', 'H');
+INSERT INTO e2_state VALUES ('WI', 'Wisconsin', 'F');
+INSERT INTO e2_state VALUES ('WV', 'West Virginia', 'F');
+INSERT INTO e2_state VALUES ('WY', 'Wyoming', 'H');
+
+-- the batch table is used to keep track of batches, both those currently
+-- being built, and those which have been sent off.
+CREATE TABLE e2_batch (
+    -- database key
+    batch_id          SERIAL      PRIMARY KEY,
+    -- when we created the batch
+    created_timestamp TIMESTAMPTZ NOT NULL DEFAULT current_timestamp,
+    -- when we sent the batch; if NULL, then it wasn't sent
+    sent_timestamp    TIMESTAMPTZ,
+    -- the statuses are as follows:
+    -- 1. active:   returns are actively being added to the batch
+    -- 2. waiting:  returns are not being added; batch is ready to be sent
+    -- 2. failed:   batch was abandoned and never sent
+    --              (all efiles in the batch need to be re-batched and resent)
+    -- 3. sent:     the batch has been sent
+    status            TEXT        NOT NULL DEFAULT 'pending',
+    -- processing center code to use
+    code              VARCHAR(1)  REFERENCES e2_center (code),
+    -- efin is different than us for vita sites, and each vita site has its own
+    -- batch, so we need to know which efin the batch is for
+    efin              TEXT        NOT NULL,
+    -- our own generated batch number that we generate according to irs rules
+    batch_num         INTEGER     NOT NULL,
+    -- whether this is a pats batch
+    pats              BOOLEAN     DEFAULT FALSE,
+    -- the gtx key we got when we sent this batch, starts out as NULL
+    gtx               TEXT,
+    -- the lowest unused serial number for related returns
+    curr_serial_num   INTEGER     NOT NULL DEFAULT 0,
+    --is vita or not-needed to know how to generate the tranA record
+    is_vita           INTEGER,
+    -- the actual batch data
+    data              TEXT,
+    UNIQUE(batch_num, efin),
+    CHECK(efin ~ '^[0-9]{6}$'),
+    -- gtx is of the form *YYYYMMDDhhmmss.xxxx, the irs gives them to us on
+    -- batch transmission
+    CHECK(gtx IS NULL OR gtx ~ '^[A-Z][0-9]{8}[0-9]{6}\.[0-9]{4}$'),
+    CHECK(status IN ('pending', 'queued', 'failed', 'sent', 'accepted', 'rejected'))
+);
+
+-- determines for each code/efin combination the id of the active batch, if any
+CREATE TABLE e2_current_batch (
+    batch_id INTEGER    REFERENCES e2_batch (batch_id),
+    code     VARCHAR(1) REFERENCES e2_center (code),
+    efin     TEXT       NOT NULL,
+    pats     BOOLEAN,
+    UNIQUE(code, efin, pats),
+    CHECK(e2_verify_current_batch(batch_id, code, efin))
+);
+
+-- this needs to be recreated each day. it keeps track of how many batches have
+-- been sent on a particular day.
+CREATE SEQUENCE e2_batch_transmission;
+
+-- each client who efiles a return will have a record in this table each time
+-- we attempt to batch that return and send it to the IRS
+--
+-- some of these records will be auto-generated by the efile server, but can't
+-- be set in a DEFAULT statement: serial
+CREATE TABLE e2_return (
+    return_id         SERIAL      PRIMARY KEY,
+    created_timestamp TIMESTAMPTZ NOT NULL DEFAULT current_timestamp,
+    batch_id          INTEGER     REFERENCES e2_batch (batch_id),
+    state             VARCHAR(2)  REFERENCES e2_state (abbrev),
+    fed_status        TEXT        NOT NULL DEFAULT 'unknown',
+    state_status      TEXT        NOT NULL DEFAULT 'unknown',
+    fed_response      DATE,
+    state_response    DATE,
+    efin              TEXT        NOT NULL,
+    pats              BOOLEAN     DEFAULT FALSE,
+    serial_num        INTEGER     NOT NULL,
+    sandbox           TEXT        NOT NULL,
+    client_id         INTEGER     NOT NULL,
+    edoc_id           INTEGER     NOT NULL,
+    dcn               TEXT        NOT NULL,
+    is_vita           INTEGER,
+    data              TEXT,
+    UNIQUE(batch_id, serial_num),
+    CHECK(state_status IN ('accepted', 'rejected', 'conditional', 'unknown', 'fed_return_rejected', 'failed')),
+    CHECK(fed_status IN ('accepted', 'rejected', 'conditional', 'unknown', 'failed')),
+    CHECK(dcn ~ '^00[0-9]{6}[0-9]{3}[0-9]{2}7$')
+);
+
+-- acks are the irs' responses to our batches. in addition to being parsed into
+-- "return ack" objects the data from the ack itself is placed here
+CREATE TABLE e2_ack (
+    ack_id            SERIAL      PRIMARY KEY,
+    created_timestamp TIMESTAMPTZ NOT NULL DEFAULT current_timestamp,
+    gtx               TEXT        NOT NULL,
+    extension         TEXT        NOT NULL,
+    data              TEXT        NOT NULL
+    -- gtx keys correspond to those stored in batches *YYYYMMDDhhmmss.xxxx
+    CHECK(gtx ~ '^[A-Z][0-9]{8}[0-9]{6}\.[0-9]{4}$'),
+    CHECK(extension ~ '^(NAK|ACK|S[A-Z][A-Z])$')
+);
+
+-- error codes are associted with return_acks, and describe the errors that a
+-- rejected ack will have
+CREATE TABLE e2_error_code (
+    error_code_id   SERIAL      PRIMARY KEY,
+    dcn             TEXT,
+    recipient       VARCHAR(2)  NOT NULL,
+    error_code      VARCHAR(4)  NOT NULL,
+    efile_line      VARCHAR(4)  NOT NULL,
+    efile_form      VARCHAR(32) NOT NULL,
+    form_instance   VARCHAR(8)  NOT NULL
+);
diff --git a/lex2.py b/lex2.py
index 4ab541b..5f47c8b 100755
--- a/lex2.py
+++ b/lex2.py
@@ -86,7 +86,9 @@ class PatternRule(Rule):
         assert name not in reserved_names, "reserved rule name: %r" % name
         self.name    = name
         self.pattern = pattern
-        self.re      = re.compile(pattern)
+        self._compile()
+    def _compile(self):
+        self.re = re.compile(self.pattern)
     def _match(self, lexer, parent, m):
         s = m.group(0)
         token = self.make_token(lexer, s, self.name, parent)
@@ -100,6 +102,10 @@ class PatternRule(Rule):
         else:
             return False
 
+class NocasePatternRule(PatternRule):
+    def _compile(self):
+        self.re = re.compile(self.pattern, re.IGNORECASE)
+
 class ContextPatternRule(PatternRule):
     def __init__(self, name, pattern, fallback):
         assert valid_name_re.match(name), 'invalid name %r' % name
diff --git a/mode_sql.py b/mode_sql.py
index 1729c76..1689a11 100644
--- a/mode_sql.py
+++ b/mode_sql.py
@@ -1,37 +1,60 @@
-import commands, os.path, sets, sys
+import color, mode2
+from lex2 import Grammar, PatternRule, NocasePatternRule, RegionRule
 
-import color, default, mode, lex, lex_sql, method, tab, tab_sql
+class StringGrammar(Grammar):
+    rules = [
+        PatternRule(
+            name=r'octal',
+            pattern=r'\\[0-7]{3}',
+        ),
+        PatternRule(
+            name=r'escaped',
+            pattern=r'\\.',
+        ),
+    ]
 
-class Sql(mode.Fundamental):
+class SqlGrammar(Grammar):
+    rules = [
+        PatternRule(name=r'comment', pattern=r'--.*$'),
+        RegionRule(name=r'comment', start='/\*', grammar=Grammar(), end='\*/'),
+        PatternRule(name=r'delimiter', pattern=r'[();,\.:\$\[\]]'),
+        NocasePatternRule(name=r'attribute', pattern=r'(?:check|exists|unique|not null|default|primary key|minvalue|foreign key|references)(?![A-Za-z0-9_])'),
+        NocasePatternRule(name=r'operator', pattern=r'(?:case|when|then|else|end|not|and|or|is not|is|in|not in)(?![A-Za-z0-9_])'),
+        NocasePatternRule(name=r'keyword', pattern=r'(?:create database|create index|create sequence|create table|create trigger|create view|select|insert|update|delete|drop database|drop index|drop sequence|drop table|drop trigger|drop view|create user|alter user|drop user|drop function|grant|revoke|create function|create or replace function|create or replace view|create language|create operator|create type)(?![A-Za-z0-9_])'),
+        NocasePatternRule(name=r'pseudokeyword', pattern=r'(?:returns|language|right join|left join|inner join|outer join|join|where|null|true|false|into|values|as|from|order by|asc|desc|limit|distinct|cascade|using|on)(?![A-Za-z0-9_])'),
+        NocasePatternRule(name=r'type', pattern=r'(?:void|row|serial|varchar|float|integer|int|text|timestamptz|timestamp|datetz|date|timetz|time|boolean|bool)(?![A-Za-z0-9_])'),
+        PatternRule(name=r'function', pattern=r'(?:nextval|current_timestamp|current_time|current_date)(?![A-Za-z0-9_])'),
+        RegionRule(name=r'string', start="'", grammar=StringGrammar(), end="'"),
+        RegionRule(name=r'quoted', start='"', grammar=StringGrammar(), end='"'),
+        PatternRule(name=r'bareword', pattern=r'[A-Za-z0-9_]+'),
+    ]
+
+class Sql(mode2.Fundamental):
+    grammar = SqlGrammar()
+    opentoken  = 'delimiter'
+    opentags   = {'(': ')', '[': ']', '{': '}'}
+    closetoken = 'delimiter'
+    closetags  = {')': '(', ']': '[', '}': '{'}
     def __init__(self, w):
-        mode.Fundamental.__init__(self, w)
-
-        self.tag_matching = True
-        self.grammar = lex_sql.SqlGrammar()
-        self.lexer = lex.Lexer(self.grammar)
-
+        mode2.Fundamental.__init__(self, w)
+        self.add_bindings('close-paren', (')',))
+        self.add_bindings('close-brace', ('}',))
+        self.add_bindings('close-bracket', (']',))
         self.colors = {
-            'sql comment':     color.build('red', 'default', 'bold'),
-            'c comment':       color.build('red', 'default', 'bold'),
-            'operator1':       color.build('yellow', 'default', 'bold'),
-            'operator2':       color.build('yellow', 'default', 'bold'),
-            'attribute1':      color.build('magenta', 'default', 'bold'),
-            'attribute2':      color.build('magenta', 'default', 'bold'),
-            'keyword1':        color.build('cyan', 'default', 'bold'),
-            'keyword2':        color.build('cyan', 'default', 'bold'),
-            'pseudo-keyword1': color.build('cyan', 'default', 'bold'),
-            'pseudo-keyword2': color.build('cyan', 'default', 'bold'),
-            'type1':           color.build('green', 'default', 'bold'),
-            'type2':           color.build('green', 'default', 'bold'),
-            'function':        color.build('yellow', 'default', 'bold'),
-            'quoted':          color.build('yellow', 'default', 'bold'),
-            'string':          color.build('green', 'default', 'bold'),
-            'bareword':        color.build('default', 'default', 'bold'),
+            'comment':        color.build('red', 'default'),
+            'operator':       color.build('yellow', 'default'),
+            'attribute':      color.build('magenta', 'default'),
+            'keyword':        color.build('cyan', 'default'),
+            'pseudokeyword':  color.build('cyan', 'default'),
+            'type':           color.build('green', 'default'),
+            'function':       color.build('yellow', 'default'),
+            'quoted':         color.build('yellow', 'default'),
+            'string.start':   color.build('green', 'default'),
+            'string.null':    color.build('green', 'default'),
+            'string.escaped': color.build('magenta', 'default'),
+            'string.octal':   color.build('magenta', 'default'),
+            'string.end':     color.build('green', 'default'),
+            'bareword':       color.build('default', 'default'),
         }
-
-        #self.highlighter.lex_buffer()
-        #self.get_regions()
-        self.tabber = tab_sql.SQLTabber(self)
-
     def name(self):
         return "Sql"