HsSqlPpp-0.4.0



Parsing

Here is a program to parse some SQL from a file and print the ast:

> import System.Environment

> import Database.HsSqlPpp.Parser

> main :: IO ()
> main = do
>   [f] <- getArgs
>   ast <- parseStatementsFromFile f
>   print ast

Given the SQL file x.sql:

select * from t;

the result is something like:

$ Parse x.sql
Right [QueryStatement (Annotation {asrc = Just ("x.sql",1,1), atype =
Nothing, errs = [], stType = Nothing, catUpd = [], fnProt = Nothing,
infType = Nothing}) (Select (Annotation {asrc = Just ("x.sql",1,1),
atype = Nothing, errs = [], stType = Nothing, catUpd = [], fnProt =
Nothing, infType = Nothing}) Dupes (SelectList (Annotation {asrc =
Just ("x.sql",1,8), atype = Nothing, errs = [], stType = Nothing,
catUpd = [], fnProt = Nothing, infType = Nothing}) [SelExp (Annotation
{asrc = Just ("x.sql",1,8), atype = Nothing, errs = [], stType =
Nothing, catUpd = [], fnProt = Nothing, infType = Nothing})
(Identifier (Annotation {asrc = Just ("x.sql",1,8), atype = Nothing,
errs = [], stType = Nothing, catUpd = [], fnProt = Nothing, infType =
Nothing}) "*")]) [Tref (Annotation {asrc = Just ("x.sql",1,15), atype
= Nothing, errs = [], stType = Nothing, catUpd = [], fnProt = Nothing,
infType = Nothing}) (SQIdentifier (Annotation {asrc = Just
("x.sql",1,15), atype = Nothing, errs = [], stType = Nothing, catUpd =
[], fnProt = Nothing, infType = Nothing}) ["t"]) (NoAlias (Annotation
{asrc = Just ("x.sql",1,16), atype = Nothing, errs = [], stType =
Nothing, catUpd = [], fnProt = Nothing, infType = Nothing}))] Nothing
[] Nothing [] Nothing Nothing)]

More readable output from this variation:

> import System.Environment

> import Language.Haskell.Exts
> import Data.Generics.Uniplate.Data

> import Database.HsSqlPpp.Parser

> main :: IO ()
> main = do
>   [f] <- getArgs
>   ast <- parseStatementsFromFile f
>   putStrLn $ showNoAnns ast

> showNoAnns :: Show a => a -> String
> showNoAnns = p stripA
>   where
>     stripA :: Exp -> Exp
>     stripA = transformBi $ \x ->
>                case x of
>                  (Paren (RecConstr (UnQual (Ident "Annotation")) _)) ->
>                           Con $ UnQual $ Ident "Ann"
>                  x1 -> x1
>     p f s =
>         case parseExp (show s) of
>           ParseOk ast -> prettyPrint (f ast)
>           x -> error $ show x

The annotation values have been replace with the string 'Ann', and the output indented using haskell-src-exts:

$ Parse2 x.sql
Right
  [QueryStatement Ann
     (Select Ann Dupes
        (SelectList Ann [SelExp Ann (Identifier Ann "*")])
        [Tref Ann (SQIdentifier Ann ["t"]) (NoAlias Ann)]
        Nothing
        []
        Nothing
        []
        Nothing
        Nothing)]

You can see the various parsing functions in the haddock docs here: Database.HsSqlPpp.Parser

Typechecking

Here is a program which parses and typechecks a query and outputs the result type:

> import Database.HsSqlPpp.Parser
> import Database.HsSqlPpp.TypeChecker
> import Database.HsSqlPpp.Catalog
> import Database.HsSqlPpp.Types
> import Database.HsSqlPpp.Annotation
> import Database.HsSqlPpp.Ast

> main :: IO ()
> main = do
>   let query = "select * from t"
>       ast :: QueryExpr
>       Right ast = parseQueryExpr "" query
>       aast :: QueryExpr
>       aast = typeCheckQueryExpr cat ast
>       ann :: Annotation
>       ann = getAnnotation aast
>       ty :: Maybe Type
>       ty = atype ann
>   print ty
>   where
>     Right cat = updateCatalog defaultTemplate1Catalog
>                   [CatCreateTable "t" [("a", typeInt)
>                                       ,("b", typeInt)
>                                       ] []]

Running gives:

$ src-extra/examples/TypeCheck
Just (SetOfType (CompositeType [("a",ScalarType "int4"),("b",ScalarType "int4")]))

Typecheck against an existing database:

> import System.Environment

> import Database.HsSqlPpp.Parser
> import Database.HsSqlPpp.TypeChecker
> import Database.HsSqlPpp.Catalog
> import Database.HsSqlPpp.Types
> import Database.HsSqlPpp.Annotation
> import Database.HsSqlPpp.Ast

> import Database.HsSqlPpp.Utils.CatalogReader

> main :: IO ()
> main = do
>   [cs] <- getArgs
>   cus <- readCatalogFromDatabase cs
>   let Right cat = updateCatalog defaultCatalog cus
>       query = "select * from t"
>       ast :: QueryExpr
>       Right ast = parseQueryExpr "" query
>       aast :: QueryExpr
>       aast = typeCheckQueryExpr cat ast
>       ann :: Annotation
>       ann = getAnnotation aast
>       ty :: Maybe Type
>       ty = atype ann
>   print ty

Assume the database 'db' has the following table:

create table t (a int, b float);
$ TypeCheckDB "dbname=db"
Just (SetOfType (CompositeType [("a",ScalarType "int4"),("b",ScalarType "float8")]))

This uses some extra utils which are only available in the repo at the moment: CatalogReader

Show catalog

Bit rough at the moment:

> import System.Environment
> import Data.List
> import Text.Groom
> import Database.HsSqlPpp.Parser
> import Database.HsSqlPpp.TypeChecker
> import Database.HsSqlPpp.Catalog

> main :: IO ()
> main = do
>   [f] <- getArgs
>   Right ast <- parseStatementsFromFile f
>   let (cat,_) = typeCheckStatements defaultTemplate1Catalog ast
>       cc = deconstructCatalog cat \\ deconstructCatalog defaultTemplate1Catalog
>   putStrLn $ groom cc

Pass in the following sql file:

create table t (a int, b float);
create view v as select a from t;
$ ./ShowCatalog sc.sql
[CatCreateTable "t"
   [("a", ScalarType "int4"), ("b", ScalarType "float8")]
   [("tableoid", ScalarType "oid"), ("cmax", ScalarType "cid"),
    ("xmax", ScalarType "xid"), ("cmin", ScalarType "cid"),
    ("xmin", ScalarType "xid"), ("ctid", ScalarType "tid")],
 CatCreateView "v" [("a", ScalarType "int4")]]

Generating SQL

Here is a program which generates SQL:

> import Database.HsSqlPpp.Annotation
> import Database.HsSqlPpp.Ast
> import Database.HsSqlPpp.Pretty

> data MakeSelect = MakeSelect [String] String

> sqlGen :: MakeSelect -> QueryExpr
> sqlGen (MakeSelect cols tb) =
>   Select emptyAnnotation Dupes
>          sl tr
>          Nothing [] Nothing [] Nothing Nothing
>   where
>     sl = SelectList emptyAnnotation
>                     (map si cols)
>     tr = [Tref emptyAnnotation
>                (Name emptyAnnotation [Nmc tb])
>                (NoAlias emptyAnnotation)]
>     si i = SelExp emptyAnnotation
>                   (Identifier emptyAnnotation
>                               (Nmc i))

> main :: IO ()
> main = do
>   let s = MakeSelect ["a", "b"] "t"
>   putStrLn $ printQueryExpr $ sqlGen s
$ MakeSelect
select
    a, b
  from
    t;

Using quasiquotes

> {-# LANGUAGE QuasiQuotes #-}
> import Database.HsSqlPpp.Ast
> import Database.HsSqlPpp.Quote
> import Database.HsSqlPpp.Annotation
> import Database.HsSqlPpp.Pretty

> test :: Statement
> test = [sqlStmt|
>
>   create table $(tablename) (
>    $(varname) $(typename)
>   );
>
>         |]
>   where
>     tablename = "my_table"
>     varname = "my_field"
>     typename = "text"

> main :: IO ()
> main = putStrLn $ printStatements [test]

The output is:

$ QQ
create table my_table (
  my_field text
);

Transforming SQL

The TPC-H qgen program generates the SQL queries for the TPC-H benchmarks and includes an option for SQL Server. For some reason, it doesn't output the queries in syntax which SQL Server recognises. Here is a short program to take the qgen output and convert it into a format which SQL Server understands. (I haven't checked carefully to see if the produced SQL is correct.)

Convert qgen output into sql server format

> {-# LANGUAGE QuasiQuotes #-}
> import Data.Generics.Uniplate.Data

> import System.Environment
> import Data.Data

> import Database.HsSqlPpp.Parser
> import Database.HsSqlPpp.Ast
> import Database.HsSqlPpp.Pretty
> import Database.HsSqlPpp.Quote
> import Database.HsSqlPpp.Annotation

> main :: IO ()
> main = do
>   [fn] <- getArgs
>   f <- readFile fn
>   putStrLn $ fixSql f

> fixSql :: String -> String
> fixSql sql =
>   let qe = either (error . show) id $ parseStatements "" sql
>       qe' = fixSqlAst qe
>   in printStatements qe'

> fixSqlAst :: Data a => a -> a
> fixSqlAst = fixDate . fixSubstring . fixExtract . fixIntervals

 dateadd(day, -90,1998-12-01)
Instead of:
 date1998-12-01- interval90day

> fixIntervals :: Data a => a -> a
> fixIntervals = transformBi $ \x -> case x of
>   [sqlExpr| $(a) + $(b) |] | Just (i,v,d) <- dateInfo a b ->
>      [sqlExpr| dateAdd($i(i),$(v),$s(d))|]
>   [sqlExpr| $(a) - $(b) |]| Just (i,v,d) <- dateInfo a b ->
>      [sqlExpr| dateAdd($i(i),-$(v),$s(d))|]
>   x' -> x'
>   where
>     dateInfo (TypedStringLit _ (SimpleTypeName _ "date") d)
>              (Interval _ v i _)
>              | Just i' <- lookup i [(IntervalDay,"day")
>                                    ,(IntervalMonth,"month")
>                                    ,(IntervalYear,"year")]
>              = Just (i',NumberLit emptyAnnotation v,d)
>     dateInfo _ _ = Nothing

 datepart(year,l_shipdate)
Instead of:
 extract(year from l_shipdate)

> fixExtract :: Data a => a -> a
> fixExtract = transformBi $ \x -> case x of
>   [sqlExpr| extract(year from $(expr) ) |] ->
>       [sqlExpr| datepart(year,$(expr)) |]
>   x' -> x'


 substring(c_phone,1,2)
Instead of:
 substring(c_phone from 1 for 2)

> fixSubstring :: Data a => a -> a
> fixSubstring = transformBi $ \x -> case x of
>   [sqlExpr| substring($(i) from $(a) for $(b)) |] ->
>       [sqlExpr| substring($(i),$(a),$(b)) |]
>   x' -> x'1998-12-01Instead of:
 date1998-12-01> fixDate :: Data a => a -> a
> fixDate = transformBi $ \x -> case x of
>    TypedStringLit a (SimpleTypeName _ "date") d -> StringLit a d
>    x' -> x'

Example, a Q1 query:

select
        l_returnflag,
        l_linestatus,
        sum(l_quantity) as sum_qty,
        sum(l_extendedprice) as sum_base_price,
        sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
        sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
        avg(l_quantity) as avg_qty,
        avg(l_extendedprice) as avg_price,
        avg(l_discount) as avg_disc,
        count(*) as count_order
from
        lineitem
where
        l_shipdate <= date '1998-12-01' - interval '63' day (3)
group by
        l_returnflag,
        l_linestatus
order by
        l_returnflag,
        l_linestatus;

Output from running the program on this sql:

select
    l_returnflag,
    l_linestatus,
    sum(l_quantity) as sum_qty,
    sum(l_extendedprice) as sum_base_price,
    sum((l_extendedprice * (1 - l_discount))) as sum_disc_price,
    sum(((l_extendedprice * (1 - l_discount)) * (1 + l_tax))) as sum_charge,
    avg(l_quantity) as avg_qty,
    avg(l_extendedprice) as avg_price,
    avg(l_discount) as avg_disc,
    count(*) as count_order
  from
    lineitem
  where
    (l_shipdate <= dateAdd(day,(- (63)),'1998-12-01'))
  group by
    l_returnflag, l_linestatus
  order by
    l_returnflag asc, l_linestatus asc;