org.jhuapl.edu.sages.etl.strategy
Class SagesOpenCsvJar

java.lang.Object
  extended by org.jhuapl.edu.sages.etl.strategy.SagesOpenCsvJar
Direct Known Subclasses:
DumbTestOpenCsvJar

public abstract class SagesOpenCsvJar
extends Object

SagesOpenCsvJar is the domain class and controls the execution of the overall ETL process. It contains an ETLStrategyTemplate object that implements most of the ETL processing logic.

Author:
POKUAM1

Field Summary
protected  File[] csvFiles
          List of files that ETL loads into the production table
protected  ArrayList<String[]> currentEntries
          Current file records that the ETL will load into the production table via SQL statements
protected  int currentRecNum
          TODO: not used yet
protected  String dbms
          target database connection settings
protected  String dbName
           
protected  Map<String,String> DEST_COLTYPE_MAP
          maps the destination columns to their sql-datatype qualifier for generating the schema
protected  Map<String,Integer> DEST_SQLTYPE_MAP
          maps the destination columns to their java.sql.Types for setting ? parameters on prepared statements
protected  String dst_table_name
           
protected  int errorFlag
          errorFlag to control what to do on certain errors
protected static String ETL_CLEANSE_TABLE
           
protected static String ETL_STAGING_DB
           
protected  ETLStrategyTemplate etlStrategy
          The ETLStrategyTemplate object
protected  File fileMarkedForDeletion
          TODO: not used yet.
protected  String[] header_src
          header columns used to define the CLEANSE table schema
protected  String inputdir_csvfiles
          ETL looks at the input directory for files to process
protected  Map<String,String> MAPPING_MAP
          maps the source:destination columns
protected  Map<String,String> MAPPING_REV_MAP
          maps the destination:source columns
protected  String outputdir_csvfiles
          ETL moves successfully processed files to the output directory
protected  Map<String,Integer> PARAMINDX_DST
          maps destination column name to its parameter index in the destination table, indexing starts at 1
protected  Map<String,Integer> PARAMINDX_SRC
          maps source column name to its parameter index in the source table, indexing starts at 1
protected  String password
           
protected  int portNumber
           
protected  String prod_table_name
           
protected  Properties props_customsql_cleanse
           
protected  Properties props_customsql_final_to_prod
           
protected  Properties props_customsql_staging
           
protected  Properties props_dateformats
           
protected  Properties props_etlconfig
          properties holders
protected  Properties props_mappings
           
protected  Map<String,Savepoint> savepoints
          The savepoints
protected  String serverName
           
protected  String src_table_name
           
protected  String userName
           
 
Constructor Summary
SagesOpenCsvJar()
           
 
Method Summary
static SagesEtlException abort(String msg, Throwable e)
          returns a SagesEtlException that wraps the original exception
 void alterCleanseTableAddFlagColumn(Connection c, Savepoint save1, Savepoint createCleanseSavepoint)
           
 void alterStagingTableAddFlagColumn(Connection c, Savepoint save1, Savepoint createCleanseSavepoint)
           
 Savepoint buildCleanseTable(Connection c, SagesOpenCsvJar socj, Savepoint save1)
           
 Savepoint buildEtlStatusTable(Connection c, SagesOpenCsvJar socj, Savepoint save1)
           
 String buildInsertIntoCleansingTableSql(Connection c, SagesOpenCsvJar socj)
           
 Savepoint buildStagingTable(Connection c, SagesOpenCsvJar socj, Savepoint save1)
           
static void closeDbConnection(Connection c)
           
 void copyFromCleanseToStaging(Connection c, SagesOpenCsvJar socj, Savepoint save2)
           
 int errorCleanup(SagesOpenCsvJar socj, Savepoint savepoint, Connection connection, File currentCsv, String failedDirPath, Exception e)
           
protected static File etlMoveFile(File file, File destinationDir)
          Copies file to the designated destination directory, and then deletes it from its original location.
 void extractHeaderColumns(SagesOpenCsvJar socj)
           
 void generateSourceDestMappings(SagesOpenCsvJar socj)
           
 Connection getConnection()
          Establishes database connection to the target database
 File getCurrentFile()
           
static List<File> getFailedCsvFiles()
           
 String getFaileddir_csvfiles()
           
static SimpleDateFormat getSimpleDateFormat()
           
protected  void initializeProperties(ETLProperties etlProperties)
          Initializes the SagesOpenCsvJar' s etl properties
 boolean isSuccess()
           
static void logFileOutcome(SagesOpenCsvJar socj, Connection c, File file, String outcome)
           
protected static void runCustomSql(Connection c, Properties customSql, String targetTableName)
           
 void setAndExecuteInsertIntoCleansingTablePreparedStatement(Connection c, SagesOpenCsvJar socj, ArrayList<String[]> entries_rawdata, Savepoint save2, PreparedStatement ps_INSERT_CLEANSE)
           
 void setCurrentFile(File currentFile)
           
 void setEtlStrategy(ETLStrategyTemplate strategy)
           
static void setFailedCsvFiles(List<File> failedCsvFiles)
           
 void setFaileddir_csvfiles(String faileddir_csvfiles)
           
 void setSuccess(boolean success)
           
 void truncateCleanseAndStagingTables(DumbTestOpenCsvJar socj_dumb, Connection c, File file, Savepoint baseLine)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

etlStrategy

protected ETLStrategyTemplate etlStrategy
The ETLStrategyTemplate object


savepoints

protected Map<String,Savepoint> savepoints
The savepoints


csvFiles

protected File[] csvFiles
List of files that ETL loads into the production table


currentEntries

protected ArrayList<String[]> currentEntries
Current file records that the ETL will load into the production table via SQL statements


fileMarkedForDeletion

protected File fileMarkedForDeletion
TODO: not used yet. The current file marked for deletion due to an error in processing


currentRecNum

protected int currentRecNum
TODO: not used yet


inputdir_csvfiles

protected String inputdir_csvfiles
ETL looks at the input directory for files to process


outputdir_csvfiles

protected String outputdir_csvfiles
ETL moves successfully processed files to the output directory


ETL_CLEANSE_TABLE

protected static final String ETL_CLEANSE_TABLE
See Also:
Constant Field Values

ETL_STAGING_DB

protected static final String ETL_STAGING_DB
See Also:
Constant Field Values

src_table_name

protected String src_table_name

dst_table_name

protected String dst_table_name

prod_table_name

protected String prod_table_name

DEST_COLTYPE_MAP

protected Map<String,String> DEST_COLTYPE_MAP
maps the destination columns to their sql-datatype qualifier for generating the schema


DEST_SQLTYPE_MAP

protected Map<String,Integer> DEST_SQLTYPE_MAP
maps the destination columns to their java.sql.Types for setting ? parameters on prepared statements


MAPPING_MAP

protected Map<String,String> MAPPING_MAP
maps the source:destination columns


MAPPING_REV_MAP

protected Map<String,String> MAPPING_REV_MAP
maps the destination:source columns


props_etlconfig

protected Properties props_etlconfig
properties holders


props_mappings

protected Properties props_mappings

props_dateformats

protected Properties props_dateformats

props_customsql_cleanse

protected Properties props_customsql_cleanse

props_customsql_staging

protected Properties props_customsql_staging

props_customsql_final_to_prod

protected Properties props_customsql_final_to_prod

dbms

protected String dbms
target database connection settings


portNumber

protected int portNumber

serverName

protected String serverName

dbName

protected String dbName

userName

protected String userName

password

protected String password

PARAMINDX_SRC

protected Map<String,Integer> PARAMINDX_SRC
maps source column name to its parameter index in the source table, indexing starts at 1


PARAMINDX_DST

protected Map<String,Integer> PARAMINDX_DST
maps destination column name to its parameter index in the destination table, indexing starts at 1


header_src

protected String[] header_src
header columns used to define the CLEANSE table schema


errorFlag

protected int errorFlag
errorFlag to control what to do on certain errors

Constructor Detail

SagesOpenCsvJar

public SagesOpenCsvJar()
                throws SagesEtlException
Throws:
SagesEtlException
Method Detail

setEtlStrategy

public void setEtlStrategy(ETLStrategyTemplate strategy)

extractHeaderColumns

public void extractHeaderColumns(SagesOpenCsvJar socj)
                          throws FileNotFoundException,
                                 IOException
Throws:
FileNotFoundException
IOException

buildCleanseTable

public Savepoint buildCleanseTable(Connection c,
                                   SagesOpenCsvJar socj,
                                   Savepoint save1)
                            throws SQLException,
                                   SagesEtlException
Throws:
SQLException
SagesEtlException

buildEtlStatusTable

public Savepoint buildEtlStatusTable(Connection c,
                                     SagesOpenCsvJar socj,
                                     Savepoint save1)
                              throws SQLException,
                                     SagesEtlException
Throws:
SQLException
SagesEtlException

truncateCleanseAndStagingTables

public void truncateCleanseAndStagingTables(DumbTestOpenCsvJar socj_dumb,
                                            Connection c,
                                            File file,
                                            Savepoint baseLine)
                                     throws SagesEtlException,
                                            SQLException
Throws:
SagesEtlException
SQLException

buildStagingTable

public Savepoint buildStagingTable(Connection c,
                                   SagesOpenCsvJar socj,
                                   Savepoint save1)
                            throws SQLException,
                                   SagesEtlException
Throws:
SQLException
SagesEtlException

generateSourceDestMappings

public void generateSourceDestMappings(SagesOpenCsvJar socj)

setAndExecuteInsertIntoCleansingTablePreparedStatement

public void setAndExecuteInsertIntoCleansingTablePreparedStatement(Connection c,
                                                                   SagesOpenCsvJar socj,
                                                                   ArrayList<String[]> entries_rawdata,
                                                                   Savepoint save2,
                                                                   PreparedStatement ps_INSERT_CLEANSE)
                                                            throws SQLException
Throws:
SQLException

buildInsertIntoCleansingTableSql

public String buildInsertIntoCleansingTableSql(Connection c,
                                               SagesOpenCsvJar socj)
                                        throws SQLException
Throws:
SQLException

copyFromCleanseToStaging

public void copyFromCleanseToStaging(Connection c,
                                     SagesOpenCsvJar socj,
                                     Savepoint save2)
                              throws SQLException,
                                     SagesEtlException
Throws:
SQLException
SagesEtlException

errorCleanup

public int errorCleanup(SagesOpenCsvJar socj,
                        Savepoint savepoint,
                        Connection connection,
                        File currentCsv,
                        String failedDirPath,
                        Exception e)

alterCleanseTableAddFlagColumn

public void alterCleanseTableAddFlagColumn(Connection c,
                                           Savepoint save1,
                                           Savepoint createCleanseSavepoint)
                                    throws SQLException,
                                           SagesEtlException
Throws:
SQLException
SagesEtlException

alterStagingTableAddFlagColumn

public void alterStagingTableAddFlagColumn(Connection c,
                                           Savepoint save1,
                                           Savepoint createCleanseSavepoint)
                                    throws SQLException,
                                           SagesEtlException
Throws:
SQLException
SagesEtlException

initializeProperties

protected void initializeProperties(ETLProperties etlProperties)
                             throws SagesEtlException
Initializes the SagesOpenCsvJar' s etl properties

Parameters:
etlProperties - - these are configured by updating the set of ETL properties files
Throws:
SagesEtlException - - if property doesn't exist, or issue loading properties file occurs

getConnection

public Connection getConnection()
                         throws SagesEtlException
Establishes database connection to the target database

Returns:
Connection
Throws:
SQLException
SagesEtlException

etlMoveFile

protected static File etlMoveFile(File file,
                                  File destinationDir)
                           throws IOException
Copies file to the designated destination directory, and then deletes it from its original location. On failure, copied to FAILED directory, On success copied to OUT directory

Parameters:
file -
destinationDir -
Throws:
IOException

runCustomSql

protected static void runCustomSql(Connection c,
                                   Properties customSql,
                                   String targetTableName)
                            throws SQLException
Parameters:
socj_dumb -
c -
Throws:
SQLException

logFileOutcome

public static void logFileOutcome(SagesOpenCsvJar socj,
                                  Connection c,
                                  File file,
                                  String outcome)
                           throws SagesEtlException
Parameters:
socj - TODO
c -
outcome -
sql -
canonicalPath -
fileName -
processtime -
Throws:
SagesEtlException
SQLException

closeDbConnection

public static void closeDbConnection(Connection c)
Parameters:
c - database connection

abort

public static SagesEtlException abort(String msg,
                                      Throwable e)
returns a SagesEtlException that wraps the original exception

Parameters:
msg - SAGES ETL message to display
e - the original exception
Returns:
SagesEtlException

getFaileddir_csvfiles

public String getFaileddir_csvfiles()

setFaileddir_csvfiles

public void setFaileddir_csvfiles(String faileddir_csvfiles)

getCurrentFile

public File getCurrentFile()

setCurrentFile

public void setCurrentFile(File currentFile)

getFailedCsvFiles

public static List<File> getFailedCsvFiles()

setFailedCsvFiles

public static void setFailedCsvFiles(List<File> failedCsvFiles)

isSuccess

public boolean isSuccess()

setSuccess

public void setSuccess(boolean success)

getSimpleDateFormat

public static SimpleDateFormat getSimpleDateFormat()


Copyright © 2013 Johns Hopkins University Applied Physics Laboratory. All rights reserved.