[go: up one dir, main page]

Menu

[r1]: / trunk / ImportXML.py  Maximize  Restore  History

Download this file

142 lines (125 with data), 5.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""Import data from an XML file into a table in an Oracle database."""
import cx_LoggingOptions
import cx_OptionParser
import cx_Oracle
import cx_OracleUtils
import cx_XML
import _strptime
import sys
import time
import xml.sax
import Options
# parse command line
parser = cx_OptionParser.OptionParser("ImportXML")
parser.AddOption(cx_OracleUtils.SchemaOption())
parser.AddOption(Options.ARRAY_SIZE)
parser.AddOption(Options.COMMIT_POINT)
parser.AddOption(Options.REPORT_POINT)
parser.AddOption(Options.SET_ROLE)
parser.AddOption(Options.DATE_FORMAT)
cx_LoggingOptions.AddOptions(parser)
parser.AddArgument("fileName", required = True,
help = "the name of the file from which to read the data or "
"'-' to read the exported data from stdin")
parser.AddArgument("tableName", required = True,
help = "the name of the table into which to import the data")
options = parser.Parse()
cx_LoggingOptions.ProcessOptions(options)
# define class for managing the import
class Handler(cx_XML.Parser):
def __init__(self, options):
self.connection = cx_OracleUtils.Connect(options.schema,
options.setRole)
self.cursor = self.connection.cursor()
if options.arraySize:
self.cursor.arraysize = options.arraySize
self.commitPoint = options.commitPoint
self.reportPoint = options.reportPoint
self.dateFormat = options.dateFormat
self.cursor.execute("select * from %s" % options.tableName)
self.columnIndexes = {}
self.dateColumns = {}
bindVars = []
bindVarNames = []
columnNames = []
for item in self.cursor.description:
name, dataType, size, internalSize, prec, scale, nullsOk = item
if dataType == cx_Oracle.DATETIME:
self.dateColumns[name.upper()] = None
else:
dataType = cx_Oracle.STRING
self.columnIndexes[name.upper()] = len(bindVars)
bindVars.append(self.cursor.var(dataType, size))
columnNames.append(name)
bindVarNames.append(":%s" % len(bindVars))
sql = "insert into %s (%s) values (%s)" % \
(options.tableName, ",".join(columnNames),
",".join(bindVarNames))
self.cursor.prepare(sql)
self.cursor.setinputsizes(*bindVars)
self.allowCustomTags = False
self.columnValue = None
self.rowsImported = 0
def characters(self, data):
if self.columnValue is not None:
self.columnValue += data
def endElement(self, name):
if self.columnValue is not None:
value = self.columnValue
if not value:
value = None
name = name.upper()
columnIndex = self.columnIndexes[name]
if value is not None and name in self.dateColumns:
dateValue = time.strptime(self.columnValue, self.dateFormat)
value = cx_Oracle.Timestamp(*dateValue[:6])
else:
value = str(self.columnValue)
self.row[columnIndex] = value
self.columnValue = None
self.allowCustomTags = True
else:
cx_XML.Parser.endElement(self, name)
def end_ROW(self):
self.rowsImported += 1
commit = (self.commitPoint \
and self.rowsImported % self.commitPoint == 0)
if commit or len(self.rowsToInsert) == self.cursor.arraysize:
self.cursor.executemany(None, self.rowsToInsert)
self.rowsToInsert = []
if commit:
self.connection.commit()
if self.reportPoint and self.rowsImported % self.reportPoint == 0:
print "%d rows imported." % self.rowsImported
self.allowCustomTags = False
def end_ROWSET(self):
if self.rowsToInsert:
self.cursor.executemany(None, self.rowsToInsert)
if self.commitPoint is None \
or self.rowsImported % self.commitPoint != 0:
self.connection.commit()
if self.reportPoint is None \
or self.rowsImported % self.reportPoint != 0:
print "%d rows imported." % self.rowsImported
def startElement(self, name, attrs):
if self.allowCustomTags:
self.columnValue = ""
self.allowCustomTags = False
else:
cx_XML.Parser.startElement(self, name, attrs)
def start_ROW(self, num = None):
self.row = [None] * len(self.columnIndexes)
self.rowsToInsert.append(self.row)
self.allowCustomTags = True
def start_ROWSET(self):
self.rowsToInsert = []
# parse the XML data stream
if options.fileName == "-":
inputFile = sys.stdin
else:
inputFile = file(options.fileName, "r")
handler = Handler(options)
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.parse(inputFile)
print >> sys.stderr, "Done."