| Modifier and Type | Class and Description |
|---|---|
static class |
ReaderImpl.StripeInformationImpl |
Reader.Options| Modifier and Type | Field and Description |
|---|---|
protected int |
bufferSize |
protected CompressionKind |
compressionKind |
protected Configuration |
conf |
protected FileSystem |
fileSystem |
protected OrcProto.Metadata |
metadata |
protected Path |
path |
protected int |
rowIndexStride |
protected OrcTail |
tail |
protected List<OrcProto.Type> |
types |
protected boolean |
useUTCTimestamp |
| Constructor and Description |
|---|
ReaderImpl(Path path,
OrcFile.ReaderOptions options)
Constructor that let's the user specify additional options.
|
| Modifier and Type | Method and Description |
|---|---|
protected static void |
checkOrcVersion(Path path,
OrcProto.PostScript postscript)
Check to see if this ORC file is from a future version and if so,
warn the user that we may not be able to read all of the column encodings.
|
static ColumnStatistics[] |
deserializeStats(TypeDescription schema,
List<OrcProto.ColumnStatistics> fileStats) |
protected static void |
ensureOrcFooter(ByteBuffer buffer,
int psLen)
Ensure this is an ORC file to prevent users from trying to read text
files or RC files as ORC files.
|
protected static void |
ensureOrcFooter(FSDataInputStream in,
Path path,
int psLen,
ByteBuffer buffer)
Ensure this is an ORC file to prevent users from trying to read text
files or RC files as ORC files.
|
static OrcTail |
extractFileTail(ByteBuffer buffer) |
static OrcTail |
extractFileTail(ByteBuffer buffer,
long fileLength,
long modificationTime) |
protected OrcTail |
extractFileTail(FileSystem fs,
Path path,
long maxFileLength) |
static OrcProto.Metadata |
extractMetadata(ByteBuffer bb,
int metadataAbsPos,
int metadataSize,
CompressionCodec codec,
int bufferSize) |
CompressionKind |
getCompressionKind()
Get the compression kind.
|
int |
getCompressionSize()
Get the buffer size for the compression.
|
long |
getContentLength()
Get the length of the file.
|
OrcProto.FileTail |
getFileTail()
Get the file tail (footer + postscript)
|
OrcFile.Version |
getFileVersion()
Get the file format version.
|
static OrcFile.Version |
getFileVersion(List<Integer> versionList) |
List<String> |
getMetadataKeys()
Get the user metadata keys.
|
int |
getMetadataSize() |
ByteBuffer |
getMetadataValue(String key)
Get a user metadata value.
|
long |
getNumberOfRows()
Get the number of rows in the file.
|
List<OrcProto.ColumnStatistics> |
getOrcProtoFileStatistics() |
List<OrcProto.StripeStatistics> |
getOrcProtoStripeStatistics() |
List<OrcProto.UserMetadataItem> |
getOrcProtoUserMetadata() |
long |
getRawDataSize()
Get the deserialized data size of the file
|
long |
getRawDataSizeFromColIndices(List<Integer> colIndices)
Get the deserialized data size of the specified columns ids
|
static long |
getRawDataSizeFromColIndices(List<Integer> colIndices,
List<OrcProto.Type> types,
List<OrcProto.ColumnStatistics> stats) |
long |
getRawDataSizeOfColumns(List<String> colNames)
Get the deserialized data size of the specified columns
|
int |
getRowIndexStride()
Get the number of rows per a entry in the row index.
|
TypeDescription |
getSchema()
Get the type of rows in this ORC file.
|
ByteBuffer |
getSerializedFileFooter() |
ColumnStatistics[] |
getStatistics()
Get the statistics about the columns in the file.
|
List<StripeInformation> |
getStripes()
Get the list of stripes.
|
List<StripeStatistics> |
getStripeStatistics() |
List<OrcProto.Type> |
getTypes()
Get the list of types contained in the file.
|
List<Integer> |
getVersionList() |
OrcFile.WriterVersion |
getWriterVersion()
Get the version of the writer of this file.
|
static OrcFile.WriterVersion |
getWriterVersion(int writerVersion)
Get the WriterVersion based on the ORC file postscript.
|
boolean |
hasMetadataValue(String key)
Did the user set the given metadata value.
|
Reader.Options |
options()
Create a default options object that can be customized for creating
a RecordReader.
|
RecordReader |
rows()
Create a RecordReader that reads everything with the default options.
|
RecordReader |
rows(Reader.Options options)
Create a RecordReader that uses the options given.
|
String |
toString() |
protected final FileSystem fileSystem
protected final Path path
protected final CompressionKind compressionKind
protected int bufferSize
protected OrcProto.Metadata metadata
protected final List<OrcProto.Type> types
protected final int rowIndexStride
protected final Configuration conf
protected final boolean useUTCTimestamp
protected OrcTail tail
public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException
path - pathname for fileoptions - options for readingIOExceptionpublic long getNumberOfRows()
ReadergetNumberOfRows in interface Readerpublic List<String> getMetadataKeys()
ReadergetMetadataKeys in interface Readerpublic ByteBuffer getMetadataValue(String key)
ReadergetMetadataValue in interface Readerkey - a key given by the userpublic boolean hasMetadataValue(String key)
ReaderhasMetadataValue in interface Readerkey - the key to checkpublic CompressionKind getCompressionKind()
ReadergetCompressionKind in interface Readerpublic int getCompressionSize()
ReadergetCompressionSize in interface Readerpublic List<StripeInformation> getStripes()
ReadergetStripes in interface Readerpublic long getContentLength()
ReadergetContentLength in interface Readerpublic List<OrcProto.Type> getTypes()
Readerpublic static OrcFile.Version getFileVersion(List<Integer> versionList)
public OrcFile.Version getFileVersion()
ReadergetFileVersion in interface Readerpublic OrcFile.WriterVersion getWriterVersion()
ReadergetWriterVersion in interface Readerpublic OrcProto.FileTail getFileTail()
ReadergetFileTail in interface Readerpublic int getRowIndexStride()
ReadergetRowIndexStride in interface Readerpublic ColumnStatistics[] getStatistics()
ReadergetStatistics in interface Readerpublic static ColumnStatistics[] deserializeStats(TypeDescription schema, List<OrcProto.ColumnStatistics> fileStats)
public TypeDescription getSchema()
Readerprotected static void ensureOrcFooter(FSDataInputStream in, Path path, int psLen, ByteBuffer buffer) throws IOException
in - the file being readpath - the filename for error messagespsLen - the postscript lengthbuffer - the tail of the fileIOExceptionprotected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException
psLen - the postscript lengthbuffer - the tail of the fileIOExceptionprotected static void checkOrcVersion(Path path, OrcProto.PostScript postscript) throws IOException
path - the data source path for error messagespostscript - the parsed postscriptIOExceptionpublic static OrcFile.WriterVersion getWriterVersion(int writerVersion)
writerVersion - the integer writer versionpublic static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos, int metadataSize, CompressionCodec codec, int bufferSize) throws IOException
IOExceptionpublic static OrcTail extractFileTail(ByteBuffer buffer) throws IOException
IOExceptionpublic static OrcTail extractFileTail(ByteBuffer buffer, long fileLength, long modificationTime) throws IOException
IOExceptionprotected OrcTail extractFileTail(FileSystem fs, Path path, long maxFileLength) throws IOException
IOExceptionpublic ByteBuffer getSerializedFileFooter()
getSerializedFileFooter in interface Readerpublic Reader.Options options()
Readerpublic RecordReader rows() throws IOException
Readerrows in interface ReaderIOExceptionpublic RecordReader rows(Reader.Options options) throws IOException
Readerrows in interface Readeroptions - the options to read withIOExceptionpublic long getRawDataSize()
ReadergetRawDataSize in interface Readerpublic long getRawDataSizeFromColIndices(List<Integer> colIndices)
ReadergetRawDataSizeFromColIndices in interface ReadercolIndices - - internal column id (check orcfiledump for column ids)public static long getRawDataSizeFromColIndices(List<Integer> colIndices, List<OrcProto.Type> types, List<OrcProto.ColumnStatistics> stats)
public long getRawDataSizeOfColumns(List<String> colNames)
ReadergetRawDataSizeOfColumns in interface Readerpublic List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics()
getOrcProtoStripeStatistics in interface Readerpublic List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics()
getOrcProtoFileStatistics in interface Readerpublic List<StripeStatistics> getStripeStatistics() throws IOException
getStripeStatistics in interface ReaderIOExceptionpublic List<OrcProto.UserMetadataItem> getOrcProtoUserMetadata()
public List<Integer> getVersionList()
getVersionList in interface Readerpublic int getMetadataSize()
getMetadataSize in interface ReaderCopyright © 2013–2018 The Apache Software Foundation. All rights reserved.