diff --git a/CHANGELOG b/CHANGELOG index 4081f7c7aaa72bca40a5615f1925907e8186a71b..32f2232fae739540a9f612ac618cff4076a9f09f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,70 @@ +orcus 0.18.0 + +* general + + * fixed the flat output mode to properly calculate the lengths of UTF-8 + encoded strings. + + * replaced all uses of std::strtol() to parse_integer() to properly parse + strings that are not necessarily null-terminated. + + * added a new output format type 'debug-state' which dumps the internal + state of the populated document model in detail. This can be useful + during debugging. + + * separated the import_shared_string interface implementation from the + backend shared strings store per separation of responsibility. + + * merged the foo_t and foo_active_t struct pair, such as font_t and + font_active_t, in the styles store into a single type using std::optional. + +* ods + + * reimplemented the number format styles import to correctly keep track of + element stacks and correctly perform structure checks to detect malformed + documents. + + * added new interface to import named styles applied to columns. + + * added new interface to import attributes for asian and complex scripts for + the folloiwng font attributes: + + * font name + + * font size + + * font style + + * font weight + + * re-designed the styles import interface to make it multi-level. + + * re-worked the import of the style:text-underline-width attribute to make + its handling more in line with the specifications. + +* xls-xml + + * added support for importing wrap-text and shrink-to-fit cell format + attributes. + + * added support for importing cell-hidden and locked attributes. + + * added support for importing direct and named cell formats applied to + columns and rows. + +* xlsx + + * added support for importing wrap-text and shrink-to-fit cell format + attributes. + + * added support for importing direct and named cell formats applied to + columns and rows. + +* xml-map + + * added a new interface to pass the encoding information to the document + model so that it can correctly decode non-UTF-8-encoded string values. + orcus 0.17.2 * ods diff --git a/CMakeLists.txt b/CMakeLists.txt index 5547ee34a003ec36bb3ddd00b4abc82d2bb44cd7..e2915061d4b566df1f1370a97dcd7d99aa80ff53 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.12) cmake_policy(SET CMP0048 NEW) set(ORCUS_MAJOR_VERSION 0) -set(ORCUS_MINOR_VERSION 17) -set(ORCUS_MICRO_VERSION 99) +set(ORCUS_MINOR_VERSION 18) +set(ORCUS_MICRO_VERSION 0) set(ORCUS_MAJOR_API_VERSION 0) set(ORCUS_MINOR_API_VERSION 18) set(ORCUS_VERSION ${ORCUS_MAJOR_VERSION}.${ORCUS_MINOR_VERSION}.${ORCUS_MICRO_VERSION}) diff --git a/Makefile.am b/Makefile.am index 04ffeb35ae567bf1b786a508c947874db2599d3b..f9faa49a0004e4e805b556f4d744120ba10beda1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -154,129 +154,135 @@ test_data = \ test/json-structure/repeat-objects/input.json \ test/json-structure/arrays-in-object/check.txt \ test/json-structure/arrays-in-object/input.json \ - test/xlsx/date-cell/input.xlsx \ + test/xlsx/borders/directions.xlsx \ + test/xlsx/borders/grid-box.xlsx \ + test/xlsx/borders/single-cells.xlsx \ + test/xlsx/borders/colors.xlsx \ + test/xlsx/boolean-values/check.txt \ + test/xlsx/boolean-values/input.xlsx \ + test/xlsx/hidden-rows-columns/input.xlsx \ + test/xlsx/raw-values-1/check.txt \ + test/xlsx/raw-values-1/input.xlsx \ + test/xlsx/revision/cell-change-basic.xlsx \ + test/xlsx/styles/column-styles.xlsx \ + test/xlsx/styles/direct-format.xlsx \ + test/xlsx/formula-shared/check.txt \ + test/xlsx/formula-shared/input.xlsx \ + test/xlsx/text-alignment/input.xlsx \ + test/xlsx/matrix-results/input.xlsx \ test/xlsx/named-expression/check.txt \ test/xlsx/named-expression/input.xlsx \ - test/xlsx/revision/cell-change-basic.xlsx \ test/xlsx/formula-array-1/check.txt \ test/xlsx/formula-array-1/input.xlsx \ + test/xlsx/test.xlsx \ + test/xlsx/view/cursor-split-pane.xlsx \ + test/xlsx/view/frozen-pane.xlsx \ + test/xlsx/view/cursor-per-sheet.xlsx \ test/xlsx/formula-cells/check.txt \ test/xlsx/formula-cells/input.xlsx \ - test/xlsx/empty-shared-strings/check.txt \ - test/xlsx/empty-shared-strings/input.xlsx \ + test/xlsx/cell-properties/wrap-and-shrink.xlsx \ + test/xlsx/formula-with-string-results/check.txt \ + test/xlsx/formula-with-string-results/input.xlsx \ + test/xlsx/named-expression-relative/input.xlsx \ test/xlsx/data-table/one-variable.xlsx \ test/xlsx/data-table/multi-table.xlsx \ + test/xlsx/number-format/date-time.xlsx \ + test/xlsx/empty-shared-strings/check.txt \ + test/xlsx/empty-shared-strings/input.xlsx \ + test/xlsx/column-width-row-height/input.xlsx \ + test/xlsx/merged-cells/simple.xlsx \ + test/xlsx/date-time/input.xlsx \ + test/xlsx/date-cell/input.xlsx \ test/xlsx/doc-structure/unordered-sheet-positions.xlsx \ - test/xlsx/formatted-text/colored-text.xlsx \ - test/xlsx/formatted-text/bold-and-italic.xlsx \ - test/xlsx/matrix-results/input.xlsx \ - test/xlsx/text-alignment/input.xlsx \ - test/xlsx/boolean-values/check.txt \ - test/xlsx/boolean-values/input.xlsx \ test/xlsx/named-expression-sheet-local/check.txt \ test/xlsx/named-expression-sheet-local/input.xlsx \ - test/xlsx/hidden-rows-columns/input.xlsx \ - test/xlsx/borders/directions.xlsx \ - test/xlsx/borders/colors.xlsx \ - test/xlsx/borders/single-cells.xlsx \ - test/xlsx/borders/grid-box.xlsx \ - test/xlsx/number-format/date-time.xlsx \ - test/xlsx/formula-shared/check.txt \ - test/xlsx/formula-shared/input.xlsx \ - test/xlsx/formula-with-string-results/check.txt \ - test/xlsx/formula-with-string-results/input.xlsx \ - test/xlsx/merged-cells/simple.xlsx \ - test/xlsx/raw-values-1/check.txt \ - test/xlsx/raw-values-1/input.xlsx \ - test/xlsx/column-width-row-height/input.xlsx \ - test/xlsx/test.xlsx \ - test/xlsx/formula-simple.xlsx \ - test/xlsx/pivot-table/two-pivot-caches.xlsx \ - test/xlsx/pivot-table/group-by-numbers.xlsx \ - test/xlsx/pivot-table/three-pivot-tables-on-one-sheet.xlsx \ - test/xlsx/pivot-table/group-by-dates.xlsx \ + test/xlsx/background-color/standard.xlsx \ + test/xlsx/table/table-1.xlsx \ + test/xlsx/table/table-2.xlsx \ + test/xlsx/table/autofilter.xlsx \ + test/xlsx/table/autofilter-text-filter-1.xlsx \ + test/xlsx/formatted-text/colored-text.xlsx \ + test/xlsx/formatted-text/bold-and-italic.xlsx \ + test/xlsx/pivot-table/error-values.xlsx \ test/xlsx/pivot-table/chart-simple.xlsx \ - test/xlsx/pivot-table/group-field.xlsx \ test/xlsx/pivot-table/mixed-type-field.xlsx \ - test/xlsx/pivot-table/error-values.xlsx \ + test/xlsx/pivot-table/group-field.xlsx \ + test/xlsx/pivot-table/three-pivot-tables-on-one-sheet.xlsx \ + test/xlsx/pivot-table/two-pivot-caches.xlsx \ + test/xlsx/pivot-table/group-by-dates.xlsx \ + test/xlsx/pivot-table/group-by-numbers.xlsx \ test/xlsx/pivot-table/two-tables-one-source.xlsx \ test/xlsx/pivot-table/many-fields.xlsx \ - test/xlsx/background-color/standard.xlsx \ - test/xlsx/view/cursor-per-sheet.xlsx \ - test/xlsx/view/cursor-split-pane.xlsx \ - test/xlsx/view/frozen-pane.xlsx \ - test/xlsx/table/table-2.xlsx \ - test/xlsx/table/autofilter-text-filter-1.xlsx \ - test/xlsx/table/table-1.xlsx \ - test/xlsx/table/autofilter.xlsx \ - test/xlsx/date-time/input.xlsx \ - test/xlsx/named-expression-relative/input.xlsx \ - test/xml-mapped/attribute-basic/check-nomap.txt \ - test/xml-mapped/attribute-basic/check.txt \ + test/xlsx/formula-simple.xlsx \ test/xml-mapped/attribute-basic/input.xml \ + test/xml-mapped/attribute-basic/check.txt \ test/xml-mapped/attribute-basic/map.xml \ - test/xml-mapped/attribute-namespace-2/check-nomap.txt \ - test/xml-mapped/attribute-namespace-2/check.txt \ - test/xml-mapped/attribute-namespace-2/input.xml \ - test/xml-mapped/attribute-namespace-2/map.xml \ - test/xml-mapped/attribute-namespace/check-nomap.txt \ - test/xml-mapped/attribute-namespace/check.txt \ - test/xml-mapped/attribute-namespace/input.xml \ - test/xml-mapped/attribute-namespace/map.xml \ - test/xml-mapped/attribute-range-self-close/check-nomap.txt \ - test/xml-mapped/attribute-range-self-close/check.txt \ - test/xml-mapped/attribute-range-self-close/input.xml \ - test/xml-mapped/attribute-range-self-close/map.xml \ - test/xml-mapped/attribute-single-element-2/check.txt \ - test/xml-mapped/attribute-single-element-2/input.xml \ - test/xml-mapped/attribute-single-element-2/map.xml \ - test/xml-mapped/attribute-single-element/check.txt \ - test/xml-mapped/attribute-single-element/input.xml \ - test/xml-mapped/attribute-single-element/map.xml \ - test/xml-mapped/content-basic/check-nomap.txt \ - test/xml-mapped/content-basic/check.txt \ - test/xml-mapped/content-basic/input.xml \ - test/xml-mapped/content-basic/map.xml \ - test/xml-mapped/content-namespace-2/check-nomap.txt \ - test/xml-mapped/content-namespace-2/check.txt \ + test/xml-mapped/attribute-basic/check-nomap.txt \ test/xml-mapped/content-namespace-2/input.xml \ + test/xml-mapped/content-namespace-2/check.txt \ test/xml-mapped/content-namespace-2/map.xml \ - test/xml-mapped/content-namespace-3/check-nomap.txt \ - test/xml-mapped/content-namespace-3/check.txt \ - test/xml-mapped/content-namespace-3/input.xml \ - test/xml-mapped/content-namespace-3/map.xml \ - test/xml-mapped/content-namespace/check-nomap.txt \ - test/xml-mapped/content-namespace/check.txt \ - test/xml-mapped/content-namespace/input.xml \ - test/xml-mapped/content-namespace/map.xml \ - test/xml-mapped/custom-labels-2/check.txt \ + test/xml-mapped/content-namespace-2/check-nomap.txt \ test/xml-mapped/custom-labels-2/input.xml \ + test/xml-mapped/custom-labels-2/check.txt \ test/xml-mapped/custom-labels-2/map.xml \ - test/xml-mapped/custom-labels/check.txt \ - test/xml-mapped/custom-labels/input.xml \ - test/xml-mapped/custom-labels/map.xml \ - test/xml-mapped/fuel-economy/check-nomap.txt \ - test/xml-mapped/fuel-economy/check.txt \ - test/xml-mapped/fuel-economy/input.xml \ - test/xml-mapped/fuel-economy/map.xml \ - test/xml-mapped/invalids/map-defs/non-leaf-element-linked.xml \ - test/xml-mapped/invalids/map-defs/not-xml.xml \ - test/xml-mapped/nested-repeats-2/check-nomap.txt \ - test/xml-mapped/nested-repeats-2/check.txt \ - test/xml-mapped/nested-repeats-2/input.xml \ - test/xml-mapped/nested-repeats-2/map.xml \ - test/xml-mapped/nested-repeats-3/check-nomap.txt \ - test/xml-mapped/nested-repeats-3/check.txt \ + test/xml-mapped/content-namespace-3/input.xml \ + test/xml-mapped/content-namespace-3/check.txt \ + test/xml-mapped/content-namespace-3/map.xml \ + test/xml-mapped/content-namespace-3/check-nomap.txt \ + test/xml-mapped/attribute-single-element/input.xml \ + test/xml-mapped/attribute-single-element/check.txt \ + test/xml-mapped/attribute-single-element/map.xml \ + test/xml-mapped/attribute-range-self-close/input.xml \ + test/xml-mapped/attribute-range-self-close/check.txt \ + test/xml-mapped/attribute-range-self-close/map.xml \ + test/xml-mapped/attribute-range-self-close/check-nomap.txt \ test/xml-mapped/nested-repeats-3/input.xml \ + test/xml-mapped/nested-repeats-3/check.txt \ test/xml-mapped/nested-repeats-3/map.xml \ - test/xml-mapped/nested-repeats-4/check-nomap.txt \ - test/xml-mapped/nested-repeats-4/check.txt \ + test/xml-mapped/nested-repeats-3/check-nomap.txt \ + test/xml-mapped/encoding/euc-jp.xml \ + test/xml-mapped/encoding/gbk.xml \ + test/xml-mapped/encoding/utf-8.xml \ + test/xml-mapped/invalids/map-defs/non-leaf-element-linked.xml \ + test/xml-mapped/invalids/map-defs/not-xml.xml \ + test/xml-mapped/attribute-namespace-2/input.xml \ + test/xml-mapped/attribute-namespace-2/check.txt \ + test/xml-mapped/attribute-namespace-2/map.xml \ + test/xml-mapped/attribute-namespace-2/check-nomap.txt \ + test/xml-mapped/content-namespace/input.xml \ + test/xml-mapped/content-namespace/check.txt \ + test/xml-mapped/content-namespace/map.xml \ + test/xml-mapped/content-namespace/check-nomap.txt \ test/xml-mapped/nested-repeats-4/input.xml \ + test/xml-mapped/nested-repeats-4/check.txt \ test/xml-mapped/nested-repeats-4/map.xml \ - test/xml-mapped/nested-repeats/check-nomap.txt \ - test/xml-mapped/nested-repeats/check.txt \ + test/xml-mapped/nested-repeats-4/check-nomap.txt \ + test/xml-mapped/attribute-single-element-2/input.xml \ + test/xml-mapped/attribute-single-element-2/check.txt \ + test/xml-mapped/attribute-single-element-2/map.xml \ test/xml-mapped/nested-repeats/input.xml \ + test/xml-mapped/nested-repeats/check.txt \ test/xml-mapped/nested-repeats/map.xml \ + test/xml-mapped/nested-repeats/check-nomap.txt \ + test/xml-mapped/fuel-economy/input.xml \ + test/xml-mapped/fuel-economy/check.txt \ + test/xml-mapped/fuel-economy/map.xml \ + test/xml-mapped/fuel-economy/check-nomap.txt \ + test/xml-mapped/content-basic/input.xml \ + test/xml-mapped/content-basic/check.txt \ + test/xml-mapped/content-basic/map.xml \ + test/xml-mapped/content-basic/check-nomap.txt \ + test/xml-mapped/nested-repeats-2/input.xml \ + test/xml-mapped/nested-repeats-2/check.txt \ + test/xml-mapped/nested-repeats-2/map.xml \ + test/xml-mapped/nested-repeats-2/check-nomap.txt \ + test/xml-mapped/custom-labels/input.xml \ + test/xml-mapped/custom-labels/check.txt \ + test/xml-mapped/custom-labels/map.xml \ + test/xml-mapped/attribute-namespace/input.xml \ + test/xml-mapped/attribute-namespace/check.txt \ + test/xml-mapped/attribute-namespace/map.xml \ + test/xml-mapped/attribute-namespace/check-nomap.txt \ test/xml/simple/check.txt \ test/xml/simple/input.xml \ test/xml/no-decl-1/check.txt \ @@ -690,86 +696,96 @@ test_data = \ test/yaml/invalids/2.yaml \ test/yaml/invalids/1.yaml \ test/yaml/literal-block-2/input.yaml \ - test/xls-xml/background-color/standard.xml \ - test/xls-xml/basic-utf-16-be/check.txt \ - test/xls-xml/basic-utf-16-be/input.xml \ - test/xls-xml/basic-utf-16-le/check.txt \ - test/xls-xml/basic-utf-16-le/input.xml \ - test/xls-xml/basic/check.txt \ - test/xls-xml/basic/input.xml \ - test/xls-xml/bold-and-italic/check.txt \ - test/xls-xml/bold-and-italic/input.xml \ test/xls-xml/borders/colors.xml \ - test/xls-xml/borders/directions.xml \ - test/xls-xml/borders/grid-box.xml \ test/xls-xml/borders/single-cells.xml \ - test/xls-xml/character-set/input.xml \ - test/xls-xml/colored-text/check.txt \ - test/xls-xml/colored-text/input.xml \ - test/xls-xml/column-width-row-height/input.xml \ - test/xls-xml/date-time/input.xml \ - test/xls-xml/empty-rows/check.txt \ - test/xls-xml/empty-rows/input.xml \ - test/xls-xml/formula-array-1/check.txt \ - test/xls-xml/formula-array-1/input.xml \ - test/xls-xml/formula-cells-1/check.txt \ + test/xls-xml/borders/grid-box.xml \ + test/xls-xml/borders/directions.xml \ test/xls-xml/formula-cells-1/input.xml \ - test/xls-xml/formula-cells-2/check.txt \ - test/xls-xml/formula-cells-2/config.yaml \ - test/xls-xml/formula-cells-2/input.xml \ - test/xls-xml/formula-cells-3/check.txt \ - test/xls-xml/formula-cells-3/input.xml \ + test/xls-xml/formula-cells-1/check.txt \ test/xls-xml/formula-cells-parse-error/input.xml \ + test/xls-xml/colored-text/input.xml \ + test/xls-xml/colored-text/check.txt \ test/xls-xml/hidden-rows-columns/input.xml \ - test/xls-xml/invalid-sub-structure/check.txt \ + test/xls-xml/raw-values-1/input.xml \ + test/xls-xml/raw-values-1/check.txt \ + test/xls-xml/styles/direct-format.xml \ + test/xls-xml/styles/column-styles.xml \ test/xls-xml/invalid-sub-structure/input.xml \ - test/xls-xml/leading-whitespace/check.txt \ - test/xls-xml/leading-whitespace/input.xml \ + test/xls-xml/invalid-sub-structure/check.txt \ + test/xls-xml/basic-utf-16-be/input.xml \ + test/xls-xml/basic-utf-16-be/check.txt \ + test/xls-xml/text-alignment/input.xml \ test/xls-xml/matrix-results/input.xml \ - test/xls-xml/merged-cells/check.txt \ - test/xls-xml/merged-cells/input.xml \ - test/xls-xml/named-colors/check.txt \ - test/xls-xml/named-colors/input-upper.xml \ + test/xls-xml/named-expression/input.xml \ + test/xls-xml/named-expression/check.txt \ + test/xls-xml/formula-array-1/input.xml \ + test/xls-xml/formula-array-1/check.txt \ + test/xls-xml/empty-rows/input.xml \ + test/xls-xml/empty-rows/check.txt \ + test/xls-xml/basic/input.xml \ + test/xls-xml/basic/check.txt \ + test/xls-xml/view/cursor-per-sheet.xml \ + test/xls-xml/view/frozen-pane.xml \ + test/xls-xml/view/cursor-split-pane.xml \ + test/xls-xml/bold-and-italic/input.xml \ + test/xls-xml/bold-and-italic/check.txt \ + test/xls-xml/cell-properties/locked-and-hidden.xml \ + test/xls-xml/cell-properties/default-style.xml \ + test/xls-xml/cell-properties/wrap-and-shrink.xml \ test/xls-xml/named-colors/input.xml \ - test/xls-xml/named-colors/run.py \ + test/xls-xml/named-colors/input-upper.xml \ test/xls-xml/named-colors/saved-by-excel.xml \ - test/xls-xml/named-expression-sheet-local/check.txt \ - test/xls-xml/named-expression-sheet-local/input.xml \ - test/xls-xml/named-expression/check.txt \ - test/xls-xml/named-expression/input.xml \ + test/xls-xml/named-colors/check.txt \ + test/xls-xml/named-colors/run.py \ + test/xls-xml/formula-cells-3/input.xml \ + test/xls-xml/formula-cells-3/check.txt \ + test/xls-xml/leading-whitespace/input.xml \ + test/xls-xml/leading-whitespace/check.txt \ + test/xls-xml/basic-utf-16-le/input.xml \ + test/xls-xml/basic-utf-16-le/check.txt \ test/xls-xml/number-format/date-time.xml \ - test/xls-xml/raw-values-1/check.txt \ - test/xls-xml/raw-values-1/input.xml \ - test/xls-xml/table-offset/check.txt \ + test/xls-xml/formula-cells-2/input.xml \ + test/xls-xml/formula-cells-2/config.yaml \ + test/xls-xml/formula-cells-2/check.txt \ test/xls-xml/table-offset/input.xml \ + test/xls-xml/table-offset/check.txt \ + test/xls-xml/column-width-row-height/input.xml \ + test/xls-xml/character-set/input.xml \ + test/xls-xml/merged-cells/input.xml \ + test/xls-xml/merged-cells/check.txt \ + test/xls-xml/date-time/input.xml \ + test/xls-xml/named-expression-sheet-local/input.xml \ + test/xls-xml/named-expression-sheet-local/check.txt \ + test/xls-xml/background-color/standard.xml \ test/xls-xml/table/autofilter.xml \ - test/xls-xml/text-alignment/input.xml \ - test/xls-xml/view/cursor-per-sheet.xml \ - test/xls-xml/view/cursor-split-pane.xml \ - test/xls-xml/view/frozen-pane.xml \ - test/ods/named-range/input.ods \ - test/ods/named-range/check.txt \ - test/ods/date-cell/input.ods \ - test/ods/named-expression/input.ods \ - test/ods/named-expression/check.txt \ - test/ods/formula-2/input.ods \ - test/ods/formula-2/check.txt \ - test/ods/test.ods \ - test/ods/formatted-text/bold-and-italic.ods \ - test/ods/japanese.ods \ - test/ods/named-expression-sheet-local/input.ods \ - test/ods/named-expression-sheet-local/check.txt \ test/ods/borders/grid-box.ods \ test/ods/borders/single-cells.ods \ - test/ods/raw-values-1/input.ods \ + test/ods/import-styles/cell-protection.xml \ + test/ods/import-styles/standard-styles.xml \ + test/ods/import-styles/cell-styles.xml \ test/ods/raw-values-1/check.txt \ - test/ods/column-width-row-height/input.ods \ - test/ods/styles/cell-protection.xml \ - test/ods/styles/cell-styles.xml \ - test/ods/styles/number-format.xml \ - test/ods/styles/standard-styles.xml \ + test/ods/raw-values-1/input.ods \ + test/ods/styles/text-underlines.ods \ + test/ods/styles/column-styles.ods \ + test/ods/styles/direct-format.ods \ + test/ods/styles/asian-complex.ods \ + test/ods/test.ods \ + test/ods/named-expression/check.txt \ + test/ods/named-expression/input.ods \ + test/ods/formula-1/check.txt \ test/ods/formula-1/input.ods \ - test/ods/formula-1/check.txt + test/ods/cell-properties/wrap-and-shrink.ods \ + test/ods/formula-2/check.txt \ + test/ods/formula-2/input.ods \ + test/ods/number-format/basic-set.ods \ + test/ods/column-width-row-height/input.ods \ + test/ods/date-cell/input.ods \ + test/ods/japanese.ods \ + test/ods/named-expression-sheet-local/check.txt \ + test/ods/named-expression-sheet-local/input.ods \ + test/ods/named-range/check.txt \ + test/ods/named-range/input.ods \ + test/ods/formatted-text/bold-and-italic.ods EXTRA_DIST = \ CHANGELOG \ @@ -803,5 +819,8 @@ doc-sphinx: doc: doc-doxygen doc-sphinx +doc-clean: + @rm -rf ./doc/_build ./doc/_doxygen + diff --git a/README.md b/README.md index 0ad9c80ed5bb1fcd389cfca849ce89912c9e67b4..e7128f1a35ed6059f8f24e85551922f7225917ca 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ The library also includes low-level parsers for the following: * CSS * XML * JSON -* YAML +* YAML (experimental) These parsers are all implemented as C++ templates and require a handler class passed as a template argument so that the handler class receives various diff --git a/benchmark/json_parser.cpp b/benchmark/json_parser.cpp index 5f9837f776074e37dfe08d12e7f7f73fef39510c..1917b60869482d8ee14d857e1fb0fe7970f8628e 100644 --- a/benchmark/json_parser.cpp +++ b/benchmark/json_parser.cpp @@ -96,9 +96,8 @@ public: do_work(); } - void object_key(const char* p, size_t len, bool transient) + void object_key(std::string_view s, bool transient) { - std::string_view s{p, len}; if (transient) s = m_pool.intern(s).first; @@ -130,9 +129,8 @@ public: do_work(); } - void string(const char* p, size_t len, bool transient) + void string(std::string_view s, bool transient) { - std::string_view s{p, len}; if (transient) s = m_pool.intern(s).first; @@ -171,7 +169,7 @@ int main(int argc, char** argv) try { stack_printer __stack_printer__("parsing"); - orcus::json_parser parser(content.data(), content.size(), hdl); + orcus::json_parser parser(content, hdl); parser.parse(); } diff --git a/configure.ac b/configure.ac index 62ee3f581b0a63591c1fad8f56131b551eab1ba8..8a65dee0a996d20872856b9168ae80c9f7bbd429 100644 --- a/configure.ac +++ b/configure.ac @@ -5,8 +5,8 @@ # Version information # =================== m4_define([orcus_major_version], [0]) -m4_define([orcus_minor_version], [17]) -m4_define([orcus_micro_version], [99]) +m4_define([orcus_minor_version], [18]) +m4_define([orcus_micro_version], [0]) m4_define([orcus_version], [orcus_major_version.orcus_minor_version.orcus_micro_version]) # =============== diff --git a/doc/conf.py b/doc/conf.py index f90713ddea8b40f76b046db4036070ed244b2e4c..4ad6082eff0d2a4b049fa94f63eaa564a0cf38ed 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -58,16 +58,16 @@ master_doc = 'index' # General information about the project. project = 'Orcus' -copyright = '2021, Kohei Yoshida' +copyright = '2022, Kohei Yoshida' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.17' +version = '0.18' # The full version, including alpha/beta/rc tags. -release = '0.17.2' +release = '0.18.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -117,7 +117,9 @@ html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -html_theme_options = {} +html_theme_options = { + "navigation_depth": 5, +} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] diff --git a/doc/cpp/filter/config.rst b/doc/cpp/filter/config.rst new file mode 100644 index 0000000000000000000000000000000000000000..163889612a3fb316ccdf8bd3e3fadac43ee6cab3 --- /dev/null +++ b/doc/cpp/filter/config.rst @@ -0,0 +1,13 @@ + +Configuration classes +===================== + +.. doxygenstruct:: orcus::config + :members: + +.. doxygenstruct:: orcus::json_config + :members: + +.. doxygenstruct:: orcus::yaml_config + :members: + diff --git a/doc/cpp/filter/index.rst b/doc/cpp/filter/index.rst index 900942c3f2d678307b413964850eb5568c1b0951..1971c6ebea6c80aad7ea99dc826a664261e8dbb9 100644 --- a/doc/cpp/filter/index.rst +++ b/doc/cpp/filter/index.rst @@ -1,53 +1,10 @@ -Spreadsheet Import Filters -========================== - - -Plain Text (CSV) ----------------- - -.. doxygenclass:: orcus::orcus_csv - :members: - - -Open Document Spreadsheet -------------------------- - -.. doxygenclass:: orcus::orcus_ods - :members: - -.. doxygenclass:: orcus::import_ods - :members: - - -Microsoft Excel 2003 XML ------------------------- - -.. doxygenclass:: orcus::orcus_xls_xml - :members: - - -Microsoft Excel 2007 XML ------------------------- - -.. doxygenclass:: orcus::orcus_xlsx - :members: - -.. doxygenclass:: orcus::import_xlsx - :members: - - -Gnumeric XML ------------- - -.. doxygenclass:: orcus::orcus_gnumeric - :members: - - -Generic XML ------------ - -.. doxygenclass:: orcus::orcus_xml - :members: +Import filters +============== +.. toctree:: + :maxdepth: 1 + config.rst + interface/index.rst + spreadsheet.rst diff --git a/doc/cpp/filter/interface.rst b/doc/cpp/filter/interface.rst deleted file mode 100644 index 0d15f0c9365fafdb9e67f1e4f3922c91e793d155..0000000000000000000000000000000000000000 --- a/doc/cpp/filter/interface.rst +++ /dev/null @@ -1,257 +0,0 @@ - -Types and Interfaces -==================== - - -Global Interface ----------------- - -.. doxygenclass:: orcus::iface::import_filter - :members: - -.. doxygenclass:: orcus::iface::document_dumper - :members: - -.. _spreadsheet-interface: - -Spreadsheet Interface ---------------------- - -import_array_formula -^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_array_formula - :members: - -import_auto_filter -^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_auto_filter - :members: - -import_border_style -^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_border_style - :members: - -import_cell_protection -^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_cell_protection - :members: - -import_cell_style -^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_cell_style - :members: - -import_conditional_format -^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_conditional_format - :members: - -import_data_table -^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_data_table - :members: - -import_factory -^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_factory - :members: - -import_fill_style -^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_fill_style - :members: - -import_font_style -^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_font_style - :members: - -import_formula -^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_formula - :members: - -import_global_settings -^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_global_settings - :members: - -import_named_expression -^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_named_expression - :members: - -import_number_format -^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_number_format - :members: - -import_pivot_cache_definition -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_definition - :members: - -import_pivot_cache_records -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_records - :members: - -import_reference_resolver -^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_reference_resolver - :members: - -import_shared_strings -^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_shared_strings - :members: - -import_sheet -^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet - :members: - -import_sheet_properties -^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_properties - :members: - -import_sheet_view -^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_view - :members: - -import_styles -^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_styles - :members: - -import_table -^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_table - :members: - -import_xf -^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_xf - :members: - -export_factory -^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::export_factory - :members: - -export_sheet -^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::export_sheet - :members: - - -Spreadsheet Types ------------------ - -Type Aliases -^^^^^^^^^^^^ - -.. doxygentypedef:: orcus::spreadsheet::row_t -.. doxygentypedef:: orcus::spreadsheet::col_t -.. doxygentypedef:: orcus::spreadsheet::sheet_t -.. doxygentypedef:: orcus::spreadsheet::color_elem_t -.. doxygentypedef:: orcus::spreadsheet::col_width_t -.. doxygentypedef:: orcus::spreadsheet::row_height_t -.. doxygentypedef:: orcus::spreadsheet::pivot_cache_id_t - - -Structs -^^^^^^^ - -.. doxygenstruct:: orcus::spreadsheet::underline_attrs_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::address_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::range_size_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::range_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::color_rgb_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::split_pane_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::frozen_pane_t - :members: - - -Enums -^^^^^ - -.. doxygenenum:: orcus::spreadsheet::error_value_t -.. doxygenenum:: orcus::spreadsheet::border_direction_t -.. doxygenenum:: orcus::spreadsheet::border_style_t -.. doxygenenum:: orcus::spreadsheet::fill_pattern_t -.. doxygenenum:: orcus::spreadsheet::strikethrough_style_t -.. doxygenenum:: orcus::spreadsheet::strikethrough_type_t -.. doxygenenum:: orcus::spreadsheet::strikethrough_width_t -.. doxygenenum:: orcus::spreadsheet::strikethrough_text_t -.. doxygenenum:: orcus::spreadsheet::formula_grammar_t -.. doxygenenum:: orcus::spreadsheet::formula_t -.. doxygenenum:: orcus::spreadsheet::underline_t -.. doxygenenum:: orcus::spreadsheet::underline_width_t -.. doxygenenum:: orcus::spreadsheet::underline_mode_t -.. doxygenenum:: orcus::spreadsheet::underline_type_t -.. doxygenenum:: orcus::spreadsheet::hor_alignment_t -.. doxygenenum:: orcus::spreadsheet::ver_alignment_t -.. doxygenenum:: orcus::spreadsheet::data_table_type_t -.. doxygenenum:: orcus::spreadsheet::xf_category_t -.. doxygenenum:: orcus::spreadsheet::totals_row_function_t -.. doxygenenum:: orcus::spreadsheet::conditional_format_t -.. doxygenenum:: orcus::spreadsheet::condition_operator_t -.. doxygenenum:: orcus::spreadsheet::condition_type_t -.. doxygenenum:: orcus::spreadsheet::condition_date_t -.. doxygenenum:: orcus::spreadsheet::databar_axis_t -.. doxygenenum:: orcus::spreadsheet::pivot_cache_group_by_t -.. doxygenenum:: orcus::spreadsheet::sheet_pane_t -.. doxygenenum:: orcus::spreadsheet::pane_state_t - - -Spreadsheet Global Functions ----------------------------- - -.. doxygenfunction:: orcus::spreadsheet::get_default_column_width -.. doxygenfunction:: orcus::spreadsheet::get_default_row_height -.. doxygenfunction:: orcus::spreadsheet::to_totals_row_function_enum -.. doxygenfunction:: orcus::spreadsheet::to_pivot_cache_group_by_enum -.. doxygenfunction:: orcus::spreadsheet::to_error_value_enum -.. doxygenfunction:: orcus::spreadsheet::to_color_rgb diff --git a/doc/cpp/filter/interface/functions.rst b/doc/cpp/filter/interface/functions.rst new file mode 100644 index 0000000000000000000000000000000000000000..8aaff512e721eb1c480569df7699563753e75287 --- /dev/null +++ b/doc/cpp/filter/interface/functions.rst @@ -0,0 +1,12 @@ + +Spreadsheet utility functions +============================= + +.. doxygenfunction:: orcus::spreadsheet::get_default_column_width +.. doxygenfunction:: orcus::spreadsheet::get_default_row_height +.. doxygenfunction:: orcus::spreadsheet::to_totals_row_function_enum +.. doxygenfunction:: orcus::spreadsheet::to_pivot_cache_group_by_enum +.. doxygenfunction:: orcus::spreadsheet::to_error_value_enum +.. doxygenfunction:: orcus::spreadsheet::to_color_rgb +.. doxygenfunction:: orcus::spreadsheet::to_rc_address +.. doxygenfunction:: orcus::spreadsheet::to_rc_range diff --git a/doc/cpp/filter/interface/global.rst b/doc/cpp/filter/interface/global.rst new file mode 100644 index 0000000000000000000000000000000000000000..0e73e470d819c6d5096dd3f37f5012b51cf661b9 --- /dev/null +++ b/doc/cpp/filter/interface/global.rst @@ -0,0 +1,12 @@ + +Global interfaces +================= + +The following global interfaces are used to abstract the concrete filter and +document classes from orcus's CLI framework. + +.. doxygenclass:: orcus::iface::import_filter + :members: + +.. doxygenclass:: orcus::iface::document_dumper + :members: diff --git a/doc/cpp/filter/interface/index.rst b/doc/cpp/filter/interface/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..67f93e9436dca05a676f698876c67e7a4766c762 --- /dev/null +++ b/doc/cpp/filter/interface/index.rst @@ -0,0 +1,12 @@ + +Types and interfaces +==================== + +.. toctree:: + :maxdepth: 1 + + global.rst + spreadsheet/index.rst + types.rst + view_types.rst + functions.rst diff --git a/doc/cpp/filter/interface/spreadsheet/document.rst b/doc/cpp/filter/interface/spreadsheet/document.rst new file mode 100644 index 0000000000000000000000000000000000000000..95eed76ea513f69266f9656ffec95f7814918311 --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/document.rst @@ -0,0 +1,21 @@ + +Document import +=============== + +The following interfaces handle importing of contents and properties related to +the entire document store. + +.. doxygenclass:: orcus::spreadsheet::iface::import_factory + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_global_settings + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_shared_strings + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_named_expression + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_reference_resolver + :members: diff --git a/doc/cpp/filter/interface/spreadsheet/document_export.rst b/doc/cpp/filter/interface/spreadsheet/document_export.rst new file mode 100644 index 0000000000000000000000000000000000000000..5759d81ff7c76cbc4ce7b0f4f6324d9b7b7fb8cc --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/document_export.rst @@ -0,0 +1,9 @@ + +Document export +=============== + +.. doxygenclass:: orcus::spreadsheet::iface::export_sheet + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::export_factory + :members: diff --git a/doc/cpp/filter/interface/spreadsheet/index.rst b/doc/cpp/filter/interface/spreadsheet/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..4254c22fd4f7170cdd1e7851b631455354811f86 --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/index.rst @@ -0,0 +1,15 @@ + +.. _spreadsheet-interfaces: + +Spreadsheet interfaces +====================== + +.. toctree:: + :maxdepth: 1 + + document.rst + sheet.rst + pivot.rst + styles.rst + view.rst + document_export.rst diff --git a/doc/cpp/filter/interface/spreadsheet/pivot.rst b/doc/cpp/filter/interface/spreadsheet/pivot.rst new file mode 100644 index 0000000000000000000000000000000000000000..28d8df7746b9963fdd808c635131ea84eb785293 --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/pivot.rst @@ -0,0 +1,14 @@ + +Pivot table import +================== + +The folloiwng interfaces handle importing of contents related to pivot tables. + +.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_definition + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_field_group + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_records + :members: diff --git a/doc/cpp/filter/interface/spreadsheet/sheet.rst b/doc/cpp/filter/interface/spreadsheet/sheet.rst new file mode 100644 index 0000000000000000000000000000000000000000..6482ac6924f6990adbc76fc27326db2ee89f4bda --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/sheet.rst @@ -0,0 +1,30 @@ + +Sheet import +============ + +The following interfaces handle importing of contents and properties related to +individual sheets. + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_properties + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_data_table + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_auto_filter + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_conditional_format + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_table + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_formula + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_array_formula + :members: diff --git a/doc/cpp/filter/interface/spreadsheet/styles.rst b/doc/cpp/filter/interface/spreadsheet/styles.rst new file mode 100644 index 0000000000000000000000000000000000000000..75752c0d98786551aab6d5efb68c3ba5fd24a711 --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/styles.rst @@ -0,0 +1,27 @@ + +Styles import +============= + +.. doxygenclass:: orcus::spreadsheet::iface::import_styles + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_font_style + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_fill_style + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_border_style + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_cell_protection + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_number_format + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_xf + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_cell_style + :members: diff --git a/doc/cpp/filter/interface/spreadsheet/view.rst b/doc/cpp/filter/interface/spreadsheet/view.rst new file mode 100644 index 0000000000000000000000000000000000000000..1c4e97f9040df274641d35526d8dd9fd537b7266 --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/view.rst @@ -0,0 +1,6 @@ + +View properties import +====================== + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_view + :members: diff --git a/doc/cpp/filter/interface/types.rst b/doc/cpp/filter/interface/types.rst new file mode 100644 index 0000000000000000000000000000000000000000..5fd3811743cc814b860cd3d667e1d2ec1fa1f798 --- /dev/null +++ b/doc/cpp/filter/interface/types.rst @@ -0,0 +1,73 @@ + +Spreadsheet types +================= + +These types are used throughout the spreadsheet import and export interfaces. + +Integral types +-------------- + +.. doxygentypedef:: orcus::spreadsheet::row_t +.. doxygentypedef:: orcus::spreadsheet::col_t +.. doxygentypedef:: orcus::spreadsheet::sheet_t +.. doxygentypedef:: orcus::spreadsheet::color_elem_t +.. doxygentypedef:: orcus::spreadsheet::col_width_t +.. doxygentypedef:: orcus::spreadsheet::row_height_t +.. doxygentypedef:: orcus::spreadsheet::pivot_cache_id_t + + +Structs +------- + +.. doxygenstruct:: orcus::spreadsheet::underline_attrs_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::address_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::range_size_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::range_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::src_address_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::src_range_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::color_rgb_t + :members: + + +Enums +----- + +.. doxygenenum:: orcus::spreadsheet::error_value_t +.. doxygenenum:: orcus::spreadsheet::border_direction_t +.. doxygenenum:: orcus::spreadsheet::border_style_t +.. doxygenenum:: orcus::spreadsheet::fill_pattern_t +.. doxygenenum:: orcus::spreadsheet::strikethrough_style_t +.. doxygenenum:: orcus::spreadsheet::strikethrough_type_t +.. doxygenenum:: orcus::spreadsheet::strikethrough_width_t +.. doxygenenum:: orcus::spreadsheet::strikethrough_text_t +.. doxygenenum:: orcus::spreadsheet::formula_grammar_t +.. doxygenenum:: orcus::spreadsheet::formula_t +.. doxygenenum:: orcus::spreadsheet::formula_ref_context_t +.. doxygenenum:: orcus::spreadsheet::formula_error_policy_t +.. doxygenenum:: orcus::spreadsheet::underline_t +.. doxygenenum:: orcus::spreadsheet::underline_width_t +.. doxygenenum:: orcus::spreadsheet::underline_mode_t +.. doxygenenum:: orcus::spreadsheet::underline_type_t +.. doxygenenum:: orcus::spreadsheet::hor_alignment_t +.. doxygenenum:: orcus::spreadsheet::ver_alignment_t +.. doxygenenum:: orcus::spreadsheet::xf_category_t +.. doxygenenum:: orcus::spreadsheet::data_table_type_t +.. doxygenenum:: orcus::spreadsheet::totals_row_function_t +.. doxygenenum:: orcus::spreadsheet::conditional_format_t +.. doxygenenum:: orcus::spreadsheet::condition_operator_t +.. doxygenenum:: orcus::spreadsheet::condition_type_t +.. doxygenenum:: orcus::spreadsheet::condition_date_t +.. doxygenenum:: orcus::spreadsheet::databar_axis_t +.. doxygenenum:: orcus::spreadsheet::pivot_cache_group_by_t diff --git a/doc/cpp/filter/interface/view_types.rst b/doc/cpp/filter/interface/view_types.rst new file mode 100644 index 0000000000000000000000000000000000000000..413be92d44697b76082fe78e9dec40768db7eb8f --- /dev/null +++ b/doc/cpp/filter/interface/view_types.rst @@ -0,0 +1,20 @@ + +Spreadsheet view types +====================== + +Structs +------- + +.. doxygenstruct:: orcus::spreadsheet::split_pane_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::frozen_pane_t + :members: + + +Enums +----- + +.. doxygenenum:: orcus::spreadsheet::sheet_pane_t +.. doxygenenum:: orcus::spreadsheet::pane_state_t + diff --git a/doc/cpp/filter/spreadsheet.rst b/doc/cpp/filter/spreadsheet.rst new file mode 100644 index 0000000000000000000000000000000000000000..e53d19fd9c72960298d2ccd0e65e9a5374a82913 --- /dev/null +++ b/doc/cpp/filter/spreadsheet.rst @@ -0,0 +1,63 @@ + +Spreadsheet import filters +========================== + +Plain text (CSV) +---------------- + +.. doxygenclass:: orcus::orcus_csv + :members: + + +Open document spreadsheet +------------------------- + +.. doxygenclass:: orcus::orcus_ods + :members: + +.. doxygenclass:: orcus::import_ods + :members: + + +Microsoft Excel 2003 XML +------------------------ + +.. doxygenclass:: orcus::orcus_xls_xml + :members: + + +Microsoft Excel 2007 XML +------------------------ + +.. doxygenclass:: orcus::orcus_xlsx + :members: + +.. doxygenclass:: orcus::import_xlsx + :members: + + +Gnumeric XML +------------ + +.. doxygenclass:: orcus::orcus_gnumeric + :members: + + +Generic XML +----------- + +.. doxygenclass:: orcus::orcus_xml + :members: + + +Generic JSON +------------ + +.. doxygenclass:: orcus::orcus_json + :members: + + +Utility functions +----------------- + +.. doxygenfunction:: orcus::detect diff --git a/doc/cpp/index.rst b/doc/cpp/index.rst index 024d9d81fa42d40f348052d8f478cc50719d15af..eef3edef1136bdc72aec1601c04b67fe94518181 100644 --- a/doc/cpp/index.rst +++ b/doc/cpp/index.rst @@ -6,6 +6,5 @@ C++ API :maxdepth: 2 parser/index.rst - filter/interface.rst filter/index.rst model/index.rst diff --git a/doc/cpp/model/index.rst b/doc/cpp/model/index.rst index f85eac7685b01e54f8d361102b6bb33583e7baf0..6dd0ccc63bc2738086ea4defd2126eb6b2a233a3 100644 --- a/doc/cpp/model/index.rst +++ b/doc/cpp/model/index.rst @@ -1,6 +1,6 @@ -Document Model -============== +Document models +=============== .. toctree:: :maxdepth: 1 diff --git a/doc/cpp/model/json.rst b/doc/cpp/model/json.rst index d2385b6cec6ac4f80a750c7aa89df9f88a85c99d..607b7490ddfe9219b37bbac08cfe8ce5643ea370 100644 --- a/doc/cpp/model/json.rst +++ b/doc/cpp/model/json.rst @@ -1,5 +1,5 @@ -JSON Document Tree +JSON document tree ================== Document tree @@ -7,9 +7,6 @@ Document tree .. doxygenclass:: orcus::json::document_tree :members: -.. doxygenstruct:: orcus::json_config - :members: - .. doxygenclass:: orcus::json::const_node :members: diff --git a/doc/cpp/model/spreadsheet.rst b/doc/cpp/model/spreadsheet.rst index 8df640fb8404458bc9f3a62eb8f5306a753e6482..3550e643d5fa8faf350079e5842af1d00861b8d8 100644 --- a/doc/cpp/model/spreadsheet.rst +++ b/doc/cpp/model/spreadsheet.rst @@ -1,8 +1,16 @@ -Spreadsheet Document +Spreadsheet document ==================== +Document types +-------------- + +.. doxygenstruct:: orcus::spreadsheet::color_t +.. doxygenstruct:: orcus::spreadsheet::format_run +.. doxygentypedef:: orcus::spreadsheet::format_runs_t + + Document -------- @@ -17,7 +25,7 @@ Sheet :members: -Pivot Table +Pivot table ----------- .. doxygenstruct:: orcus::spreadsheet::pivot_cache_record_value_t @@ -39,7 +47,7 @@ Pivot Table :members: -Import Factory +Import factory -------------- .. doxygenclass:: orcus::spreadsheet::import_factory diff --git a/doc/cpp/model/yaml.rst b/doc/cpp/model/yaml.rst index c5193881d485023acbab74c075737eda0cdd1c08..06bb91b0a030285a31ddbc99c2ee6633696c1967 100644 --- a/doc/cpp/model/yaml.rst +++ b/doc/cpp/model/yaml.rst @@ -1,4 +1,4 @@ -YAML Document Tree +YAML document tree ================== diff --git a/doc/cpp/parser/archive.rst b/doc/cpp/parser/archive.rst new file mode 100644 index 0000000000000000000000000000000000000000..e5f7a25ab9468cac4e3827e720420c33061c5494 --- /dev/null +++ b/doc/cpp/parser/archive.rst @@ -0,0 +1,21 @@ + +Archive +======= + +Zip archive +----------- + +.. doxygenstruct:: orcus::zip_file_entry_header + :members: + +.. doxygenclass:: orcus::zip_archive + :members: + +.. doxygenclass:: orcus::zip_archive_stream + :members: + +.. doxygenclass:: orcus::zip_archive_stream_fd + :members: + +.. doxygenclass:: orcus::zip_archive_stream_blob + :members: diff --git a/doc/cpp/parser/base64.rst b/doc/cpp/parser/base64.rst new file mode 100644 index 0000000000000000000000000000000000000000..617db375788a852e1ff26d2530e7456b437d2a4d --- /dev/null +++ b/doc/cpp/parser/base64.rst @@ -0,0 +1,5 @@ + +Base64 +====== + +.. doxygenfile:: base64.hpp diff --git a/doc/cpp/parser/css.rst b/doc/cpp/parser/css.rst index efbf545ccd69104c585c8ded5960fe77c7301e87..beb6cb217dff1be67ad0377e4c68ca09d420f563 100644 --- a/doc/cpp/parser/css.rst +++ b/doc/cpp/parser/css.rst @@ -1,27 +1,84 @@ .. highlight:: cpp -CSS Parser +CSS parser ========== .. doxygenclass:: orcus::css_parser :members: -Parser Handler +Parser handler -------------- .. doxygenclass:: orcus::css_handler :members: -CSS Types +CSS types --------- .. doxygenenum:: orcus::css::combinator_t - .. doxygenenum:: orcus::css::property_function_t - .. doxygenenum:: orcus::css::property_value_t .. doxygentypedef:: orcus::css::pseudo_element_t - .. doxygentypedef:: orcus::css::pseudo_class_t +.. doxygenstruct:: orcus::css::rgba_color_t +.. doxygenstruct:: orcus::css::hsla_color_t + + +Constants +--------- + +Pseudo elements +^^^^^^^^^^^^^^^ + +.. doxygenvariable:: orcus::css::pseudo_element_after +.. doxygenvariable:: orcus::css::pseudo_element_before +.. doxygenvariable:: orcus::css::pseudo_element_first_letter +.. doxygenvariable:: orcus::css::pseudo_element_first_line +.. doxygenvariable:: orcus::css::pseudo_element_selection +.. doxygenvariable:: orcus::css::pseudo_element_backdrop + +Pseudo classes +^^^^^^^^^^^^^^ + +.. doxygenvariable:: orcus::css::pseudo_class_active +.. doxygenvariable:: orcus::css::pseudo_class_checked +.. doxygenvariable:: orcus::css::pseudo_class_default +.. doxygenvariable:: orcus::css::pseudo_class_dir +.. doxygenvariable:: orcus::css::pseudo_class_disabled +.. doxygenvariable:: orcus::css::pseudo_class_empty +.. doxygenvariable:: orcus::css::pseudo_class_enabled +.. doxygenvariable:: orcus::css::pseudo_class_first +.. doxygenvariable:: orcus::css::pseudo_class_first_child +.. doxygenvariable:: orcus::css::pseudo_class_first_of_type +.. doxygenvariable:: orcus::css::pseudo_class_fullscreen +.. doxygenvariable:: orcus::css::pseudo_class_focus +.. doxygenvariable:: orcus::css::pseudo_class_hover +.. doxygenvariable:: orcus::css::pseudo_class_indeterminate +.. doxygenvariable:: orcus::css::pseudo_class_in_range +.. doxygenvariable:: orcus::css::pseudo_class_invalid +.. doxygenvariable:: orcus::css::pseudo_class_lang +.. doxygenvariable:: orcus::css::pseudo_class_last_child +.. doxygenvariable:: orcus::css::pseudo_class_last_of_type +.. doxygenvariable:: orcus::css::pseudo_class_left +.. doxygenvariable:: orcus::css::pseudo_class_link +.. doxygenvariable:: orcus::css::pseudo_class_not +.. doxygenvariable:: orcus::css::pseudo_class_nth_child +.. doxygenvariable:: orcus::css::pseudo_class_nth_last_child +.. doxygenvariable:: orcus::css::pseudo_class_nth_last_of_type +.. doxygenvariable:: orcus::css::pseudo_class_nth_of_type +.. doxygenvariable:: orcus::css::pseudo_class_only_child +.. doxygenvariable:: orcus::css::pseudo_class_only_of_type +.. doxygenvariable:: orcus::css::pseudo_class_optional +.. doxygenvariable:: orcus::css::pseudo_class_out_of_range +.. doxygenvariable:: orcus::css::pseudo_class_read_only +.. doxygenvariable:: orcus::css::pseudo_class_read_write +.. doxygenvariable:: orcus::css::pseudo_class_required +.. doxygenvariable:: orcus::css::pseudo_class_right +.. doxygenvariable:: orcus::css::pseudo_class_root +.. doxygenvariable:: orcus::css::pseudo_class_scope +.. doxygenvariable:: orcus::css::pseudo_class_target +.. doxygenvariable:: orcus::css::pseudo_class_valid +.. doxygenvariable:: orcus::css::pseudo_class_visited + diff --git a/doc/cpp/parser/csv.rst b/doc/cpp/parser/csv.rst index 560cb73c43b753321016e319d5b7a83e4566e8dd..67e708ac9f884cb5d5bf79a7105ad27b7f6169a7 100644 --- a/doc/cpp/parser/csv.rst +++ b/doc/cpp/parser/csv.rst @@ -1,6 +1,6 @@ .. highlight:: cpp -CSV Parser +CSV parser ========== .. doxygenclass:: orcus::csv_parser @@ -9,7 +9,7 @@ CSV Parser .. doxygenstruct:: orcus::csv::parser_config :members: -Parser Handler +Parser handler -------------- .. doxygenclass:: orcus::csv_handler diff --git a/doc/cpp/parser/exception.rst b/doc/cpp/parser/exception.rst new file mode 100644 index 0000000000000000000000000000000000000000..e6b14ccb105630810cf4d8107f79183ff1b38824 --- /dev/null +++ b/doc/cpp/parser/exception.rst @@ -0,0 +1,37 @@ +.. highlight:: cpp + +Exceptions +========== + +.. doxygenclass:: orcus::general_error + :members: + +.. doxygenclass:: orcus::invalid_arg_error + :members: + +.. doxygenclass:: orcus::xml_structure_error + :members: + +.. doxygenclass:: orcus::json_structure_error + :members: + +.. doxygenclass:: orcus::invalid_map_error + :members: + +.. doxygenclass:: orcus::value_error + :members: + +.. doxygenclass:: orcus::xpath_error + :members: + +.. doxygenclass:: orcus::interface_error + :members: + +.. doxygenclass:: orcus::parse_error + :members: + +.. doxygenclass:: orcus::malformed_xml_error + :members: + +.. doxygenclass:: orcus::zip_error + :members: diff --git a/doc/cpp/parser/index.rst b/doc/cpp/parser/index.rst index 8a12be2f3e188a1b4451aca2f1791ff50fbf1946..c91d7925c735743ee62f0e360218bb96022fa1eb 100644 --- a/doc/cpp/parser/index.rst +++ b/doc/cpp/parser/index.rst @@ -1,14 +1,19 @@ -Low-Level Parsers and Utilities +Low-level parsers and utilities =============================== .. toctree:: :maxdepth: 1 - util.rst + xml.rst + xml_writer.rst + json.rst css.rst csv.rst - json.rst - xml.rst yaml.rst - xml_writer.rst + types.rst + util.rst + stream.rst + base64.rst + archive.rst + exception.rst diff --git a/doc/cpp/parser/json.rst b/doc/cpp/parser/json.rst index 10688b7183bd7695968de30e8d75bc3de9389f76..8aa402b189305e4dc8324f35690a7e73602da357 100644 --- a/doc/cpp/parser/json.rst +++ b/doc/cpp/parser/json.rst @@ -1,12 +1,12 @@ .. highlight:: cpp -JSON Parser +JSON parser =========== .. doxygenclass:: orcus::json_parser :members: -Parser Handler +Parser handler -------------- .. doxygenclass:: orcus::json_handler diff --git a/doc/cpp/parser/stream.rst b/doc/cpp/parser/stream.rst new file mode 100644 index 0000000000000000000000000000000000000000..6f8ecde50c6cff268484d84c25004783ae556e96 --- /dev/null +++ b/doc/cpp/parser/stream.rst @@ -0,0 +1,24 @@ +.. highlight:: cpp + +Stream +====== + +Stream buffers +-------------- + +.. doxygenclass:: orcus::file_content + :members: + +.. doxygenclass:: orcus::memory_content + :members: + +Utility functions +----------------- + +.. doxygenstruct:: orcus::line_with_offset + :members: + +.. doxygenfunction:: orcus::create_parse_error_output +.. doxygenfunction:: orcus::locate_line_with_offset +.. doxygenfunction:: orcus::locate_first_different_char +.. doxygenfunction:: orcus::calc_logical_string_length diff --git a/doc/cpp/parser/types.rst b/doc/cpp/parser/types.rst new file mode 100644 index 0000000000000000000000000000000000000000..6c5326736de2895eb98965fdab37df3833320a56 --- /dev/null +++ b/doc/cpp/parser/types.rst @@ -0,0 +1,43 @@ + +Basic types +=========== + +Constants +--------- + +.. doxygenvariable:: orcus::INDEX_NOT_FOUND +.. doxygenvariable:: orcus::XMLNS_UNKNOWN_ID +.. doxygenvariable:: orcus::XML_UNKNOWN_TOKEN + +Type aliases +------------ + +.. doxygentypedef:: orcus::xml_token_attrs_t +.. doxygentypedef:: orcus::xml_token_t +.. doxygentypedef:: orcus::xmlns_id_t + +Structs +------- + +.. doxygenstruct:: orcus::date_time_t +.. doxygenstruct:: orcus::length_t +.. doxygenstruct:: orcus::parse_error_value_t +.. doxygenstruct:: orcus::xml_declaration_t +.. doxygenstruct:: orcus::xml_name_t +.. doxygenstruct:: orcus::xml_token_attr_t +.. doxygenstruct:: orcus::xml_token_element_t + +Enums +----- + +.. doxygenenum:: orcus::character_set_t +.. doxygenenum:: orcus::dump_format_t +.. doxygenenum:: orcus::format_t +.. doxygenenum:: orcus::length_unit_t + +Utility functions +----------------- + +.. doxygenfunction:: orcus::get_dump_format_entries +.. doxygenfunction:: orcus::to_character_set +.. doxygenfunction:: orcus::to_dump_format_enum diff --git a/doc/cpp/parser/util.rst b/doc/cpp/parser/util.rst index 4363a9ea0b9a7510e823b6bb4c10f880fd06f43d..2d3ec0d534320c92d89cbf5e1338ccf4c8ea7c74 100644 --- a/doc/cpp/parser/util.rst +++ b/doc/cpp/parser/util.rst @@ -11,28 +11,3 @@ Utilities .. doxygenclass:: orcus::cell_buffer :members: - -.. doxygenclass:: orcus::zip_archive - :members: - - -XML Types -========= - -.. doxygentypedef:: orcus::xml_token_t - -.. doxygentypedef:: orcus::xmlns_id_t - -.. doxygenstruct:: orcus::xml_name_t - -.. doxygenstruct:: orcus::xml_token_attr_t - - -Other Types -=========== - -.. doxygenenum:: orcus::length_unit_t - -.. doxygenstruct:: orcus::date_time_t - - diff --git a/doc/cpp/parser/xml.rst b/doc/cpp/parser/xml.rst index fdcf348a3836c0867a652b77d125e13bf79ac704..462c466191f23703fc3b56804f79e00097e2639b 100644 --- a/doc/cpp/parser/xml.rst +++ b/doc/cpp/parser/xml.rst @@ -1,30 +1,49 @@ .. highlight:: cpp -XML Parsers +XML parsers =========== +SAX base parser +--------------- + .. doxygenclass:: orcus::sax_parser :members: -.. doxygenclass:: orcus::sax_ns_parser +.. doxygenstruct:: orcus::sax_parser_default_config :members: -.. doxygenclass:: orcus::sax_token_parser +.. doxygenclass:: orcus::sax_handler :members: +.. doxygenstruct:: orcus::sax::parser_element + :members: -Parser Handlers ---------------- +.. doxygenstruct:: orcus::sax::parser_attribute + :members: -.. doxygenclass:: orcus::sax_handler +SAX namespace parser +-------------------- + +.. doxygenclass:: orcus::sax_ns_parser :members: .. doxygenclass:: orcus::sax_ns_handler :members: -.. doxygenclass:: orcus::sax_token_handler +.. doxygenstruct:: orcus::sax_ns_parser_element :members: +.. doxygenstruct:: orcus::sax_ns_parser_attribute + :members: + +SAX token parser +---------------- + +.. doxygenclass:: orcus::sax_token_parser + :members: + +.. doxygenclass:: orcus::sax_token_handler + :members: Namespace --------- @@ -34,3 +53,13 @@ Namespace .. doxygenclass:: orcus::xmlns_context :members: + +Common +------ + +.. doxygenstruct:: orcus::sax::doctype_declaration + :members: + +.. doxygenfunction:: orcus::sax::decode_xml_encoded_char + +.. doxygenfunction:: orcus::sax::decode_xml_unicode_char diff --git a/doc/cpp/parser/xml_writer.rst b/doc/cpp/parser/xml_writer.rst index 18395d960642d5a073fa02aa0c37c74dae90169b..1092ac4c0b814e05776bb504e4bce761af282b96 100644 --- a/doc/cpp/parser/xml_writer.rst +++ b/doc/cpp/parser/xml_writer.rst @@ -1,6 +1,6 @@ .. highlight:: cpp -XML Writer +XML writer ========== .. doxygenclass:: orcus::xml_writer diff --git a/doc/cpp/parser/yaml.rst b/doc/cpp/parser/yaml.rst index 1835b732c80e73046290df797ca9986e3ddad81d..0a1107ab30acc376f552921390c01ac5927e632f 100644 --- a/doc/cpp/parser/yaml.rst +++ b/doc/cpp/parser/yaml.rst @@ -1,6 +1,6 @@ .. highlight:: cpp -YAML Parser +YAML parser =========== .. doxygenclass:: orcus::yaml_parser diff --git a/doc/overview/doc-user.rst b/doc/overview/doc-user.rst index e6317a5c22d72f55e21cf18d51689dffd639d5a2..a1292e5cd3b2617afdc30ca8e02321574afb52d5 100644 --- a/doc/overview/doc-user.rst +++ b/doc/overview/doc-user.rst @@ -570,5 +570,5 @@ Implement more interfaces ------------------------- This section has covered only a part of the available spreadsheet interfaces -you can implement in your code. Refer to the :ref:`spreadsheet-interface` +you can implement in your code. Refer to the :ref:`spreadsheet-interfaces` section to see the complete list of interfaces. diff --git a/doc_example/json_parser_1.cpp b/doc_example/json_parser_1.cpp index 832e51768b5bf7be423c54a07eae3feafd83f838..322316a13bf0a73a7e6f6f7d0a3ee88db9db716e 100644 --- a/doc_example/json_parser_1.cpp +++ b/doc_example/json_parser_1.cpp @@ -8,14 +8,14 @@ using namespace std; class json_parser_handler : public orcus::json_handler { public: - void object_key(const char* p, size_t len, bool /*transient*/) + void object_key(std::string_view key, bool /*transient*/) { - cout << "object key: " << std::string_view(p, len) << endl; + cout << "object key: " << key << endl; } - void string(const char* p, size_t len, bool /*transient*/) + void string(std::string_view val, bool /*transient*/) { - cout << "string: " << std::string_view(p, len) << endl; + cout << "string: " << val << endl; } void number(double val) @@ -27,13 +27,12 @@ public: int main() { const char* test_code = "{\"key1\": [1,2,3,4,5], \"key2\": 12.3}"; - size_t n_test_code = strlen(test_code); cout << "JSON string: " << test_code << endl; // Instantiate the parser with an own handler. json_parser_handler hdl; - orcus::json_parser parser(test_code, n_test_code, hdl); + orcus::json_parser parser(test_code, hdl); // Parse the string. parser.parse(); diff --git a/doc_example/spreadsheet_doc_1.cpp b/doc_example/spreadsheet_doc_1.cpp index 33f164589f9e5ab7f0ac58f5245d16bcf7f77f24..91dcbeaec77fc191fc05e7c27cdf239acc67e6a7 100644 --- a/doc_example/spreadsheet_doc_1.cpp +++ b/doc_example/spreadsheet_doc_1.cpp @@ -24,7 +24,8 @@ int main() // Pass the factory to the document loader, and read the content from a file // to populate the document. orcus_ods loader(&factory); - loader.read_file(input_dir / "document.ods"); + auto filepath = input_dir / "document.ods"; + loader.read_file(filepath.native()); // Now that the document is fully populated, access its content. const ixion::model_context& model = doc.get_model_context(); diff --git a/doc_example/spreadsheet_doc_1_num_and_formula.cpp b/doc_example/spreadsheet_doc_1_num_and_formula.cpp index 6ac6d1d96299f003c4afb08711228dd132436e9b..f8476cbf61c4eb05b1d26c46cc9c99bcff4cf40a 100644 --- a/doc_example/spreadsheet_doc_1_num_and_formula.cpp +++ b/doc_example/spreadsheet_doc_1_num_and_formula.cpp @@ -25,7 +25,8 @@ int main() // Pass the factory to the document loader, and read the content from a file // to populate the document. orcus_ods loader(&factory); - loader.read_file(input_dir / "document.ods"); + auto filepath = input_dir / "document.ods"; + loader.read_file(filepath.native()); doc.recalc_formula_cells(); // Now that the document is fully populated, access its content. diff --git a/doc_example/spreadsheet_doc_2.cpp b/doc_example/spreadsheet_doc_2.cpp index 3420ae609985507482c7bb89dd542af2f9043e47..614b50b8512f8650ac208000ed2c965ec4cf8fbf 100644 --- a/doc_example/spreadsheet_doc_2.cpp +++ b/doc_example/spreadsheet_doc_2.cpp @@ -34,10 +34,11 @@ public: int main() { std::filesystem::path input_dir = std::getenv("INPUTDIR"); + auto filepath = input_dir / "multi-sheets.ods"; my_empty_import_factory factory; orcus::orcus_ods loader(&factory); - loader.read_file(input_dir / "multi-sheets.ods"); + loader.read_file(filepath.native()); return EXIT_SUCCESS; } diff --git a/doc_example/spreadsheet_doc_2_sheets_no_string_pool.cpp b/doc_example/spreadsheet_doc_2_sheets_no_string_pool.cpp index 94049910f11ac182351a796fff22846050798c7f..ea58d5e1db3478b8a548b25732c07f63b5c1a966 100644 --- a/doc_example/spreadsheet_doc_2_sheets_no_string_pool.cpp +++ b/doc_example/spreadsheet_doc_2_sheets_no_string_pool.cpp @@ -113,10 +113,11 @@ public: int main() { std::filesystem::path input_dir = std::getenv("INPUTDIR"); + auto filepath = input_dir / "multi-sheets.ods"; my_import_factory factory; orcus::orcus_ods loader(&factory); - loader.read_file(input_dir / "multi-sheets.ods"); + loader.read_file(filepath.native()); return EXIT_SUCCESS; } diff --git a/doc_example/spreadsheet_doc_2_sheets_with_formula.cpp b/doc_example/spreadsheet_doc_2_sheets_with_formula.cpp index 6f44aaace977449c97ca3fc60db5b6e00d74c626..29511e47247c17e4d396233d803337764c2e1eda 100644 --- a/doc_example/spreadsheet_doc_2_sheets_with_formula.cpp +++ b/doc_example/spreadsheet_doc_2_sheets_with_formula.cpp @@ -280,10 +280,11 @@ public: int main() { std::filesystem::path input_dir = std::getenv("INPUTDIR"); + auto filepath = input_dir / "multi-sheets.ods"; my_import_factory factory; orcus::orcus_ods loader(&factory); - loader.read_file(input_dir / "multi-sheets.ods"); + loader.read_file(filepath.native()); return EXIT_SUCCESS; } diff --git a/doc_example/spreadsheet_doc_2_sheets_with_string_pool.cpp b/doc_example/spreadsheet_doc_2_sheets_with_string_pool.cpp index 2888a580253730f554bcdf88bdc49897414c3104..76fcf807e57a081abe54e4a2cabd29e5ca8cfeb7 100644 --- a/doc_example/spreadsheet_doc_2_sheets_with_string_pool.cpp +++ b/doc_example/spreadsheet_doc_2_sheets_with_string_pool.cpp @@ -194,10 +194,11 @@ public: int main() { std::filesystem::path input_dir = std::getenv("INPUTDIR"); + auto filepath = input_dir / "multi-sheets.ods"; my_import_factory factory; orcus::orcus_ods loader(&factory); - loader.read_file(input_dir / "multi-sheets.ods"); + loader.read_file(filepath.native()); return EXIT_SUCCESS; } diff --git a/include/orcus/cell_buffer.hpp b/include/orcus/cell_buffer.hpp index e6b82ec3db9b06188bf5a5638e6834a36ba0eea4..60df72887d527fe9267cda8eb1c3e5dbdf109218 100644 --- a/include/orcus/cell_buffer.hpp +++ b/include/orcus/cell_buffer.hpp @@ -15,20 +15,24 @@ namespace orcus { /** - * Temporary cell buffer used to convert cell values when needed. This is - * used in the sax and csv parsers. + * Temporary cell buffer used to decode encoded cell values. This is used in + * the sax, json and csv parsers. */ class ORCUS_PSR_DLLPUBLIC cell_buffer { std::string m_buffer; - size_t m_buf_size; /// Logical buffer size. May differ from the actual buffer size. + size_t m_buf_size; public: + cell_buffer(const cell_buffer&) = delete; + cell_buffer(); + ~cell_buffer(); void append(const char* p, size_t len); void reset(); - const char* get() const; - size_t size() const; + + std::string_view str() const; + bool empty() const; }; diff --git a/include/orcus/css_parser.hpp b/include/orcus/css_parser.hpp index 53a5aadccf173c057b992b42b90f52dc839a9e92..93bbc146a8d23955104527630a1ce8ab4303c708 100644 --- a/include/orcus/css_parser.hpp +++ b/include/orcus/css_parser.hpp @@ -31,40 +31,139 @@ namespace orcus { class css_handler { public: - void at_rule_name(const char* p, size_t n) + /** + * Called upon encountering an at-rule. + * + * @param name name of the at-rule. + */ + void at_rule_name(std::string_view name) { - (void)p; (void)n; + (void)name; } - void simple_selector_type(const char* p, size_t n) + /** + * Called upon encountering a simple selector type. A simple selector may + * consist of + * + * @code{.txt} + * .# + * @endcode + * + * and this function only passes the type part of the simple selector + * expression. + * + * @param type simple selector type. + */ + void simple_selector_type(std::string_view type) { - (void)p; (void)n; + (void)type; } - void simple_selector_class(const char* p, size_t n) + /** + * Called upon encountering a simple selector class. A simple selector may + * consist of + * + * @code{.txt} + * .# + * @endcode + * + * and this function only passes the class part of the simple selector + * expression. + * + * @param cls simple selector class. + */ + void simple_selector_class(std::string_view cls) { - (void)p; (void)n; + (void)cls; } + /** + * Called upon encountering a pseudo element of a simple selector. For + * instance, given the following CSS block: + * + * @code{.css} + * p::first-line { + * color: blue; + * text-transform: uppercase; + * } + * @endcode + * + * the `first-line` part is the pseudo element of the selector named `p`. + * + * @param pe pseudo element of a simple selector. + */ void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe) { (void)pe; } + /** + * Called upon encountering a pseudo class of a simple selector. For + * instance, given the following CSS block: + * + * @code{.css} + * button:hover { + * color: blue; + * } + * @endcode + * + * the `hover` part is the pseudo class of the selector named `button`. + * + * @param pc pseudo class of a simple selector. + */ void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc) { (void)pc; } - void simple_selector_id(const char* p, size_t n) + /** + * Called upon encountering a simple selector id. A simple selector may + * consist of + * + * @code{.txt} + * .# + * @endcode + * + * and this function only passes the id part of the simle selector + * expression. + * + * @param id simple selector id. + */ + void simple_selector_id(std::string_view id) { - (void)p; (void)n; + (void)id; } + /** + * Called at the end of a simple selector expression. + * + * @todo find out the difference between a simple selector and a selector, + * and document it. + */ void end_simple_selector() {} + /** + * Called at the end of a selector expression. + * + * @todo find out the difference between a simple selector and a selector, + * and document it. + */ void end_selector() {} + /** + * Calling upon encountering a combinator. A combinator is an operator that + * combines other selectors. Given the following CSS block: + * + * @code{.css} + * div > p { + * background-color: yellow; + * } + * @endcode + * + * the `>` is the combinator that combines the `div` and `p` selectors. + * + * @param combinator type of combinator encountered. + */ void combinator(orcus::css::combinator_t combinator) { (void)combinator; @@ -73,23 +172,21 @@ public: /** * Called at each property name. * - * @param p pointer to the char-array containing the property name string. - * @param n length of the property name string. + * @param name property name string. */ - void property_name(const char* p, size_t n) + void property_name(std::string_view name) { - (void)p; (void)n; + (void)name; } /** * Called at each ordinary property value string. * - * @param p pointer to the char-array containing the value string. - * @param n length of the value string. + * @param value value string. */ - void value(const char* p, size_t n) + void value(std::string_view value) { - (void)p; (void)n; + (void)value; } /** @@ -147,12 +244,11 @@ public: /** * Called at each URL value of a property. * - * @param p pointer to the char-array containing the URL value string. - * @param n length of the URL value string. + * @param url URL value string. */ - void url(const char* p, size_t n) + void url(std::string_view url) { - (void)p; (void)n; + (void)url; } /** @@ -178,23 +274,36 @@ public: void end_block() {} /** - * Called at the beginning of each property. + * Called at the beginning of a single property expression. Each property + * expression may consist of + * + * @code{.txt} + * : , ..., + * @endcode + * + * terminated by either a `;` or `}`. */ void begin_property() {} /** - * Called at the end of each property. + * Called at the end of a single property expression. */ void end_property() {} }; -template +/** + * Parser for CSS documents. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to css_handler. + */ +template class css_parser : public css::parser_base { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; - css_parser(const char* p, size_t n, handler_type& hdl); + css_parser(std::string_view content, handler_type& hdl); void parse(); private: @@ -221,8 +330,8 @@ private: }; template -css_parser<_Handler>::css_parser(const char* p, size_t n, handler_type& hdl) : - css::parser_base(p, n), m_handler(hdl) {} +css_parser<_Handler>::css_parser(std::string_view content, handler_type& hdl) : + css::parser_base(content), m_handler(hdl) {} template void css_parser<_Handler>::parse() @@ -279,7 +388,7 @@ void css_parser<_Handler>::rule() block(); break; default: - css::parse_error::throw_with("rule: failed to parse '", c, "'"); + parse_error::throw_with("rule: failed to parse '", c, "'", offset()); } } } @@ -292,14 +401,14 @@ void css_parser<_Handler>::at_rule_name() next(); char c = cur_char(); if (!is_alpha(c)) - throw css::parse_error("at_rule_name: first character of an at-rule name must be an alphabet."); + throw parse_error("at_rule_name: first character of an at-rule name must be an alphabet.", offset()); const char* p; size_t len; identifier(p, len); skip_blanks(); - m_handler.at_rule_name(p, len); + m_handler.at_rule_name({p, len}); #if ORCUS_DEBUG_CSS std::string foo(p, len); std::cout << "at-rule name: " << foo.c_str() << std::endl; @@ -342,7 +451,7 @@ void css_parser<_Handler>::simple_selector_name() std::string s(p, n); cout << " type=" << s; #endif - m_handler.simple_selector_type(p, n); + m_handler.simple_selector_type({p, n}); } bool in_loop = true; @@ -354,7 +463,7 @@ void css_parser<_Handler>::simple_selector_name() { next(); identifier(p, n); - m_handler.simple_selector_class(p, n); + m_handler.simple_selector_class({p, n}); #if ORCUS_DEBUG_CSS std::string s(p, n); std::cout << " class=" << s; @@ -365,7 +474,7 @@ void css_parser<_Handler>::simple_selector_name() { next(); identifier(p, n); - m_handler.simple_selector_id(p, n); + m_handler.simple_selector_id({p, n}); #if ORCUS_DEBUG_CSS std::string s(p, n); std::cout << " id=" << s; @@ -383,8 +492,8 @@ void css_parser<_Handler>::simple_selector_name() identifier(p, n); css::pseudo_element_t elem = css::to_pseudo_element({p, n}); if (!elem) - css::parse_error::throw_with( - "selector_name: unknown pseudo element '", p, n, "'"); + parse_error::throw_with( + "selector_name: unknown pseudo element '", {p, n}, "'", offset()); m_handler.simple_selector_pseudo_element(elem); } @@ -394,8 +503,8 @@ void css_parser<_Handler>::simple_selector_name() identifier(p, n); css::pseudo_class_t pc = css::to_pseudo_class({p, n}); if (!pc) - css::parse_error::throw_with( - "selector_name: unknown pseudo class '", p, n, "'"); + parse_error::throw_with( + "selector_name: unknown pseudo class '", {p, n}, "'", offset()); m_handler.simple_selector_pseudo_class(pc); } @@ -424,15 +533,15 @@ void css_parser<_Handler>::property_name() assert(has_char()); char c = cur_char(); if (!is_alpha(c) && c != '.') - css::parse_error::throw_with( - "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'"); + parse_error::throw_with( + "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'", offset()); const char* p; size_t len; identifier(p, len); skip_comments_and_blanks(); - m_handler.property_name(p, len); + m_handler.property_name({p, len}); #if ORCUS_DEBUG_CSS std::string foo(p, len); std::cout << "property name: " << foo.c_str() << std::endl; @@ -447,7 +556,7 @@ void css_parser<_Handler>::property() m_handler.begin_property(); property_name(); if (cur_char() != ':') - throw css::parse_error("property: ':' expected."); + throw parse_error("property: ':' expected.", offset()); next(); skip_comments_and_blanks(); @@ -488,7 +597,7 @@ void css_parser<_Handler>::quoted_value(char c) next(); skip_blanks(); - m_handler.value(p, len); + m_handler.value({p, len}); #if ORCUS_DEBUG_CSS std::string foo(p, len); std::cout << "quoted value: " << foo.c_str() << std::endl; @@ -516,7 +625,7 @@ void css_parser<_Handler>::value() return; } - m_handler.value(v.data(), v.size()); + m_handler.value(v); skip_comments_and_blanks(); @@ -531,7 +640,7 @@ void css_parser<_Handler>::function_value(std::string_view v) assert(cur_char() == '('); css::property_function_t func = css::to_property_function(v); if (func == css::property_function_t::unknown) - css::parse_error::throw_with("function_value: unknown function '", v, "'"); + parse_error::throw_with("function_value: unknown function '", v, "'", offset()); // Move to the first character of the first argument. next(); @@ -555,12 +664,12 @@ void css_parser<_Handler>::function_value(std::string_view v) function_url(); break; default: - css::parse_error::throw_with("function_value: unhandled function '", v, "'"); + parse_error::throw_with("function_value: unhandled function '", v, "'", offset()); } char c = cur_char(); if (c != ')') - css::parse_error::throw_with("function_value: ')' expected but '", c, "' found."); + parse_error::throw_with("function_value: ')' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -588,7 +697,7 @@ void css_parser<_Handler>::function_rgb(bool alpha) c = cur_char(); if (c != ',') - css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found."); + parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -598,7 +707,7 @@ void css_parser<_Handler>::function_rgb(bool alpha) { c = cur_char(); if (c != ',') - css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found."); + parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -635,7 +744,7 @@ void css_parser<_Handler>::function_hsl(bool alpha) char c = cur_char(); if (c != ',') - css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found."); + parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -646,7 +755,7 @@ void css_parser<_Handler>::function_hsl(bool alpha) c = cur_char(); if (c != ',') - css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found."); + parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -663,7 +772,7 @@ void css_parser<_Handler>::function_hsl(bool alpha) c = cur_char(); if (c != ',') - css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found."); + parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -687,7 +796,7 @@ void css_parser<_Handler>::function_url() literal(p, len, c); next(); skip_comments_and_blanks(); - m_handler.url(p, len); + m_handler.url({p, len}); #if ORCUS_DEBUG_CSS std::cout << "url(" << std::string(p, len) << ")" << std::endl; #endif @@ -699,7 +808,7 @@ void css_parser<_Handler>::function_url() size_t len; skip_to_or_blank(p, len, ")"); skip_comments_and_blanks(); - m_handler.url(p, len); + m_handler.url({p, len}); #if ORCUS_DEBUG_CSS std::cout << "url(" << std::string(p, len) << ")" << std::endl; #endif @@ -755,7 +864,7 @@ void css_parser<_Handler>::block() } if (cur_char() != '}') - throw css::parse_error("block: '}' expected."); + throw parse_error("block: '}' expected.", offset()); m_handler.end_block(); diff --git a/include/orcus/css_parser_base.hpp b/include/orcus/css_parser_base.hpp index 0964d7c17f1967afcdbf7d85e78acbf393b27e21..451426911585430034193f19ccbed19a9536037a 100644 --- a/include/orcus/css_parser_base.hpp +++ b/include/orcus/css_parser_base.hpp @@ -18,20 +18,10 @@ namespace orcus { namespace css { -class ORCUS_PSR_DLLPUBLIC parse_error : public ::orcus::parse_error -{ -public: - parse_error(const std::string& msg); - - static void throw_with(const char* msg_before, char c, const char* msg_after); - static void throw_with(const char* msg_before, const char* p, size_t n, const char* msg_after); - static void throw_with(const char* msg_before, std::string_view s, const char* msg_after); -}; - class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base { public: - parser_base(const char* p, size_t n); + parser_base(std::string_view content); protected: diff --git a/include/orcus/css_types.hpp b/include/orcus/css_types.hpp index 43f8c41fc4a4a592927f4809d73b5bf7ab80aae0..75386ea3ceb14bb5459b10f74c483db411635da0 100644 --- a/include/orcus/css_types.hpp +++ b/include/orcus/css_types.hpp @@ -18,11 +18,11 @@ namespace orcus { namespace css { enum class combinator_t { - /// 'E F' where F is a descendant of E. + /// `E F` where `F` is a descendant of `E`. descendant, - /// 'E > F' where F is a direct child of E. + /// `E > F` where `F` is a direct child of `E`. direct_child, - /// 'E + F' where F is a direct sibling of E where E precedes F. + /// `E + F` where `F` is a direct sibling of `E` where `E` precedes `F`. next_sibling }; diff --git a/include/orcus/csv_parser.hpp b/include/orcus/csv_parser.hpp index 27b4f92403fb851e7a5148cd62ffffe065cd7228..ae1dcd0effccf127110e97fd5bca0fae7ddc66e6 100644 --- a/include/orcus/csv_parser.hpp +++ b/include/orcus/csv_parser.hpp @@ -38,8 +38,7 @@ public: /** * Called after every cell is parsed. * - * @param p pointer to the first character of a cell content. - * @param n number of characters the cell content consists of. + * @param value cell content. * @param transient when true, the text content has been converted and is * stored in a temporary buffer. In such case, there is * no guarantee that the text content remain available @@ -47,19 +46,25 @@ public: * the text content is guaranteed to be valid so long as * the original CSV stream content is valid. */ - void cell(const char* p, size_t n, bool transient) + void cell(std::string_view value, bool transient) { - (void)p; (void)n; (void)transient; + (void)value; (void)transient; } }; -template +/** + * Parser for CSV documents. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to csv_handler. + */ +template class csv_parser : public csv::parser_base { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; - csv_parser(const char* p, size_t n, handler_type& hdl, const csv::parser_config& config); + csv_parser(std::string_view content, handler_type& hdl, const csv::parser_config& config); void parse(); private: @@ -82,8 +87,8 @@ private: template csv_parser<_Handler>::csv_parser( - const char* p, size_t n, handler_type& hdl, const csv::parser_config& config) : - csv::parser_base(p, n, config), m_handler(hdl) {} + std::string_view content, handler_type& hdl, const csv::parser_config& config) : + csv::parser_base(content, config), m_handler(hdl) {} template void csv_parser<_Handler>::parse() @@ -129,7 +134,7 @@ void csv_parser<_Handler>::row() } if (!is_delim(c)) - throw orcus::csv::parse_error("expected a delimiter"); + throw orcus::parse_error("expected a delimiter", offset()); next(); @@ -199,14 +204,14 @@ void csv_parser<_Handler>::quoted_cell() } // Closing quote. - m_handler.cell(p0, len-1, false); + m_handler.cell({p0, len-1}, false); next(); skip_blanks(); return; } // Stream ended prematurely. Handle it gracefully. - m_handler.cell(p0, len, false); + m_handler.cell({p0, len}, false); } template @@ -250,14 +255,14 @@ void csv_parser<_Handler>::parse_cell_with_quote(const char* p0, size_t len0) // buffer, push the value to the handler, and exit normally. m_cell_buf.append(p_cur, cur_len); - m_handler.cell(m_cell_buf.get(), m_cell_buf.size(), true); + m_handler.cell(m_cell_buf.str(), true); next(); skip_blanks(); return; } // Stream ended prematurely. - throw csv::parse_error("stream ended prematurely while parsing quoted cell."); + throw parse_error("stream ended prematurely while parsing quoted cell.", offset()); } template @@ -286,7 +291,7 @@ void csv_parser<_Handler>::push_cell_value(const char* p, size_t n) } } - m_handler.cell(p, len, false); + m_handler.cell({p, len}, false); #if ORCUS_DEBUG_CSV if (len) cout << "(cell:'" << std::string(p, len) << "')" << endl; diff --git a/include/orcus/csv_parser_base.hpp b/include/orcus/csv_parser_base.hpp index e7d4be41184ee1fb31aab4fd5ea7e6692cec07be..d1bb25a407b4ae72e7f872fef5d28512314de628 100644 --- a/include/orcus/csv_parser_base.hpp +++ b/include/orcus/csv_parser_base.hpp @@ -31,7 +31,7 @@ using std::endl; namespace orcus { namespace csv { /** - * Run-time configuration object for orcus::csv_parser. + * Run-time configuration object for csv_parser. */ struct ORCUS_PSR_DLLPUBLIC parser_config { @@ -54,15 +54,6 @@ struct ORCUS_PSR_DLLPUBLIC parser_config parser_config(); }; -class ORCUS_PSR_DLLPUBLIC parse_error : public std::exception -{ - std::string m_msg; -public: - parse_error(const std::string& msg); - virtual ~parse_error() throw(); - virtual const char* what() const throw(); -}; - class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base { protected: @@ -70,7 +61,7 @@ protected: cell_buffer m_cell_buf; protected: - parser_base(const char* p, size_t n, const parser_config& config); + parser_base(std::string_view content, const parser_config& config); /** * This is different from the global 'is_blank' in that it doesn't treat diff --git a/include/orcus/exception.hpp b/include/orcus/exception.hpp index 3d82535bee6f9488e1a786a36fbb35ace1dbfc88..5d1aa827b9fe8b168aa8263e62264b98f229240b 100644 --- a/include/orcus/exception.hpp +++ b/include/orcus/exception.hpp @@ -18,8 +18,8 @@ namespace orcus { class ORCUS_PSR_DLLPUBLIC general_error : public std::exception { public: - explicit general_error(const std::string& msg); - explicit general_error(const std::string& cls, const std::string& msg); + explicit general_error(std::string msg); + explicit general_error(std::string_view cls, std::string_view msg); virtual ~general_error() noexcept; virtual const char* what() const noexcept; @@ -27,7 +27,7 @@ protected: void append_msg(const std::string& s); private: - ::std::string m_msg; + std::string m_msg; }; class ORCUS_PSR_DLLPUBLIC invalid_arg_error : public std::invalid_argument @@ -40,28 +40,28 @@ public: class ORCUS_PSR_DLLPUBLIC xml_structure_error : public general_error { public: - explicit xml_structure_error(const ::std::string& msg); + explicit xml_structure_error(std::string msg); virtual ~xml_structure_error() noexcept; }; class ORCUS_PSR_DLLPUBLIC json_structure_error : public general_error { public: - explicit json_structure_error(const ::std::string& msg); + explicit json_structure_error(std::string msg); virtual ~json_structure_error() noexcept; }; class ORCUS_PSR_DLLPUBLIC invalid_map_error : public general_error { public: - explicit invalid_map_error(const ::std::string& msg); + explicit invalid_map_error(std::string msg); virtual ~invalid_map_error() noexcept; }; class ORCUS_PSR_DLLPUBLIC value_error : public general_error { public: - explicit value_error(const std::string& msg); + explicit value_error(std::string msg); virtual ~value_error() noexcept; }; @@ -71,7 +71,7 @@ public: class ORCUS_PSR_DLLPUBLIC xpath_error : public general_error { public: - xpath_error(const std::string& msg); + xpath_error(std::string msg); virtual ~xpath_error() noexcept; }; @@ -82,10 +82,59 @@ public: class ORCUS_PSR_DLLPUBLIC interface_error : public general_error { public: - interface_error(const std::string& msg); + interface_error(std::string msg); virtual ~interface_error() noexcept; }; +/** + * Exception related to a parsing error that includes an offset in the stream + * where the error occurred. + */ +class ORCUS_PSR_DLLPUBLIC parse_error : public general_error +{ + std::ptrdiff_t m_offset; /// offset in the stream where the error occurred. + +protected: + parse_error(std::string_view cls, std::string_view msg, std::ptrdiff_t offset); + +public: + parse_error(std::string msg, std::ptrdiff_t offset); + + /** + * Get the offset in a stream associated with the error. + * + * @return offset in a stream where the error occurred. + */ + std::ptrdiff_t offset() const; + + static void throw_with( + std::string_view msg_before, char c, std::string_view msg_after, std::ptrdiff_t offset); + + static void throw_with( + std::string_view msg_before, std::string_view msg, std::string_view msg_after, std::ptrdiff_t offset); +}; + +/** + * This exception is thrown when SAX parser detects a malformed XML document. + */ +class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public parse_error +{ +public: + malformed_xml_error() = delete; + malformed_xml_error(std::string_view msg, std::ptrdiff_t offset); + virtual ~malformed_xml_error(); +}; + +/** + * Exception related to parsing of zip archive stream. + */ +class ORCUS_PSR_DLLPUBLIC zip_error : public general_error +{ +public: + zip_error(std::string_view msg); + virtual ~zip_error(); +}; + namespace detail { /** diff --git a/include/orcus/format_detection.hpp b/include/orcus/format_detection.hpp index ee01b8e63de2b9d2dfc61d24c4a7fd7255f34681..049b9da92847c65e838f39ad92e43f3c363604d8 100644 --- a/include/orcus/format_detection.hpp +++ b/include/orcus/format_detection.hpp @@ -15,7 +15,12 @@ namespace orcus { -ORCUS_DLLPUBLIC format_t detect(const unsigned char* buffer, size_t length); +/** + * Detect the format of a given document stream. + * + * @param strm document stream to detect the format of. + */ +ORCUS_DLLPUBLIC format_t detect(std::string_view strm); } diff --git a/include/orcus/interface.hpp b/include/orcus/interface.hpp index 7db68e999a380810adcefc8b42bf9da0eae02457..e6237929110e27930d89a8caa121c0c015bcc5b0 100644 --- a/include/orcus/interface.hpp +++ b/include/orcus/interface.hpp @@ -20,6 +20,9 @@ struct config; namespace iface { +/** + * Base interface for import filters. + */ class ORCUS_DLLPUBLIC import_filter { struct impl; @@ -29,23 +32,57 @@ public: import_filter(format_t input); virtual ~import_filter(); - /// expects a system path to a local file - virtual void read_file(const std::string& filepath) = 0; + /** + * Read the content of a file. + * + * @param filepath path to a local file. It must be a system path. + */ + virtual void read_file(std::string_view filepath) = 0; - /// expects the whole content of the file + /** + * Read the content of an in-memory stream. + * + * @param stream in-memory stream to read from. + */ virtual void read_stream(std::string_view stream) = 0; + /** + * Get the name of a filter. + * + * @return name of a filter. + */ virtual std::string_view get_name() const = 0; void set_config(const orcus::config& v); const orcus::config& get_config() const; }; +/** + * Base interface for document content dumpers. + */ class ORCUS_DLLPUBLIC document_dumper { public: virtual ~document_dumper(); + + /** + * Dump the content of a document in a specified format, either into set of + * multiple files, or a single file. + * + * @param format Output format type in which to dump the content. + * @param output Depending on the output format type, this can be either an + * output directory path where multiple output files get + * created, or an output file path where the content of the + * entire document gets dumped into. + */ virtual void dump(dump_format_t format, const std::string& output) const = 0; + + /** + * Dump the content of a document in a special "check" format suitable as + * unit testing controls. + * + * @param os output stream to write the transformed content to. + */ virtual void dump_check(std::ostream& os) const = 0; }; diff --git a/include/orcus/json_document_tree.hpp b/include/orcus/json_document_tree.hpp index ffba35b8c1ead3de2a8e7f1600e2bccd12a61d62..e558c38cbcdc22fc5054b76ea429435167aeb32e 100644 --- a/include/orcus/json_document_tree.hpp +++ b/include/orcus/json_document_tree.hpp @@ -32,7 +32,7 @@ class ORCUS_DLLPUBLIC document_error : public general_error { public: document_error(const std::string& msg); - virtual ~document_error() throw(); + virtual ~document_error(); }; /** @@ -44,7 +44,7 @@ class ORCUS_DLLPUBLIC key_value_error : public document_error { public: key_value_error(const std::string& msg); - virtual ~key_value_error() throw(); + virtual ~key_value_error(); }; enum class node_t : uint8_t diff --git a/include/orcus/json_parser.hpp b/include/orcus/json_parser.hpp index ef22b3a8b98b9a80b33b7eeeb9c446be85a89860..e37d50a9440bb64a5f05d650b8db4b885129e202 100644 --- a/include/orcus/json_parser.hpp +++ b/include/orcus/json_parser.hpp @@ -8,7 +8,7 @@ #ifndef INCLUDED_ORCUS_JSON_PARSER_HPP #define INCLUDED_ORCUS_JSON_PARSER_HPP -#include "orcus/json_parser_base.hpp" +#include "json_parser_base.hpp" #include #include @@ -46,17 +46,16 @@ public: /** * Called when a key value string of an object is encountered. * - * @param p pointer to the first character of the key value string. - * @param len length of the key value string. + * @param key key value string. * @param transient true if the string value is stored in a temporary * buffer which is not guaranteed to hold the string * value after the end of this callback. When false, the * pointer points to somewhere in the JSON stream being * parsed. */ - void object_key(const char* p, size_t len, bool transient) + void object_key(std::string_view key, bool transient) { - (void)p; (void)len; (void)transient; + (void)key; (void)transient; } /** @@ -82,17 +81,16 @@ public: /** * Called when a string value is encountered. * - * @param p pointer to the first character of the string value. - * @param len length of the string value. + * @param val string value. * @param transient true if the string value is stored in a temporary * buffer which is not guaranteed to hold the string * value after the end of this callback. When false, the * pointer points to somewhere in the JSON stream being * parsed. */ - void string(const char* p, size_t len, bool transient) + void string(std::string_view val, bool transient) { - (void)p; (void)len; (void)transient; + (void)val; (void)transient; } /** @@ -107,23 +105,24 @@ public: }; /** - * Low-level JSON parser. The caller must provide a handler class to - * receive callbacks. + * Parser for JSON documents. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to json_handler. */ -template +template class json_parser : public json::parser_base { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; /** * Constructor. * - * @param p pointer to a string stream containing JSON string. - * @param n size of the stream. + * @param content string stream containing JSON string. * @param hdl handler class instance. */ - json_parser(const char* p, size_t n, handler_type& hdl); + json_parser(std::string_view content, handler_type& hdl); /** * Call this method to start parsing. @@ -145,8 +144,8 @@ private: template json_parser<_Handler>::json_parser( - const char* p, size_t n, handler_type& hdl) : - json::parser_base(p, n), m_handler(hdl) {} + std::string_view content, handler_type& hdl) : + json::parser_base(content), m_handler(hdl) {} template void json_parser<_Handler>::parse() @@ -157,10 +156,10 @@ void json_parser<_Handler>::parse() if (has_char()) root_value(); else - throw json::parse_error("parse: no json content could be found in file", offset()); + throw parse_error("parse: no json content could be found in file", offset()); if (has_char()) - throw json::parse_error("parse: unexpected trailing string segment.", offset()); + throw parse_error("parse: unexpected trailing string segment.", offset()); m_handler.end_parse(); } @@ -179,7 +178,7 @@ void json_parser<_Handler>::root_value() object(); break; default: - json::parse_error::throw_with( + parse_error::throw_with( "root_value: either '[' or '{' was expected, but '", cur_char(), "' was found.", offset()); } } @@ -221,7 +220,7 @@ void json_parser<_Handler>::value() string(); break; default: - json::parse_error::throw_with("value: failed to parse '", cur_char(), "'.", offset()); + parse_error::throw_with("value: failed to parse '", cur_char(), "'.", offset()); } } @@ -254,12 +253,12 @@ void json_parser<_Handler>::array() case ',': if (next_char() == ']') { - json::parse_error::throw_with( + parse_error::throw_with( "array: ']' expected but '", cur_char(), "' found.", offset() ); } continue; default: - json::parse_error::throw_with( + parse_error::throw_with( "array: either ']' or ',' expected, but '", cur_char(), "' found.", offset()); } } @@ -272,7 +271,7 @@ void json_parser<_Handler>::array() } } - throw json::parse_error("array: failed to parse array.", offset()); + throw parse_error("array: failed to parse array.", offset()); } template @@ -294,14 +293,14 @@ void json_parser<_Handler>::object() { skip_ws(); if (!has_char()) - throw json::parse_error("object: stream ended prematurely before reaching a key.", offset()); + throw parse_error("object: stream ended prematurely before reaching a key.", offset()); switch (cur_char()) { case '}': if (require_new_key) { - json::parse_error::throw_with( + parse_error::throw_with( "object: new key expected, but '", cur_char(), "' found.", offset()); } m_handler.end_object(); @@ -311,7 +310,7 @@ void json_parser<_Handler>::object() case '"': break; default: - json::parse_error::throw_with( + parse_error::throw_with( "object: '\"' was expected, but '", cur_char(), "' found.", offset()); } require_new_key = false; @@ -321,32 +320,32 @@ void json_parser<_Handler>::object() { // Parsing was unsuccessful. if (res.length == parse_quoted_string_state::error_no_closing_quote) - throw json::parse_error("object: stream ended prematurely before reaching the closing quote of a key.", offset()); + throw parse_error("object: stream ended prematurely before reaching the closing quote of a key.", offset()); else if (res.length == parse_quoted_string_state::error_illegal_escape_char) - json::parse_error::throw_with( + parse_error::throw_with( "object: illegal escape character '", cur_char(), "' in key value.", offset()); else - throw json::parse_error("object: unknown error while parsing a key value.", offset()); + throw parse_error("object: unknown error while parsing a key value.", offset()); } - m_handler.object_key(res.str, res.length, res.transient); + m_handler.object_key({res.str, res.length}, res.transient); skip_ws(); if (cur_char() != ':') - json::parse_error::throw_with( + parse_error::throw_with( "object: ':' was expected, but '", cur_char(), "' found.", offset()); next(); skip_ws(); if (!has_char()) - throw json::parse_error("object: stream ended prematurely before reaching a value.", offset()); + throw parse_error("object: stream ended prematurely before reaching a value.", offset()); value(); skip_ws(); if (!has_char()) - throw json::parse_error("object: stream ended prematurely before reaching either '}' or ','.", offset()); + throw parse_error("object: stream ended prematurely before reaching either '}' or ','.", offset()); switch (cur_char()) { @@ -359,12 +358,12 @@ void json_parser<_Handler>::object() require_new_key = true; continue; default: - json::parse_error::throw_with( + parse_error::throw_with( "object: either '}' or ',' expected, but '", cur_char(), "' found.", offset()); } } - throw json::parse_error("object: closing '}' was never reached.", offset()); + throw parse_error("object: closing '}' was never reached.", offset()); } template @@ -383,17 +382,17 @@ void json_parser<_Handler>::string() parse_quoted_string_state res = parse_string(); if (res.str) { - m_handler.string(res.str, res.length, res.transient); + m_handler.string({res.str, res.length}, res.transient); return; } // Parsing was unsuccessful. if (res.length == parse_quoted_string_state::error_no_closing_quote) - throw json::parse_error("string: stream ended prematurely before reaching the closing quote.", offset()); + throw parse_error("string: stream ended prematurely before reaching the closing quote.", offset()); else if (res.length == parse_quoted_string_state::error_illegal_escape_char) - json::parse_error::throw_with("string: illegal escape character '", cur_char(), "'.", offset()); + parse_error::throw_with("string: illegal escape character '", cur_char(), "'.", offset()); else - throw json::parse_error("string: unknown error.", offset()); + throw parse_error("string: unknown error.", offset()); } } diff --git a/include/orcus/json_parser_base.hpp b/include/orcus/json_parser_base.hpp index 4e70f46977f3797ae8dcfd5b2c3f55c6ac6f6305..461808ebdddf1a6dcc588fc5d91ebf7fa68b607d 100644 --- a/include/orcus/json_parser_base.hpp +++ b/include/orcus/json_parser_base.hpp @@ -8,25 +8,14 @@ #ifndef INCLUDED_ORCUS_JSON_PARSER_BASE_HPP #define INCLUDED_ORCUS_JSON_PARSER_BASE_HPP -#include "orcus/parser_base.hpp" -#include "orcus/parser_global.hpp" +#include "parser_base.hpp" +#include "parser_global.hpp" +#include "exception.hpp" #include namespace orcus { namespace json { -class ORCUS_PSR_DLLPUBLIC parse_error : public ::orcus::parse_error -{ -public: - parse_error(const std::string& msg, std::ptrdiff_t offset); - - static void throw_with( - const char* msg_before, char c, const char* msg_after, std::ptrdiff_t offset); - - static void throw_with( - const char* msg_before, const char* p, size_t n, const char* msg_after, std::ptrdiff_t offset); -}; - class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base { struct impl; @@ -38,7 +27,7 @@ protected: parser_base(const parser_base&) = delete; parser_base& operator=(const parser_base&) = delete; - parser_base(const char* p, size_t n); + parser_base(std::string_view content); ~parser_base(); void skip_ws(); diff --git a/include/orcus/orcus_csv.hpp b/include/orcus/orcus_csv.hpp index f3f07f24f9d5a82bdd9faac457e4cc23aa519ab4..3e34c15407f201efb6df2ec00d31faccf7364043 100644 --- a/include/orcus/orcus_csv.hpp +++ b/include/orcus/orcus_csv.hpp @@ -29,7 +29,7 @@ public: orcus_csv(spreadsheet::iface::import_factory* factory); ~orcus_csv(); - virtual void read_file(const std::string& filepath) override; + virtual void read_file(std::string_view filepath) override; virtual void read_stream(std::string_view stream) override; virtual std::string_view get_name() const override; diff --git a/include/orcus/orcus_gnumeric.hpp b/include/orcus/orcus_gnumeric.hpp index a8bf3b3879050d2801ebe54dafa49c49eadd1792..54f74a27abadc7a943c7194843113c1132257061 100644 --- a/include/orcus/orcus_gnumeric.hpp +++ b/include/orcus/orcus_gnumeric.hpp @@ -30,7 +30,7 @@ public: static bool detect(const unsigned char* blob, size_t size); - virtual void read_file(const std::string& filepath) override; + virtual void read_file(std::string_view filepath) override; virtual void read_stream(std::string_view stream) override; diff --git a/include/orcus/orcus_ods.hpp b/include/orcus/orcus_ods.hpp index 74d385a22473eeaf4974437aa0dc48e219996593..08eb19726b32733c7c31671f631e917223c78781 100644 --- a/include/orcus/orcus_ods.hpp +++ b/include/orcus/orcus_ods.hpp @@ -33,7 +33,7 @@ public: static bool detect(const unsigned char* blob, size_t size); - virtual void read_file(const std::string& filepath) override; + virtual void read_file(std::string_view filepath) override; virtual void read_stream(std::string_view stream) override; diff --git a/include/orcus/orcus_xls_xml.hpp b/include/orcus/orcus_xls_xml.hpp index 14300b6a8ef3f0370f3b90cb16fe3822a4c191d2..4534bfc66355a639951f3eea215663bc8e5e5b90 100644 --- a/include/orcus/orcus_xls_xml.hpp +++ b/include/orcus/orcus_xls_xml.hpp @@ -31,7 +31,7 @@ public: static bool detect(const unsigned char* blob, size_t size); - virtual void read_file(const std::string& filepath) override; + virtual void read_file(std::string_view filepath) override; virtual void read_stream(std::string_view stream) override; virtual std::string_view get_name() const override; diff --git a/include/orcus/orcus_xlsx.hpp b/include/orcus/orcus_xlsx.hpp index 6ad0ca24cb0eb68be81577c3a044f21cb8393c6e..68b01c09a3ae63591689ad75fbbae921e2a2e0e1 100644 --- a/include/orcus/orcus_xlsx.hpp +++ b/include/orcus/orcus_xlsx.hpp @@ -38,7 +38,7 @@ public: static bool detect(const unsigned char* blob, size_t size); - virtual void read_file(const std::string& filepath) override; + virtual void read_file(std::string_view filepath) override; virtual void read_stream(std::string_view stream) override; virtual std::string_view get_name() const override; diff --git a/include/orcus/parser_base.hpp b/include/orcus/parser_base.hpp index 26752849598845f358fb2bad1ca143f4e7d8cd14..7eb8d81e72d23e45c5c8ba8edddb03765547b324 100644 --- a/include/orcus/parser_base.hpp +++ b/include/orcus/parser_base.hpp @@ -8,8 +8,8 @@ #ifndef INCLUDED_ORCUS_PARSER_BASE_HPP #define INCLUDED_ORCUS_PARSER_BASE_HPP -#include "orcus/env.hpp" -#include "orcus/exception.hpp" +#include "env.hpp" +#include "exception.hpp" #include #include @@ -19,24 +19,6 @@ namespace orcus { -/** - * Exception related to parsing error that includes the offset in the stream - * where the error occurred. - */ -class ORCUS_PSR_DLLPUBLIC parse_error : public general_error -{ - std::ptrdiff_t m_offset; /// offset in the stream where the error occurred. -protected: - parse_error(const std::string& msg, std::ptrdiff_t offset); - parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset); - - static std::string build_message(const char* msg_before, char c, const char* msg_after); - static std::string build_message(const char* msg_before, const char* p, size_t n, const char* msg_after); - -public: - std::ptrdiff_t offset() const; -}; - class ORCUS_PSR_DLLPUBLIC parser_base { protected: @@ -45,21 +27,18 @@ protected: const char* const mp_begin; const char* mp_char; const char* mp_end; - const bool m_transient_stream; private: numeric_parser_type m_func_parse_numeric; protected: - parser_base(const char* p, size_t n, bool transient_stream); + parser_base(const char* p, size_t n); void set_numeric_parser(const numeric_parser_type& func) { m_func_parse_numeric = func; } - bool transient_stream() const { return m_transient_stream; } - bool has_char() const { assert(mp_char <= mp_end); diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index 5bd6283678f6138e92f7a9db02ffeb118d2c367f..f888fa2cd40f385636a7dbdc8028c974647d28d4 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -13,30 +13,39 @@ #include #include -#include #include namespace orcus { struct sax_ns_parser_element { - xmlns_id_t ns; // element namespace - std::string_view ns_alias; // element namespace alias - std::string_view name; // element name - std::ptrdiff_t begin_pos; // position of the opening brace '<'. - std::ptrdiff_t end_pos; // position of the char after the closing brace '>'. + /** Element namespace identifier. */ + xmlns_id_t ns; + /** Element namespace alias. */ + std::string_view ns_alias; + /** Element name. */ + std::string_view name; + /** Position of the opening brace '<'. */ + std::ptrdiff_t begin_pos; + /** Position immediately after the closing brace '>'. */ + std::ptrdiff_t end_pos; }; struct sax_ns_parser_attribute { - xmlns_id_t ns; // attribute namespace - std::string_view ns_alias; // attribute namespace alias - std::string_view name; // attribute name - std::string_view value; // attribute value - bool transient; // whether or not the attribute value is transient. + /** Attribute namespace identifier. */ + xmlns_id_t ns; + /** Attribute namespace alias. */ + std::string_view ns_alias; + /** Attribute name. */ + std::string_view name; + /** Attribute value. */ + std::string_view value; + /** Whether or not the attribute value is transient. */ + bool transient; }; -namespace __sax { +namespace sax { namespace detail { struct entity_name { @@ -69,57 +78,151 @@ struct elem_scope xmlns_id_t ns; std::string_view name; ns_keys_type ns_keys; -}; - -typedef std::vector> elem_scopes_type; -class pop_ns_by_key -{ - xmlns_context& m_cxt; -public: - pop_ns_by_key(xmlns_context& cxt) : m_cxt(cxt) {} - void operator() (std::string_view key) - { - m_cxt.pop(key); - } + elem_scope() {} + elem_scope(const elem_scope&) = delete; + elem_scope(elem_scope&& other) = default; }; -} +using elem_scopes_type = std::vector; + +}} // namespace sax::detail class sax_ns_handler { public: - void doctype(const orcus::sax::doctype_declaration& /*dtd*/) {} + /** + * Called when a doctype declaration <!DOCTYPE ... > is encountered. + * + * @param dtd struct containing doctype declaration data. + */ + void doctype(const orcus::sax::doctype_declaration& dtd) + { + (void)dtd; + } - void start_declaration(std::string_view /*decl*/) {} + /** + * Called when <?... is encountered, where the '...' may be an + * arbitraray dentifier. One common declaration is <?xml which is + * typically given at the start of an XML stream. + * + * @param decl name of the identifier. + */ + void start_declaration(std::string_view decl) + { + (void)decl; + } - void end_declaration(std::string_view /*decl*/) {} + /** + * Called when the closing tag (>) of a <?... ?> is encountered. + * + * @param decl name of the identifier. + */ + void end_declaration(std::string_view decl) + { + (void)decl; + } - void start_element(const orcus::sax_ns_parser_element& /*elem*/) {} + /** + * Called at the start of each element. + * + * @param elem information of the element being parsed. + */ + void start_element(const orcus::sax_ns_parser_element& elem) + { + (void)elem; + } - void end_element(const orcus::sax_ns_parser_element& /*elem*/) {} + /** + * Called at the end of each element. + * + * @param elem information of the element being parsed. + */ + void end_element(const orcus::sax_ns_parser_element& elem) + { + (void)elem; + } - void characters(std::string_view /*val*/, bool /*transient*/) {} + /** + * Called when a segment of a text content is parsed. Each text content + * is a direct child of an element, which may have multiple child contents + * when the element also has a child element that are direct sibling to + * the text contents or the text contents are splitted by a comment. + * + * @param val value of the text content. + * @param transient when true, the text content has been converted and is + * stored in a temporary buffer due to presence of one or + * more encoded characters, in which case the passed + * text value needs to be either immediately converted to + * a non-text value or be interned within the scope of + * the callback. + */ + void characters(std::string_view val, bool transient) + { + (void)val; + (void)transient; + } - void attribute(std::string_view /*name*/, std::string_view /*val*/) {} + /** + * Called upon parsing of an attribute of a declaration. The value of an + * attribute is assumed to be transient thus should be consumed within the + * scope of this callback. + * + * @param name name of an attribute. + * @param val value of an attribute. + * + * @todo Perhaps we should pass the transient flag here as well like all the + * other places. + */ + void attribute(std::string_view name, std::string_view val) + { + (void)name; + (void)val; + } - void attribute(const orcus::sax_ns_parser_attribute& /*attr*/) {} + /** + * Called upon parsing of an attribute of an element. Note that when + * the attribute's transient flag is set, the attribute value is stored in + * a temporary buffer due to a presence of encoded characters, and must be + * processed within the scope of the callback. + * + * @param attr struct containing attribute information. + */ + void attribute(const orcus::sax_ns_parser_attribute& attr) + { + (void)attr; + } }; /** - * SAX based XML parser with proper namespace handling. + * SAX based XML parser with extra namespace handling. + * + * It uses an instance of xmlns_context passed by the caller to validate and + * convert namespace values into identifiers. The namespace identifier of + * each encountered element is always given even if one is not explicitly + * given. + * + * This parser keeps track of element scopes and detects non-matching element + * pairs. + * + * @tparam HandlerT Handler type with member functions for event callbacks. + * Refer to @ref sax_ns_handler. */ -template +template class sax_ns_parser { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; - sax_ns_parser(const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler); - sax_ns_parser(const char* content, const size_t size, bool transient_stream, - xmlns_context& ns_cxt, handler_type& handler); - ~sax_ns_parser(); + sax_ns_parser(std::string_view content, xmlns_context& ns_cxt, handler_type& handler); + ~sax_ns_parser() = default; + /** + * Start parsing the document. + * + * @exception orcus::malformed_xml_error when it encounters a + * non-matching closing element. + */ void parse(); private: @@ -129,9 +232,9 @@ private: */ class handler_wrapper { - __sax::elem_scopes_type m_scopes; - __sax::ns_keys_type m_ns_keys; - __sax::entity_names_type m_attrs; + sax::detail::elem_scopes_type m_scopes; + sax::detail::ns_keys_type m_ns_keys; + sax::detail::entity_names_type m_attrs; sax_ns_parser_element m_elem; sax_ns_parser_attribute m_attr; @@ -163,8 +266,8 @@ private: void start_element(const sax::parser_element& elem) { - m_scopes.push_back(std::make_unique<__sax::elem_scope>()); - __sax::elem_scope& scope = *m_scopes.back(); + m_scopes.emplace_back(); + sax::detail::elem_scope& scope = m_scopes.back(); scope.ns = m_ns_cxt.get(elem.ns); scope.name = elem.name; scope.ns_keys.swap(m_ns_keys); @@ -181,9 +284,9 @@ private: void end_element(const sax::parser_element& elem) { - __sax::elem_scope& scope = *m_scopes.back(); + sax::detail::elem_scope& scope = m_scopes.back(); if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name) - throw sax::malformed_xml_error("mis-matching closing element.", -1); + throw malformed_xml_error("mis-matching closing element.", -1); m_elem.ns = scope.ns; m_elem.ns_alias = elem.ns; @@ -193,7 +296,8 @@ private: m_handler.end_element(m_elem); // Pop all namespaces declared in this scope. - std::for_each(scope.ns_keys.begin(), scope.ns_keys.end(), __sax::pop_ns_by_key(m_ns_cxt)); + for (const std::string_view& key : scope.ns_keys) + m_ns_cxt.pop(key); m_scopes.pop_back(); } @@ -212,11 +316,11 @@ private: return; } - if (m_attrs.count(__sax::entity_name(attr.ns, attr.name)) > 0) - throw sax::malformed_xml_error( + if (m_attrs.count(sax::detail::entity_name(attr.ns, attr.name)) > 0) + throw malformed_xml_error( "You can't define two attributes of the same name in the same element.", -1); - m_attrs.insert(__sax::entity_name(attr.ns, attr.name)); + m_attrs.insert(sax::detail::entity_name(attr.ns, attr.name)); if (attr.ns.empty() && attr.name == "xmlns") { @@ -251,27 +355,15 @@ private: sax_parser m_parser; }; -template -sax_ns_parser<_Handler>::sax_ns_parser( - const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler) : - m_wrapper(ns_cxt, handler), m_parser(content, size, m_wrapper) -{ -} - -template -sax_ns_parser<_Handler>::sax_ns_parser( - const char* content, const size_t size, bool transient_stream, xmlns_context& ns_cxt, handler_type& handler) : - m_wrapper(ns_cxt, handler), m_parser(content, size, transient_stream, m_wrapper) -{ -} - -template -sax_ns_parser<_Handler>::~sax_ns_parser() +template +sax_ns_parser::sax_ns_parser( + std::string_view content, xmlns_context& ns_cxt, handler_type& handler) : + m_wrapper(ns_cxt, handler), m_parser(content, m_wrapper) { } -template -void sax_ns_parser<_Handler>::parse() +template +void sax_ns_parser::parse() { m_parser.parse(); } diff --git a/include/orcus/sax_parser.hpp b/include/orcus/sax_parser.hpp index c15ba48fae936d9928ebf668242ec69d45531e5b..1158a70da8dd694d27bda8d027761b6172be1c90 100644 --- a/include/orcus/sax_parser.hpp +++ b/include/orcus/sax_parser.hpp @@ -21,7 +21,7 @@ struct sax_parser_default_config * corresponds with version 1.0 whereas a value of 11 corresponds with * version 1.1. */ - static const uint8_t baseline_version = 10; + static constexpr uint8_t baseline_version = 10; }; class sax_handler @@ -30,11 +30,11 @@ public: /** * Called when a doctype declaration <!DOCTYPE ... > is encountered. * - * @param param struct containing doctype declaration data. + * @param dtd struct containing doctype declaration data. */ - void doctype(const orcus::sax::doctype_declaration& param) + void doctype(const orcus::sax::doctype_declaration& dtd) { - (void)param; + (void)dtd; } /** @@ -113,19 +113,29 @@ public: }; /** - * Template-based sax parser that doesn't use function pointer for - * callbacks for better performance, especially on large XML streams. + * SAX parser for XML documents. + * + * This parser is barebone in that it only parses the document and picks up + * all encountered elements and attributes without checking proper element + * pairs. The user is responsible for checking whether or not the document is + * well-formed in terms of element scopes. + * + * This parser additionally records the begin and end offset positions of each + * element. + * + * @tparam HandlerT Handler type with member functions for event callbacks. + * Refer to @ref sax_handler. + * @tparam ConfigT Parser configuration. */ -template +template class sax_parser : public sax::parser_base { public: - typedef _Handler handler_type; - typedef _Config config_type; + typedef HandlerT handler_type; + typedef ConfigT config_type; - sax_parser(const char* content, const size_t size, handler_type& handler); - sax_parser(const char* content, const size_t size, bool transient_stream, handler_type& handler); - ~sax_parser(); + sax_parser(std::string_view content, handler_type& handler); + ~sax_parser() = default; void parse(); @@ -151,29 +161,15 @@ private: handler_type& m_handler; }; -template -sax_parser<_Handler,_Config>::sax_parser( - const char* content, const size_t size, handler_type& handler) : - sax::parser_base(content, size, false), +template +sax_parser::sax_parser(std::string_view content, handler_type& handler) : + sax::parser_base(content.data(), content.size()), m_handler(handler) { } -template -sax_parser<_Handler,_Config>::sax_parser( - const char* content, const size_t size, bool transient_stream, handler_type& handler) : - sax::parser_base(content, size, transient_stream), - m_handler(handler) -{ -} - -template -sax_parser<_Handler,_Config>::~sax_parser() -{ -} - -template -void sax_parser<_Handler,_Config>::parse() +template +void sax_parser::parse() { m_nest_level = 0; mp_char = mp_begin; @@ -184,28 +180,28 @@ void sax_parser<_Handler,_Config>::parse() assert(m_buffer_pos == 0); } -template -void sax_parser<_Handler,_Config>::header() +template +void sax_parser::header() { // we don't handle multi byte encodings so we can just skip bom entry if exists. skip_bom(); skip_space_and_control(); if (!has_char() || cur_char() != '<') - throw sax::malformed_xml_error("xml file must begin with '<'.", offset()); + throw malformed_xml_error("xml file must begin with '<'.", offset()); if (config_type::baseline_version >= 11) { // XML version 1.1 requires a header declaration whereas in 1.0 it's // optional. if (next_char_checked() != '?') - throw sax::malformed_xml_error("xml file must begin with ' -void sax_parser<_Handler,_Config>::body() +template +void sax_parser::body() { while (has_char()) { @@ -224,8 +220,8 @@ void sax_parser<_Handler,_Config>::body() } } -template -void sax_parser<_Handler,_Config>::element() +template +void sax_parser::element() { assert(cur_char() == '<'); std::ptrdiff_t pos = offset(); @@ -246,8 +242,8 @@ void sax_parser<_Handler,_Config>::element() element_open(pos); } -template -void sax_parser<_Handler,_Config>::element_open(std::ptrdiff_t begin_pos) +template +void sax_parser::element_open(std::ptrdiff_t begin_pos) { sax::parser_element elem; element_name(elem, begin_pos); @@ -260,7 +256,7 @@ void sax_parser<_Handler,_Config>::element_open(std::ptrdiff_t begin_pos) { // Self-closing element: if (next_and_char() != '>') - throw sax::malformed_xml_error("expected '/>' to self-close the element.", offset()); + throw malformed_xml_error("expected '/>' to self-close the element.", offset()); next(); elem.end_pos = offset(); m_handler.start_element(elem); @@ -291,8 +287,8 @@ void sax_parser<_Handler,_Config>::element_open(std::ptrdiff_t begin_pos) } } -template -void sax_parser<_Handler,_Config>::element_close(std::ptrdiff_t begin_pos) +template +void sax_parser::element_close(std::ptrdiff_t begin_pos) { assert(cur_char() == '/'); nest_down(); @@ -301,7 +297,7 @@ void sax_parser<_Handler,_Config>::element_close(std::ptrdiff_t begin_pos) element_name(elem, begin_pos); if (cur_char() != '>') - throw sax::malformed_xml_error("expected '>' to close the element.", offset()); + throw malformed_xml_error("expected '>' to close the element.", offset()); next(); elem.end_pos = offset(); @@ -313,14 +309,14 @@ void sax_parser<_Handler,_Config>::element_close(std::ptrdiff_t begin_pos) m_root_elem_open = false; } -template -void sax_parser<_Handler,_Config>::special_tag() +template +void sax_parser::special_tag() { assert(cur_char() == '!'); // This can be either ::special_tag() { // Possibly comment. if (next_and_char() != '-') - throw sax::malformed_xml_error("comment expected.", offset()); + throw malformed_xml_error("comment expected.", offset()); len -= 2; if (len < 3) - throw sax::malformed_xml_error("malformed comment.", offset()); + throw malformed_xml_error("malformed comment.", offset()); next(); comment(); @@ -356,12 +352,12 @@ void sax_parser<_Handler,_Config>::special_tag() } break; default: - throw sax::malformed_xml_error("failed to parse special tag.", offset()); + throw malformed_xml_error("failed to parse special tag.", offset()); } } -template -void sax_parser<_Handler,_Config>::declaration(const char* name_check) +template +void sax_parser::declaration(const char* name_check) { assert(cur_char() == '?'); next_check(); @@ -377,7 +373,7 @@ void sax_parser<_Handler,_Config>::declaration(const char* name_check) { std::ostringstream os; os << "declaration name of '" << name_check << "' was expected, but '" << decl_name << "' was found instead."; - throw sax::malformed_xml_error(os.str(), offset()); + throw malformed_xml_error(os.str(), offset()); } m_handler.start_declaration(decl_name); @@ -390,7 +386,7 @@ void sax_parser<_Handler,_Config>::declaration(const char* name_check) skip_space_and_control(); } if (next_char_checked() != '>') - throw sax::malformed_xml_error("declaration must end with '?>'.", offset()); + throw malformed_xml_error("declaration must end with '?>'.", offset()); m_handler.end_declaration(decl_name); reset_buffer_pos(); @@ -400,8 +396,8 @@ void sax_parser<_Handler,_Config>::declaration(const char* name_check) #endif } -template -void sax_parser<_Handler,_Config>::cdata() +template +void sax_parser::cdata() { size_t len = remains(); assert(len > 3); @@ -427,18 +423,18 @@ void sax_parser<_Handler,_Config>::cdata() { // Found ']]>'. size_t cdata_len = i - 2; - m_handler.characters(std::string_view(p0, cdata_len), transient_stream()); + m_handler.characters(std::string_view(p0, cdata_len), false); next(); return; } else match = 0; } - throw sax::malformed_xml_error("malformed CDATA section.", offset()); + throw malformed_xml_error("malformed CDATA section.", offset()); } -template -void sax_parser<_Handler,_Config>::doctype() +template +void sax_parser::doctype() { // Parse the root element first. sax::doctype_declaration param; @@ -448,21 +444,21 @@ void sax_parser<_Handler,_Config>::doctype() // Either PUBLIC or SYSTEM. size_t len = remains(); if (len < 6) - throw sax::malformed_xml_error("DOCTYPE section too short.", offset()); + throw malformed_xml_error("DOCTYPE section too short.", offset()); param.keyword = sax::doctype_declaration::keyword_type::dtd_private; char c = cur_char(); if (c == 'P') { if (next_and_char() != 'U' || next_and_char() != 'B' || next_and_char() != 'L' || next_and_char() != 'I' || next_and_char() != 'C') - throw sax::malformed_xml_error("malformed DOCTYPE section.", offset()); + throw malformed_xml_error("malformed DOCTYPE section.", offset()); param.keyword = sax::doctype_declaration::keyword_type::dtd_public; } else if (c == 'S') { if (next_and_char() != 'Y' || next_and_char() != 'S' || next_and_char() != 'T' || next_and_char() != 'E' || next_and_char() != 'M') - throw sax::malformed_xml_error("malformed DOCTYPE section.", offset()); + throw malformed_xml_error("malformed DOCTYPE section.", offset()); } next_check(); @@ -494,7 +490,7 @@ void sax_parser<_Handler,_Config>::doctype() has_char_throw("DOCTYPE section too short."); if (cur_char() != '>') - throw sax::malformed_xml_error("malformed DOCTYPE section - closing '>' expected but not found.", offset()); + throw malformed_xml_error("malformed DOCTYPE section - closing '>' expected but not found.", offset()); #if ORCUS_DEBUG_SAX_PARSER cout << "sax_parser::doctype: root='" << param.root_element << "', fpi='" << param.fpi << "' uri='" << param.uri << "'" << endl; @@ -503,8 +499,8 @@ void sax_parser<_Handler,_Config>::doctype() next(); } -template -void sax_parser<_Handler,_Config>::characters() +template +void sax_parser::characters() { const char* p0 = mp_char; for (; has_char(); next()) @@ -520,9 +516,9 @@ void sax_parser<_Handler,_Config>::characters() buf.append(p0, mp_char-p0); characters_with_encoded_char(buf); if (buf.empty()) - m_handler.characters(std::string_view{}, transient_stream()); + m_handler.characters(std::string_view{}, false); else - m_handler.characters(std::string_view(buf.get(), buf.size()), true); + m_handler.characters(buf.str(), true); return; } } @@ -530,12 +526,12 @@ void sax_parser<_Handler,_Config>::characters() if (mp_char > p0) { std::string_view val(p0, mp_char-p0); - m_handler.characters(val, transient_stream()); + m_handler.characters(val, false); } } -template -void sax_parser<_Handler,_Config>::attribute() +template +void sax_parser::attribute() { sax::parser_attribute attr; attribute_name(attr.ns, attr.name); @@ -551,7 +547,7 @@ void sax_parser<_Handler,_Config>::attribute() { std::ostringstream os; os << "Attribute must begin with 'name=..'. (ns='" << attr.ns << "', name='" << attr.name << "')"; - throw sax::malformed_xml_error(os.str(), offset()); + throw malformed_xml_error(os.str(), offset()); } next_check(); // skip the '='. diff --git a/include/orcus/sax_parser_base.hpp b/include/orcus/sax_parser_base.hpp index bf12f5809a5771695ac36098d47712dfc4a1a2b6..596df447a8a83a492990c30a2f27d73b0db549b5 100644 --- a/include/orcus/sax_parser_base.hpp +++ b/include/orcus/sax_parser_base.hpp @@ -29,14 +29,6 @@ using std::endl; namespace orcus { namespace sax { -class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public ::orcus::parse_error -{ -public: - malformed_xml_error() = delete; - malformed_xml_error(const std::string& msg, std::ptrdiff_t offset); - virtual ~malformed_xml_error() throw(); -}; - /** * Document type declaration passed by sax_parser to its handler's doctype() * call. @@ -83,25 +75,33 @@ ORCUS_PSR_DLLPUBLIC std::string decode_xml_unicode_char(const char* p, size_t n) */ struct parser_element { - std::string_view ns; // element namespace (optional) - std::string_view name; // element name - std::ptrdiff_t begin_pos; // position of the opening brace '<'. - std::ptrdiff_t end_pos; // position of the char after the closing brace '>'. + /** Optional element namespace. It may be empty if it's not given. */ + std::string_view ns; + /** Element name. */ + std::string_view name; + /** Position of the opening brace '<'. */ + std::ptrdiff_t begin_pos; + /** Position immediately after the closing brace '>'. */ + std::ptrdiff_t end_pos; }; /** * Attribute properties passed by sax_parser to its handler's attribute() - * call. When an attribute value is transient, it has been converted due to - * presence of encoded character(s) and stored in a temporary buffer. The - * handler must assume that the value will not survive beyond the scope of - * the callback. + * call. When an attribute value is "transient", it has been converted due to + * presence of encoded character(s) and has been stored in a temporary buffer. + * The handler must assume that the value will not survive after the callback + * function ends. */ struct parser_attribute { - std::string_view ns; // attribute namespace (optional) - std::string_view name; // attribute name - std::string_view value; // attribute value - bool transient; // whether or not the attribute value is on a temporary buffer. + /** Optional attribute namespace. It may be empty if it's not given. */ + std::string_view ns; + /** Attribute name. */ + std::string_view name; + /** Attribute value. */ + std::string_view value; + /** Whether or not the attribute value is in a temporary buffer. */ + bool transient; }; class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base @@ -118,7 +118,7 @@ protected: bool m_root_elem_open:1; protected: - parser_base(const char* content, size_t size, bool transient_stream); + parser_base(const char* content, size_t size); ~parser_base(); void next_check() diff --git a/include/orcus/sax_token_parser.hpp b/include/orcus/sax_token_parser.hpp index 5e86af3e91007d63f26cc39104aef9314c99b3ed..867c8b5b7386f6750a2ca50cf9c13d53e63e2c3a 100644 --- a/include/orcus/sax_token_parser.hpp +++ b/include/orcus/sax_token_parser.hpp @@ -19,32 +19,6 @@ namespace orcus { class tokens; -namespace sax { - -#if ORCUS_DEBUG_SAX_PARSER -template -class attr_printer -{ -public: - attr_printer(const _Tokens& tokens, const ::std::string& indent) : - m_tokens(tokens), m_indent(indent) {} - - void operator() (const _Attr& attr) const - { - using namespace std; - cout << m_indent << " attribute: " - << attr.ns << ":" - << m_tokens.get_token_name(attr.name) << "=\"" - << attr.value.str() << "\"" << endl; - } -private: - const _Tokens& m_tokens; - ::std::string m_indent; -}; -#endif - -} - class ORCUS_PSR_DLLPUBLIC sax_token_handler_wrapper_base { protected: @@ -119,23 +93,28 @@ public: }; /** - * XML parser that tokenizes element and attribute names while parsing. + * SAX parser that tokenizes element and attribute names while parsing. All + * pre-defined elements and attribute names are translated into integral + * identifiers via use of @ref tokens. The user of this class needs to + * provide a pre-defined set of element and attribute names at construction + * time. + * + * This parser internally uses @ref sax_ns_parser. + * + * @tparam HandlerT Handler type with member functions for event callbacks. + * Refer to @ref sax_token_handler. */ -template +template class sax_token_parser { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; sax_token_parser( - const char* content, const size_t size, const tokens& _tokens, + std::string_view content, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler); - sax_token_parser( - const char* content, const size_t size, bool transient_stream, - const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler); - - ~sax_token_parser(); + ~sax_token_parser() = default; void parse(); @@ -187,35 +166,21 @@ private: sax_ns_parser m_parser; }; -template -sax_token_parser<_Handler>::sax_token_parser( - const char* content, const size_t size, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) : +template +sax_token_parser::sax_token_parser( + std::string_view content, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) : m_wrapper(_tokens, handler), - m_parser(content, size, ns_cxt, m_wrapper) + m_parser(content, ns_cxt, m_wrapper) { } -template -sax_token_parser<_Handler>::sax_token_parser( - const char* content, const size_t size, bool transient_stream, - const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) : - m_wrapper(_tokens, handler), - m_parser(content, size, transient_stream, ns_cxt, m_wrapper) -{ -} - -template -sax_token_parser<_Handler>::~sax_token_parser() -{ -} - -template -void sax_token_parser<_Handler>::parse() +template +void sax_token_parser::parse() { m_parser.parse(); } -} +} // namespace orcus #endif /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/orcus/spreadsheet/Makefile.am b/include/orcus/spreadsheet/Makefile.am index d49cbb911ae652642a89a42281f69a0c41d5b5af..b01bce74c87b10a331b37f31d919a4cab132c75c 100644 --- a/include/orcus/spreadsheet/Makefile.am +++ b/include/orcus/spreadsheet/Makefile.am @@ -15,6 +15,7 @@ liborcus_HEADERS += \ auto_filter.hpp \ config.hpp \ document.hpp \ + document_types.hpp \ factory.hpp \ pivot.hpp \ shared_strings.hpp \ diff --git a/include/orcus/spreadsheet/document_types.hpp b/include/orcus/spreadsheet/document_types.hpp new file mode 100644 index 0000000000000000000000000000000000000000..28d8c22f8366d05a68fe6c7af9f787b6bf470c5c --- /dev/null +++ b/include/orcus/spreadsheet/document_types.hpp @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include "types.hpp" +#include + +namespace orcus { namespace spreadsheet { + +/** + * Stores a color value in ARGB format. + */ +struct ORCUS_SPM_DLLPUBLIC color_t +{ + color_elem_t alpha; + color_elem_t red; + color_elem_t green; + color_elem_t blue; + + color_t(); + color_t(color_elem_t _red, color_elem_t _green, color_elem_t _blue); + color_t(color_elem_t _alpha, color_elem_t _red, color_elem_t _green, color_elem_t _blue); + + void reset(); + + bool operator==(const color_t& other) const; + bool operator!=(const color_t& other) const; +}; + +struct ORCUS_SPM_DLLPUBLIC format_run +{ + size_t pos; + size_t size; + std::string_view font; + double font_size; + color_t color; + bool bold:1; + bool italic:1; + + format_run(); + + void reset(); + bool formatted() const; +}; + +using format_runs_t = std::vector; + +}} // namespace orcus::spreadsheet + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index e0aea72e4a78e410481ca578874da09e2e6c5e56..f340a276fc14e6aaf51878246cc5134bcbde1c9a 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -26,7 +26,47 @@ class import_pivot_cache_records; class import_sheet_view; /** - * Interface class designed to be derived by the implementor. + * Interface for importing raw string values shared in string cells. String + * values may be either with or without formatted segments. + * + * To insert an unformatted string, simply use either append() or add() + * method. The string will then be immediately pushed to the pool. + * + * To insert a string with mixed formatted segments, you need to first use one + * or more of: + * + * @li set_segment_font() + * @li set_segment_bold() + * @li set_segment_italic() + * @li set_segment_font_name() + * @li set_segment_font_size() + * @li set_segment_font_color() + * + * to define the format attribute(s) of a string segment followed by a call to + * append_segment(). This may be repeated as many times as necessary. Then + * as the final step, call commit_segments() to insert the entire series of + * formatted segments to the pool as a single string entry. The following + * example demonstrates how the code may look like: + * + * @code{.cpp} + * import_shared_strings* iface = ...; + * + * // store a segment with specific font, size and boldness. + * iface->set_segment_font_name("FreeMono"); + * iface->set_segment_font_size(14); + * iface->set_segment_font_bold(true); + * iface->append_segment("a bold and big segment"); + * + * // store an unformatted segment. + * iface->append_segment(" followed by "); + * + * // store a segment with smaller, italic font. + * iface->set_segment_font_size(7); + * iface->set_segment_font_italic(true); + * iface->append_segment("a small and italic segment"); + * + * iface->commit_segments(); // commit the whole formatted string to the pool. + * @endcode */ class ORCUS_DLLPUBLIC import_shared_strings { @@ -34,67 +74,69 @@ public: virtual ~import_shared_strings(); /** - * Append new string to the string list. Order of insertion is important - * since that determines the numerical ID values of inserted strings. - * Note that this method assumes that the caller knows the string being - * appended is not yet in the pool. + * Append a new string to the sequence of strings. Order of insertion + * determines the numerical ID value of an inserted string. Note that this + * method assumes that the caller knows the string being appended is not yet + * in the pool; it does not check on duplicated strings. * * @param s string to append to the pool. * - * @return ID of the string just inserted. + * @return ID of the inserted string. */ virtual size_t append(std::string_view s) = 0; /** - * Similar to the append method, it adds new string to the string pool; + * Similar to the append() method, it adds a new string to the string pool; * however, this method checks if the string being added is already in the * pool before each insertion, to avoid duplicated strings. * * @param s string to add to the pool. * - * @return ID of the string just inserted. + * @return ID of the inserted string. */ virtual size_t add(std::string_view s) = 0; /** - * Set the index of a font to apply to the current format attributes. + * Set the index of a font to apply to the current format attributes. Refer + * to the import_font_style interface on how to obtain a font index. Note + * that a single font index is associated with multiple font-related + * formatting attributes, such as font name, font color, boldness and + * italics. * * @param font_index positive integer representing the font to use. */ virtual void set_segment_font(size_t font_index) = 0; /** - * Set whether or not to make the font bold to the current format - * attributes. + * Set whether or not to make the current segment bold. * * @param b true if it's bold, false otherwise. */ virtual void set_segment_bold(bool b) = 0; /** - * Set whether or not to set the font italic font to the current format - * attributes. + * Set whether or not to make the current segment italic. * * @param b true if it's italic, false otherwise. */ virtual void set_segment_italic(bool b) = 0; /** - * Set the name of a font to the current format attributes. + * Set the name of a font to the current segment. * * @param s font name. */ virtual void set_segment_font_name(std::string_view s) = 0; /** - * Set a font size to the current format attributes. + * Set a font size to the current segment. * * @param point font size in points. */ virtual void set_segment_font_size(double point) = 0; /** - * Set the color of a font in ARGB to the current format attributes. + * Set the color of a font in ARGB format to the current segment. * * @param alpha alpha component value (0-255). * @param red red component value (0-255). @@ -104,29 +146,33 @@ public: virtual void set_segment_font_color(color_elem_t alpha, color_elem_t red, color_elem_t green, color_elem_t blue) = 0; /** - * Append a string segment with the current format attributes to the - * formatted string buffer. + * Push the current string segment to the buffer. Any formatting attributes + * defined so far will be applied to this segment. * - * @param s string segment value. + * @param s string value for the segment. */ virtual void append_segment(std::string_view s) = 0; /** - * Store the formatted string in the current buffer to the shared strings - * store. The implementation may choose to unconditionally append the - * string to the store, or choose to look for an existing indentical - * formatted string to reuse and discard the new one if one exists. + * Store the entire formatted string in the current buffer to the shared + * strings pool. The implementor may choose to unconditionally append the + * string to the pool, or choose to find an existing duplicate and reuse + * it instead. * * @return ID of the string just inserted, or the ID of an existing string - * with identical formatting attributes. + * with identical formatting. */ virtual size_t commit_segments() = 0; }; /** - * Interface for importing sheet properties. Sheet properties are those - * that are used for decorative purposes but are not necessarily a part of - * the sheet cell values. + * Interface for importing sheet properties. Sheet properties include: + * + * @li column widths and row heights, + * @li hidden flags for columns and rows, and + * @li merged cell ranges. + * + * These properties are independent of the cell contents of a sheet. */ class ORCUS_DLLPUBLIC import_sheet_properties { @@ -134,7 +180,7 @@ public: virtual ~import_sheet_properties(); /** - * Set column width to specified column(s). + * Set a column width to one or more columns. * * @param col 0-based position of the first column. * @param col_span number of contiguous columns to apply the width to. @@ -144,7 +190,7 @@ public: virtual void set_column_width(col_t col, col_t col_span, double width, orcus::length_unit_t unit) = 0; /** - * Set column hidden flag to specified column(s). + * Set a column hidden flag to one or more columns. * * @param col 0-based position of the first column. * @param col_span number of contiguous columns to apply the flag to. @@ -152,15 +198,32 @@ public: */ virtual void set_column_hidden(col_t col, col_t col_span, bool hidden) = 0; + /** + * Set a row height to specified row. + * + * @param row 0-based position of a row. + * @param height new row height value to set. + * @param unit unit of the new row height value. + * + * @todo Convert this to take a raw span. + */ virtual void set_row_height(row_t row, double height, orcus::length_unit_t unit) = 0; + /** + * Set a row hidden flag to a specified row. + * + * @param row 0-based position of a row. + * @param hidden flag indicating whether or not the row is hidden. + * + * @todo Convert this to take a raw span. + */ virtual void set_row_hidden(row_t row, bool hidden) = 0; /** - * Specify merged cell range. + * Set a merged cell range. * * @param range structure containing the top-left and bottom-right - * positions of the merged cell range. + * positions of a merged cell range. */ virtual void set_merge_cell_range(const range_t& range) = 0; }; @@ -168,14 +231,36 @@ public: /** * Interface for importing named expressions or ranges. * - * Note that this interface has two different methods for defining named - * expressions - set_named_expression() and set_named_range(). + * This interface has two different methods for defining named expressions: + * + * @li set_named_expression() and + * @li set_named_range(). + * + * Generally speaking, set_named_expression() can be used to define both named + * expression and named range. However, the implementor may choose to apply a + * different syntax rule to parse an expression passed to set_named_range(), + * depending on the formula grammar defined via @ref + * import_global_settings::set_default_formula_grammar(). For instance, the + * OpenDocument Spreadsheet format is known to use different syntax rules + * between named expressions and named ranges. + * + * A named range is a special case of a named expression where the expression + * consists of only one single cell range token. + * + * Here is a code example of how a named expression is defined: * - * The set_named_expression() method is generally used to pass named - * expression strings. The set_named_range() method is used only when the - * format uses a different syntax to express a named range. A named range - * is a special case of named expression where the expression consists of - * one range token. + * @code{.cpp} + * import_named_expression* iface = ...; + * + * // set the A1 on the first sheet as its origin (optional). + * src_address_t origin{0, 0, 0}; + * iface->set_base_position(origin); + * iface->set_named_expression("MyExpression", "SUM(A1:B10)+SUM(D1:D4)"); + * iface->commit(); + * @endcode + * + * Replace the above set_named_expression() call with set_named_range() if you + * wish to define a named range instead. */ class ORCUS_DLLPUBLIC import_named_expression { @@ -183,16 +268,16 @@ public: virtual ~import_named_expression(); /** - * Specify an optional base position from which to evaluate a named - * expression. If not specified, the implementor should use the top-left - * cell position on the first sheet as its implied base position. + * Specify an optional base position, or origin, from which to evaluate a + * named expression. If not specified, the implementor should use the + * top-left corner cell on the first sheet as its origin. * - * @param pos cell position to be used as the base. + * @param pos cell position to be used as the origin. */ virtual void set_base_position(const src_address_t& pos) = 0; /** - * Define a new named expression or overwrite an existing one. + * Set a named expression to the buffer. * * @param name name of the expression to be defined. * @param expression expression to be associated with the name. @@ -200,14 +285,17 @@ public: virtual void set_named_expression(std::string_view name, std::string_view expression) = 0; /** - * Define a new named range or overwrite an existin gone. Note that you - * can only define one named range or expression per single commit. + * Set a named range to the buffer. * * @param name name of the expression to be defined. * @param range range to be associated with the name. */ virtual void set_named_range(std::string_view name, std::string_view range) = 0; + /** + * Commit the named expression or range currently in the buffer to the + * document. + */ virtual void commit() = 0; }; @@ -219,17 +307,80 @@ class ORCUS_DLLPUBLIC import_data_table public: virtual ~import_data_table(); + /** + * Set the type of a data table. A data table can either: + * + * @li be a single-variable column-oriented, + * @li be a single-variable row-oriented, or + * @li use two variables that use both column and row. + * + * @param type type of a data table. + */ virtual void set_type(data_table_type_t type) = 0; + /** + * Set the range of a data table. + * + * @param range range of a data table. + */ virtual void set_range(const range_t& range) = 0; + /** + * Set the reference of the first input cell. + * + * @param ref reference of the first input cell. + * @param deleted whether or not this input cell has been deleted. + */ virtual void set_first_reference(std::string_view ref, bool deleted) = 0; + /** + * Set the reference of the second input cell but only if the data table + * uses two variables. + * + * @note This method gets called only if the data table uses two variables. + * + * @param ref reference of the second input cell. + * @param deleted whether or not this input cell has been deleted. + */ virtual void set_second_reference(std::string_view ref, bool deleted) = 0; + /** + * Store the current data table data in the buffer to the backend sheet + * storage. + */ virtual void commit() = 0; }; +/** + * Interface for importing auto filters. + * + * Importing a single auto filter would roughly follow the following flow: + * + * @code{.cpp} + * import_auto_filter* iface = ... ; + * + * range_t range; + * range.first.column = 0; + * range.first.row = 0; + * range.last.column = 3; + * range.last.row = 1000; + * iface->set_range(range); // Auto filter is applied for A1:D1001. + * + * // Column A is filtered for a value of "A". + * iface->set_column(0); + * iface->append_column_match_value("A"); + * iface->commit_column(); + * + * // Column D is filtered for values of 1 and 4. + * iface->set_column(3); + * iface->append_column_match_value("1"); + * iface->append_column_match_value("4"); + * iface->commit_column(); + * + * // Push the autofilter data in the current buffer to the sheet store. + * iface->commit(); + * @endcode + */ class ORCUS_DLLPUBLIC import_auto_filter { public: @@ -245,27 +396,35 @@ public: /** * Specify the column position of a filter. The position is relative to - * the first column in the auto filter range. + * the first column in the auto filter range. This method gets called at + * the beginning of each column filter data. The implementor may initialize + * the column filter data buffer when this method is called. + * + * @note This column position is relative to the first column in the + * autofilter range. * * @param col 0-based column position of a filter relative to the first - * column. + * column of the auto filter range. */ virtual void set_column(col_t col) = 0; /** - * Add a match value to the current column filter. + * Append a match value to the current column filter. A single column + * filter may have one or more match values. * - * @param value match value. + * @param value match value to append to the current column filter. */ virtual void append_column_match_value(std::string_view value) = 0; /** - * Commit current column filter to the current auto filter. + * Commit the current column filter data to the current auto filter buffer. + * The implementor may clear the current column filter buffer after this + * call. */ virtual void commit_column() = 0; /** - * Commit current auto filter to the model. + * Commit current auto filter data stored in the buffer to the sheet store. */ virtual void commit() = 0; }; @@ -273,24 +432,45 @@ public: /** * This is an optional interface to import conditional formatting. * - * A conditional format consists of: - *
    - *
  • a range
  • - *
  • several entries
  • - *
- * - * Each entry consists of: - *
    - *
  • a type
  • - *
  • a few properties depending on the type (optional)
  • - *
  • zero or more conditions depending on the type
  • - *
- * - * Each condition consists of: - *
    - *
  • a formula/value/string
  • - *
  • a color (optional)
  • - *
+ * In general, a single conditional format consists of: + * + * @li a cell range the format is applied to, and + * @li one or more rule entries. + * + * Each rule entry consists of: + * + * @li a type of rule, + * @li zero or more rule properties, and + * @li zero or more conditions depending on the rule type. + * + * Lastly, each condition consists of: + * + * @li a formula, value, or string, + * @li an optional color. + * + * The flow of the import process varies depending on the type of the + * conditional formatting being imported. The following is an example of + * importing a conditional formatting that consists of a rule that applies a + * format when the cell value is greather than 2: + * + * @code{.cpp} + * import_conditional_format* iface = ... ; + * + * iface->set_range("A2:A13"); + * iface->set_xf_id(14); // apply differential format (dxf) whose ID is 14 + * iface->set_type(conditional_format_t::condition); // rule entry type + * iface->set_operator(condition_operator_t::expression); + * iface->set_operator(condition_operator_t::greater); + * + * iface->set_formula("2"); + * iface->commit_condition(); + * + * iface->commit_entry(); + * + * iface->commit_format(); + * @endcode + * + * @todo Revise this API for simplification. */ class ORCUS_DLLPUBLIC import_conditional_format { @@ -405,43 +585,157 @@ public: }; /** - * Interface for table. A table is a range within a sheet that consists of - * one or more data columns with a header row that contains their labels. + * Interface for table. A table is a range of cells within a sheet that + * consists of one or more data columns with a header row that contains their + * labels. */ class ORCUS_DLLPUBLIC import_table { public: virtual ~import_table(); + /** + * Get an optional interface for importing auto filter data stored as part + * of a table. + * + * The implementor should initialize the internal state of the temporary + * auto filter object when this method is called. + * + * @return pointer to the auto filter interface object, or a @p nullptr if + * the implementor doesn't support it. + */ virtual import_auto_filter* get_auto_filter(); + /** + * Set an integral identifier unique to the table. + * + * @param id identifier associated with the table. + */ virtual void set_identifier(size_t id) = 0; - virtual void set_range(std::string_view ref) = 0; + /** + * Set a 2-dimensional cell range associated with the table. + * + * @param range cell range associated with the table. + */ + virtual void set_range(const range_t& range) = 0; + /** + * Set the number of totals rows. + * + * @param row_count number of totals rows. + */ virtual void set_totals_row_count(size_t row_count) = 0; + /** + * Set the internal name of the table. + * + * @param name name of the table. + */ virtual void set_name(std::string_view name) = 0; + /** + * Set the displayed name of the table. + * + * @param name displayed name of the table. + */ virtual void set_display_name(std::string_view name) = 0; + /** + * Set the number of columns the table contains. + * + * @param n number of columns in the table. + * + * @note This method gets called before the column data gets imported. The + * implementor can use this call to initialize the buffer for storing + * the column data. + */ virtual void set_column_count(size_t n) = 0; + /** + * Set an integral identifier for a column. + * + * @param id integral identifier for a column. + */ virtual void set_column_identifier(size_t id) = 0; + + /** + * Set a name of a column. + * + * @param name name of a column. + */ virtual void set_column_name(std::string_view name) = 0; + + /** + * Set the totals row label for a column. + * + * @param label row label for a column. + */ virtual void set_column_totals_row_label(std::string_view label) = 0; + + /** + * Set the totals row function for a column. + * + * @param func totals row function for a column. + */ virtual void set_column_totals_row_function(totals_row_function_t func) = 0; + + /** + * Push and append the column data stored in the current column data buffer + * into the table buffer. + */ virtual void commit_column() = 0; + /** + * Set the name of a style to apply to the table. + * + * @param name name of a style to apply to the table. + */ virtual void set_style_name(std::string_view name) = 0; + + /** + * Specify whether or not the first column in the table should have the + * style applied. + * + * @param b whether or not the first column in the table should have the + * style applied. + */ virtual void set_style_show_first_column(bool b) = 0; + + /** + * Specify whether or not the last column in the table should have the style + * applied. + * + * @param b whether or not the last column in the table should have the + * style applied. + */ virtual void set_style_show_last_column(bool b) = 0; + + /** + * Specify whether or not row stripe formatting is applied. + * + * @param b whether or not row stripe formatting is applied. + */ virtual void set_style_show_row_stripes(bool b) = 0; + + /** + * Specify whether or not column stripe formatting is applied. + * + * @param b whether or not column stripe formatting is applied. + */ virtual void set_style_show_column_stripes(bool b) = 0; + /** + * Push the data stored in the table buffer into the document store. + */ virtual void commit() = 0; }; +/** + * Interface for importing the properties of a single formula cell. A formula + * cell contains a formula expression that can be computed, and optionally a + * cached result of the last computation performed on the expression. + */ class ORCUS_DLLPUBLIC import_formula { public: @@ -503,90 +797,166 @@ public: virtual void commit() = 0; }; +/** + * Interface for importing the properties of an array formula which occupies a + * range of cells. Cells that are part of an array formula share the same + * formula expression but may have different calculation results. + */ class ORCUS_DLLPUBLIC import_array_formula { public: virtual ~import_array_formula(); + /** + * Set the range of an array formula. + * + * @param range range of an array formula. + */ virtual void set_range(const range_t& range) = 0; + /** + * Set the formula expression of an array formula. + * + * @param grammar grammar to use to compile the formula string into + * tokens. + * @param formula formula expression of an array formula. + */ virtual void set_formula(formula_grammar_t grammar, std::string_view formula) = 0; + /** + * Set a cached string result of a cell within the array formula range. + * + * @param row 0-based row position of a cell. + * @param col 0-based column position of a cell. + * @param value cached string value to set. + */ virtual void set_result_string(row_t row, col_t col, std::string_view value) = 0; + /** + * Set a cached numeric result of a cell within the array formula range. + * + * @param row 0-based row position of a cell. + * @param col 0-based column position of a cell. + * @param value cached numeric value to set. + */ virtual void set_result_value(row_t row, col_t col, double value) = 0; + /** + * Set a cached boolean result of a cell within the array formula range. + * + * @param row 0-based row position of a cell. + * @param col 0-based column position of a cell. + * @param value cached boolean value to set. + */ virtual void set_result_bool(row_t row, col_t col, bool value) = 0; + /** + * Set an empty value as a cached result to a cell within the array formula + * range. + * + * @param row 0-based row position of a cell. + * @param col 0-based column position of a cell. + */ virtual void set_result_empty(row_t row, col_t col) = 0; + /** + * Push the properties of an array formula currently stored in the buffer to + * the sheet store. + */ virtual void commit() = 0; }; /** - * Interface for sheet. + * Interface for importing the content and properties of a sheet. */ class ORCUS_DLLPUBLIC import_sheet { public: virtual ~import_sheet(); + /** + * Get an optional interface for importing properties that are specific to a + * view of a sheet. + * + * @return pointer to the interface for importing view properties, or a @p + * nullptr if the implementor doesn't support it. + */ virtual import_sheet_view* get_sheet_view(); + /** + * Get an optional interface for importing sheet properties. + * + * @return pointer to the interface for importing sheet properties, or a @p + * nullptr if the implementor doesn't support it. + */ virtual import_sheet_properties* get_sheet_properties(); /** - * Get an interface for importing data tables. Note that the implementer - * may decide not to support this feature in which case this method - * returns NULL. The implementer is responsible for managing the life - * cycle of the returned interface object. + * Get an optional interface for importing data tables. Note that the + * implementer may decide not to support this feature in which case this + * method should return a @p nullptr. * - * The implementor should also initialize the internal state of the - * temporary data table object when this method is called. + * The implementor should initialize the internal state of the temporary + * data table object when this method is called. * - * @return pointer to the data table interface object. + * @return pointer to the data table interface object, or a @p nullptr if + * the implementor doesn't support it. */ virtual import_data_table* get_data_table(); /** - * Get an interface for importing auto filter ranges. + * Get an optional interface for importing auto filter ranges. * - * The implementor should also initialize the internal state of the - * temporary auto filter object when this method is called. + * The implementor should initialize the internal state of the temporary + * auto filter object when this method is called. * - * @return pointer to the auto filter interface object. + * @return pointer to the auto filter interface object, or a @p nullptr if + * the implementor doesn't support it. */ virtual import_auto_filter* get_auto_filter(); /** - * Get an interface for importing tables. The implementer is responsible - * for managing the life cycle of the returned interface object. + * Get an interface for importing tables. * - * The implementor should also initialize the internal state of the - * temporary table object when this method is called. + * The implementor should initialize the internal state of the temporary + * table object when this method is called. * - * @return pointer to the table interface object, or NULL if the + * @return pointer to the table interface object, or @p nullptr if the * implementer doesn't support importing of tables. */ virtual import_table* get_table(); /** - * get an interface for importing conditional formats. The implementer is responsible - * for managing the life cycle of the returned interface object. + * Get an optional interface for importing conditional formats. * - * @return pointer to the conditional format interface object, or NULL - * if the implementer doesn't support importing conditional formats. + * @return pointer to the conditional format interface object, or @p nullptr + * if the implementer doesn't support importing conditional + * formats. */ virtual import_conditional_format* get_conditional_format(); + /** + * Get an optional interface for importing sheet-local named expressions. + * + * @return pointer to the sheet-local named expression interface, or a @p + * nullptr if the implementor doesn't support it. + */ virtual import_named_expression* get_named_expression(); + /** + * Get an optional interface for importing array formulas. An array formula + * is a formula expression applied to a range of cells where each cell may + * have a different result value. + * + * @return pointer to the array formula import interface, or a @p nullptr if + * the implementor doesn't support it. + */ virtual import_array_formula* get_array_formula(); /** - * Get an interface for importing formula cells. + * Get an optional interface for importing formula cells. * - * @return pointer to the formula interface object, or nullptr if the + * @return pointer to the formula interface object, or a @p nullptr if the * implementer doesn't support importing of formula cells. */ virtual import_formula* get_formula(); @@ -717,6 +1087,10 @@ public: virtual range_size_t get_sheet_size() const = 0; }; +/** + * Interface for specifying global settings that may affect how the + * implementor should process certain values and properties. + */ class ORCUS_DLLPUBLIC import_global_settings { public: @@ -724,7 +1098,8 @@ public: /** * Set the date that is to be represented by a value of 0. All date - * values will be internally represented relative to this date afterward. + * values should be represented relative to this date. This may affect, for + * instance, values imported via @ref import_sheet::set_date_time(). * * @param year 1-based value representing year * @param month 1-based value representing month, varying from 1 through @@ -734,30 +1109,44 @@ public: virtual void set_origin_date(int year, int month, int day) = 0; /** - * Set formula grammar to be used globally when parsing formulas if the - * grammar is not specified. This grammar will also be used when parsing + * Set the formula grammar to be used globally when parsing formulas if the + * grammar is not specified. This grammar should also be used when parsing * range strings associated with shared formula ranges, array formula * ranges, autofilter ranges etc. * - * @param grammar default formula grammar + * Note that the import filter may specify what formula grammar to use + * locally when importing formula expressions for cells via @ref + * import_formula::set_formula(), in which case the implementor should honor + * that one instead. + * + * @param grammar default formula grammar to use globally unless otherwise + * specified. */ virtual void set_default_formula_grammar(formula_grammar_t grammar) = 0; /** - * Get current default formula grammar. + * Get current global formula grammar. The import filter may use this + * method to query the current global formula grammar. * * @return current default formula grammar. */ virtual formula_grammar_t get_default_formula_grammar() const = 0; /** - * Set the character set to be used when parsing string values. + * Set the character set to use when parsing encoded string values. * - * @param charset character set to apply when parsing string values. + * @param charset character set to use when parsing encoded string values. */ virtual void set_character_set(character_set_t charset) = 0; }; +/** + * This is an interface to allow the implementor to provide its own reference + * address parsers, for both single cell references and cell range references. + * The implementor may choose to provide a different parser depending of the + * type of formula_ref_context_t argument given to the @ref + * import_factory::get_reference_resolver() call. + */ class ORCUS_DLLPUBLIC import_reference_resolver { public: @@ -792,101 +1181,140 @@ public: }; /** - * This interface provides the filters a means to instantiate concrete - * classes that implement the above interfaces. The client code never has - * to manually delete objects returned by its methods; the implementor of - * this interface must manage the life cycles of objects it returns. - * - * The implementor of this interface normally wraps the document instance - * inside it and have the document instance manage the life cycles of - * various objects it creates. + * This interface is the entry point for the import filter code to instantiate + * other, more specialized interfaces. The life cycles of any specialized + * interfaces returned from this interface shall be managed by the implementor + * of this interface. + * + * The implementer of this interface may wrap a backend document store that + * needs to be populated. */ class ORCUS_DLLPUBLIC import_factory { public: virtual ~import_factory(); + /** + * Obtain an optional interface for global settings, which the import filter + * uses to specify global filter settings that may affect how certain values + * and properties should be processed. The implementor can use this + * interface to decide how to process relevant values and properties. + * + * @return pointer to the global settings interface, or a @p nullptr if the + * implementor doesn't support it. + */ virtual import_global_settings* get_global_settings(); /** - * @return pointer to the shared strings instance. It may return NULL if - * the client app doesn't support shared strings. + * Obtain an optional interface for importing shared strings for string + * cells. Implementing this interface is required in order to import string + * cell values. + * + * @return pointer to the shared strings interface, or a @p nullptr if the + * implementor doesn't support it. */ virtual import_shared_strings* get_shared_strings(); + /** + * Obtain an optional interface for importing global named expressions. + * + * Note that @ref import_sheet also provides the same interface, but its + * interface is for importing sheet-local named expressions. + * + * @return pointer to the global named expression interface, or a @p nullptr + * if the implementor doesn't support it. + */ virtual import_named_expression* get_named_expression(); /** - * @return pointer to the styles instance. It may return NULL if the - * client app doesn't support styles. + * Obtain an optional interface for importing styles used to add formatting + * properties to cell values. + * + * @return pointer to the styles interface, or a @p nullptr if the + * implementor doesn't support it. */ virtual import_styles* get_styles(); + /** + * Obtain an optional interface for resolving cell and cell-range references + * from string values. + * + * @param cxt context in which the formula expression containing the + * references to be resolved occurs. + * + * @return pointer to the reference resolve interfance, or a @p nullptr if + * the implementor doesn't support it. + */ virtual import_reference_resolver* get_reference_resolver(formula_ref_context_t cxt); /** - * Create an interface for pivot cache definition import for a specified - * cache ID. In case a pivot cache alrady exists for the passed ID, the - * client app should overwrite the existing cache with a brand-new cache - * instance. + * Obtain an optional interface for pivot cache definition import for a + * specified cache ID. In case a pivot cache alrady exists for the passed + * ID, the implementor should overwrite the existing cache with a brand-new + * cache instance. * * @param cache_id numeric ID associated with the pivot cache. * - * @return pointer to the pivot cache interface instance. If may return - * NULL if the client app doesn't support pivot tables. + * @return pointer to the pivot cache interface, or a @p nullptr if the + * implementor doesn't support pivot cache import. */ virtual import_pivot_cache_definition* create_pivot_cache_definition( pivot_cache_id_t cache_id); /** - * Create an interface for pivot cache records import for a specified - * cache ID. + * Obtain an optional interface for pivot cache records import for a + * specified cache ID. * * @param cache_id numeric ID associated with the pivot cache. * - * @return pointer to the pivot cache records interface instance. If may - * return nullptr if the client app doesn't support pivot tables. + * @return pointer to the pivot cache records interface, or a @p nullptr if + * the implementor doesn't support pivot cache import. */ virtual import_pivot_cache_records* create_pivot_cache_records( pivot_cache_id_t cache_id); /** - * Append a sheet with specified sheet position index and name. + * Append a sheet with a specified sheet position index and name and return + * an interface for importing its content. The implementor can use a call + * to this method as a signal to create and append a new sheet instance to + * the document store. * * @param sheet_index position index of the sheet to be appended. It is * 0-based i.e. the first sheet to be appended will * have an index value of 0. * @param name sheet name. * - * @return pointer to the sheet instance. It may return nullptr if the - * client app fails to append a new sheet. + * @return pointer to the sheet instance, or a @p nullptr if the implementor + * doesn't support it. Note, however, that if the implementor + * doesn't support this interface, no cell values will get imported. */ virtual import_sheet* append_sheet(sheet_t sheet_index, std::string_view name) = 0; /** - * Get a sheet instance by name. + * Get a sheet instance by name. The import filter may use this method to + * get access to an existing sheet after it has been created. * * @param name sheet name. * * @return pointer to the sheet instance whose name matches the name - * passed to this method. It returns nullptr if no sheet instance - * exists by the specified name. + * passed to this method. It returns a @p nullptr if no sheet + * instance exists by the specified name. */ virtual import_sheet* get_sheet(std::string_view name) = 0; /** - * Retrieve sheet instance by specified numerical sheet index. + * Retrieve a sheet instance by a specified numerical sheet index. * - * @param sheet_index sheet index + * @param sheet_index sheet index. * - * @return pointer to the sheet instance, or nullptr if no sheet instance - * exists at specified sheet index position. + * @return pointer to the sheet instance, or a @p nullptr if no sheet + * instance exists at the specified sheet index. */ virtual import_sheet* get_sheet(sheet_t sheet_index) = 0; /** - * This method is called at the end of import, to give the implementor a - * chance to perform post-processing if necessary. + * The import filter calls this method after completing its import, to give + * the implementor a chance to perform post-processing. */ virtual void finalize() = 0; }; diff --git a/include/orcus/spreadsheet/import_interface_pivot.hpp b/include/orcus/spreadsheet/import_interface_pivot.hpp index 8fd533347928df633222b3a68b8bb3865d37c1ee..275ed444152566ef546286745f0fcf627a40c88e 100644 --- a/include/orcus/spreadsheet/import_interface_pivot.hpp +++ b/include/orcus/spreadsheet/import_interface_pivot.hpp @@ -23,7 +23,7 @@ namespace orcus { namespace spreadsheet { namespace iface { class import_pivot_cache_field_group; /** - * Interface for importing pivot cache definition. + * Interface for importing pivot cache definitions. */ class ORCUS_DLLPUBLIC import_pivot_cache_definition { @@ -36,6 +36,8 @@ public: * * @param ref range string specifying the source range. * @param sheet_name name of the worksheet where the source data is located. + * + * @todo use the ref resolver to resolve the range. */ virtual void set_worksheet_source(std::string_view ref, std::string_view sheet_name) = 0; @@ -90,16 +92,17 @@ public: virtual void set_field_max_date(const date_time_t& dt) = 0; /** - * Mark the current field as a group field. + * Mark the current field as a group field and initiate its import. * - * This method gets called first to signify that the current field is a - * group field. + * The implementor should create an internal storage to prepare for the + * importing of field group data when this method gets called. * - * @param base_index 0-based index of the field this field is the parent - * group of. - * @return interface for importing group field data. + * @param base_index 0-based index of the field this group field uses as its + * base. + * @return interface for importing group field data, or a @p nullptr if the + * implementor doesn't support it. */ - virtual import_pivot_cache_field_group* create_field_group(size_t base_index) = 0; + virtual import_pivot_cache_field_group* start_field_group(size_t base_index) = 0; /** * Commit the field in the current field buffer to the pivot cache model. @@ -147,7 +150,7 @@ public: }; /** - * Interface for importing group field settings in a pivot cache. + * Interface for importing group field settings in a pivot cache definition. */ class ORCUS_DLLPUBLIC import_pivot_cache_field_group { @@ -291,19 +294,53 @@ class ORCUS_DLLPUBLIC import_pivot_cache_records public: virtual ~import_pivot_cache_records(); + /** + * Set the number of records included in pivot cache records. + * + * @note This method gets called before the very first record gets imported. + * The implementor can use this call as an opportunity to initialize + * any internal buffers used to store the imported records. + * + * @param n number of records included in pivot cache records. + */ virtual void set_record_count(size_t n) = 0; + /** + * Append to the current record buffer a numeric value as a column value. + * + * @param v numeric value to append to the current record buffer as a column + * value. + */ virtual void append_record_value_numeric(double v) = 0; + /** + * Append to the current record buffer a character value as a column value. + * + * @param s character value to append to the current record buffer as a + * column value. + */ virtual void append_record_value_character(std::string_view s) = 0; + /** + * Append to the current record buffer a column value referenced by an index + * into the shared items table of a pivot cache field. The corresponding + * field in the pivot cache definition should provide the shared items table + * that this index references. + * + * @param index index into the shared items table of a pivot cache field. + */ virtual void append_record_value_shared_item(size_t index) = 0; /** - * Commit the record in the current buffer, and clears the buffer. + * Commit the record in the current record buffer. + * + * The implementor can clear the buffer afterward. */ virtual void commit_record() = 0; + /** + * Commit the entire records set to the document store. + */ virtual void commit() = 0; }; diff --git a/include/orcus/spreadsheet/shared_strings.hpp b/include/orcus/spreadsheet/shared_strings.hpp index 9c3a257cfdfea0c001764171a1d21b53bd68f964..28799b9c6e8c58e2e38ed78a19a55ddceda8eb94 100644 --- a/include/orcus/spreadsheet/shared_strings.hpp +++ b/include/orcus/spreadsheet/shared_strings.hpp @@ -8,7 +8,7 @@ #ifndef INCLUDED_ORCUS_SPREADSHEET_SHARED_STRINGS_HPP #define INCLUDED_ORCUS_SPREADSHEET_SHARED_STRINGS_HPP -#include "types.hpp" +#include "document_types.hpp" #include #include diff --git a/include/orcus/spreadsheet/styles.hpp b/include/orcus/spreadsheet/styles.hpp index 62340a8e4086adc69ecf127aa928dcf11941b98b..f60977877ade7bacd00fe9de69201d217b844c87 100644 --- a/include/orcus/spreadsheet/styles.hpp +++ b/include/orcus/spreadsheet/styles.hpp @@ -10,7 +10,7 @@ #include "../env.hpp" #include "../measurement.hpp" -#include "types.hpp" +#include "document_types.hpp" #include #include diff --git a/include/orcus/spreadsheet/types.hpp b/include/orcus/spreadsheet/types.hpp index c588f892d3834f15dc91d9e4b80d6db00f4f3121..c71044c344e200ec9fdc0ddd385e2446cbddb8a1 100644 --- a/include/orcus/spreadsheet/types.hpp +++ b/include/orcus/spreadsheet/types.hpp @@ -21,43 +21,95 @@ namespace orcus { namespace spreadsheet { -typedef int32_t row_t; -typedef int32_t col_t; -typedef int32_t sheet_t; -typedef uint8_t color_elem_t; -typedef uint16_t col_width_t; -typedef uint16_t row_height_t; -typedef uint32_t string_id_t; - -typedef uint32_t pivot_cache_id_t; +/** Row ID type. */ +using row_t = int32_t; +/** Column ID type. */ +using col_t = int32_t; +/** Sheet ID type. */ +using sheet_t = int32_t; +/** Individual color element type. */ +using color_elem_t = uint8_t; +/** Type for column width values. Column width values are stored in twips. */ +using col_width_t = uint16_t; +/** Type for row height values. Row height values are stored in twips. */ +using row_height_t = uint16_t; +/** Type for string ID's for string cells. */ +using string_id_t = uint32_t; +/** Pivot cache ID type. */ +using pivot_cache_id_t = uint32_t; +/** + * Get the special column width value that represents the default column + * width. The value itself is not to be used as an actual width value. + * + * @return value that represents the default column width. + */ ORCUS_DLLPUBLIC col_width_t get_default_column_width(); + +/** + * Get the special row height value that represents the default row height. + * The value itself is not to be used as an actual row height value. + * + * @return value that represents the default row height. + */ ORCUS_DLLPUBLIC row_height_t get_default_row_height(); +/** + * Type of error value in cells. + */ enum class error_value_t { + /** + * Error type unknown, typically used as an initial error value or generic + * default value. + */ unknown = 0, - null, // #NULL! - div0, // #DIV/0! - value, // #VALUE! - ref, // #REF! - name, // #NAME? - num, // #NUM! - na // #N/A! + /** Null reference error, displayed as `#NULL!`. */ + null, + /** Division-by-zero error, displayed as `#DIV/0`. */ + div0, + /** Formula expression error, displayed as `#VALUE!`. */ + value, + /** Reference error, displayed as `#REF!`. */ + ref, + /** Invalid named-expression error, displayed as `#NAME?` */ + name, + /** Invalid numeric value error, displayed as `#NUM!`. */ + num, + /** No value is available error, displayed as `#N/A!`. */ + na }; +/** + * Type of border direction, used to reference the position of a border in a + * cell. + */ enum class border_direction_t { + /** Unknown or uninitialized border direction value. */ unknown = 0, + /** Top border of a cell. */ top, + /** Bottom border of a cell. */ bottom, + /** Left border of a cell. */ left, + /** Right border of a cell. */ right, + /** + * Cross-diagonal borders of a cell. This is equivalent of both + * @p diagonal_bl_tr and @p diagonal_tl_br combined. + */ diagonal, + /** Diagonal border of a cell that runs from bottom-left to top-right. */ diagonal_bl_tr, + /** Diagonal border of a cell that runs from top-left to bottom-right. */ diagonal_tl_br }; +/** + * Type of border style. + */ enum class border_style_t { unknown = 0, @@ -80,6 +132,9 @@ enum class border_style_t fine_dashed }; +/** + * Type of fill pattern for cell background. + */ enum class fill_pattern_t { none = 0, @@ -103,6 +158,11 @@ enum class fill_pattern_t medium_gray }; +/** + * Strikethrough style as applied to a cell value. + * + * @note This is specific to ODS format. + */ enum class strikethrough_style_t { none = 0, @@ -115,6 +175,11 @@ enum class strikethrough_style_t wave }; +/** + * Strikethrough type as applied to a cell value. + * + * @note This is specific to ODS format. + */ enum class strikethrough_type_t { unknown = 0, @@ -123,6 +188,11 @@ enum class strikethrough_type_t double_type }; +/** + * Width of strikethrough applied to a cell value. + * + * @note This is specific to ODS format. + */ enum class strikethrough_width_t { unknown = 0, @@ -133,10 +203,17 @@ enum class strikethrough_width_t bold }; +/** + * Text used for strike-through. + * + * @note This is specific to ODS format. + */ enum class strikethrough_text_t { unknown = 0, + /** `/` is used as the text. */ slash, + /** `X` is used as the text. */ cross }; @@ -158,12 +235,20 @@ enum class formula_grammar_t gnumeric }; +/** + * Type of formula expression. + */ enum class formula_t { + /** Formula expression type unknown, or generic default value. */ unknown = 0, + /** Formula expression in an array of cells. */ array, + /** Formula expression in a data table. */ data_table, + /** Formula expression in a normal formula cell. */ normal, + /** Formula expression in a shared formula cell. */ shared }; @@ -180,36 +265,49 @@ enum class formula_ref_context_t */ global = 0, - /** base cell position of either a named range or expression. */ + /** Base cell position of either a named range or expression. */ named_expression_base, /** - * named range is a special case of named expression where the expression + * Named range is a special case of named expression where the expression * consists of only one range token. */ named_range, }; /** - * Policy on how to handle a formula cell containing an expression that has - * not been successfully parsed. + * Type of policy on how to handle a formula cell with an erroneous expression + * that has been parsed unsuccessfully. */ enum class formula_error_policy_t { unknown, - /** loading of the document will be halted. */ + /** Loading of the document will be halted. */ fail, - /** the error cell will be skipped. */ + /** The error cell will be skipped. */ skip }; +/** + * Underline type for a cell value. + */ enum class underline_t { none = 0, single_line, - single_accounting, // unique to xlsx + /** + * Single line for accounting format. + * + * @note This is unique to xlsx format. + */ + single_accounting, double_line, - double_accounting, // unique to xlsx + /** + * Double line for accounting format. + * + * @note This is unique to xlsx format. + */ + double_accounting, dotted, dash, long_dash, @@ -240,19 +338,33 @@ enum class underline_width_t positive_length }; +/** + * Underline mode that determines whether an underline is applied to both + * words and spaces, or words only. + */ enum class underline_mode_t { + /** Underline is applied to both words and spaces. */ continuous = 0, + /** Underline is applied only to words. */ skip_white_space }; +/** + * Whether a single line or a double line is used as an underline. + */ enum class underline_type_t { none = 0, + /** A single line is used as an underline. */ single_type, + /** A double line is used as an underline. */ double_type }; +/** + * Collection of various underline attributes. + */ struct underline_attrs_t { underline_t underline_style; @@ -261,6 +373,9 @@ struct underline_attrs_t underline_type_t underline_type; }; +/** + * Type of horizontal alignment applied to a cell content. + */ enum class hor_alignment_t { unknown = 0, @@ -272,6 +387,9 @@ enum class hor_alignment_t filled }; +/** + * Type of vertical alignment applied to a cell content. + */ enum class ver_alignment_t { unknown = 0, @@ -283,17 +401,17 @@ enum class ver_alignment_t }; /** - * Cell format categories. The abbrevaition "xf" refers to "cell format" where - * the "x" stands for cell. + * Cell format categories. The abbreviation "xf" stands for "cell format" + * where the "x" is short for cell. */ enum class xf_category_t { unknown, - /** Direct cell format, also abbreviated as xf */ + /** Direct cell format, also often referenced as xf. */ cell, - /** Cell format for named styles */ + /** Cell format for named styles. */ cell_style, - /** Incremental cell format, also abbreviated as dxf */ + /** Incremental cell format, also referenced as dxf. */ differential, }; @@ -326,6 +444,9 @@ enum class totals_row_function_t custom }; +/** + * Type of conditional format. + */ enum class conditional_format_t { unknown = 0, @@ -337,6 +458,9 @@ enum class conditional_format_t iconset }; +/** + * Operator type associated with a conditional format rule. + */ enum class condition_operator_t { unknown = 0, @@ -366,6 +490,14 @@ enum class condition_operator_t expression }; +/** + * Type of a condition in a conditional format rule. This is applicable only + * when the type of a conditional format entry is either: + * + * @li @p colorscale, + * @li @p databar or + * @li @p iconset. + */ enum class condition_type_t { unknown = 0, @@ -378,6 +510,10 @@ enum class condition_type_t percentile }; +/** + * Type of a date condition when the type of a conditional format entry is + * @p date. + */ enum class condition_date_t { unknown = 0, @@ -396,6 +532,10 @@ enum class condition_date_t last_year, }; +/** + * Databar axis type, applicable only when the type of a conditional format + * entry is @p databar. + */ enum class databar_axis_t { none = 0, @@ -403,31 +543,57 @@ enum class databar_axis_t automatic }; +/** + * Type of range grouping in a group field of a pivot table cache. + */ enum class pivot_cache_group_by_t { + /** + * Type of range grouping is unknown. + * + * This is an implicit default value of this type. + */ unknown = 0, - days, // grouping on "days" for date values. - hours, // grouping on "hours" for date values. - minutes, // grouping on "minutes" for date values. - months, // grouping on "months" for date values. - quarters, // grouping on "quarters" for date values. - range, // grouping by numeric ranges for numeric values. - seconds, // grouping on "seconds" for date values. - years // grouping on "years" for date values. + /** Grouping on "days" for date values. */ + days, + /** Grouping on "hours" for date values. */ + hours, + /** Grouping on "minutes" for date values. */ + minutes, + /** Grouping on "months" for date values. */ + months, + /** Grouping on "quarters" for date values. */ + quarters, + /** Grouping by numeric ranges for numeric values. */ + range, + /** Grouping on "seconds" for date values. */ + seconds, + /** Grouping on "years" for date values. */ + years }; +/** + * Stores a 2-dimensional cell address. + */ struct address_t { row_t row; col_t column; }; +/** + * Stores the size of a range of a spreadsheet. + */ struct range_size_t { row_t rows; col_t columns; }; +/** + * Stores a 2-dimensional cell range by storing the positions of the top-left + * and bottom-right corners of the range. + */ struct range_t { address_t first; @@ -435,7 +601,7 @@ struct range_t }; /** - * Stores 3-dimensional cell address. The 'src' stands for + * Stores 3-dimensional cell address. The 'src' abbreviation stands for * sheet-row-column. */ struct src_address_t @@ -446,7 +612,7 @@ struct src_address_t }; /** - * Stores 3-dimensional range address. The 'src' stands for + * Stores 3-dimensional cell range address. The 'src' abbreviation stands for * sheet-row-column. */ struct src_range_t @@ -455,7 +621,16 @@ struct src_range_t src_address_t last; }; +/** + * Convert a 3-dimensional cell address to a 2-dimensional counterpart by + * dropping the sheet index. + */ ORCUS_DLLPUBLIC address_t to_rc_address(const src_address_t& r); + +/** + * Convert a 3-dimensional cell range address to a 2-dimensional counterpart + * by dropping the sheet indices. + */ ORCUS_DLLPUBLIC range_t to_rc_range(const src_range_t& r); ORCUS_DLLPUBLIC bool operator== (const address_t& left, const address_t& right); @@ -480,23 +655,9 @@ ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const address_t& v); ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const src_address_t& v); ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const range_t& v); -struct ORCUS_SPM_DLLPUBLIC color_t -{ - color_elem_t alpha; - color_elem_t red; - color_elem_t green; - color_elem_t blue; - - color_t(); - color_t(color_elem_t _red, color_elem_t _green, color_elem_t _blue); - color_t(color_elem_t _alpha, color_elem_t _red, color_elem_t _green, color_elem_t _blue); - - void reset(); - - bool operator==(const color_t& other) const; - bool operator!=(const color_t& other) const; -}; - +/** + * Stores a color value in RGB format. + */ struct color_rgb_t { color_elem_t red; @@ -504,24 +665,6 @@ struct color_rgb_t color_elem_t blue; }; -struct ORCUS_SPM_DLLPUBLIC format_run -{ - size_t pos; - size_t size; - std::string_view font; - double font_size; - color_t color; - bool bold:1; - bool italic:1; - - format_run(); - - void reset(); - bool formatted() const; -}; - -using format_runs_t = std::vector; - /** * Convert a string representation of a totals row function name to its * equivalent enum value. diff --git a/include/orcus/stream.hpp b/include/orcus/stream.hpp index 1e24942f03005021b3b5f91f2b3e83de74a58c91..dd094bb97710c6727fd81e4d3fea72b726a96447 100644 --- a/include/orcus/stream.hpp +++ b/include/orcus/stream.hpp @@ -16,9 +16,10 @@ namespace orcus { /** - * Represents the content of a file. The file content may be either - * in-memory, or memory-mapped; it is initially memory-mapped, but it may - * become in-memory when converted to a different encoding. + * Represents the content of a file. + * + * The file content is memory-mapped initially, but may later become in-memory + * if the non-utf-8 content gets converted to utf-8. */ class ORCUS_PSR_DLLPUBLIC file_content { @@ -117,14 +118,20 @@ public: struct ORCUS_PSR_DLLPUBLIC line_with_offset { + /** content of the entire line. */ std::string line; - size_t line_number; - size_t offset_on_line; + /** 0-based line number. */ + std::size_t line_number; + /** 0-based offset within the line. */ + std::size_t offset_on_line; - line_with_offset(std::string _line, size_t _line_number, size_t _offset_on_line); + line_with_offset(std::string _line, std::size_t _line_number, std::size_t _offset_on_line); line_with_offset(const line_with_offset& other); line_with_offset(line_with_offset&& other); ~line_with_offset(); + + bool operator== (const line_with_offset& other) const; + bool operator!= (const line_with_offset& other) const; }; /** @@ -142,11 +149,14 @@ ORCUS_PSR_DLLPUBLIC std::string create_parse_error_output(std::string_view strm, * Given a string consisting of multiple lines i.e. multiple line breaks, * find the line that contains the specified offset position. * - * @param strm string buffer containing multiple lines. + * @param strm string stream containing multiple lines to search. * @param offset offset position. * * @return structure containing information about the line containing the * offset position. + * + * @exception std::invalid_argument if the offset value equals or exceeds the + * length of the string stream being searched. */ ORCUS_PSR_DLLPUBLIC line_with_offset locate_line_with_offset(std::string_view strm, std::ptrdiff_t offset); diff --git a/include/orcus/string_pool.hpp b/include/orcus/string_pool.hpp index 9b2a66744b00f3c16c45dcda00889970feb3578a..12419bc7154651af36eb8da8b6f7dff9b68e6a59 100644 --- a/include/orcus/string_pool.hpp +++ b/include/orcus/string_pool.hpp @@ -17,7 +17,10 @@ namespace orcus { /** - * Implements string hash map. + * This class implements a shared string pool with the ability to merge with + * other pools. + * + * @note This class is not copy-constructible, but is move-constructible. */ class ORCUS_PSR_DLLPUBLIC string_pool { @@ -26,6 +29,7 @@ public: string_pool& operator=(const string_pool&) = delete; string_pool(); + string_pool(string_pool&& other); ~string_pool(); /** @@ -46,12 +50,31 @@ public: */ std::vector get_interned_strings() const; + /** + * Dump pool's content to stdout. + * + * @todo This needs to be reworked to make it more generally usable. + */ void dump() const; + /** + * Clear pool's content. + */ void clear(); + /** + * Query the total number of strings stored in the pool. + * + * @return size_t total number of strings in the pool. + */ size_t size() const; + /** + * Swap the content with another string-pool instance. + * + * + * @param other string-pool instance to swap contents with. + */ void swap(string_pool& other); /** diff --git a/include/orcus/threaded_json_parser.hpp b/include/orcus/threaded_json_parser.hpp index 51cdaced2010f6f8dcdfe83704ac2c6620ca13d9..09bddfac3e1627fc941184c9bc79ec71ac1f0e68 100644 --- a/include/orcus/threaded_json_parser.hpp +++ b/include/orcus/threaded_json_parser.hpp @@ -168,7 +168,7 @@ void threaded_json_parser<_Handler>::process_tokens(json::parse_tokens_t& tokens case json::parse_token_t::parse_error: { auto v = std::get(t.value); - throw json::parse_error(std::string{v.str}, v.offset); + throw parse_error(std::string{v.str}, v.offset); } case json::parse_token_t::unknown: default: diff --git a/include/orcus/threaded_sax_token_parser.hpp b/include/orcus/threaded_sax_token_parser.hpp index 59ea967ab04986d46f1c8e50aad5df780180bc11..bad4761c9d9f1fe499010b9e4351d3c98dfc3c0c 100644 --- a/include/orcus/threaded_sax_token_parser.hpp +++ b/include/orcus/threaded_sax_token_parser.hpp @@ -150,7 +150,7 @@ void threaded_sax_token_parser<_Handler>::process_tokens(const sax::parse_tokens case sax::parse_token_t::parse_error: { auto v = std::get(t.value); - throw sax::malformed_xml_error(std::string{v.str}, v.offset); + throw malformed_xml_error(std::string{v.str}, v.offset); } default: throw general_error("unknown token type encountered."); diff --git a/include/orcus/tokens.hpp b/include/orcus/tokens.hpp index a5dc9a441537304270dd1b5650ceaa4e1d0ace01..9edc8771068a7b8d8b753307f82c9a1e8975b34a 100644 --- a/include/orcus/tokens.hpp +++ b/include/orcus/tokens.hpp @@ -15,11 +15,24 @@ namespace orcus { +/** + * XML token store that provides mapping of integral token indentifiers and + * their original names. Instances of this class are typically used as global + * constants. + * + * @note The string values for the original token names should be static + * values whose values and memory addresses remain unchanged during the + * life cycle of the instance that references them. + * + * @note This class is not copy-constructible. + */ class ORCUS_PSR_DLLPUBLIC tokens { public: tokens() = delete; + tokens(const tokens&) = delete; tokens(const char** token_names, size_t token_name_count); + ~tokens(); /** * Check if a token returned from get_token() method is valid. diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp index 3117d8d686f1813996d5aacfdaf629901a6e31af..6b85206817f7c13b3a89fab95d71eca331fbe317 100644 --- a/include/orcus/types.hpp +++ b/include/orcus/types.hpp @@ -21,41 +21,43 @@ class xmlns_context; class xmlns_repository; /** - * Generic constant to be used to indicate that a valid index value is - * expected but not found. + * Integral type that represents a tokenized XML element name. */ -ORCUS_PSR_DLLPUBLIC extern const size_t index_not_found; - -// XML specific types +using xml_token_t = std::size_t; -using xml_token_t = size_t; +/** + * Type that represents a normalized XML namespace identifier. Internally it + * is a pointer value that points to a static char buffer that stores a + * namespace name. + */ using xmlns_id_t = const char*; -using xml_token_pair_t = std::pair; - -struct ORCUS_PSR_DLLPUBLIC xml_token_pair_hash -{ - size_t operator()(const xml_token_pair_t& v) const; -}; - -using xml_elem_stack_t = std::vector; -using xml_elem_set_t = std::unordered_set; - -ORCUS_PSR_DLLPUBLIC extern const xmlns_id_t XMLNS_UNKNOWN_ID; -ORCUS_PSR_DLLPUBLIC extern const xml_token_t XML_UNKNOWN_TOKEN; - +/** + * Parser token that represents the state of a parse error, used by + * threaded_json_parser and threaded_sax_token_parser when transferring + * parse status between threads. + */ struct ORCUS_PSR_DLLPUBLIC parse_error_value_t { + /** error message associated with the parse error. */ std::string_view str; + /** offset in stream where the error occurred. */ std::ptrdiff_t offset; parse_error_value_t(); + parse_error_value_t(const parse_error_value_t& other); parse_error_value_t(std::string_view _str, std::ptrdiff_t _offset); + parse_error_value_t& operator=(const parse_error_value_t& other); + bool operator==(const parse_error_value_t& other) const; bool operator!=(const parse_error_value_t& other) const; }; +/** + * Represents a name with a normalized namespace in XML documents. This can + * be used either as an element name or as an attribute name. + */ struct ORCUS_PSR_DLLPUBLIC xml_name_t { enum to_string_type { use_alias, use_short_name }; @@ -63,20 +65,47 @@ struct ORCUS_PSR_DLLPUBLIC xml_name_t xmlns_id_t ns; std::string_view name; - xml_name_t(); + xml_name_t() noexcept; xml_name_t(xmlns_id_t _ns, std::string_view _name); - xml_name_t(const xml_name_t& r); + xml_name_t(const xml_name_t& other); xml_name_t& operator= (const xml_name_t& other); - bool operator== (const xml_name_t& other) const; - bool operator!= (const xml_name_t& other) const; + bool operator== (const xml_name_t& other) const noexcept; + bool operator!= (const xml_name_t& other) const noexcept; + /** + * Convert a namespace-name value pair to a string representation with the + * namespace value converted to either an alias or a unique "short name". + * Refer to @link xmlns_context::get_alias() get_alias() @endlink and + * @link xmlns_context::get_short_name() get_short_name() @endlink + * for the explanations of an alias and short name. + * + * @param cxt namespace context object associated with the XML stream + * currently being parsed. + * @param type policy on how to convert a namespace identifier to a string + * representation. + * + * @return string representation of a namespace-name value pair. + */ std::string to_string(const xmlns_context& cxt, to_string_type type) const; + /** + * Convert a namespace-name value pair to a string representation with the + * namespace value converted to a unique "short name". Refer to @link + * xmlns_repository::get_short_name() get_short_name() @endlink for the + * explanations of a short name. + * + * @param repo namespace repository. + * + * @return string representation of a namespace-name value pair. + */ std::string to_string(const xmlns_repository& repo) const; }; +/** + * Struct containing properties of a tokenized XML attribute. + */ struct ORCUS_PSR_DLLPUBLIC xml_token_attr_t { xmlns_id_t ns; @@ -94,23 +123,33 @@ struct ORCUS_PSR_DLLPUBLIC xml_token_attr_t bool transient; xml_token_attr_t(); + xml_token_attr_t(const xml_token_attr_t& other); xml_token_attr_t( xmlns_id_t _ns, xml_token_t _name, std::string_view _value, bool _transient); xml_token_attr_t( xmlns_id_t _ns, xml_token_t _name, std::string_view _raw_name, std::string_view _value, bool _transient); + + xml_token_attr_t& operator=(const xml_token_attr_t& other); }; +using xml_token_attrs_t = std::vector; + /** - * Element properties passed to its handler via start_element() and - * end_element() calls. + * Struct containing XML element properties passed to the handler of + * sax_token_parser via its @p start_element() and @p end_element() + * calls. + * + * @see + * @li sax_token_handler::start_element + * @li sax_token_handler::end_element */ struct ORCUS_PSR_DLLPUBLIC xml_token_element_t { xmlns_id_t ns; xml_token_t name; std::string_view raw_name; - std::vector attrs; + xml_token_attrs_t attrs; xml_token_element_t& operator= (xml_token_element_t) = delete; @@ -121,7 +160,7 @@ struct ORCUS_PSR_DLLPUBLIC xml_token_element_t }; /** - * Character set types. + * Character set types, generated from IANA character-sets specifications. * * @see https://www.iana.org/assignments/character-sets/character-sets.xhtml */ @@ -388,6 +427,9 @@ enum class character_set_t windows_874, }; +/** + * Struct holding XML declaration properties. + */ struct ORCUS_PSR_DLLPUBLIC xml_declaration_t { uint8_t version_major; @@ -406,22 +448,32 @@ struct ORCUS_PSR_DLLPUBLIC xml_declaration_t bool operator!= (const xml_declaration_t& other) const; }; -// Other types - +/** + * Unit of length, as used in length_t. + */ enum class length_unit_t { unknown = 0, centimeter, millimeter, + /** + * Special unit of length used by Excel, defined as the maximum digit width + * of font used as the "Normal" style font. + * + * @note Since it's not possible to determine the actual length using this + * unit, it is approximated by 1.9 millimeters. + */ xlsx_column_digit, inch, point, + /** One twip is a twentieth of a point equal to 1/1440 of an inch. */ twip, pixel - - // TODO: Add more. }; +/** + * Input formats that orcus can import. + */ enum class format_t { unknown = 0, @@ -432,9 +484,12 @@ enum class format_t csv }; +/** + * Formats supported by orcus as output formats. + */ enum class dump_format_t { - unknown, + unknown = 0, none, check, csv, @@ -446,6 +501,9 @@ enum class dump_format_t debug_state }; +/** + * Holds a length value with unit of measurement. + */ struct ORCUS_PSR_DLLPUBLIC length_t { length_unit_t unit; @@ -462,6 +520,9 @@ struct ORCUS_PSR_DLLPUBLIC length_t bool operator!= (const length_t& other) const noexcept; }; +/** + * Struct that holds a date or date-time value. + */ struct ORCUS_PSR_DLLPUBLIC date_time_t { int year; @@ -483,17 +544,30 @@ struct ORCUS_PSR_DLLPUBLIC date_time_t bool operator!= (const date_time_t& other) const; bool operator< (const date_time_t& other) const; + /** + * Convert the date-time value to an ISO-formatted string representation. + * + * @return ISO-formatted string representation of the date-time value. + */ std::string to_string() const; + /** + * Swap the value with another instance. + * + * @param other another instance to swap values with. + */ void swap(date_time_t& other); /** - * Parse a string representation of a date-time value, and convert it into a - * date_time_t value. A string representation allows either a date only or - * a date and time value, but it does not allow a time only value. + * Parse an ISO-formatted string representation of a date-time value, and + * convert it into a date_time_t value. A string representation allows + * either a date only or a date and time value, but it does not allow a time + * only value. * - * date only: 2013-04-09 - * date and time: 2013-04-09T21:34:09.55 + * Here are some examples of ISO-formatted date and date-time values: + * + * @li 2013-04-09 (date only) + * @li 2013-04-09T21:34:09.55 (date and time) * * @param str string representation of a date-time value. * @return converted date-time value consisting of a set of numeric values. @@ -501,16 +575,57 @@ struct ORCUS_PSR_DLLPUBLIC date_time_t static date_time_t from_chars(std::string_view str); }; +/** + * Parse a string that represents an output format type and convert it to a + * corresponding enum value. + * + * @param s string representing an output format type. + * + * @return enum value representing a character set, or + * character_set_t::unknown in case it cannot be + * determined. + */ ORCUS_PSR_DLLPUBLIC dump_format_t to_dump_format_enum(std::string_view s); + +/** + * Parse a string that represents a character set and convert it to a + * corresponding enum value. + * + * @param s string representing a character set. + * + * @return enum value representing a character set, or + * character_set_t::unspecified in case it cannot be + * determined. + */ ORCUS_PSR_DLLPUBLIC character_set_t to_character_set(std::string_view s); +/** + * Get a list of available output format entries. Each entry consists of the + * name of a format and its enum value equivalent. + * + * @return list of available output format entries. + */ ORCUS_PSR_DLLPUBLIC std::vector> get_dump_format_entries(); ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const length_t& v); ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const date_time_t& v); ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, format_t v); -typedef ::std::vector xml_attrs_t; +/** + * Generic constant to be used to indicate that a valid index value is + * expected but not found. + */ +ORCUS_PSR_DLLPUBLIC extern const std::size_t INDEX_NOT_FOUND; + +/** + * Value associated with an unknown XML namespace. + */ +ORCUS_PSR_DLLPUBLIC extern const xmlns_id_t XMLNS_UNKNOWN_ID; + +/** + * Value associated with an unknown XML token. + */ +ORCUS_PSR_DLLPUBLIC extern const xml_token_t XML_UNKNOWN_TOKEN; } diff --git a/include/orcus/xml_namespace.hpp b/include/orcus/xml_namespace.hpp index fd5d9ff598296bc081d32ef5b86041b589133c80..cf9b2700e2eaf59a7de4955dfda17584eefd2f3e 100644 --- a/include/orcus/xml_namespace.hpp +++ b/include/orcus/xml_namespace.hpp @@ -22,6 +22,9 @@ struct xmlns_context_impl; /** * Central XML namespace repository that stores all namespaces that are used * in the current session. + * + * @warning this class is not copyable, but is movable; however, the + * moved-from object will not be usable after the move. */ class ORCUS_PSR_DLLPUBLIC xmlns_repository { @@ -32,15 +35,18 @@ class ORCUS_PSR_DLLPUBLIC xmlns_repository xmlns_id_t intern(std::string_view uri); - xmlns_repository(const xmlns_repository&); // disabled - xmlns_repository& operator= (const xmlns_repository&); // disabled - size_t get_index(xmlns_id_t ns_id) const; public: + xmlns_repository(const xmlns_repository&) = delete; + xmlns_repository& operator= (const xmlns_repository&) = delete; + xmlns_repository(); + xmlns_repository(xmlns_repository&& other); ~xmlns_repository(); + xmlns_repository& operator= (xmlns_repository&&); + /** * Add a set of predefined namespace values to the repository. * @@ -55,6 +61,15 @@ public: */ void add_predefined_values(const xmlns_id_t* predefined_ns); + /** + * Create a context object associated with this namespace repository. + * + * @warning Since this context object references values stored in the repo, + * make sure that it will not out-live the repository object + * itself. + * + * @return context object to use for a new XML stream. + */ xmlns_context create_context(); /** @@ -66,8 +81,11 @@ public: */ xmlns_id_t get_identifier(size_t index) const; + /** + * See xmlns_context::get_short_name() for the explanation of this method, + * which works identically to it. + */ std::string get_short_name(xmlns_id_t ns_id) const; - std::string get_short_name(size_t index) const; }; /** @@ -76,7 +94,7 @@ public: * instance of this class any longer than the life cycle of the xml stream * it is used in. * - * An empty key value is associated with a default namespace. + * An empty key value i.e. `""` is associated with a default namespace. */ class ORCUS_PSR_DLLPUBLIC xmlns_context { @@ -95,17 +113,33 @@ public: xmlns_context& operator= (const xmlns_context& r); xmlns_context& operator= (xmlns_context&& r); - xmlns_id_t push(std::string_view key, std::string_view uri); - void pop(std::string_view key); + /** + * Push a new namespace alias-value pair to the stack. + * + * @param alias namespace alias to push onto the stack. If the same alias + * is already present, this overwrites it until it gets popped + * off the stack. + * @param uri namespace name to associate with the alias. + * + * @return normalized namespace identifier for the namespace name. + */ + xmlns_id_t push(std::string_view alias, std::string_view uri); + + /** + * Pop a namespace alias from the stack. + * + * @param alias namespace alias to pop from the stack. + */ + void pop(std::string_view alias); /** * Get the currnet namespace identifier for a specified namespace alias. * - * @param key namespace alias to get the current namespace identifier for. + * @param alias namespace alias to get the current namespace identifier for. * * @return current namespace identifier associated with the alias. */ - xmlns_id_t get(std::string_view key) const; + xmlns_id_t get(std::string_view alias) const; /** * Get a unique index value associated with a specified identifier. An @@ -123,8 +157,8 @@ public: * but still guaranteed to be unique to the identifier it is associated * with. * - *

Note that the xmlns_repository class has method of the same - * name, and that method works identically to this method.

+ * @note The xmlns_repository class has method of the same name, and that + * method works identically to this method. * * @param ns_id a namespace identifier to obtain short name for. * diff --git a/include/orcus/yaml_document_tree.hpp b/include/orcus/yaml_document_tree.hpp index 6e8d55bdf6b7f7aadab61ddffd67916b4d79fc03..d22a588db0d507cb4bbb0a5b98985d63d6d45923 100644 --- a/include/orcus/yaml_document_tree.hpp +++ b/include/orcus/yaml_document_tree.hpp @@ -25,7 +25,7 @@ class ORCUS_DLLPUBLIC document_error : public general_error { public: document_error(const std::string& msg); - virtual ~document_error() throw(); + virtual ~document_error(); }; enum class node_t : uint8_t diff --git a/include/orcus/yaml_parser.hpp b/include/orcus/yaml_parser.hpp index 123e1793cf1f3ecbe81fa9b72202d097032b90c1..836a9021acb1bc3c9164b958853aa40e73f55038 100644 --- a/include/orcus/yaml_parser.hpp +++ b/include/orcus/yaml_parser.hpp @@ -13,6 +13,10 @@ namespace orcus { +/** + * Blank handler class for yaml_parser. One can sub-class this and overwrite + * callback functions one needs to handle. + */ class yaml_handler { public: @@ -69,12 +73,11 @@ public: /** * Called when a string value is encountered. * - * @param p pointer to the first character of the string value. - * @param n length of the string value. + * @param value string value. */ - void string(const char* p, size_t n) + void string(std::string_view value) { - (void)p; (void)n; + (void)value; } /** @@ -103,13 +106,21 @@ public: void null() {} }; -template +/** + * Parser for YAML documents. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to yaml_handler. + * + * @warning This parser is still highly experimental. Use with caution. + */ +template class yaml_parser : public yaml::parser_base { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; - yaml_parser(const char* p, size_t n, handler_type& hdl); + yaml_parser(std::string_view content, handler_type& hdl); void parse(); @@ -217,7 +228,7 @@ template void yaml_parser<_Handler>::handler_string(const char* p, size_t n) { push_parse_token(yaml::detail::parse_token_t::string); - m_handler.string(p, n); + m_handler.string({p, n}); } template @@ -249,8 +260,8 @@ void yaml_parser<_Handler>::handler_null() } template -yaml_parser<_Handler>::yaml_parser(const char* p, size_t n, handler_type& hdl) : - yaml::parser_base(p, n), m_handler(hdl) {} +yaml_parser<_Handler>::yaml_parser(std::string_view content, handler_type& hdl) : + yaml::parser_base(content), m_handler(hdl) {} template void yaml_parser<_Handler>::parse() @@ -290,7 +301,7 @@ void yaml_parser<_Handler>::parse() if (cur_scope == scope_empty) { if (indent > 0) - throw yaml::parse_error( + throw parse_error( "first node of the document should not be indented.", offset()); push_scope(indent); @@ -306,7 +317,7 @@ void yaml_parser<_Handler>::parse() { cur_scope = end_scope(); if (cur_scope < indent) - throw yaml::parse_error("parse: invalid indent level.", offset()); + throw parse_error("parse: invalid indent level.", offset()); } while (indent < cur_scope); } @@ -503,10 +514,10 @@ void yaml_parser<_Handler>::parse_line(const char* p, size_t len) // start of a document ++p; if (p == p_end) - throw yaml::parse_error("parse_line: line ended with '--'.", offset_last_char_of_line()); + throw parse_error("parse_line: line ended with '--'.", offset_last_char_of_line()); if (*p != '-') - yaml::parse_error::throw_with( + parse_error::throw_with( "parse_line: '-' expected but '", *p, "' found.", offset_last_char_of_line() - std::ptrdiff_t(p_end-p)); @@ -533,7 +544,7 @@ void yaml_parser<_Handler>::parse_line(const char* p, size_t len) // list item start with inline first item content. ++p; if (p == p_end) - throw yaml::parse_error( + throw parse_error( "parse_line: list item expected, but the line ended prematurely.", offset_last_char_of_line() - std::ptrdiff_t(p_end-p)); @@ -552,7 +563,7 @@ void yaml_parser<_Handler>::parse_line(const char* p, size_t len) } if (get_scope_type() == yaml::detail::scope_t::sequence) - yaml::parse_error::throw_with( + parse_error::throw_with( "'-' was expected for a sequence element, but '", *p, "' was found.", offset_last_char_of_line()-len+1); @@ -581,7 +592,7 @@ void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len) skip_blanks(p, p_end-p); if (*p != ':') - throw yaml::parse_error( + throw parse_error( "parse_map_key: ':' is expected after the quoted string key.", offset() - std::ptrdiff_t(p_end-p+1)); @@ -611,7 +622,7 @@ void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len) skip_blanks(p, p_end-p); if (*p != ':') - throw yaml::parse_error( + throw parse_error( "parse_map_key: ':' is expected after the quoted string key.", offset() - std::ptrdiff_t(p_end-p+1)); @@ -664,7 +675,7 @@ void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len) // inline map item. if (*p == '-') - throw yaml::parse_error( + throw parse_error( "parse_map_key: sequence entry is not allowed as an inline map item.", offset() - std::ptrdiff_t(p_end-p+1)); diff --git a/include/orcus/yaml_parser_base.hpp b/include/orcus/yaml_parser_base.hpp index ec13d22dfd791547923225900b37d6e8067c8705..13b4c915658a6dc886f977faf691fe91bc5cab8b 100644 --- a/include/orcus/yaml_parser_base.hpp +++ b/include/orcus/yaml_parser_base.hpp @@ -15,15 +15,6 @@ namespace orcus { namespace yaml { -class ORCUS_PSR_DLLPUBLIC parse_error : public ::orcus::parse_error -{ -public: - parse_error(const std::string& msg, std::ptrdiff_t offset); - - static void throw_with(const char* msg_before, char c, const char* msg_after, std::ptrdiff_t offset); - static void throw_with(const char* msg_before, const char* p, size_t n, const char* msg_after, std::ptrdiff_t offset); -}; - namespace detail { enum class scope_t @@ -96,7 +87,7 @@ protected: parser_base(const parser_base&) = delete; parser_base& operator=(const parser_base&) = delete; - parser_base(const char* p, size_t n); + parser_base(std::string_view content); ~parser_base(); void push_parse_token(detail::parse_token_t t); diff --git a/include/orcus/zip_archive.hpp b/include/orcus/zip_archive.hpp index 47ceb0c71977568e0c9f10589046254c43382fbd..8896bd88e079d917dd948b0d595d463be5f664b0 100644 --- a/include/orcus/zip_archive.hpp +++ b/include/orcus/zip_archive.hpp @@ -9,37 +9,57 @@ #define INCLUDED_ORCUS_ZIP_ARCHIVE_HPP #include "env.hpp" +#include "exception.hpp" -#include -#include -#include +#include #include +#include +#include namespace orcus { -class zip_archive_stream; -class zip_archive_impl; - -class ORCUS_PSR_DLLPUBLIC zip_error : public std::exception +/** + * Structure containing file entry header attributes. + */ +struct ORCUS_PSR_DLLPUBLIC zip_file_entry_header { - std::string m_msg; -public: - zip_error(); - zip_error(const std::string& msg); - virtual ~zip_error() throw(); - - virtual const char* what() const throw(); + uint32_t header_signature = 0; + uint16_t required_version = 0; + uint16_t flag = 0; + uint16_t compression_method = 0; + uint16_t last_modified_time = 0; + uint16_t last_modified_date = 0; + uint32_t crc32 = 0; + uint32_t compressed_size = 0; + uint32_t uncompressed_size = 0; + + std::string filename; + std::vector extra_field; + + zip_file_entry_header(); + zip_file_entry_header(const zip_file_entry_header& other); + zip_file_entry_header(zip_file_entry_header&& other); + ~zip_file_entry_header(); + + zip_file_entry_header& operator=(const zip_file_entry_header& other); + zip_file_entry_header& operator=(zip_file_entry_header&& other); }; +ORCUS_PSR_DLLPUBLIC std::ostream& operator<<(std::ostream& os, const zip_file_entry_header& header); + +class zip_archive_stream; + class ORCUS_PSR_DLLPUBLIC zip_archive { - zip_archive_impl* mp_impl; + struct impl; + std::unique_ptr mp_impl; + +public: zip_archive() = delete; zip_archive(const zip_archive&) = delete; zip_archive& operator= (const zip_archive) = delete; -public: zip_archive(zip_archive_stream* stream); ~zip_archive(); @@ -51,19 +71,22 @@ public: void load(); /** - * Dump the content of a specified file entry to stdout. + * Retrieve the header information for a file entry specified by index. * - * @param index file entry index + * @param index file entry index. + * + * @return header information for a file entry. */ - void dump_file_entry(size_t index) const; + zip_file_entry_header get_file_entry_header(std::size_t index) const; /** - * Dump the content of a specified file entry to stdout. + * Retrieve the header information for a file entry specified by name. * + * @param name file entry name. * - * @param entry_name file entry name. + * @return header information for a file entry. */ - void dump_file_entry(std::string_view entry_name) const; + zip_file_entry_header get_file_entry_header(std::string_view name) const; /** * Get file entry name from its index. @@ -84,17 +107,17 @@ public: size_t get_file_entry_count() const; /** - * Retrieve data stream of specified file entry into buffer. The retrieved - * data stream gets uncompressed if the original stream is compressed. - * The method will overwrite the content of passed buffer if there is any - * pre-existing data in it. + * Retrieve data stream of specified file entry. The retrieved data stream + * gets uncompressed if the original stream is compressed. + * + * @param entry_name file entry name. * - * @param entry_name file entry name - * @param buf buffer to put the retrieved data stream into. + * @return buffer containing the data stream for specified entry. * - * @return true if successful, false otherwise. + * @exception zip_error thrown when any problem is encountered during data + * stream retrieval. */ - bool read_file_entry(std::string_view entry_name, std::vector& buf) const; + std::vector read_file_entry(std::string_view entry_name) const; }; } diff --git a/slickedit/cpp.vpj b/slickedit/cpp.vpj index c34a764f6ee2528fac48067f810728dbaf2eef17..e78690750b98e5c438e96e066441ad1bc2830f01 100644 --- a/slickedit/cpp.vpj +++ b/slickedit/cpp.vpj @@ -144,8 +144,6 @@ Name="Other Files" Filters=""> - - @@ -173,6 +171,7 @@ + @@ -424,6 +423,8 @@ + + @@ -472,6 +473,7 @@ + @@ -549,6 +551,7 @@ + @@ -619,7 +622,8 @@ - + + diff --git a/slickedit/doc.vpj b/slickedit/doc.vpj index 252ca4b42db5821c692fcbcff208ff8c8a6b40cd..0486dff506c13239321b23aa37aaa35bc518c7c9 100644 --- a/slickedit/doc.vpj +++ b/slickedit/doc.vpj @@ -70,60 +70,9 @@ Name="Other Files" Filters="" GUID="{58BF30C7-CBF2-4454-ABD3-F8D694316022}"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + diff --git a/src/include/test_global.hpp b/src/include/test_global.hpp index 3aa508eb010739a313daf2ca01cdbc33e30740ea..cd8d32c158bae9486e71be636680c1c24f3cda37 100644 --- a/src/include/test_global.hpp +++ b/src/include/test_global.hpp @@ -51,6 +51,8 @@ void verify_content( }} // namespace orcus::test +#define ORCUS_TEST_FUNC_SCOPE orcus::test::stack_printer __sp__(__func__) + #endif /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/liborcus/CMakeLists.txt b/src/liborcus/CMakeLists.txt index 40a0bc0274faaf06396e81521d15d0b01128ba0b..9eb3808045bee05008aa36506871e253218fe66c 100644 --- a/src/liborcus/CMakeLists.txt +++ b/src/liborcus/CMakeLists.txt @@ -24,6 +24,7 @@ add_library(orcus-${ORCUS_API_VERSION} SHARED measurement.cpp xml_context_base.cpp xml_context_global.cpp + xml_element_types.cpp xml_element_validator.cpp xml_empty_context.cpp xml_map_tree.cpp diff --git a/src/liborcus/Makefile.am b/src/liborcus/Makefile.am index b547bb0a74bdc182bbb0b07349c9a2833473492a..dd374743aaf26b7878c81ac1408a72a2fc8af7da 100644 --- a/src/liborcus/Makefile.am +++ b/src/liborcus/Makefile.am @@ -19,6 +19,9 @@ EXTRA_PROGRAMS = \ TESTS = +EXTRA_DIST = \ + xml_element_types.hpp + if HAVE_STATIC_LIB AM_CPPFLAGS += -D__ORCUS_STATIC_LIB=1 else @@ -71,6 +74,7 @@ liborcus_@ORCUS_API_VERSION@_la_SOURCES = \ xml_context_base.cpp \ xml_context_global.hpp \ xml_context_global.cpp \ + xml_element_types.cpp \ xml_element_validator.hpp \ xml_element_validator.cpp \ xml_empty_context.hpp \ diff --git a/src/liborcus/css_document_tree.cpp b/src/liborcus/css_document_tree.cpp index 46bf7e912435e749be6eca05c9bb7e0074ce162a..c4176450fa0aa3c9aec6a3da8dbbab8882cde6a3 100644 --- a/src/liborcus/css_document_tree.cpp +++ b/src/liborcus/css_document_tree.cpp @@ -36,7 +36,7 @@ class parser_handler css_document_tree& m_doc; std::vector m_cur_selector_group; css_properties_t m_cur_properties; - pstring m_cur_prop_name; + std::string_view m_cur_prop_name; std::vector m_cur_prop_values; css_selector_t m_cur_selector; /// current selector css_simple_selector_t m_cur_simple_selector; @@ -50,24 +50,23 @@ public: m_cur_combinator(css::combinator_t::descendant), m_in_prop(false) {} - void at_rule_name(const char* p, size_t n) + void at_rule_name(std::string_view name) { #if ORCUS_DEBUG_CSS_DOCTREE - cout << "@" << string(p, n).c_str(); + cout << "@" << name; #else - (void)p; - (void)n; + (void)name; #endif } - void simple_selector_type(const char* p, size_t n) + void simple_selector_type(std::string_view type) { - m_cur_simple_selector.name = pstring(p, n); + m_cur_simple_selector.name = type; } - void simple_selector_class(const char* p, size_t n) + void simple_selector_class(std::string_view cls) { - m_cur_simple_selector.classes.insert(pstring(p, n)); + m_cur_simple_selector.classes.insert(cls); } void simple_selector_pseudo_element(css::pseudo_element_t pe) @@ -81,9 +80,9 @@ public: m_cur_simple_selector.pseudo_classes |= pc; } - void simple_selector_id(const char* p, size_t n) + void simple_selector_id(std::string_view id) { - m_cur_simple_selector.id = std::string_view(p, n); + m_cur_simple_selector.id = id; } void end_simple_selector() @@ -119,20 +118,19 @@ public: m_cur_combinator = combinator; } - void property_name(const char* p, size_t n) + void property_name(std::string_view name) { - m_cur_prop_name = pstring(p, n); + m_cur_prop_name = name; #if ORCUS_DEBUG_CSS_DOCTREE - cout << string(p, n).c_str() << ":"; + cout << name << ":"; #endif } - void value(const char* p, size_t n) + void value(std::string_view s) { - std::string_view s(p, n); m_cur_prop_values.push_back(s); #if ORCUS_DEBUG_CSS_DOCTREE - cout << " '" << string(p, n).c_str() << "'"; + cout << " '" << s << "'"; #endif } @@ -180,14 +178,14 @@ public: m_cur_prop_values.push_back(val); } - void url(const char* p, size_t n) + void url(std::string_view url) { #if ORCUS_DEBUG_CSS_DOCTREE - cout << " url(" << pstring(p, n) << ")"; + cout << " url(" << url << ")"; #endif css_property_value_t val; val.type = orcus::css::property_value_t::url; - val.value = std::string_view(p, n); + val.value = url; m_cur_prop_values.push_back(val); } @@ -243,7 +241,7 @@ public: { m_cur_properties.insert( css_properties_t::value_type(m_cur_prop_name, m_cur_prop_values)); - m_cur_prop_name.clear(); + m_cur_prop_name = std::string_view{}; m_cur_prop_values.clear(); #if ORCUS_DEBUG_CSS_DOCTREE cout << endl; @@ -558,7 +556,7 @@ void css_document_tree::load(std::string_view stream) return; parser_handler handler(*this); - css_parser parser(stream.data(), stream.size(), handler); + css_parser parser(stream, handler); parser.parse(); } diff --git a/src/liborcus/css_document_tree_test.cpp b/src/liborcus/css_document_tree_test.cpp index ca164acf643b59e483a82cd9f2a8c0145cf4c98a..09b98d2f93dc882f726eb3e2f0824b9b9953ad61 100644 --- a/src/liborcus/css_document_tree_test.cpp +++ b/src/liborcus/css_document_tree_test.cpp @@ -108,7 +108,7 @@ void test_css_invalids() doc.load(content.str()); assert(!"css::parse_error was not thrown, but expected to be."); } - catch (const css::parse_error&) + catch (const parse_error&) { // This is expected. } diff --git a/src/liborcus/dom_tree.cpp b/src/liborcus/dom_tree.cpp index f841443727cbfeff7c3d18d106ac98eb4c84b521..8419c6aa119186d4dcb569c4dc5a3432b3b84789 100644 --- a/src/liborcus/dom_tree.cpp +++ b/src/liborcus/dom_tree.cpp @@ -119,7 +119,7 @@ void print(std::ostream& os, const entity_name& name, const xmlns_context& cxt) if (name.ns) { size_t index = cxt.get_index(name.ns); - if (index != index_not_found) + if (index != INDEX_NOT_FOUND) os << "ns" << index << ':'; } os << name.name; @@ -580,8 +580,7 @@ document_tree::~document_tree() {} void document_tree::load(std::string_view strm) { - sax_ns_parser parser( - strm.data(), strm.size(), mp_impl->m_ns_cxt, *mp_impl); + sax_ns_parser parser(strm, mp_impl->m_ns_cxt, *mp_impl); parser.parse(); } diff --git a/src/liborcus/format_detection.cpp b/src/liborcus/format_detection.cpp index 0bb02777e08514969ac85fe592099a04734fe3f3..c3a31b1c31c4326504727cdc90c49273319d043b 100644 --- a/src/liborcus/format_detection.cpp +++ b/src/liborcus/format_detection.cpp @@ -48,22 +48,24 @@ namespace orcus { -format_t detect(const unsigned char* buffer, size_t length) try +format_t detect(std::string_view strm) try { + const auto* p = reinterpret_cast(strm.data()); + #if ODS_ENABLED - if (orcus_ods::detect(buffer, length)) + if (orcus_ods::detect(p, strm.size())) return format_t::ods; #endif #if XLSX_ENABLED - if (orcus_xlsx::detect(buffer, length)) + if (orcus_xlsx::detect(p, strm.size())) return format_t::xlsx; #endif #if GNUMERIC_ENABLED - if (orcus_gnumeric::detect(buffer, length)) + if (orcus_gnumeric::detect(p, strm.size())) return format_t::gnumeric; #endif #if XLS_XML_ENABLED - if (orcus_xls_xml::detect(buffer, length)) + if (orcus_xls_xml::detect(p, strm.size())) return format_t::xls_xml; #endif diff --git a/src/liborcus/format_detection_test.cpp b/src/liborcus/format_detection_test.cpp index 378eff3d4c585b0fe8e3f13dc41fd02497bebe23..74b4e51e0b370b9f619f596ca84277126b6ad1f1 100644 --- a/src/liborcus/format_detection_test.cpp +++ b/src/liborcus/format_detection_test.cpp @@ -34,8 +34,7 @@ void test_detect_formats() { orcus::file_content content(tests[i].path.string()); assert(!content.empty()); - orcus::format_t detected = orcus::detect( - reinterpret_cast(content.data()), content.size()); + orcus::format_t detected = orcus::detect(content.str()); assert(detected == tests[i].format); } @@ -54,9 +53,7 @@ void test_invalids() orcus::file_content content(p.string()); assert(!content.empty()); - orcus::format_t detected = orcus::detect( - reinterpret_cast(content.data()), content.size()); - + orcus::format_t detected = orcus::detect(content.str()); assert(detected == orcus::format_t::unknown); } } diff --git a/src/liborcus/gnumeric_cell_context.cpp b/src/liborcus/gnumeric_cell_context.cpp index 17c3a008165640dc920c511fd908f40b016989ff..83f435356160e5dcaefaaea432e330224167da85 100644 --- a/src/liborcus/gnumeric_cell_context.cpp +++ b/src/liborcus/gnumeric_cell_context.cpp @@ -121,7 +121,7 @@ gnumeric_cell_context::gnumeric_cell_context(session_context& session_cxt, const gnumeric_cell_context::~gnumeric_cell_context() = default; -void gnumeric_cell_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void gnumeric_cell_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { push_stack(ns, name); @@ -172,7 +172,7 @@ void gnumeric_cell_context::reset(ss::iface::import_sheet* sheet) mp_sheet = sheet; } -void gnumeric_cell_context::start_cell(const xml_attrs_t& attrs) +void gnumeric_cell_context::start_cell(const xml_token_attrs_t& attrs) { mp_cell_data.reset(new gnumeric_cell_data); cell_attr_parser parser = for_each(attrs.begin(), attrs.end(), cell_attr_parser()); diff --git a/src/liborcus/gnumeric_cell_context.hpp b/src/liborcus/gnumeric_cell_context.hpp index e6f6a76e517ffd143c11b87301b20dc6549be3a9..ae4727ab32065497dc5c59e40d02d9a5797d91c4 100644 --- a/src/liborcus/gnumeric_cell_context.hpp +++ b/src/liborcus/gnumeric_cell_context.hpp @@ -31,14 +31,14 @@ public: spreadsheet::iface::import_factory* factory); virtual ~gnumeric_cell_context() override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; virtual void characters(std::string_view str, bool transient) override; void reset(spreadsheet::iface::import_sheet* sheet); private: - void start_cell(const xml_attrs_t& attrs); + void start_cell(const xml_token_attrs_t& attrs); void end_cell(); private: spreadsheet::iface::import_factory* mp_factory; diff --git a/src/liborcus/gnumeric_cell_context_test.cpp b/src/liborcus/gnumeric_cell_context_test.cpp index eca5f4ebe1733374ef91d8e137a08517d126f1a6..188cea1c6477bbd0036f4306e89453fdc618371e 100644 --- a/src/liborcus/gnumeric_cell_context_test.cpp +++ b/src/liborcus/gnumeric_cell_context_test.cpp @@ -173,7 +173,7 @@ void test_cell_value() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "1", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "2", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_ValueType, "40", false)); @@ -193,7 +193,7 @@ void test_cell_bool() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "31", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "32", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_ValueType, "20", false)); @@ -213,7 +213,7 @@ void test_cell_string() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "10", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "321", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_ValueType, "60", false)); @@ -268,7 +268,7 @@ void test_shared_formula_with_string() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "5", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "15", false)); @@ -319,7 +319,7 @@ void test_shared_formula_without_string() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "6", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "16", false)); @@ -340,7 +340,7 @@ void test_cell_formula() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "9", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "11", false)); context.start_element(ns, elem, attrs); @@ -359,7 +359,7 @@ void test_cell_array_formula() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "19", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "111", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Rows, "2", false)); diff --git a/src/liborcus/gnumeric_context.cpp b/src/liborcus/gnumeric_context.cpp index 8050e4fd54f623905b5a47abdbc644d536356266..353d60596c95838de415e4847a475f08953809f7 100644 --- a/src/liborcus/gnumeric_context.cpp +++ b/src/liborcus/gnumeric_context.cpp @@ -52,7 +52,7 @@ void gnumeric_content_xml_context::end_child_context(xmlns_id_t /*ns*/, xml_toke { } -void gnumeric_content_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& /*attrs*/) +void gnumeric_content_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& /*attrs*/) { push_stack(ns, name); diff --git a/src/liborcus/gnumeric_context.hpp b/src/liborcus/gnumeric_context.hpp index d43bda7968ed87c3975132a1dad95ff95eeadd35..73f2bb9240f8282119d1742b669b260235830aed 100644 --- a/src/liborcus/gnumeric_context.hpp +++ b/src/liborcus/gnumeric_context.hpp @@ -32,7 +32,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); diff --git a/src/liborcus/gnumeric_sheet_context.cpp b/src/liborcus/gnumeric_sheet_context.cpp index 261490f5836463180f795d7d017f6060cab7a9e9..d9ded623c4caa0a59bf90053cdfaec7e75d92097 100644 --- a/src/liborcus/gnumeric_sheet_context.cpp +++ b/src/liborcus/gnumeric_sheet_context.cpp @@ -355,7 +355,7 @@ xml_context_base* gnumeric_sheet_context::create_child_context(xmlns_id_t ns, xm return nullptr; } -void gnumeric_sheet_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); if (ns == NS_gnumeric_gnm) @@ -527,7 +527,7 @@ void gnumeric_sheet_context::reset(spreadsheet::sheet_t sheet_index) m_chars = std::string_view{}; } -void gnumeric_sheet_context::start_font(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_font(const xml_token_attrs_t& attrs) { auto* styles = mp_factory->get_styles(); if (!styles) @@ -579,7 +579,7 @@ void gnumeric_sheet_context::start_font(const xml_attrs_t& attrs) } } -void gnumeric_sheet_context::start_col(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_col(const xml_token_attrs_t& attrs) { gnumeric_col_row_info col_info = for_each(attrs.begin(), attrs.end(), gnumeric_col_row_info()); @@ -593,7 +593,7 @@ void gnumeric_sheet_context::start_col(const xml_attrs_t& attrs) p_sheet_props->set_column_hidden(col, col_span, hidden); } -void gnumeric_sheet_context::start_row(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_row(const xml_token_attrs_t& attrs) { gnumeric_col_row_info row_info = for_each(attrs.begin(), attrs.end(), gnumeric_col_row_info()); @@ -608,7 +608,7 @@ void gnumeric_sheet_context::start_row(const xml_attrs_t& attrs) } } -void gnumeric_sheet_context::start_style(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_style(const xml_token_attrs_t& attrs) { auto* styles = mp_factory->get_styles(); if (!styles) @@ -736,13 +736,13 @@ void gnumeric_sheet_context::start_style(const xml_attrs_t& attrs) } } -void gnumeric_sheet_context::start_style_region(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_style_region(const xml_token_attrs_t& attrs) { mp_region_data.reset(new gnumeric_style_region()); for_each(attrs.begin(), attrs.end(), gnumeric_style_region_attr_parser(*mp_region_data)); } -void gnumeric_sheet_context::start_condition(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_condition(const xml_token_attrs_t& attrs) { spreadsheet::iface::import_conditional_format* cond_format = mp_sheet->get_conditional_format(); diff --git a/src/liborcus/gnumeric_sheet_context.hpp b/src/liborcus/gnumeric_sheet_context.hpp index 9dc652cb884ebf645fc69c14b1fe896149ceed56..e44a96635660f2a9305972be96da4ff232d6d74a 100644 --- a/src/liborcus/gnumeric_sheet_context.hpp +++ b/src/liborcus/gnumeric_sheet_context.hpp @@ -67,19 +67,19 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name) override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; virtual void characters(std::string_view str, bool transient) override; void reset(spreadsheet::sheet_t sheet_index); private: - void start_style_region(const xml_attrs_t& attrs); - void start_style(const xml_attrs_t& attrs); - void start_font(const xml_attrs_t& attrs); - void start_col(const xml_attrs_t& attrs); - void start_row(const xml_attrs_t& attrs); - void start_condition(const xml_attrs_t& attrs); + void start_style_region(const xml_token_attrs_t& attrs); + void start_style(const xml_token_attrs_t& attrs); + void start_font(const xml_token_attrs_t& attrs); + void start_col(const xml_token_attrs_t& attrs); + void start_row(const xml_token_attrs_t& attrs); + void start_condition(const xml_token_attrs_t& attrs); void end_table(); void end_style(bool conditional_format); diff --git a/src/liborcus/gnumeric_sheet_context_test.cpp b/src/liborcus/gnumeric_sheet_context_test.cpp index 436d95eb3c3a8fa1fea2b58e33362019f6294784..147e56ad231fa2eec1b0480cbd24ae677b14901e 100644 --- a/src/liborcus/gnumeric_sheet_context_test.cpp +++ b/src/liborcus/gnumeric_sheet_context_test.cpp @@ -88,18 +88,18 @@ void test_column_width() context.reset(0); orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t parent = XML_Sheet; - orcus::xml_attrs_t parent_attr; + orcus::xml_token_attrs_t parent_attr; context.start_element(ns, parent, parent_attr); { orcus::xml_token_t elem = XML_Name; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; context.start_element(ns, elem, attrs); context.characters("test", false); context.end_element(ns, elem); } { orcus::xml_token_t elem = XML_ColInfo; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(ns, XML_No, "2", false)); attrs.push_back(xml_token_attr_t(ns, XML_Unit, "37.3", false)); attrs.push_back(xml_token_attr_t(ns, XML_Unit, "37.3", false)); diff --git a/src/liborcus/interface.cpp b/src/liborcus/interface.cpp index f61df5b61dccde37c9537b0fe1f675804b0d1e92..2b743a2d8ff267d1b34ee8ab2ea9f45e134fd4bf 100644 --- a/src/liborcus/interface.cpp +++ b/src/liborcus/interface.cpp @@ -19,7 +19,7 @@ struct import_filter::impl import_filter::import_filter(format_t input) : mp_impl(std::make_unique(input)) {} -import_filter::~import_filter() {} +import_filter::~import_filter() = default; void import_filter::set_config(const config& v) { @@ -31,7 +31,7 @@ const config& import_filter::get_config() const return mp_impl->m_config; } -document_dumper::~document_dumper() {} +document_dumper::~document_dumper() = default; }} /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/liborcus/json_document_tree.cpp b/src/liborcus/json_document_tree.cpp index e23521ea4eeb814a47a169db05732ebe3df46061..071198b5d71200456eaeaecaa1ff58f47f9c7d03 100644 --- a/src/liborcus/json_document_tree.cpp +++ b/src/liborcus/json_document_tree.cpp @@ -106,12 +106,12 @@ std::ostream& operator<< (std::ostream& os, node_t nt) document_error::document_error(const std::string& msg) : general_error("json::document_error", msg) {} -document_error::~document_error() throw() {} +document_error::~document_error() = default; key_value_error::key_value_error(const std::string& msg) : document_error(msg) {} -key_value_error::~key_value_error() throw() {} +key_value_error::~key_value_error() = default; struct json_value final { @@ -750,10 +750,10 @@ public: } } - void object_key(const char* p, size_t len, bool transient) + void object_key(std::string_view key, bool transient) { parser_stack& cur = m_stack.back(); - cur.key = std::string_view(p, len); + cur.key = key; if (m_config.persistent_string_values || transient) // The tree manages the life cycle of this string value. cur.key = m_res.str_pool.intern(cur.key).first; @@ -780,9 +780,8 @@ public: push_value(m_res.obj_pool.construct(detail::node_t::null)); } - void string(const char* p, size_t len, bool transient) + void string(std::string_view s, bool transient) { - std::string_view s(p, len); if (m_config.persistent_string_values || transient) // The tree manages the life cycle of this string value. s = m_res.str_pool.intern(s).first; @@ -1674,7 +1673,7 @@ document_tree& document_tree::operator= (object obj) void document_tree::load(std::string_view stream, const json_config& config) { json::parser_handler hdl(config, mp_impl->m_res); - json_parser parser(stream.data(), stream.size(), hdl); + json_parser parser(stream, hdl); parser.parse(); mp_impl->m_root = hdl.get_root(); @@ -1701,7 +1700,7 @@ void document_tree::load(std::string_view stream, const json_config& config) { doc.load(ext_content.str(), ext_config); } - catch (const json::parse_error& e) + catch (const parse_error& e) { std::ostringstream os; os << "Error while parsing " << extpath.string() << std::endl; diff --git a/src/liborcus/json_document_tree_test.cpp b/src/liborcus/json_document_tree_test.cpp index b4e430dc2b3eb0e1904f41f687861d319d8ec19d..018702c0bad6e4b36bd7aff3acfa92eba1fec4e2 100644 --- a/src/liborcus/json_document_tree_test.cpp +++ b/src/liborcus/json_document_tree_test.cpp @@ -189,7 +189,7 @@ void test_json_parse_empty() { doc.load(test, test_config); } - catch (const json::parse_error& e) + catch (const parse_error& e) { std::cout << create_parse_error_output(test, e.offset()) << std::endl; std::cout << e.what() << std::endl; @@ -221,7 +221,7 @@ void test_json_parse_invalid() std::cerr << "Invalid JSON expression is parsed as valid: '" << invalid_json << "'" << std::endl; assert(false); } - catch (const json::parse_error& e) + catch (const parse_error& e) { // works as expected. std::cout << "invalid expression tested: " << invalid_json << std::endl; diff --git a/src/liborcus/json_structure_tree.cpp b/src/liborcus/json_structure_tree.cpp index 8f03a6fa7659fa5564699c02388ece6655f11150..0f20c71a6bf095d9eebc74f1edd4b78a21df0422 100644 --- a/src/liborcus/json_structure_tree.cpp +++ b/src/liborcus/json_structure_tree.cpp @@ -64,7 +64,7 @@ struct structure_node */ int32_t child_count = 0; - pstring name; //< value of a key for a object key node. + std::string_view name; //< value of a key for a object key node. /** * For a value node that is an immediate child of an array node, these @@ -203,10 +203,10 @@ struct structure_tree::impl push_stack(node_type::object); } - void object_key(const char* p, size_t len, bool transient) + void object_key(std::string_view key, bool transient) { structure_node node(node_type::object_key); - node.name = pstring(p, len); + node.name = key; if (transient) node.name = m_pool.intern(node.name).first; @@ -234,7 +234,7 @@ struct structure_tree::impl push_value(); } - void string(const char* /*p*/, size_t /*len*/, bool /*transient*/) + void string(std::string_view /*val*/, bool /*transient*/) { push_value(); } @@ -666,7 +666,7 @@ structure_tree::~structure_tree() {} void structure_tree::parse(std::string_view stream) { - json_parser parser(stream.data(), stream.size(), *mp_impl); + json_parser parser(stream, *mp_impl); parser.parse(); } diff --git a/src/liborcus/odf_para_context.cpp b/src/liborcus/odf_para_context.cpp index b8f8242c5515867ed6dfdf8c9459630433acdff0..2bd96dcbc3bd5e99e9374e4e3cd13c0204854c96 100644 --- a/src/liborcus/odf_para_context.cpp +++ b/src/liborcus/odf_para_context.cpp @@ -43,7 +43,7 @@ void text_para_context::end_child_context(xmlns_id_t /*ns*/, xml_token_t /*name* // not implemented yet. } -void text_para_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void text_para_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); if (ns == NS_odf_text) diff --git a/src/liborcus/odf_para_context.hpp b/src/liborcus/odf_para_context.hpp index 1614b9455d1ff45637f16ff7c6b06756f43c7539..bc44988b3547b28db26b5a2fc137624d04dec657 100644 --- a/src/liborcus/odf_para_context.hpp +++ b/src/liborcus/odf_para_context.hpp @@ -34,7 +34,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); diff --git a/src/liborcus/odf_style_context.cpp b/src/liborcus/odf_style_context.cpp index 7a338f1de9cea482bcd355cb113a62fbfb242d87..70953a104b51ce029ebbbc232088e9349ab8e9ea 100644 --- a/src/liborcus/odf_style_context.cpp +++ b/src/liborcus/odf_style_context.cpp @@ -251,7 +251,7 @@ void style_context::characters(std::string_view /*str*/, bool /*transient*/) { } -void style_context::start_paragraph_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void style_context::start_paragraph_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_element_expected(parent, NS_odf_style, XML_style); @@ -298,7 +298,7 @@ void style_context::start_paragraph_properties(const xml_token_pair_t& parent, c } } -void style_context::start_text_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void style_context::start_text_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { static const xml_elem_set_t expected = { { NS_odf_style, XML_style }, @@ -548,7 +548,7 @@ void style_context::start_text_properties(const xml_token_pair_t& parent, const } } -void style_context::start_table_cell_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void style_context::start_table_cell_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_element_expected(parent, NS_odf_style, XML_style); diff --git a/src/liborcus/odf_style_context.hpp b/src/liborcus/odf_style_context.hpp index 81e88ee53b6e40a6f2f06978bdb5e06de05b7163..4a3394850285257e60783ff8626b806320958240 100644 --- a/src/liborcus/odf_style_context.hpp +++ b/src/liborcus/odf_style_context.hpp @@ -37,9 +37,9 @@ public: std::unique_ptr pop_style(); private: - void start_paragraph_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_text_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_table_cell_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs); + void start_paragraph_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_text_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_table_cell_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); private: spreadsheet::iface::import_styles* mp_styles = nullptr; diff --git a/src/liborcus/odf_styles_context.cpp b/src/liborcus/odf_styles_context.cpp index 33647e85260149bc96e207b28c39ee3cf2aa79cd..cf2589da4b1bd9a947ed54a3f8b04615210820a3 100644 --- a/src/liborcus/odf_styles_context.cpp +++ b/src/liborcus/odf_styles_context.cpp @@ -230,7 +230,7 @@ void styles_context::end_child_context(xmlns_id_t ns, xml_token_t name, xml_cont } } -void styles_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& /*attrs*/) +void styles_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& /*attrs*/) { xml_token_pair_t parent = push_stack(ns, name); if (ns == NS_odf_office) diff --git a/src/liborcus/odf_styles_context.hpp b/src/liborcus/odf_styles_context.hpp index 38dc6bfa7fe806ecbe6329fb39999696a0fa59c8..01b2ac48cc166706a6cd9bcf7146c1fd2a4d08e7 100644 --- a/src/liborcus/odf_styles_context.hpp +++ b/src/liborcus/odf_styles_context.hpp @@ -32,7 +32,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name) override; virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child) override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; virtual void characters(std::string_view str, bool transient) override; diff --git a/src/liborcus/ods_content_xml_context.cpp b/src/liborcus/ods_content_xml_context.cpp index 65f25b80ef1fe94be1a32519cdbde21c82275648..5f07594ad4cb4ae8fc95c2c7648ad8ab0b8c61da 100644 --- a/src/liborcus/ods_content_xml_context.cpp +++ b/src/liborcus/ods_content_xml_context.cpp @@ -51,7 +51,7 @@ const map_type& get() } // namespace cell_value void pick_up_named_range_or_expression( - session_context& cxt, const xml_attrs_t& attrs, xmlns_id_t exp_attr_ns, xml_token_t exp_attr_name, + session_context& cxt, const xml_token_attrs_t& attrs, xmlns_id_t exp_attr_ns, xml_token_t exp_attr_name, ods_session_data::named_exp_type name_type, ss::sheet_t scope) { std::string_view name; @@ -193,7 +193,7 @@ void ods_content_xml_context::end_child_context(xmlns_id_t ns, xml_token_t name, } } -void ods_content_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void ods_content_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); @@ -326,7 +326,7 @@ bool ods_content_xml_context::end_element(xmlns_id_t ns, xml_token_t name) return pop_stack(ns, name); } -void ods_content_xml_context::start_null_date(const xml_attrs_t& attrs) +void ods_content_xml_context::start_null_date(const xml_token_attrs_t& attrs) { spreadsheet::iface::import_global_settings* gs = mp_factory->get_global_settings(); if (!gs) @@ -346,7 +346,7 @@ void ods_content_xml_context::start_null_date(const xml_attrs_t& attrs) gs->set_origin_date(val.year, val.month, val.day); } -void ods_content_xml_context::start_table(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void ods_content_xml_context::start_table(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { static const xml_elem_set_t expected = { { NS_odf_office, XML_spreadsheet }, @@ -391,7 +391,7 @@ void ods_content_xml_context::end_table() } } -void ods_content_xml_context::start_named_range(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void ods_content_xml_context::start_named_range(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_element_expected(parent, NS_odf_table, XML_named_expressions); @@ -404,7 +404,7 @@ void ods_content_xml_context::end_named_range() { } -void ods_content_xml_context::start_named_expression(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void ods_content_xml_context::start_named_expression(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_element_expected(parent, NS_odf_table, XML_named_expressions); @@ -417,7 +417,7 @@ void ods_content_xml_context::end_named_expression() { } -void ods_content_xml_context::start_column(const xml_attrs_t& attrs) +void ods_content_xml_context::start_column(const xml_token_attrs_t& attrs) { if (!m_cur_sheet.sheet) return; @@ -469,7 +469,7 @@ void ods_content_xml_context::end_column() m_col += m_col_repeated; } -void ods_content_xml_context::start_row(const xml_attrs_t& attrs) +void ods_content_xml_context::start_row(const xml_token_attrs_t& attrs) { m_col = 0; m_row_attr = row_attr(); @@ -528,7 +528,7 @@ void ods_content_xml_context::end_row() m_row += m_row_attr.number_rows_repeated; } -void ods_content_xml_context::start_cell(const xml_attrs_t& attrs) +void ods_content_xml_context::start_cell(const xml_token_attrs_t& attrs) { m_cell_attr = cell_attr(); diff --git a/src/liborcus/ods_content_xml_context.hpp b/src/liborcus/ods_content_xml_context.hpp index 499833b733b7ff178ea2030734e63bc783ac5a30..592175294b543fe2b8c6b8f7fb5ebc5419984b4c 100644 --- a/src/liborcus/ods_content_xml_context.hpp +++ b/src/liborcus/ods_content_xml_context.hpp @@ -70,28 +70,28 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name) override; virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child) override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; private: - void start_null_date(const xml_attrs_t& attrs); + void start_null_date(const xml_token_attrs_t& attrs); - void start_table(const xml_token_pair_t& parent, const xml_attrs_t& attrs); + void start_table(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); void end_table(); - void start_named_range(const xml_token_pair_t& parent, const xml_attrs_t& attrs); + void start_named_range(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); void end_named_range(); - void start_named_expression(const xml_token_pair_t& parent, const xml_attrs_t& attrs); + void start_named_expression(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); void end_named_expression(); - void start_column(const xml_attrs_t& attrs); + void start_column(const xml_token_attrs_t& attrs); void end_column(); - void start_row(const xml_attrs_t& attrs); + void start_row(const xml_token_attrs_t& attrs); void end_row(); - void start_cell(const xml_attrs_t& attrs); + void start_cell(const xml_token_attrs_t& attrs); void end_cell(); /** diff --git a/src/liborcus/opc_reader.cpp b/src/liborcus/opc_reader.cpp index a98cecca39f1e30a7a9a2feda6d0aba41832ea98..8056969efac9cc0aad9394baaecf3c5c2090eb4a 100644 --- a/src/liborcus/opc_reader.cpp +++ b/src/liborcus/opc_reader.cpp @@ -71,7 +71,16 @@ void opc_reader::read_file(std::unique_ptr&& stream) bool opc_reader::open_zip_stream(const string& path, vector& buf) { - return m_archive->read_file_entry(path.c_str(), buf); + try + { + std::vector entry = m_archive->read_file_entry(path.c_str()); + buf.swap(entry); + return true; + } + catch (const std::exception&) + { + return false; + } } void opc_reader::read_part(const pstring& path, const schema_t type, opc_rel_extra* data) diff --git a/src/liborcus/orcus_csv.cpp b/src/liborcus/orcus_csv.cpp index d16e040dce23b78990805df82a0f1c511d86a45f..dff2b2d4e261fe56f65956e1e3063c02be49b961 100644 --- a/src/liborcus/orcus_csv.cpp +++ b/src/liborcus/orcus_csv.cpp @@ -89,20 +89,19 @@ public: m_col = 0; } - void cell(const char* p, size_t n, bool transient) + void cell(std::string_view v, bool transient) { auto csv = std::get(m_app_config.data); if (m_sheet == 0 && size_t(m_row) < csv.header_row_size) { - std::string_view v{p, n}; if (transient) v = m_pool.intern(v).first; m_header_cells.emplace_back(m_row, m_col, v); } - mp_sheet->set_auto(m_row, m_col, {p, n}); + mp_sheet->set_auto(m_row, m_col, v); ++m_col; } @@ -148,7 +147,7 @@ struct orcus_csv::impl csv::parser_config config; config.delimiters.push_back(','); config.text_qualifier = '"'; - csv_parser parser(stream.data(), stream.size(), handler, config); + csv_parser parser(stream, handler, config); try { parser.parse(); @@ -158,9 +157,9 @@ struct orcus_csv::impl // The parser has decided to end the import due to the destination // sheet being full. } - catch (const csv::parse_error& e) + catch (const parse_error& e) { - cout << "parse failed: " << e.what() << endl; + cout << "parse failed at offset " << e.offset() << ": " << e.what() << endl; } } }; @@ -171,9 +170,9 @@ orcus_csv::orcus_csv(spreadsheet::iface::import_factory* factory) : orcus_csv::~orcus_csv() {} -void orcus_csv::read_file(const string& filepath) +void orcus_csv::read_file(std::string_view filepath) { - file_content fc(filepath.data()); + file_content fc(filepath); mp_impl->parse(fc.str(), get_config()); mp_impl->factory->finalize(); } diff --git a/src/liborcus/orcus_gnumeric.cpp b/src/liborcus/orcus_gnumeric.cpp index 51755609645429daff0172baf6e140a475242ea5..76c136827b39b57e9d59ac8f038b8f41b737257b 100644 --- a/src/liborcus/orcus_gnumeric.cpp +++ b/src/liborcus/orcus_gnumeric.cpp @@ -122,13 +122,13 @@ bool orcus_gnumeric::detect(const unsigned char* buffer, size_t size) return false; } -void orcus_gnumeric::read_file(const string& filepath) +void orcus_gnumeric::read_file(std::string_view filepath) { #if ORCUS_DEBUG_GNUMERIC cout << "reading " << filepath << endl; #endif - file_content content(filepath.data()); + file_content content(filepath); if (content.empty()) return; diff --git a/src/liborcus/orcus_json.cpp b/src/liborcus/orcus_json.cpp index 48a2d563275716db177630c779c04c99b79dc5f4..c1fa0e7c8e80c9caf2dcd35d495ebca23e22f894 100644 --- a/src/liborcus/orcus_json.cpp +++ b/src/liborcus/orcus_json.cpp @@ -136,9 +136,9 @@ public: push_node(json_map_tree::input_node_type::object); } - void object_key(const char* p, size_t len, bool /*transient*/) + void object_key(std::string_view key, bool /*transient*/) { - m_walker.set_object_key(p, len); + m_walker.set_object_key(key.data(), key.size()); } void end_object() @@ -167,10 +167,10 @@ public: pop_node(json_map_tree::input_node_type::value); } - void string(const char* p, size_t len, bool /*transient*/) + void string(std::string_view val, bool /*transient*/) { push_node(json_map_tree::input_node_type::value); - commit_value(json_value(p, len)); + commit_value(json_value(val.data(), val.size())); pop_node(json_map_tree::input_node_type::value); } @@ -386,7 +386,7 @@ void orcus_json::read_stream(std::string_view stream) } json_content_handler hdl(mp_impl->map_tree, *mp_impl->im_factory); - json_parser parser(stream.data(), stream.size(), hdl); + json_parser parser(stream, hdl); parser.parse(); mp_impl->im_factory->finalize(); @@ -466,7 +466,7 @@ void orcus_json::read_map_definition(std::string_view stream) } } } - catch (const json::parse_error& e) + catch (const parse_error& e) { std::ostringstream os; os << "Error parsing the map definition file:" << std::endl diff --git a/src/liborcus/orcus_ods.cpp b/src/liborcus/orcus_ods.cpp index 16a5a481d33b121c20b578b4782d7994e8b81742..15785e8d6abce467177305027a1534aefaf57d73 100644 --- a/src/liborcus/orcus_ods.cpp +++ b/src/liborcus/orcus_ods.cpp @@ -69,9 +69,14 @@ void orcus_ods::read_styles(const zip_archive& archive) return; std::vector buf; - if (!archive.read_file_entry("styles.xml", buf)) + + try + { + buf = archive.read_file_entry("styles.xml"); + } + catch (const std::exception& e) { - std::cout << "failed to get stat on styles.xml" << std::endl; + std::cerr << "failed to get stat on styles.xml (reason: " << e.what() << ")" << std::endl; return; } @@ -95,13 +100,18 @@ void orcus_ods::read_styles(const zip_archive& archive) void orcus_ods::read_content(const zip_archive& archive) { vector buf; - if (!archive.read_file_entry("content.xml", buf)) + + try { - cout << "failed to get stat on content.xml" << endl; + buf = archive.read_file_entry("content.xml"); + } + catch (const std::exception& e) + { + std::cerr << "failed to get stat on content.xml (reason: " << e.what() << ")" << std::endl; return; } - read_content_xml(&buf[0], buf.size()); + read_content_xml(buf.data(), buf.size()); } void orcus_ods::read_content_xml(const unsigned char* p, size_t size) @@ -144,9 +154,25 @@ bool orcus_ods::detect(const unsigned char* blob, size_t size) { zip_archive_stream_blob stream(blob, size); zip_archive archive(&stream); + try { archive.load(); + + std::vector buf = archive.read_file_entry("mimetype"); + + if (buf.empty()) + // mimetype is empty. + return false; + + const char* mimetype = "application/vnd.oasis.opendocument.spreadsheet"; + size_t n = strlen(mimetype); + if (buf.size() < n) + return false; + + if (strncmp(mimetype, reinterpret_cast(buf.data()), n)) + // The mimetype content differs. + return false; } catch (const zip_error&) { @@ -154,30 +180,12 @@ bool orcus_ods::detect(const unsigned char* blob, size_t size) return false; } - vector buf; - if (!archive.read_file_entry("mimetype", buf)) - // Failed to read 'mimetype' entry. - return false; - - if (buf.empty()) - // mimetype is empty. - return false; - - const char* mimetype = "application/vnd.oasis.opendocument.spreadsheet"; - size_t n = strlen(mimetype); - if (buf.size() < n) - return false; - - if (strncmp(mimetype, reinterpret_cast(&buf[0]), n)) - // The mimetype content differs. - return false; - return true; } -void orcus_ods::read_file(const std::string& filepath) +void orcus_ods::read_file(std::string_view filepath) { - zip_archive_stream_fd stream(filepath.data()); + zip_archive_stream_fd stream(std::string{filepath}.c_str()); read_file_impl(&stream); } diff --git a/src/liborcus/orcus_xls_xml.cpp b/src/liborcus/orcus_xls_xml.cpp index 7c59d2dfe514987408a849de60e5889bccd99419..06e323d5251b67b040f9c2092a3c200c91635264 100644 --- a/src/liborcus/orcus_xls_xml.cpp +++ b/src/liborcus/orcus_xls_xml.cpp @@ -102,7 +102,7 @@ bool orcus_xls_xml::detect(const unsigned char* buffer, size_t size) return false; } -void orcus_xls_xml::read_file(const string& filepath) +void orcus_xls_xml::read_file(std::string_view filepath) { file_content content(filepath.data()); if (content.empty()) diff --git a/src/liborcus/orcus_xlsx.cpp b/src/liborcus/orcus_xlsx.cpp index a6d998133deef34aac5506a5190dada12caaf5c1..b0f3ac4be18ba27838e399452b2561e2098bdeb7 100644 --- a/src/liborcus/orcus_xlsx.cpp +++ b/src/liborcus/orcus_xlsx.cpp @@ -154,55 +154,53 @@ bool orcus_xlsx::detect(const unsigned char* blob, size_t size) { zip_archive_stream_blob stream(blob, size); zip_archive archive(&stream); + try { archive.load(); - } - catch (const zip_error&) - { - // Not a valid zip archive. - return false; - } - // Find and parse [Content_Types].xml which is required for OPC package. - vector buf; - if (!archive.read_file_entry("[Content_Types].xml", buf)) - // Failed to read the contnet types entry. - return false; + // Find and parse [Content_Types].xml which is required for OPC package. + std::vector buf = archive.read_file_entry("[Content_Types].xml"); - if (buf.empty()) - return false; + if (buf.empty()) + return false; - config opt(format_t::xlsx); - xmlns_repository ns_repo; - ns_repo.add_predefined_values(NS_opc_all); - session_context session_cxt; - xml_stream_parser parser( - opt, ns_repo, opc_tokens, reinterpret_cast(&buf[0]), buf.size()); + config opt(format_t::xlsx); + xmlns_repository ns_repo; + ns_repo.add_predefined_values(NS_opc_all); + session_context session_cxt; + xml_stream_parser parser( + opt, ns_repo, opc_tokens, reinterpret_cast(&buf[0]), buf.size()); - xml_simple_stream_handler handler( - session_cxt, opc_tokens, - std::make_unique(session_cxt, opc_tokens)); - parser.set_handler(&handler); - parser.parse(); + xml_simple_stream_handler handler( + session_cxt, opc_tokens, + std::make_unique(session_cxt, opc_tokens)); + parser.set_handler(&handler); + parser.parse(); - opc_content_types_context& context = - static_cast(handler.get_context()); + opc_content_types_context& context = + static_cast(handler.get_context()); - std::vector parts; - context.pop_parts(parts); + std::vector parts; + context.pop_parts(parts); - if (parts.empty()) - return false; + if (parts.empty()) + return false; - // See if we can find the workbook stream. - xml_part_t workbook_part("/xl/workbook.xml", CT_ooxml_xlsx_sheet_main); - return std::find(parts.begin(), parts.end(), workbook_part) != parts.end(); + // See if we can find the workbook stream. + xml_part_t workbook_part("/xl/workbook.xml", CT_ooxml_xlsx_sheet_main); + return std::find(parts.begin(), parts.end(), workbook_part) != parts.end(); + } + catch (const std::exception&) + { + return false; + } } -void orcus_xlsx::read_file(const string& filepath) +void orcus_xlsx::read_file(std::string_view filepath) { - std::unique_ptr stream(new zip_archive_stream_fd(filepath.c_str())); + std::unique_ptr stream( + new zip_archive_stream_fd(std::string{filepath}.c_str())); mp_impl->m_opc_reader.read_file(std::move(stream)); // Formulas need to be inserted to the document after the shared string diff --git a/src/liborcus/orcus_xml.cpp b/src/liborcus/orcus_xml.cpp index 1de1169e5778ca69a4895af6aa1b70bcbcc96e44..cb45f12d77232bf7f1ea6a248f72885241fe7d0a 100644 --- a/src/liborcus/orcus_xml.cpp +++ b/src/liborcus/orcus_xml.cpp @@ -585,7 +585,7 @@ void orcus_xml::read_stream(std::string_view stream) xml_data_sax_handler handler( *mp_impl->im_factory, mp_impl->link_positions, mp_impl->map_tree); - sax_ns_parser parser(stream.data(), stream.size(), ns_cxt, handler); + sax_ns_parser parser(stream, ns_cxt, handler); parser.parse(); } diff --git a/src/liborcus/orcus_xml_map_def.cpp b/src/liborcus/orcus_xml_map_def.cpp index bfe5dabeb344728d92943e2a13aa61eb8d294b4e..7766cca00ba6cac5803627a1d108c230faf001bf 100644 --- a/src/liborcus/orcus_xml_map_def.cpp +++ b/src/liborcus/orcus_xml_map_def.cpp @@ -189,7 +189,7 @@ void orcus_xml::read_map_definition(std::string_view stream) try { xml_map_sax_handler handler(*this); - sax_parser parser(stream.data(), stream.size(), handler); + sax_parser parser(stream, handler); parser.parse(); } catch (const parse_error& e) diff --git a/src/liborcus/spreadsheet_types.cpp b/src/liborcus/spreadsheet_types.cpp index 5a313d82cae7ee5004a7d7be50877de89c37f861..bd7c0cc9d455819797441fbf3dc75b7d6fe4ba54 100644 --- a/src/liborcus/spreadsheet_types.cpp +++ b/src/liborcus/spreadsheet_types.cpp @@ -294,36 +294,6 @@ std::ostream& write_name_for_pos( } // anonymous namespace -color_t::color_t() : - alpha(0), red(0), green(0), blue(0) -{ -} - -color_t::color_t(color_elem_t _red, color_elem_t _green, color_elem_t _blue) : - alpha(255), red(_red), green(_green), blue(_blue) -{ -} - -color_t::color_t(color_elem_t _alpha, color_elem_t _red, color_elem_t _green, color_elem_t _blue) : - alpha(_alpha), red(_red), green(_green), blue(_blue) -{ -} - -void color_t::reset() -{ - *this = color_t(); -} - -bool color_t::operator==(const color_t& other) const -{ - return alpha == other.alpha && red == other.red && green == other.green && blue == other.blue; -} - -bool color_t::operator!=(const color_t& other) const -{ - return !operator==(other); -} - address_t to_rc_address(const src_address_t& r) { address_t ret; @@ -448,39 +418,6 @@ std::ostream& operator<< (std::ostream& os, const range_t& v) return os; } -format_run::format_run() : - pos(0), size(0), - font_size(0), - bold(false), italic(false) {} - -void format_run::reset() -{ - pos = 0; - size = 0; - font = std::string_view{}; - font_size = 0; - bold = false; - italic = false; - color = color_t(); -} - -bool format_run::formatted() const -{ - if (bold || italic) - return true; - - if (font_size) - return true; - - if (!font.empty()) - return true; - - if (color.alpha || color.red || color.green || color.blue) - return true; - - return false; -} - col_width_t get_default_column_width() { return std::numeric_limits::max(); diff --git a/src/liborcus/xls_xml_context.cpp b/src/liborcus/xls_xml_context.cpp index d468cf04258b90c0087cfdb240f96c7811bed1e5..85916fc71d53bd619088cf35f8ffbf969c9a1f98 100644 --- a/src/liborcus/xls_xml_context.cpp +++ b/src/liborcus/xls_xml_context.cpp @@ -262,7 +262,7 @@ void xls_xml_data_context::reset() } void xls_xml_data_context::start_element_data( - const xml_token_pair_t& /*parent*/, const xml_attrs_t& attrs) + const xml_token_pair_t& /*parent*/, const xml_token_attrs_t& attrs) { m_cell_type = ct_unknown; m_cell_string.clear(); @@ -851,7 +851,7 @@ void xls_xml_context::end_child_context(xmlns_id_t /*ns*/, xml_token_t /*name*/, { } -void xls_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xls_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { push_stack(ns, name); @@ -1283,12 +1283,12 @@ void xls_xml_context::characters(std::string_view str, bool /*transient*/) } } -void xls_xml_context::start_element_borders(const xml_attrs_t& /*attrs*/) +void xls_xml_context::start_element_borders(const xml_token_attrs_t& /*attrs*/) { m_current_style->borders.clear(); } -void xls_xml_context::start_element_border(const xml_attrs_t& attrs) +void xls_xml_context::start_element_border(const xml_token_attrs_t& attrs) { ss::border_direction_t dir = ss::border_direction_t::unknown; ss::border_style_t style = ss::border_style_t::unknown; @@ -1376,7 +1376,7 @@ void xls_xml_context::start_element_border(const xml_attrs_t& attrs) } } -void xls_xml_context::start_element_number_format(const xml_attrs_t& attrs) +void xls_xml_context::start_element_number_format(const xml_token_attrs_t& attrs) { m_current_style->number_format = std::string_view{}; @@ -1399,7 +1399,7 @@ void xls_xml_context::start_element_number_format(const xml_attrs_t& attrs) } } -void xls_xml_context::start_element_cell(const xml_attrs_t& attrs) +void xls_xml_context::start_element_cell(const xml_token_attrs_t& attrs) { long col_index = 0; std::string_view formula; @@ -1467,7 +1467,7 @@ void xls_xml_context::start_element_cell(const xml_attrs_t& attrs) } } -void xls_xml_context::start_element_column(const xml_attrs_t& attrs) +void xls_xml_context::start_element_column(const xml_token_attrs_t& attrs) { if (!mp_sheet_props && !mp_cur_sheet) return; @@ -1537,7 +1537,7 @@ void xls_xml_context::start_element_column(const xml_attrs_t& attrs) m_cur_prop_col = col_index + span + 1; } -void xls_xml_context::start_element_row(const xml_attrs_t& attrs) +void xls_xml_context::start_element_row(const xml_token_attrs_t& attrs) { m_cur_col = m_table_props.pos.column; ss::row_t row_index = -1; @@ -1604,7 +1604,7 @@ void xls_xml_context::start_element_row(const xml_attrs_t& attrs) } } -void xls_xml_context::start_element_table(const xml_attrs_t& attrs) +void xls_xml_context::start_element_table(const xml_token_attrs_t& attrs) { ss::row_t row_index = -1; ss::col_t col_index = -1; @@ -1642,7 +1642,7 @@ void xls_xml_context::start_element_table(const xml_attrs_t& attrs) m_table_props.pos.column = col_index - 1; } -void xls_xml_context::start_element_worksheet(const xml_attrs_t& attrs) +void xls_xml_context::start_element_worksheet(const xml_token_attrs_t& attrs) { ++m_cur_sheet; std::string_view sheet_name; diff --git a/src/liborcus/xls_xml_context.hpp b/src/liborcus/xls_xml_context.hpp index 9a2ef6a9ccbe3b3944188aa27ed0e374b31080ee..9f00bb072fd4a98ea838aa621ec16eaae763d599 100644 --- a/src/liborcus/xls_xml_context.hpp +++ b/src/liborcus/xls_xml_context.hpp @@ -92,7 +92,7 @@ public: private: - void start_element_data(const xml_token_pair_t& parent, const xml_attrs_t& attrs); + void start_element_data(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); void end_element_data(); bool handle_array_formula_result(); @@ -241,20 +241,20 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name) override; virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child) override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; virtual void characters(std::string_view str, bool transient) override; private: - void start_element_borders(const xml_attrs_t& attrs); - void start_element_border(const xml_attrs_t& attrs); - void start_element_number_format(const xml_attrs_t& attrs); - void start_element_cell(const xml_attrs_t& attrs); - void start_element_column(const xml_attrs_t& attrs); - void start_element_row(const xml_attrs_t& attrs); - void start_element_table(const xml_attrs_t& attrs); - void start_element_worksheet(const xml_attrs_t& attrs); + void start_element_borders(const xml_token_attrs_t& attrs); + void start_element_border(const xml_token_attrs_t& attrs); + void start_element_number_format(const xml_token_attrs_t& attrs); + void start_element_cell(const xml_token_attrs_t& attrs); + void start_element_column(const xml_token_attrs_t& attrs); + void start_element_row(const xml_token_attrs_t& attrs); + void start_element_table(const xml_token_attrs_t& attrs); + void start_element_worksheet(const xml_token_attrs_t& attrs); void end_element_borders(); void end_element_border(); diff --git a/src/liborcus/xlsx_autofilter_context.cpp b/src/liborcus/xlsx_autofilter_context.cpp index c04268e9cc6ec42339f3057cfc4ebb944551226b..7a4d5ab63833f516a94c25110203a3debb6c5ad3 100644 --- a/src/liborcus/xlsx_autofilter_context.cpp +++ b/src/liborcus/xlsx_autofilter_context.cpp @@ -37,7 +37,7 @@ void xlsx_autofilter_context::end_child_context( { } -void xlsx_autofilter_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_autofilter_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); diff --git a/src/liborcus/xlsx_autofilter_context.hpp b/src/liborcus/xlsx_autofilter_context.hpp index 5803a9238065427fc61d4be4fda78ba5a6f2044b..27f5e7bdd0cd4a7146f46a411543dd889f47136f 100644 --- a/src/liborcus/xlsx_autofilter_context.hpp +++ b/src/liborcus/xlsx_autofilter_context.hpp @@ -38,7 +38,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); diff --git a/src/liborcus/xlsx_conditional_format_context.cpp b/src/liborcus/xlsx_conditional_format_context.cpp index 7a023a945318b824715f87a1d7ef89c8ea3a93c6..e667d8340d748c53aa1ed1a1f17a54f90dff8e17 100644 --- a/src/liborcus/xlsx_conditional_format_context.cpp +++ b/src/liborcus/xlsx_conditional_format_context.cpp @@ -655,7 +655,7 @@ void xlsx_conditional_format_context::end_child_context(xmlns_id_t /*ns*/, xml_t { } -void xlsx_conditional_format_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_conditional_format_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); diff --git a/src/liborcus/xlsx_conditional_format_context.hpp b/src/liborcus/xlsx_conditional_format_context.hpp index 9f1103db0fa26fdea2b11d46c277f04c8ac6457d..f03d53dd43773e1d75fe2f7bbf32509422725535 100644 --- a/src/liborcus/xlsx_conditional_format_context.hpp +++ b/src/liborcus/xlsx_conditional_format_context.hpp @@ -45,7 +45,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name) override; virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child) override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; virtual void characters(std::string_view str, bool transient) override; diff --git a/src/liborcus/xlsx_context.cpp b/src/liborcus/xlsx_context.cpp index a41d9862a319090e02c204e46d226a3a59a15c49..6b4bf93973a28f9503f484a9e7cadee3a677fd60 100644 --- a/src/liborcus/xlsx_context.cpp +++ b/src/liborcus/xlsx_context.cpp @@ -105,7 +105,7 @@ void xlsx_shared_strings_context::end_child_context(xmlns_id_t /*ns*/, xml_token { } -void xlsx_shared_strings_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_shared_strings_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); switch (name) @@ -267,7 +267,7 @@ void xlsx_shared_strings_context::characters(std::string_view str, bool transien // Append the tail end. m_cell_buffer.append(p0, std::distance(p0, p)); - m_cur_str = m_pool.intern({m_cell_buffer.get(), m_cell_buffer.size()}).first; + m_cur_str = m_pool.intern(m_cell_buffer.str()).first; transient = false; } @@ -504,7 +504,7 @@ void xlsx_styles_context::end_child_context(xmlns_id_t /*ns*/, xml_token_t /*nam { } -void xlsx_styles_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_styles_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); @@ -1041,7 +1041,7 @@ void xlsx_styles_context::characters(std::string_view /*str*/, bool /*transient* // not used in the styles.xml part. } -void xlsx_styles_context::start_element_number_format(const xml_attrs_t& attrs) +void xlsx_styles_context::start_element_number_format(const xml_token_attrs_t& attrs) { if (!mp_styles) return; @@ -1071,7 +1071,7 @@ void xlsx_styles_context::start_element_number_format(const xml_attrs_t& attrs) } } -void xlsx_styles_context::start_element_border(const xml_attrs_t& attrs) +void xlsx_styles_context::start_element_border(const xml_token_attrs_t& attrs) { bool diagonal_up = false; bool diagonal_down = false; @@ -1102,7 +1102,7 @@ void xlsx_styles_context::start_element_border(const xml_attrs_t& attrs) m_diagonal_down = diagonal_down; } -void xlsx_styles_context::start_element_diagonal(const xml_attrs_t& attrs) +void xlsx_styles_context::start_element_diagonal(const xml_token_attrs_t& attrs) { assert(mp_border); @@ -1128,7 +1128,7 @@ void xlsx_styles_context::start_element_diagonal(const xml_attrs_t& attrs) for_each(attrs.begin(), attrs.end(), func); } -void xlsx_styles_context::start_border_color(const xml_attrs_t& attrs) +void xlsx_styles_context::start_border_color(const xml_token_attrs_t& attrs) { assert(mp_border); @@ -1143,7 +1143,7 @@ void xlsx_styles_context::start_border_color(const xml_attrs_t& attrs) mp_border->set_color(m_cur_border_dir, alpha, red, green, blue); } -void xlsx_styles_context::start_font_color(const xml_attrs_t& attrs) +void xlsx_styles_context::start_font_color(const xml_token_attrs_t& attrs) { assert(mp_font); diff --git a/src/liborcus/xlsx_context.hpp b/src/liborcus/xlsx_context.hpp index 021e135b2ec9b3fe3a5792bb7adff6285b75d714..56fb702c65e68e01d7a2450d15e53b61fe845167 100644 --- a/src/liborcus/xlsx_context.hpp +++ b/src/liborcus/xlsx_context.hpp @@ -42,7 +42,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); @@ -67,17 +67,17 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); private: - void start_element_number_format(const xml_attrs_t& attrs); + void start_element_number_format(const xml_token_attrs_t& attrs); - void start_element_border(const xml_attrs_t& attrs); - void start_element_diagonal(const xml_attrs_t& attrs); - void start_border_color(const xml_attrs_t& attrs); - void start_font_color(const xml_attrs_t& attrs); + void start_element_border(const xml_token_attrs_t& attrs); + void start_element_diagonal(const xml_token_attrs_t& attrs); + void start_border_color(const xml_token_attrs_t& attrs); + void start_font_color(const xml_token_attrs_t& attrs); void end_element_number_format(); diff --git a/src/liborcus/xlsx_pivot_context.cpp b/src/liborcus/xlsx_pivot_context.cpp index f351a0e2f44f403b3bfe64148b14e7a8486f1fc8..48d1d2504b574bc4eb171e39018de38a2bbf688b 100644 --- a/src/liborcus/xlsx_pivot_context.cpp +++ b/src/liborcus/xlsx_pivot_context.cpp @@ -298,7 +298,7 @@ void xlsx_pivot_cache_def_context::start_element(xmlns_id_t ns, xml_token_t name if (group_base >= 0) { // This is a group field. - m_pcache_field_group = m_pcache.create_field_group(group_base); + m_pcache_field_group = m_pcache.start_field_group(group_base); } break; } @@ -385,18 +385,21 @@ void xlsx_pivot_cache_def_context::start_element(xmlns_id_t ns, xml_token_t name } ); - // Pass the values to the interface. - m_pcache_field_group->set_range_grouping_type(group_by); - m_pcache_field_group->set_range_auto_start(auto_start); - m_pcache_field_group->set_range_auto_end(auto_end); - m_pcache_field_group->set_range_start_number(start); - m_pcache_field_group->set_range_end_number(end); - m_pcache_field_group->set_range_interval(interval); + if (m_pcache_field_group) + { + // Pass the values to the interface. + m_pcache_field_group->set_range_grouping_type(group_by); + m_pcache_field_group->set_range_auto_start(auto_start); + m_pcache_field_group->set_range_auto_end(auto_end); + m_pcache_field_group->set_range_start_number(start); + m_pcache_field_group->set_range_end_number(end); + m_pcache_field_group->set_range_interval(interval); - if (start_date) - m_pcache_field_group->set_range_start_date(*start_date); - if (end_date) - m_pcache_field_group->set_range_end_date(*end_date); + if (start_date) + m_pcache_field_group->set_range_start_date(*start_date); + if (end_date) + m_pcache_field_group->set_range_end_date(*end_date); + } if (get_config().debug) { @@ -1102,10 +1105,6 @@ bool xlsx_pivot_cache_rec_context::end_element(xmlns_id_t ns, xml_token_t name) return pop_stack(ns, name); } -void xlsx_pivot_cache_rec_context::characters(std::string_view /*str*/, bool /*transient*/) -{ -} - xlsx_pivot_table_context::xlsx_pivot_table_context(session_context& cxt, const tokens& tokens) : xml_context_base(cxt, tokens) {} diff --git a/src/liborcus/xlsx_pivot_context.hpp b/src/liborcus/xlsx_pivot_context.hpp index c292fe757aee75ddfc9807e6d6402ef0a6133f93..f9610ba11b9d8a955be41b3a0acc3c23a1fa00a7 100644 --- a/src/liborcus/xlsx_pivot_context.hpp +++ b/src/liborcus/xlsx_pivot_context.hpp @@ -93,7 +93,6 @@ public: virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); virtual void start_element(xmlns_id_t ns, xml_token_t name, const::std::vector& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); - virtual void characters(std::string_view str, bool transient); }; /** diff --git a/src/liborcus/xlsx_sheet_context.cpp b/src/liborcus/xlsx_sheet_context.cpp index 878a6a72fc9ac4b7a8d6c82bcebcfa4081e65334..a6ce7fc701d56d6ce341642755b6be8b05ef76f1 100644 --- a/src/liborcus/xlsx_sheet_context.cpp +++ b/src/liborcus/xlsx_sheet_context.cpp @@ -169,7 +169,7 @@ void xlsx_sheet_context::end_child_context(xmlns_id_t ns, xml_token_t name, xml_ } } -void xlsx_sheet_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_sheet_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); @@ -312,7 +312,7 @@ void xlsx_sheet_context::characters(std::string_view str, bool transient) m_cur_str = intern_in_context(str, transient); } -void xlsx_sheet_context::start_element_formula(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void xlsx_sheet_context::start_element_formula(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { const xml_elem_set_t expected = { { NS_ooxml_xlsx, XML_c }, @@ -363,7 +363,7 @@ void xlsx_sheet_context::start_element_formula(const xml_token_pair_t& parent, c } void xlsx_sheet_context::start_element_sheet_view( - const xml_token_pair_t& parent, const xml_attrs_t& attrs) + const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_element_expected(parent, NS_ooxml_xlsx, XML_sheetViews); @@ -393,7 +393,7 @@ void xlsx_sheet_context::start_element_sheet_view( } void xlsx_sheet_context::start_element_selection( - const xml_token_pair_t& parent, const xml_attrs_t& attrs) + const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_elem_stack_t elems; elems.emplace_back(NS_ooxml_xlsx, XML_sheetView); @@ -446,7 +446,7 @@ void xlsx_sheet_context::start_element_selection( } void xlsx_sheet_context::start_element_pane( - const xml_token_pair_t& parent, const xml_attrs_t& attrs) + const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_elem_stack_t elems; elems.emplace_back(NS_ooxml_xlsx, XML_sheetView); @@ -513,7 +513,7 @@ void xlsx_sheet_context::start_element_pane( } } -void xlsx_sheet_context::start_element_cell(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void xlsx_sheet_context::start_element_cell(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xlsx_cell_t cell_type = xlsx_ct_numeric; ss::address_t address; @@ -566,7 +566,7 @@ void xlsx_sheet_context::start_element_cell(const xml_token_pair_t& parent, cons m_cur_cell_xf = xf; } -void xlsx_sheet_context::start_element_col(const xml_attrs_t& attrs) +void xlsx_sheet_context::start_element_col(const xml_token_attrs_t& attrs) { long col_min = 0; // 1-based long col_max = 0; // 1-based @@ -621,7 +621,7 @@ void xlsx_sheet_context::start_element_col(const xml_attrs_t& attrs) } } -void xlsx_sheet_context::start_element_row(const xml_attrs_t& attrs) +void xlsx_sheet_context::start_element_row(const xml_token_attrs_t& attrs) { std::optional row; length_t height; diff --git a/src/liborcus/xlsx_sheet_context.hpp b/src/liborcus/xlsx_sheet_context.hpp index b7cf43b4cedea7afbdc28d7a45f7d4d80b312853..3f26ea0a2bd946284df59991cf70c5a73a20af44 100644 --- a/src/liborcus/xlsx_sheet_context.hpp +++ b/src/liborcus/xlsx_sheet_context.hpp @@ -70,20 +70,20 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); void pop_rel_extras(opc_rel_extras_t& other); private: - void start_element_formula(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_element_sheet_view(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_element_selection(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_element_pane(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_element_cell(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_element_col(const xml_attrs_t& attrs); - void start_element_row(const xml_attrs_t& attrs); + void start_element_formula(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_element_sheet_view(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_element_selection(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_element_pane(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_element_cell(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_element_col(const xml_token_attrs_t& attrs); + void start_element_row(const xml_token_attrs_t& attrs); void end_element_cell(); void push_raw_cell_value(); diff --git a/src/liborcus/xlsx_sheet_context_test.cpp b/src/liborcus/xlsx_sheet_context_test.cpp index d961fc4ac41488f767737c7b2cd8daf4a4db2158..20492b53552699926c54162e16e52a5e14c1dcb8 100644 --- a/src/liborcus/xlsx_sheet_context_test.cpp +++ b/src/liborcus/xlsx_sheet_context_test.cpp @@ -152,11 +152,11 @@ void test_cell_value() orcus::xmlns_id_t ns = NS_ooxml_xlsx; orcus::xml_token_t elem = XML_c; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; context.start_element(ns, elem, attrs); { - xml_attrs_t val_attrs; + xml_token_attrs_t val_attrs; context.start_element(ns, XML_v, val_attrs); context.characters("5", false); context.end_element(ns, XML_v); @@ -178,12 +178,12 @@ void test_cell_bool() orcus::xmlns_id_t ns = NS_ooxml_xlsx; orcus::xml_token_t elem = XML_c; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_ooxml_xlsx, XML_t, "b", false)); context.start_element(ns, elem, attrs); { - xml_attrs_t val_attrs; + xml_token_attrs_t val_attrs; context.start_element(ns, XML_v, val_attrs); context.characters("1", false); context.end_element(ns, XML_v); @@ -205,11 +205,11 @@ void test_array_formula() orcus::xmlns_id_t ns = NS_ooxml_xlsx; orcus::xml_token_t elem = XML_c; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; context.start_element(ns, elem, attrs); { - xml_attrs_t formula_attrs; + xml_token_attrs_t formula_attrs; formula_attrs.push_back(xml_token_attr_t(NS_ooxml_xlsx, XML_t, "array", false)); formula_attrs.push_back(xml_token_attr_t(NS_ooxml_xlsx, XML_ref, "B3:B4", false)); context.start_element(ns, XML_f, formula_attrs); @@ -217,7 +217,7 @@ void test_array_formula() context.end_element(ns, XML_f); } { - xml_attrs_t val_attrs; + xml_token_attrs_t val_attrs; context.start_element(ns, XML_v, val_attrs); context.characters("5", false); context.end_element(ns, XML_v); @@ -239,7 +239,7 @@ void test_hidden_col() orcus::xmlns_id_t ns = NS_ooxml_xlsx; orcus::xml_token_t elem = XML_col; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(orcus::xml_token_attr_t(ns, XML_min, "2", false)); attrs.push_back(orcus::xml_token_attr_t(ns, XML_max, "2", false)); attrs.push_back(orcus::xml_token_attr_t(ns, XML_hidden, "1", false)); @@ -260,7 +260,7 @@ void test_hidden_row() orcus::xmlns_id_t ns = NS_ooxml_xlsx; orcus::xml_token_t elem = XML_row; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(orcus::xml_token_attr_t(ns, XML_r, "4", false)); attrs.push_back(orcus::xml_token_attr_t(ns, XML_hidden, "1", false)); context.start_element(ns, elem, attrs); diff --git a/src/liborcus/xlsx_table_context.cpp b/src/liborcus/xlsx_table_context.cpp index e13aa69a625c582c526cc9b64457b8ca3906702b..47e2f7c54ad146a34626d3b60898ec4bfa3c7b74 100644 --- a/src/liborcus/xlsx_table_context.cpp +++ b/src/liborcus/xlsx_table_context.cpp @@ -17,67 +17,14 @@ #include "orcus/spreadsheet/import_interface.hpp" #include +#include -using namespace std; +namespace ss = orcus::spreadsheet; namespace orcus { namespace { -class table_attr_parser -{ - string_pool* m_pool; - - long m_id; - long m_totals_row_count; - - pstring m_name; - pstring m_display_name; - pstring m_ref; - -public: - table_attr_parser(string_pool* pool) : m_pool(pool), m_id(-1), m_totals_row_count(-1) {} - - void operator() (const xml_token_attr_t& attr) - { - if (attr.ns && attr.ns != NS_ooxml_xlsx) - return; - - switch (attr.name) - { - case XML_id: - m_id = to_long(attr.value); - break; - case XML_totalsRowCount: - m_totals_row_count = to_long(attr.value); - break; - case XML_name: - m_name = attr.value; - if (attr.transient) - m_name = m_pool->intern(m_name).first; - break; - case XML_displayName: - m_display_name = attr.value; - if (attr.transient) - m_display_name = m_pool->intern(m_display_name).first; - break; - case XML_ref: - m_ref = attr.value; - if (attr.transient) - m_ref = m_pool->intern(m_ref).first; - break; - default: - ; - } - } - - long get_id() const { return m_id; } - long get_totals_row_count() const { return m_totals_row_count; } - pstring get_name() const { return m_name; } - pstring get_display_name() const { return m_display_name; } - pstring get_ref() const { return m_ref; } -}; - class table_column_attr_parser { string_pool* m_pool; @@ -146,31 +93,31 @@ public: case XML_name: mp_table->set_style_name(attr.value); if (m_debug) - cout << " * table style info (name=" << attr.value << ")" << endl; + std::cout << " * table style info (name=" << attr.value << ")" << std::endl; break; case XML_showFirstColumn: b = to_bool(attr.value); mp_table->set_style_show_first_column(b); if (m_debug) - cout << " * show first column: " << b << endl; + std::cout << " * show first column: " << b << std::endl; break; case XML_showLastColumn: b = to_bool(attr.value); mp_table->set_style_show_last_column(b); if (m_debug) - cout << " * show last column: " << b << endl; + std::cout << " * show last column: " << b << std::endl; break; case XML_showRowStripes: b = to_bool(attr.value); mp_table->set_style_show_row_stripes(b); if (m_debug) - cout << " * show row stripes: " << b << endl; + std::cout << " * show row stripes: " << b << std::endl; break; case XML_showColumnStripes: b = to_bool(attr.value); mp_table->set_style_show_column_stripes(b); if (m_debug) - cout << " * show column stripes: " << b << endl; + std::cout << " * show column stripes: " << b << std::endl; break; default: ; @@ -219,47 +166,29 @@ void xlsx_table_context::end_child_context(xmlns_id_t ns, xml_token_t name, xml_ } } -void xlsx_table_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_table_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); if (ns != NS_ooxml_xlsx) return; - pstring str; + std::string_view str; switch (name) { case XML_table: { xml_element_expected(parent, XMLNS_UNKNOWN_ID, XML_UNKNOWN_TOKEN); - table_attr_parser func(&get_session_context().spool); - func = for_each(attrs.begin(), attrs.end(), func); - - if (get_config().debug) - { - cout << "* table (range=" << func.get_ref() << "; id=" << func.get_id() - << "; name=" << func.get_name() << "; display name=" - << func.get_display_name() << ")" << endl; - cout << " * totals row count: " << func.get_totals_row_count() << endl; - } - - m_table.set_identifier(func.get_id()); - str = func.get_ref(); - m_table.set_range(str); - str = func.get_name(); - m_table.set_name(str); - str = func.get_display_name(); - m_table.set_display_name(str); - m_table.set_totals_row_count(func.get_totals_row_count()); + start_element_table(attrs); + break; } - break; case XML_tableColumns: { xml_element_expected(parent, NS_ooxml_xlsx, XML_table); single_long_attr_getter func(NS_ooxml_xlsx, XML_count); long column_count = for_each(attrs.begin(), attrs.end(), func).get_value(); if (get_config().debug) - cout << " * column count: " << column_count << endl; + std::cout << " * column count: " << column_count << std::endl; m_table.set_column_count(column_count); } @@ -271,9 +200,9 @@ void xlsx_table_context::start_element(xmlns_id_t ns, xml_token_t name, const xm func = for_each(attrs.begin(), attrs.end(), func); if (get_config().debug) { - cout << " * table column (id=" << func.get_id() << "; name=" << func.get_name() << ")" << endl; - cout << " * totals row label: " << func.get_totals_row_label() << endl; - cout << " * totals func: " << static_cast(func.get_totals_row_function()) << endl; + std::cout << " * table column (id=" << func.get_id() << "; name=" << func.get_name() << ")" << std::endl; + std::cout << " * totals row label: " << func.get_totals_row_label() << std::endl; + std::cout << " * totals func: " << static_cast(func.get_totals_row_function()) << std::endl; } m_table.set_column_identifier(func.get_id()); @@ -317,8 +246,72 @@ bool xlsx_table_context::end_element(xmlns_id_t ns, xml_token_t name) return pop_stack(ns, name); } -void xlsx_table_context::characters(std::string_view /*str*/, bool /*transient*/) +void xlsx_table_context::start_element_table(const xml_token_attrs_t& attrs) { + long id = -1; + long totals_row_count = -1; + + std::optional name; + std::optional display_name; + std::optional ref; + + for (const xml_token_attr_t& attr : attrs) + { + if (attr.ns) + continue; + + switch (attr.name) + { + case XML_id: + id = to_long(attr.value); + break; + case XML_totalsRowCount: + totals_row_count = to_long(attr.value); + break; + case XML_name: + name = attr.value; + break; + case XML_displayName: + display_name = attr.value; + break; + case XML_ref: + ref = attr.value; + break; + } + } + + if (get_config().debug) + { + auto str_or_not = [](const auto& v) -> std::string_view + { + return v ? *v : "-"; + }; + + std::cout << "* table (range=" << str_or_not(ref) + << "; id=" << id + << "; name=" << str_or_not(name) + << "; display name=" << str_or_not(display_name) << ")" << std::endl; + + std::cout << " * totals row count: " << totals_row_count << std::endl; + } + + if (id >= 0) + m_table.set_identifier(id); + + if (ref) + { + ss::range_t range = to_rc_range(m_resolver.resolve_range(*ref)); + m_table.set_range(range); + } + + if (name) + m_table.set_name(*name); + + if (display_name) + m_table.set_display_name(*display_name); + + if (totals_row_count >= 0) + m_table.set_totals_row_count(totals_row_count); } } diff --git a/src/liborcus/xlsx_table_context.hpp b/src/liborcus/xlsx_table_context.hpp index 39b33b615cf4cd6c6f12c02b09395c1d6b2f723e..8b3e629a1f16d8bbda077d62e6c78f8b96fb5ac5 100644 --- a/src/liborcus/xlsx_table_context.hpp +++ b/src/liborcus/xlsx_table_context.hpp @@ -32,9 +32,11 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); - virtual void characters(std::string_view str, bool transient); + +private: + void start_element_table(const xml_token_attrs_t& attrs); private: spreadsheet::iface::import_table& m_table; diff --git a/src/liborcus/xlsx_workbook_context.cpp b/src/liborcus/xlsx_workbook_context.cpp index 9924ddd5a75c443e5d3ecd6c33e14298e6bebcf1..0e3a6815f4a2bad6a2ac9bd3054b2a720f2f6ac6 100644 --- a/src/liborcus/xlsx_workbook_context.cpp +++ b/src/liborcus/xlsx_workbook_context.cpp @@ -43,7 +43,7 @@ void xlsx_workbook_context::end_child_context(xmlns_id_t /*ns*/, xml_token_t /*n { } -void xlsx_workbook_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_workbook_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); session_context& cxt = get_session_context(); diff --git a/src/liborcus/xlsx_workbook_context.hpp b/src/liborcus/xlsx_workbook_context.hpp index f3f346bba01960404174aa58d0e9eea3eb3f99c6..08716700db4be3d8b202067b998b0ef0306d286c 100644 --- a/src/liborcus/xlsx_workbook_context.hpp +++ b/src/liborcus/xlsx_workbook_context.hpp @@ -41,7 +41,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); diff --git a/src/liborcus/xml_element_types.cpp b/src/liborcus/xml_element_types.cpp new file mode 100644 index 0000000000000000000000000000000000000000..967b4d4d87e20e146a968ef78535f3ac5df31764 --- /dev/null +++ b/src/liborcus/xml_element_types.cpp @@ -0,0 +1,19 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "xml_element_types.hpp" + +namespace orcus { + +size_t xml_token_pair_hash::operator()(const xml_token_pair_t& v) const +{ + return std::hash()(v.first) ^ std::hash()(v.second); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/liborcus/xml_element_types.hpp b/src/liborcus/xml_element_types.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8bf391ff1241c03ceec152189b23b57c80f59fac --- /dev/null +++ b/src/liborcus/xml_element_types.hpp @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include + +namespace orcus { + +/** + * Holds a pair of XML namespace identifier and an element token. Typically + * used when managing the element stack inside element context classes. + */ +using xml_token_pair_t = std::pair; + +struct ORCUS_PSR_DLLPUBLIC xml_token_pair_hash +{ + size_t operator()(const xml_token_pair_t& v) const; +}; + +using xml_elem_stack_t = std::vector; +using xml_elem_set_t = std::unordered_set; + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/liborcus/xml_element_validator.hpp b/src/liborcus/xml_element_validator.hpp index ed2a47a1c6fa9feec18820cc82b2b954a0ebd6e1..43a44e0d1b481eecdf73f29312e777b3e72577d1 100644 --- a/src/liborcus/xml_element_validator.hpp +++ b/src/liborcus/xml_element_validator.hpp @@ -7,7 +7,7 @@ #pragma once -#include +#include "xml_element_types.hpp" #include diff --git a/src/liborcus/xml_stream_parser.cpp b/src/liborcus/xml_stream_parser.cpp index daa565dfb2c9ab5ad9b980157f129472e2b772d5..895821c45a5faebea4eb85a26c8a544e991373ef 100644 --- a/src/liborcus/xml_stream_parser.cpp +++ b/src/liborcus/xml_stream_parser.cpp @@ -64,7 +64,7 @@ void xml_stream_parser::parse() if (!mp_handler) return; - sax_token_parser sax(m_content, m_size, m_tokens, m_ns_cxt, *mp_handler); + sax_token_parser sax({m_content, m_size}, m_tokens, m_ns_cxt, *mp_handler); sax.parse(); } diff --git a/src/liborcus/xml_structure_tree.cpp b/src/liborcus/xml_structure_tree.cpp index e14932e2ef009413e8f56afa1b393fbf4239910e..dbdb029d4a5c328248a73206c7afff1dedfaa7f2 100644 --- a/src/liborcus/xml_structure_tree.cpp +++ b/src/liborcus/xml_structure_tree.cpp @@ -263,7 +263,7 @@ void print_scope(std::ostream& os, const scopes_type& scopes, const xmlns_contex { os << "/"; size_t num_id = cxt.get_index((*it)->name.ns); - if (num_id != index_not_found) + if (num_id != INDEX_NOT_FOUND) os << "ns" << num_id << ":"; os << (*it)->name.name; if ((*it)->repeat) @@ -292,7 +292,7 @@ struct xml_structure_tree::impl std::string to_string(const xml_structure_tree::entity_name& name) const { std::ostringstream ss; - if (m_xmlns_cxt.get_index(name.ns) != index_not_found) + if (m_xmlns_cxt.get_index(name.ns) != INDEX_NOT_FOUND) ss << m_xmlns_cxt.get_short_name(name.ns) << ":"; ss << name.name; return ss.str(); @@ -523,7 +523,7 @@ xml_structure_tree::~xml_structure_tree() {} void xml_structure_tree::parse(std::string_view s) { xml_sax_handler hdl(mp_impl->m_pool); - sax_ns_parser parser(s.data(), s.size(), mp_impl->m_xmlns_cxt, hdl); + sax_ns_parser parser(s, mp_impl->m_xmlns_cxt, hdl); parser.parse(); mp_impl->mp_root = hdl.release_root_element(); } @@ -555,7 +555,7 @@ void xml_structure_tree::dump_compact(std::ostream& os) const ss << "/"; size_t num_id = cxt.get_index(this_elem.name.ns); - if (num_id != index_not_found) + if (num_id != INDEX_NOT_FOUND) ss << "ns" << num_id << ":"; ss << this_elem.name.name; if (this_elem.prop->repeat) diff --git a/src/liborcus/xml_util.cpp b/src/liborcus/xml_util.cpp index c2a0c9dbcbd58de287945e345d882cc193a39d5b..7c15adfd6d90b679276ea8c0aa3cd5323233ce9b 100644 --- a/src/liborcus/xml_util.cpp +++ b/src/liborcus/xml_util.cpp @@ -53,7 +53,7 @@ void xml_element_printer::print_element(std::ostream& os, xmlns_id_t ns, xml_tok os << m_tokens.get_token_name(name) << '>'; } -void print_attrs(const tokens& tokens, const xml_attrs_t& attrs) +void print_attrs(const tokens& tokens, const xml_token_attrs_t& attrs) { for (const auto& attr : attrs) { diff --git a/src/liborcus/xml_util.hpp b/src/liborcus/xml_util.hpp index 4f67015376793526319cef2c9744ae2486285002..1021ccf4eb55d2a43a610b36ff0148baea86e6c4 100644 --- a/src/liborcus/xml_util.hpp +++ b/src/liborcus/xml_util.hpp @@ -36,7 +36,7 @@ void print_element(std::ostream& os, const tokens& t, xmlns_id_t ns, xml_token_t /** * Print attributes to stdout for debugging purposes. */ -void print_attrs(const tokens& tokens, const xml_attrs_t& attrs); +void print_attrs(const tokens& tokens, const xml_token_attrs_t& attrs); } // namespace orcus diff --git a/src/liborcus/yaml_document_tree.cpp b/src/liborcus/yaml_document_tree.cpp index 7a5804835a4f16b2672c8e912a7577280f2bfa9d..eac35c706c23af504fb322359bca6dda9ac0c58f 100644 --- a/src/liborcus/yaml_document_tree.cpp +++ b/src/liborcus/yaml_document_tree.cpp @@ -30,7 +30,7 @@ namespace orcus { namespace yaml { document_error::document_error(const std::string& msg) : general_error("yaml_document_error", msg) {} -document_error::~document_error() throw() {} +document_error::~document_error() = default; struct yaml_value { @@ -96,8 +96,7 @@ struct yaml_value_string : public yaml_value std::string value_string; yaml_value_string() : yaml_value(node_t::string) {} - yaml_value_string(const std::string& s) : yaml_value(node_t::string), value_string(s) {} - yaml_value_string(const char* p, size_t n) : yaml_value(node_t::string), value_string(p, n) {} + yaml_value_string(std::string_view s) : yaml_value(node_t::string), value_string(s) {} virtual ~yaml_value_string() {} virtual std::string print() const @@ -302,17 +301,17 @@ public: m_stack.pop_back(); } - void string(const char* p, size_t n) + void string(std::string_view v) { assert(m_in_document); if (m_root) { - yaml_value* yv = push_value(std::make_unique(p, n)); + yaml_value* yv = push_value(std::make_unique(v)); assert(yv && yv->type == node_t::string); } else - m_root = std::make_unique(p, n); + m_root = std::make_unique(v); } void number(double val) @@ -543,7 +542,7 @@ document_tree::~document_tree() {} void document_tree::load(std::string_view s) { handler hdl; - yaml_parser parser(s.data(), s.size(), hdl); + yaml_parser parser(s, hdl); parser.parse(); hdl.swap(mp_impl->m_docs); } diff --git a/src/liborcus/yaml_document_tree_test.cpp b/src/liborcus/yaml_document_tree_test.cpp index b4e68d9ddb076a5000b326973274cff0dee8706a..7a732ac5917c675fc100aa994a876727a16d3cde 100644 --- a/src/liborcus/yaml_document_tree_test.cpp +++ b/src/liborcus/yaml_document_tree_test.cpp @@ -96,7 +96,7 @@ void test_yaml_invalids() doc.load(strm.str()); assert(!"yaml::parse_error was not thrown, but expected to be."); } - catch (const yaml::parse_error&) + catch (const parse_error&) { // This is expected. } diff --git a/src/mso/encryption_info.cpp b/src/mso/encryption_info.cpp index 3ebfc15da95cbbfb53b0ce055d80408b79e7c3d9..bca2bcd2a76c078b26de0c64222f520068379b8a 100644 --- a/src/mso/encryption_info.cpp +++ b/src/mso/encryption_info.cpp @@ -219,7 +219,7 @@ void encryption_info_reader::read(const char* p, size_t n) #endif orcus::xmlns_context cxt = mp_impl->m_ns_repo.create_context(); sax_handler hdl(cxt); - orcus::sax_ns_parser parser(p, n, cxt, hdl); + orcus::sax_ns_parser parser({p, n}, cxt, hdl); parser.parse(); } diff --git a/src/orcus_detect_main.cpp b/src/orcus_detect_main.cpp index 2a19931c7a5ee8fcdee3b62cbd0abb105cba53ea..addf9b092f566197a1fb3695a5a350f078367927 100644 --- a/src/orcus_detect_main.cpp +++ b/src/orcus_detect_main.cpp @@ -30,7 +30,7 @@ int main(int argc, char** argv) try return EXIT_FAILURE; } - format_t detected_type = detect(reinterpret_cast(content.data()), content.size()); + format_t detected_type = detect(content.str()); cout << "type: "; switch (detected_type) diff --git a/src/orcus_json_cli.cpp b/src/orcus_json_cli.cpp index ca6f682f4e3bde0eb6c06042c708668d2eed8a99..5818612cdd91bdf8fbb68088edc9c3090a4ab12b 100644 --- a/src/orcus_json_cli.cpp +++ b/src/orcus_json_cli.cpp @@ -425,7 +425,7 @@ int main(int argc, char** argv) return EXIT_FAILURE; } } - catch (const json::parse_error& e) + catch (const parse_error& e) { cerr << create_parse_error_output(content.str(), e.offset()) << endl; cerr << e.what() << endl; diff --git a/src/orcus_test_xml.cpp b/src/orcus_test_xml.cpp index 5def53fb331ae518aa08f267b38b933ba5f92b7a..eb6bc954df99b0c4eda2b3ee83818b415fbf2710 100644 --- a/src/orcus_test_xml.cpp +++ b/src/orcus_test_xml.cpp @@ -203,7 +203,7 @@ void test_xml_encoded_attrs() assert(!content.empty()); sax_handler_encoded_attrs hdl; - sax_parser parser(content.data(), content.size(), hdl); + sax_parser parser(content.str(), hdl); parser.parse(); vector expected; diff --git a/src/orcus_xml_main.cpp b/src/orcus_xml_main.cpp index 4056939f25958675bb366c153f1f0ea484520bc0..617dc6589d4fc53f3a2be7fe243df82dbe12da8a 100644 --- a/src/orcus_xml_main.cpp +++ b/src/orcus_xml_main.cpp @@ -332,7 +332,7 @@ int main(int argc, char** argv) try ; } } - catch (const sax::malformed_xml_error& e) + catch (const malformed_xml_error& e) { cerr << create_parse_error_output(content.str(), e.offset()) << endl; cerr << e.what() << endl; diff --git a/src/orcus_yaml_main.cpp b/src/orcus_yaml_main.cpp index 9f750dd43248cd402cd0b868444910fda600e466..09dc48bc5bf319b98fba26f725405631149f591f 100644 --- a/src/orcus_yaml_main.cpp +++ b/src/orcus_yaml_main.cpp @@ -146,7 +146,7 @@ std::unique_ptr load_doc(const char* p, size_t n) { doc->load({p, n}); } - catch (const yaml::parse_error& e) + catch (const parse_error& e) { cerr << create_parse_error_output(std::string_view(p, n), e.offset()) << endl; throw; diff --git a/src/orcus_zip_dump.cpp b/src/orcus_zip_dump.cpp index 6563c43e2c5cf65090541c3ac5957ca87d8fc9c3..1cf81e77c0f39258be1a8a5c5aee6ecf822987c8 100644 --- a/src/orcus_zip_dump.cpp +++ b/src/orcus_zip_dump.cpp @@ -29,12 +29,16 @@ int main(int argc, char** argv) if (argc < 3) { for (size_t i = 0; i < n; ++i) - archive.dump_file_entry(i); + { + auto header = archive.get_file_entry_header(i); + std::cout << "--" << std::endl; + std::cout << header << std::endl; + } return EXIT_SUCCESS; } - const char* entry_name = argv[2]; - archive.dump_file_entry(entry_name); + auto header = archive.get_file_entry_header(argv[2]); + std::cout << header << std::endl; } catch (const std::exception& e) { diff --git a/src/parser/.gitignore b/src/parser/.gitignore index 34791331a698d0f51ba60a28536a7f1a95912a26..3590d47b6cd5dd8ea1507af6af1ae38fed498ace 100644 --- a/src/parser/.gitignore +++ b/src/parser/.gitignore @@ -2,3 +2,4 @@ parser-test-* parser-global-test sax-parser-test utf8-test +types-test diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index 708f842385fc5ed2210b48f37a20de30abd479a1..d9822de7dff6e4e940111a47e14fb0ecc1883561 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -86,7 +86,9 @@ parser_test_xml_namespace_SOURCES = \ xml_namespace.cpp \ xml_namespace_test.cpp -parser_test_xml_namespace_LDADD = liborcus-parser-@ORCUS_API_VERSION@.la +parser_test_xml_namespace_LDADD = \ + liborcus-parser-@ORCUS_API_VERSION@.la \ + ../test/liborcus-test.a parser_test_xml_namespace_CPPFLAGS = $(AM_CPPFLAGS) # parser-test-xml-validation @@ -200,8 +202,15 @@ parser_test_stream_CPPFLAGS = $(AM_CPPFLAGS) parser_test_zip_archive_SOURCES = \ zip_archive_test.cpp -parser_test_zip_archive_LDADD = liborcus-parser-@ORCUS_API_VERSION@.la parser_test_zip_archive_CPPFLAGS = $(AM_CPPFLAGS) +parser_test_zip_archive_LDADD = \ + liborcus-parser-@ORCUS_API_VERSION@.la \ + ../test/liborcus-test.a \ + $(BOOST_SYSTEM_LIBS) \ + $(BOOST_FILESYSTEM_LIBS) +parser_test_zip_archive_LDFLAGS = \ + $(BOOST_SYSTEM_LDFLAGS) \ + $(BOOST_FILESYSTEM_LDFLAGS) # parser-test-base diff --git a/src/parser/cell_buffer.cpp b/src/parser/cell_buffer.cpp index 54bc4d5dd982590b94f3d9df390a322010bf547d..6815d71b814774c1e7b706f800f66dac9112ffbf 100644 --- a/src/parser/cell_buffer.cpp +++ b/src/parser/cell_buffer.cpp @@ -21,6 +21,8 @@ namespace orcus { cell_buffer::cell_buffer() : m_buf_size(0) {} +cell_buffer::~cell_buffer() = default; + void cell_buffer::append(const char* p, size_t len) { if (!len) @@ -44,14 +46,9 @@ void cell_buffer::reset() m_buf_size = 0; } -const char* cell_buffer::get() const -{ - return &m_buffer[0]; -} - -size_t cell_buffer::size() const +std::string_view cell_buffer::str() const { - return m_buf_size; + return std::string_view{m_buffer.data(), m_buf_size}; } bool cell_buffer::empty() const diff --git a/src/parser/css_parser_base.cpp b/src/parser/css_parser_base.cpp index 34f7d6ef480dfe65a42c23777e870a581d467de1..7d2b22829fd7a832e51298fa212800d1fc9b8c50 100644 --- a/src/parser/css_parser_base.cpp +++ b/src/parser/css_parser_base.cpp @@ -15,31 +15,10 @@ #include #include -using namespace std; - namespace orcus { namespace css { -parse_error::parse_error(const std::string& msg) : - orcus::parse_error(msg, 0) {} - -void parse_error::throw_with(const char* msg_before, char c, const char* msg_after) -{ - throw parse_error(build_message(msg_before, c, msg_after)); -} - -void parse_error::throw_with( - const char* msg_before, const char* p, size_t n, const char* msg_after) -{ - throw parse_error(build_message(msg_before, p, n, msg_after)); -} - -void parse_error::throw_with(const char* msg_before, std::string_view s, const char* msg_after) -{ - throw parse_error(build_message(msg_before, s.data(), s.size(), msg_after)); -} - -parser_base::parser_base(const char* p, size_t n) : - ::orcus::parser_base(p, n, false), +parser_base::parser_base(std::string_view content) : + orcus::parser_base(content.data(), content.size()), m_simple_selector_count(0), m_combinator(combinator_t::descendant) {} @@ -80,7 +59,7 @@ uint8_t parser_base::parse_uint8() } if (!len) - throw css::parse_error("parse_uint8: no digit encountered."); + throw parse_error("parse_uint8: no digit encountered.", offset()); int maxval = std::numeric_limits::max(); if (val > maxval) @@ -91,20 +70,20 @@ uint8_t parser_base::parse_uint8() std::string_view parser_base::parse_value() { - auto throw_invalid = [](uint8_t n_bytes) + auto throw_invalid = [this](uint8_t n_bytes) { std::ostringstream os; os << "parse_value: invalid utf-8 byte length (" << int(n_bytes) << ")"; - throw css::parse_error(os.str()); + throw parse_error(os.str(), offset()); }; - auto check_byte_length_or_throw = [](uint8_t n_bytes, std::size_t max_size) + auto check_byte_length_or_throw = [this](uint8_t n_bytes, std::size_t max_size) { if (std::size_t(n_bytes) > max_size) { std::ostringstream os; os << "parse_value: utf-8 byte length is " << int(n_bytes) << " but only " << max_size << " bytes remaining."; - throw css::parse_error(os.str()); + throw parse_error(os.str(), offset()); } }; @@ -126,7 +105,7 @@ std::string_view parser_base::parse_value() case 1: { if (!is_alpha(c) && !is_numeric(c) && !is_in(c, "-+.#")) - css::parse_error::throw_with("parse_value: illegal first character of a value '", c, "'"); + parse_error::throw_with("parse_value: illegal first character of a value '", c, "'", offset()); break; } case 2: @@ -180,8 +159,8 @@ double parser_base::parse_percent() double v = parse_double_or_throw(); if (*mp_char != '%') - css::parse_error::throw_with( - "parse_percent: '%' expected after the numeric value, but '", *mp_char, "' found."); + parse_error::throw_with( + "parse_percent: '%' expected after the numeric value, but '", *mp_char, "' found.", offset()); next(); // skip the '%'. return v; @@ -191,7 +170,7 @@ double parser_base::parse_double_or_throw() { double v = parse_double(); if (std::isnan(v)) - throw css::parse_error("parse_double: failed to parse double precision value."); + throw parse_error("parse_double: failed to parse double precision value.", offset()); return v; } @@ -202,7 +181,7 @@ void parser_base::literal(const char*& p, size_t& len, char quote) skip_to(p, len, quote); if (cur_char() != quote) - throw css::parse_error("literal: end quote has never been reached."); + throw parse_error("literal: end quote has never been reached.", offset()); } void parser_base::skip_to(const char*&p, size_t& len, char c) @@ -339,8 +318,8 @@ void parser_base::skip_comments_and_blanks() void parser_base::set_combinator(char c, css::combinator_t combinator) { if (!m_simple_selector_count) - css::parse_error::throw_with( - "set_combinator: combinator '", c, "' encountered without parent element."); + parse_error::throw_with( + "set_combinator: combinator '", c, "' encountered without parent element.", offset()); m_combinator = combinator; next(); diff --git a/src/parser/css_parser_test.cpp b/src/parser/css_parser_test.cpp index 6c6c7831fae50513a67a2dddcfe6a52be0c9ac01..95f4b1cc3fdcfa227f8873127c14a17680023292 100644 --- a/src/parser/css_parser_test.cpp +++ b/src/parser/css_parser_test.cpp @@ -12,10 +12,9 @@ void test_handler() { const char* test_code = "p { background-color: white; }"; - size_t n = strlen(test_code); orcus::css_handler hdl; - orcus::css_parser parser(test_code, n, hdl); + orcus::css_parser parser(test_code, hdl); parser.parse(); } diff --git a/src/parser/css_types.cpp b/src/parser/css_types.cpp index b1289d92ca753650ce82b6012aa84d57e5498c56..907915933660f51fa6c3303d71f3d669f0de74a7 100644 --- a/src/parser/css_types.cpp +++ b/src/parser/css_types.cpp @@ -22,26 +22,33 @@ const pseudo_element_t pseudo_element_backdrop = 0x0020; namespace { -typedef mdds::sorted_string_map pe_map_type; +namespace pseudo_elem { + +using map_type = mdds::sorted_string_map; // Keys must be sorted. -pe_map_type::entry pseudo_elem_type_entries[] = { - { MDDS_ASCII("after"), pseudo_element_after }, - { MDDS_ASCII("backdrop"), pseudo_element_backdrop }, - { MDDS_ASCII("before"), pseudo_element_before }, - { MDDS_ASCII("first-letter"), pseudo_element_first_letter }, - { MDDS_ASCII("first-line"), pseudo_element_first_line }, - { MDDS_ASCII("selection"), pseudo_element_selection }, +constexpr map_type::entry entries[] = { + { "after", pseudo_element_after }, + { "backdrop", pseudo_element_backdrop }, + { "before", pseudo_element_before }, + { "first-letter", pseudo_element_first_letter }, + { "first-line", pseudo_element_first_line }, + { "selection", pseudo_element_selection }, }; +const map_type& get() +{ + static map_type map(entries, std::size(entries), 0); + return map; +} + +} // namespace pseudo_elem + } pseudo_element_t to_pseudo_element(std::string_view s) { - static pe_map_type elem_map( - pseudo_elem_type_entries, std::size(pseudo_elem_type_entries), 0); - - return elem_map.find(s.data(), s.size()); + return pseudo_elem::get().find(s); } const pseudo_class_t pseudo_class_active = 0x0000000000000001; @@ -86,66 +93,74 @@ const pseudo_class_t pseudo_class_visited = 0x0000004000000000; namespace { -typedef mdds::sorted_string_map pc_map_type; +namespace pseudo_class { + +using map_type = mdds::sorted_string_map; // Keys must be sorted. -pc_map_type::entry pseudo_class_type_entries[] = { - { MDDS_ASCII("active"), pseudo_class_active }, - { MDDS_ASCII("checked"), pseudo_class_checked }, - { MDDS_ASCII("default"), pseudo_class_default }, - { MDDS_ASCII("dir"), pseudo_class_dir }, - { MDDS_ASCII("disabled"), pseudo_class_disabled }, - { MDDS_ASCII("empty"), pseudo_class_empty }, - { MDDS_ASCII("enabled"), pseudo_class_enabled }, - { MDDS_ASCII("first"), pseudo_class_first }, - { MDDS_ASCII("first-child"), pseudo_class_first_child }, - { MDDS_ASCII("first-of-type"), pseudo_class_first_of_type }, - { MDDS_ASCII("focus"), pseudo_class_focus }, - { MDDS_ASCII("fullscreen"), pseudo_class_fullscreen }, - { MDDS_ASCII("hover"), pseudo_class_hover }, - { MDDS_ASCII("in-range"), pseudo_class_in_range }, - { MDDS_ASCII("indeterminate"), pseudo_class_indeterminate }, - { MDDS_ASCII("invalid"), pseudo_class_invalid }, - { MDDS_ASCII("lang"), pseudo_class_lang }, - { MDDS_ASCII("last-child"), pseudo_class_last_child }, - { MDDS_ASCII("last-of-type"), pseudo_class_last_of_type }, - { MDDS_ASCII("left"), pseudo_class_left }, - { MDDS_ASCII("link"), pseudo_class_link }, - { MDDS_ASCII("not"), pseudo_class_not }, - { MDDS_ASCII("nth-child"), pseudo_class_nth_child }, - { MDDS_ASCII("nth-last-child"), pseudo_class_nth_last_child }, - { MDDS_ASCII("nth-last-of-type"), pseudo_class_nth_last_of_type }, - { MDDS_ASCII("nth-of-type"), pseudo_class_nth_of_type }, - { MDDS_ASCII("only-child"), pseudo_class_only_child }, - { MDDS_ASCII("only-of-type"), pseudo_class_only_of_type }, - { MDDS_ASCII("optional"), pseudo_class_optional }, - { MDDS_ASCII("out-of-range"), pseudo_class_out_of_range }, - { MDDS_ASCII("read-only"), pseudo_class_read_only }, - { MDDS_ASCII("read-write"), pseudo_class_read_write }, - { MDDS_ASCII("required"), pseudo_class_required }, - { MDDS_ASCII("right"), pseudo_class_right }, - { MDDS_ASCII("root"), pseudo_class_root }, - { MDDS_ASCII("scope"), pseudo_class_scope }, - { MDDS_ASCII("target"), pseudo_class_target }, - { MDDS_ASCII("valid"), pseudo_class_valid }, - { MDDS_ASCII("visited"), pseudo_class_visited }, +constexpr map_type::entry entries[] = { + { "active", pseudo_class_active }, + { "checked", pseudo_class_checked }, + { "default", pseudo_class_default }, + { "dir", pseudo_class_dir }, + { "disabled", pseudo_class_disabled }, + { "empty", pseudo_class_empty }, + { "enabled", pseudo_class_enabled }, + { "first", pseudo_class_first }, + { "first-child", pseudo_class_first_child }, + { "first-of-type", pseudo_class_first_of_type }, + { "focus", pseudo_class_focus }, + { "fullscreen", pseudo_class_fullscreen }, + { "hover", pseudo_class_hover }, + { "in-range", pseudo_class_in_range }, + { "indeterminate", pseudo_class_indeterminate }, + { "invalid", pseudo_class_invalid }, + { "lang", pseudo_class_lang }, + { "last-child", pseudo_class_last_child }, + { "last-of-type", pseudo_class_last_of_type }, + { "left", pseudo_class_left }, + { "link", pseudo_class_link }, + { "not", pseudo_class_not }, + { "nth-child", pseudo_class_nth_child }, + { "nth-last-child", pseudo_class_nth_last_child }, + { "nth-last-of-type", pseudo_class_nth_last_of_type }, + { "nth-of-type", pseudo_class_nth_of_type }, + { "only-child", pseudo_class_only_child }, + { "only-of-type", pseudo_class_only_of_type }, + { "optional", pseudo_class_optional }, + { "out-of-range", pseudo_class_out_of_range }, + { "read-only", pseudo_class_read_only }, + { "read-write", pseudo_class_read_write }, + { "required", pseudo_class_required }, + { "right", pseudo_class_right }, + { "root", pseudo_class_root }, + { "scope", pseudo_class_scope }, + { "target", pseudo_class_target }, + { "valid", pseudo_class_valid }, + { "visited", pseudo_class_visited }, }; +const map_type& get() +{ + static map_type map(entries, std::size(entries), 0); + return map; +} + +} // namespace pseudo_class + } pseudo_class_t to_pseudo_class(std::string_view s) { - static pc_map_type class_map(pseudo_class_type_entries, std::size(pseudo_class_type_entries), 0); - - return class_map.find(s.data(), s.size()); + return pseudo_class::get().find(s); } std::string pseudo_class_to_string(pseudo_class_t val) { std::ostringstream os; - std::size_t n = std::size(pseudo_class_type_entries); - const pc_map_type::entry* p = pseudo_class_type_entries; - const pc_map_type::entry* p_end = p + n; + std::size_t n = std::size(pseudo_class::entries); + const pseudo_class::map_type::entry* p = pseudo_class::entries; + const pseudo_class::map_type::entry* p_end = p + n; for (; p != p_end; ++p) { if (val & p->value) diff --git a/src/parser/csv_parser_base.cpp b/src/parser/csv_parser_base.cpp index 517e20734b8e95611a9ec13b1ac1648f609316e6..a5055bc0d71ddfc6c6fb43295236b5ccd893ad9c 100644 --- a/src/parser/csv_parser_base.cpp +++ b/src/parser/csv_parser_base.cpp @@ -15,18 +15,9 @@ parser_config::parser_config() : text_qualifier('\0'), trim_cell_value(false) {} -parse_error::parse_error(const std::string& msg) : m_msg(msg) {} - -parse_error::~parse_error() throw() {} - -const char* parse_error::what() const throw() -{ - return m_msg.c_str(); -} - parser_base::parser_base( - const char* p, size_t n, const csv::parser_config& config) : - ::orcus::parser_base(p, n, false), m_config(config) + std::string_view content, const csv::parser_config& config) : + ::orcus::parser_base(content.data(), content.size()), m_config(config) { maybe_skip_bom(); } diff --git a/src/parser/csv_parser_test.cpp b/src/parser/csv_parser_test.cpp index 66b81d0b3c971339aa0ed8367aa70f14ce6f156d..18470f008732c14455a58dc86b05def2a0203e39 100644 --- a/src/parser/csv_parser_test.cpp +++ b/src/parser/csv_parser_test.cpp @@ -12,11 +12,10 @@ void test_handler() { const char* test_code = "1,2,3,4,5\n6,7,8,9,10\n"; - size_t n = strlen(test_code); orcus::csv_handler hdl; orcus::csv::parser_config config; - orcus::csv_parser parser(test_code, n, hdl, config); + orcus::csv_parser parser(test_code, hdl, config); parser.parse(); } diff --git a/src/parser/exception.cpp b/src/parser/exception.cpp index 71e1f33637014902f62df5e1480ea846d8a33952..1d958fdb64c0f3e2749d6ac75874a2e24c0e3821 100644 --- a/src/parser/exception.cpp +++ b/src/parser/exception.cpp @@ -13,21 +13,19 @@ using namespace std; namespace orcus { -general_error::general_error(const string& msg) : - m_msg(msg) +general_error::general_error(std::string msg) : + m_msg(std::move(msg)) { } -general_error::general_error(const std::string& cls, const std::string& msg) +general_error::general_error(std::string_view cls, std::string_view msg) { - ostringstream os; + std::ostringstream os; os << cls << ": " << msg; m_msg = os.str(); } -general_error::~general_error() noexcept -{ -} +general_error::~general_error() noexcept = default; const char* general_error::what() const noexcept { @@ -44,33 +42,100 @@ invalid_arg_error::invalid_arg_error(const std::string& msg) : invalid_arg_error::~invalid_arg_error() noexcept {} -xml_structure_error::xml_structure_error(const string& msg) : - general_error(msg) {} +xml_structure_error::xml_structure_error(std::string msg) : + general_error(std::move(msg)) {} + +xml_structure_error::~xml_structure_error() noexcept = default; + +json_structure_error::json_structure_error(std::string msg) : + general_error(std::move(msg)) {} + +json_structure_error::~json_structure_error() noexcept = default; -xml_structure_error::~xml_structure_error() noexcept {} +invalid_map_error::invalid_map_error(std::string msg) : + general_error(std::move(msg)) {} -json_structure_error::json_structure_error(const string& msg) : - general_error(msg) {} +invalid_map_error::~invalid_map_error() noexcept = default; -json_structure_error::~json_structure_error() noexcept {} +value_error::value_error(std::string msg) : + general_error(std::move(msg)) {} -invalid_map_error::invalid_map_error(const string& msg) : - general_error(msg) {} +value_error::~value_error() noexcept = default; -invalid_map_error::~invalid_map_error() noexcept {} +xpath_error::xpath_error(std::string msg) : general_error(std::move(msg)) {} -value_error::value_error(const string& msg) : - general_error(msg) {} +xpath_error::~xpath_error() noexcept = default; -value_error::~value_error() noexcept {} +interface_error::interface_error(std::string msg) : general_error(std::move(msg)) {} + +interface_error::~interface_error() noexcept = default; + +namespace { + +std::string build_offset_msg(std::ptrdiff_t offset) +{ + std::ostringstream os; + os << " (offset=" << offset << ')'; + return os.str(); +} -xpath_error::xpath_error(const string& msg) : general_error(msg) {} +std::string build_message(std::string_view msg_before, char c, std::string_view msg_after) +{ + std::ostringstream os; + os << msg_before << c << msg_after; + return os.str(); +} -xpath_error::~xpath_error() noexcept {} +std::string build_message( + std::string_view msg_before, std::string_view msg, std::string_view msg_after) +{ + std::ostringstream os; + os << msg_before << msg << msg_after; + return os.str(); +} -interface_error::interface_error(const std::string& msg) : general_error(msg) {} +} -interface_error::~interface_error() noexcept {} +parse_error::parse_error(std::string_view cls, std::string_view msg, std::ptrdiff_t offset) : + general_error(cls, msg), m_offset(offset) +{ + append_msg(build_offset_msg(offset)); +} +parse_error::parse_error(std::string msg, std::ptrdiff_t offset) : + general_error(std::move(msg)), m_offset(offset) +{ + append_msg(build_offset_msg(offset)); } + +std::ptrdiff_t parse_error::offset() const +{ + return m_offset; +} + +void parse_error::throw_with( + std::string_view msg_before, char c, std::string_view msg_after, std::ptrdiff_t offset) +{ + throw parse_error(build_message(msg_before, c, msg_after), offset); +} + +void parse_error::throw_with( + std::string_view msg_before, std::string_view msg, std::string_view msg_after, std::ptrdiff_t offset) +{ + throw parse_error(build_message(msg_before, msg, msg_after), offset); +} + +malformed_xml_error::malformed_xml_error(std::string_view msg, std::ptrdiff_t offset) : + orcus::parse_error("malformed_xml_error", msg, offset) {} + +malformed_xml_error::~malformed_xml_error() = default; + +zip_error::zip_error(std::string_view msg) : general_error("zip_error", msg) +{ +} + +zip_error::~zip_error() = default; + +} // namespace orcus + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/parser/json_parser_base.cpp b/src/parser/json_parser_base.cpp index 1e1e1ee6f3325b04b5b8e1aeb68a61190bb6302e..0dcdc3b75e62fee58fb67d9b2d48382c9abbb31b 100644 --- a/src/parser/json_parser_base.cpp +++ b/src/parser/json_parser_base.cpp @@ -31,28 +31,13 @@ const char* parse_numeric_json(const char* p, const char* p_end, double& value) } // anonymous namespace -parse_error::parse_error(const std::string& msg, std::ptrdiff_t offset) : - ::orcus::parse_error(msg, offset) {} - -void parse_error::throw_with( - const char* msg_before, char c, const char* msg_after, std::ptrdiff_t offset) -{ - throw parse_error(build_message(msg_before, c, msg_after), offset); -} - -void parse_error::throw_with( - const char* msg_before, const char* p, size_t n, const char* msg_after, std::ptrdiff_t offset) -{ - throw parse_error(build_message(msg_before, p, n, msg_after), offset); -} - struct parser_base::impl { cell_buffer m_buffer; }; -parser_base::parser_base(const char* p, size_t n) : - ::orcus::parser_base(p, n, false), mp_impl(std::make_unique()) +parser_base::parser_base(std::string_view content) : + orcus::parser_base(content.data(), content.size()), mp_impl(std::make_unique()) { set_numeric_parser(parse_numeric_json); diff --git a/src/parser/json_parser_test.cpp b/src/parser/json_parser_test.cpp index 470b71c19de1d777e0094f640ec06cd199993617..db6486171d231d7b11452ef59761dddfe540e61e 100644 --- a/src/parser/json_parser_test.cpp +++ b/src/parser/json_parser_test.cpp @@ -12,10 +12,9 @@ void test_handler() { const char* test_code = "{\"key1\": [1,2,3,4,5], \"key2\": 12.3}"; - size_t n = strlen(test_code); orcus::json_handler hdl; - orcus::json_parser parser(test_code, n, hdl); + orcus::json_parser parser(test_code, hdl); parser.parse(); } diff --git a/src/parser/json_parser_thread.cpp b/src/parser/json_parser_thread.cpp index 3e5b56e2a6617623ddd09c84d8139408bda7f9fd..c9eea8d99a1f1a131d528f0c0adc28bf81d152d6 100644 --- a/src/parser/json_parser_thread.cpp +++ b/src/parser/json_parser_thread.cpp @@ -77,7 +77,7 @@ struct parser_thread::impl { try { - json_parser parser(mp_char, m_size, *this); + json_parser parser({mp_char, m_size}, *this); parser.parse(); } catch (const parse_error& e) @@ -119,9 +119,8 @@ struct parser_thread::impl check_and_notify(); } - void object_key(const char* p, size_t len, bool transient) + void object_key(std::string_view s, bool transient) { - std::string_view s{p, len}; if (transient) s = m_pool.intern(s).first; @@ -153,9 +152,8 @@ struct parser_thread::impl check_and_notify(); } - void string(const char* p, size_t len, bool transient) + void string(std::string_view s, bool transient) { - std::string_view s{p, len}; if (transient) s = m_pool.intern(s).first; diff --git a/src/parser/parser_base.cpp b/src/parser/parser_base.cpp index 8a3928606c1d5b52423b53b9946b1c2cbb82d32c..20587a9e70ebbc01fc55ec9900fff59c10e85512 100644 --- a/src/parser/parser_base.cpp +++ b/src/parser/parser_base.cpp @@ -21,68 +21,8 @@ namespace orcus { -namespace { - -std::string build_offset_msg(std::ptrdiff_t offset) -{ - std::ostringstream os; - os << " (offset=" << offset << ')'; - return os.str(); -} - -} - -parse_error::parse_error(const std::string& msg, std::ptrdiff_t offset) : - general_error(msg), m_offset(offset) -{ - append_msg(build_offset_msg(offset)); -} - -parse_error::parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset) : - general_error(cls, msg), m_offset(offset) -{ - append_msg(build_offset_msg(offset)); -} - -std::ptrdiff_t parse_error::offset() const -{ - return m_offset; -} - -std::string parse_error::build_message(const char* msg_before, char c, const char* msg_after) -{ - std::ostringstream os; - - if (msg_before) - os << msg_before; - - os << c; - - if (msg_after) - os << msg_after; - - return os.str(); -} - -std::string parse_error::build_message( - const char* msg_before, const char* p, size_t n, const char* msg_after) -{ - std::ostringstream os; - - if (msg_before) - os << msg_before; - - os << std::string_view(p, n); - - if (msg_after) - os << msg_after; - - return os.str(); -} - -parser_base::parser_base(const char* p, size_t n, bool transient_stream) : +parser_base::parser_base(const char* p, size_t n) : mp_begin(p), mp_char(p), mp_end(p+n), - m_transient_stream(transient_stream), m_func_parse_numeric(parse_numeric) { } diff --git a/src/parser/parser_base_test.cpp b/src/parser/parser_base_test.cpp index 6675f24ed740866e0e256db4f0c620340e4297eb..74994da3fcaeb982b30a7536a128bb8a49249e60 100644 --- a/src/parser/parser_base_test.cpp +++ b/src/parser/parser_base_test.cpp @@ -16,7 +16,7 @@ void test_skip_space_and_control() class _test_type : public orcus::parser_base { public: - _test_type(const char* p, size_t n) : orcus::parser_base(p, n, false) {} + _test_type(const char* p, size_t n) : orcus::parser_base(p, n) {} void run() { diff --git a/src/parser/parser_global.cpp b/src/parser/parser_global.cpp index 59ae93b68abc67266c885f62763231a12c66d219..5489e2163360ada85661bb811f1a469ea1325c8b 100644 --- a/src/parser/parser_global.cpp +++ b/src/parser/parser_global.cpp @@ -184,19 +184,20 @@ parse_quoted_string_state parse_string_with_escaped_char( switch (*p) { case '"': + { // closing quote. buffer.append(p_head, len); ++p; // skip the quote. - ret.str = buffer.get(); - ret.length = buffer.size(); + std::string_view s = buffer.str(); + ret.str = s.data(); + ret.length = s.size(); return ret; - break; + } case '\\': { escape = true; continue; } - break; default: ; } @@ -243,8 +244,9 @@ parse_quoted_string_state parse_single_quoted_string_buffered( if (last == '\'') { buffer.append(p0, len-1); - ret.str = buffer.get(); - ret.length = buffer.size(); + auto s = buffer.str(); + ret.str = s.data(); + ret.length = s.size(); return ret; } } @@ -257,8 +259,9 @@ parse_quoted_string_state parse_single_quoted_string_buffered( if (last == '\'') { buffer.append(p0, len-1); - ret.str = buffer.get(); - ret.length = buffer.size(); + auto s = buffer.str(); + ret.str = s.data(); + ret.length = s.size(); return ret; } diff --git a/src/parser/parser_test_json_validation.cpp b/src/parser/parser_test_json_validation.cpp index 11588bd567fcadfeefb1d9ac4a96fcbfc99f4c9c..a001226988a04f384625e685b54c136b117468df 100644 --- a/src/parser/parser_test_json_validation.cpp +++ b/src/parser/parser_test_json_validation.cpp @@ -369,10 +369,10 @@ void test_pass() std::cout << test_file_name << std::endl; try { - orcus::json_parser parser(content.c_str(), content.size(), hdl); + orcus::json_parser parser(content, hdl); parser.parse(); } - catch (const orcus::json::parse_error& e) + catch (const orcus::parse_error& e) { std::cout << e.what() << std::endl; std::cout << orcus::create_parse_error_output(content, e.offset()) << std::endl; @@ -391,7 +391,7 @@ void test_fail() std::cout << test_file_name << std::endl; bool failed = false; try { - orcus::json_parser parser(content.c_str(), content.size(), hdl); + orcus::json_parser parser(content, hdl); parser.parse(); } catch (const orcus::parse_error&) @@ -416,7 +416,7 @@ void test_indeterminate() std::string content = load_file(test_file_name); std::cout << test_file_name << std::endl; try { - orcus::json_parser parser(content.c_str(), content.size(), hdl); + orcus::json_parser parser(content, hdl); parser.parse(); } catch (const orcus::parse_error&) diff --git a/src/parser/parser_test_xml_validation.cpp b/src/parser/parser_test_xml_validation.cpp index 84fc26d15a0c41903fb0d1a6998621fc9d26e9b4..0e74e0aaab56e60de1ee31cc0445c82c79d0518f 100644 --- a/src/parser/parser_test_xml_validation.cpp +++ b/src/parser/parser_test_xml_validation.cpp @@ -31,14 +31,14 @@ void test_invalid() orcus::file_content content(entry.string()); _handler hdl; - orcus::sax_parser<_handler> parser(content.data(), content.size(), hdl); + orcus::sax_parser<_handler> parser(content.str(), hdl); try { parser.parse(); assert(!"exception was expected, but one was not thrown."); } - catch (const orcus::sax::malformed_xml_error& e) + catch (const orcus::malformed_xml_error& e) { std::cerr << orcus::create_parse_error_output(content.str(), e.offset()) << std::endl; std::cerr << e.what() << std::endl; diff --git a/src/parser/sax_ns_parser_test.cpp b/src/parser/sax_ns_parser_test.cpp index 40ef8b081e461f1bf3c384babb7e935ba207e3b2..eb7443f7db5ce2e2831cd6986dd50f0dcdc50117 100644 --- a/src/parser/sax_ns_parser_test.cpp +++ b/src/parser/sax_ns_parser_test.cpp @@ -13,12 +13,11 @@ void test_handler() { const char* test_code = ""; - size_t len = std::strlen(test_code); orcus::sax_ns_handler hdl; orcus::xmlns_repository repo; orcus::xmlns_context cxt = repo.create_context(); - orcus::sax_ns_parser parser(test_code, len, cxt, hdl); + orcus::sax_ns_parser parser(test_code, cxt, hdl); parser.parse(); } @@ -53,7 +52,6 @@ void test_default_attr_ns() }; const char* test_code = ""; - size_t len = strlen(test_code); const orcus::xmlns_id_t predefined[] = { default_ns, nullptr }; @@ -65,7 +63,7 @@ void test_default_attr_ns() _handler hdl; hdl.default_ns_expected = default_ns; - orcus::sax_ns_parser<_handler> parser(test_code, len, cxt, hdl); + orcus::sax_ns_parser<_handler> parser(test_code, cxt, hdl); parser.parse(); } diff --git a/src/parser/sax_parser_base.cpp b/src/parser/sax_parser_base.cpp index 54dd89aebbc4389cd2c2557daccd0ba864d82042..95d8c6e7733c7671610b2e0a46ecbbde3e8a069d 100644 --- a/src/parser/sax_parser_base.cpp +++ b/src/parser/sax_parser_base.cpp @@ -19,11 +19,6 @@ namespace orcus { namespace sax { -malformed_xml_error::malformed_xml_error(const std::string& msg, std::ptrdiff_t offset) : - ::orcus::parse_error("malformed_xml_error", msg, offset) {} - -malformed_xml_error::~malformed_xml_error() throw() {} - char decode_xml_encoded_char(const char* p, size_t n) { if (n == 2) @@ -113,8 +108,8 @@ struct parser_base::impl std::vector> m_cell_buffers; }; -parser_base::parser_base(const char* content, size_t size, bool transient_stream) : - ::orcus::parser_base(content, size, transient_stream), +parser_base::parser_base(const char* content, size_t size) : + ::orcus::parser_base(content, size), mp_impl(std::make_unique()), m_nest_level(0), m_buffer_pos(0), @@ -290,7 +285,7 @@ void parser_base::value_with_encoded_char(cell_buffer& buf, std::string_view& st buf.append(p0, mp_char-p0); if (!buf.empty()) - str = std::string_view(buf.get(), buf.size()); + str = buf.str(); // Skip the closing quote. assert(!has_char() || cur_char() == quote_char); @@ -327,7 +322,7 @@ bool parser_base::value(std::string_view& str, bool decode) // Skip the closing quote. next(); - return transient_stream(); + return false; } void parser_base::name(std::string_view& str) diff --git a/src/parser/sax_parser_test.cpp b/src/parser/sax_parser_test.cpp index 6476ff8aa1c9392965f5d5716d625d9d0a84438a..ec8b1f1913cfae00bd403fe36dca4d61e0816059 100644 --- a/src/parser/sax_parser_test.cpp +++ b/src/parser/sax_parser_test.cpp @@ -14,73 +14,12 @@ using namespace std; void test_handler() { const char* test_code = ""; - size_t len = std::strlen(test_code); orcus::sax_handler hdl; - orcus::sax_parser parser(test_code, len, hdl); + orcus::sax_parser parser(test_code, hdl); parser.parse(); } -void test_transient_stream() -{ - struct _handler : public orcus::sax_handler - { - void characters(std::string_view val, bool transient) - { - cout << "characters: '" << val << "' (transient=" << transient << ")" << endl; - - if (transient_stream) - // When parsing a transient stream, this flag is always set. - assert(transient); - else if (val == "non-transient") - assert(!transient); - else if (val == "(&&&)") - assert(transient); - else if (val == " ") - assert(!transient); - } - - void attribute(const orcus::sax::parser_attribute& attr) - { - cout << "attribute: " << attr.name << "=\"" << attr.value << "\" (transient=" << attr.transient << ")" << endl; - - if (transient_stream) - // When parsing a transient stream, this flag is always set. - assert(attr.transient); - else if (attr.name == "attr1") - assert(!attr.transient); - else if (attr.name == "attr2") - assert(attr.transient); - else if (attr.name == "version") - assert(!attr.transient); - } - - bool transient_stream = false; - }; - - const char* content = - "" - "" - " non-transient" - " (&&&)" - "" - ; - - { - _handler hdl; - hdl.transient_stream = false; - orcus::sax_parser<_handler> parser(content, strlen(content), hdl.transient_stream, hdl); - parser.parse(); - } - - { - _handler hdl; - hdl.transient_stream = true; - orcus::sax_parser<_handler> parser(content, strlen(content), hdl.transient_stream, hdl); - parser.parse(); - } -} - void test_attr_equal_with_whitespace() { struct _handler : public orcus::sax_handler {}; @@ -91,7 +30,7 @@ void test_attr_equal_with_whitespace() ; _handler hdl; - orcus::sax_parser<_handler> parser(content, strlen(content), hdl); + orcus::sax_parser<_handler> parser(content, hdl); parser.parse(); } @@ -112,14 +51,13 @@ void test_attr_with_encoded_chars_single_quotes() ; _handler hdl; - orcus::sax_parser<_handler> parser(content, strlen(content), hdl); + orcus::sax_parser<_handler> parser(content, hdl); parser.parse(); } int main() { test_handler(); - test_transient_stream(); test_attr_equal_with_whitespace(); test_attr_with_encoded_chars_single_quotes(); diff --git a/src/parser/sax_token_parser_test.cpp b/src/parser/sax_token_parser_test.cpp index f3ad592781938a79e1bd279fcd9f09759938cf5e..d473196c40f1f2e5024876a7b233edb03c27a95e 100644 --- a/src/parser/sax_token_parser_test.cpp +++ b/src/parser/sax_token_parser_test.cpp @@ -18,13 +18,12 @@ using namespace orcus; void test_handler() { const char* test_code = ""; - size_t len = strlen(test_code); orcus::sax_token_handler hdl; orcus::tokens token_map(nullptr, 0); orcus::xmlns_repository repo; orcus::xmlns_context cxt = repo.create_context(); - orcus::sax_token_parser parser(test_code, len, token_map, cxt, hdl); + orcus::sax_token_parser parser(test_code, token_map, cxt, hdl); parser.parse(); } @@ -32,7 +31,6 @@ void test_sax_token_parser_1() { // Test XML content. const char* content = ""; - size_t content_size = strlen(content); // Array of tokens to define for this test. const char* token_names[] = { @@ -115,7 +113,7 @@ void test_sax_token_parser_1() tokens token_map(token_names, token_count); xmlns_repository ns_repo; xmlns_context ns_cxt = ns_repo.create_context(); - sax_token_parser parser(content, content_size, token_map, ns_cxt, hdl); + sax_token_parser parser(content, token_map, ns_cxt, hdl); parser.parse(); assert(hdl.get_token_count() == std::size(checks)); @@ -162,13 +160,13 @@ void test_unicode_string() xmlns_repository ns_repo; xmlns_context ns_cxt = ns_repo.create_context(); handler hdl(u8"\u0021"); - sax_token_parser parser1(content1, strlen(content1), token_map, ns_cxt, hdl); + sax_token_parser parser1(content1, token_map, ns_cxt, hdl); parser1.parse(); hdl = handler(u8"\u00B6"); - sax_token_parser parser2(content2, strlen(content2), token_map, ns_cxt, hdl); + sax_token_parser parser2(content2, token_map, ns_cxt, hdl); parser2.parse(); hdl = handler(u8"\u20B9"); - sax_token_parser parser3(content3, strlen(content3), token_map, ns_cxt, hdl); + sax_token_parser parser3(content3, token_map, ns_cxt, hdl); parser3.parse(); } @@ -221,7 +219,7 @@ void test_declaration() { xml_declaration_t decl; handler hdl(decl); - sax_token_parser parser(c.content.data(), c.content.size(), token_map, ns_cxt, hdl); + sax_token_parser parser(c.content, token_map, ns_cxt, hdl); parser.parse(); assert(decl == c.decl); diff --git a/src/parser/sax_token_parser_thread.cpp b/src/parser/sax_token_parser_thread.cpp index 36cef8f786a6c81ab50474618ddec66d71b2ace2..3d7b16b37eeeaa90f355ba8afa6c1a7c40a31933 100644 --- a/src/parser/sax_token_parser_thread.cpp +++ b/src/parser/sax_token_parser_thread.cpp @@ -53,7 +53,7 @@ bool parse_token::operator!= (const parse_token& other) const struct parser_thread::impl { - detail::thread::parser_token_buffer m_token_buffer; + orcus::detail::thread::parser_token_buffer m_token_buffer; string_pool m_pool; std::vector> m_element_store; @@ -135,7 +135,7 @@ struct parser_thread::impl { try { - orcus::sax_token_parser parser(mp_char, m_size, m_tokens, m_ns_cxt, *this); + orcus::sax_token_parser parser({mp_char, m_size}, m_tokens, m_ns_cxt, *this); parser.parse(); } catch (const malformed_xml_error& e) diff --git a/src/parser/stream.cpp b/src/parser/stream.cpp index 889258b8446ecbaaebb006302159c0cde651326b..3964e4b5f15d6fd60397c28828a48834442ce99c 100644 --- a/src/parser/stream.cpp +++ b/src/parser/stream.cpp @@ -106,8 +106,15 @@ std::tuple find_line_with_offset(std::string_v const char* p_end = p0 + strm.size(); const char* p_offset = p0 + offset; + if (p_offset >= p_end) + { + std::ostringstream os; + os << "offset value of " << offset << " is out-of-bound for a stream of length " << strm.size(); + throw std::invalid_argument(os.str()); + } + // Determine the line number. - size_t line_num = 1; + std::size_t line_num = 0; for (const char* p = p0; p != p_offset; ++p) { if (*p == '\n') @@ -175,16 +182,12 @@ struct file_content::impl file_content::file_content() : mp_impl(std::make_unique()) {} -file_content::file_content(file_content&& other) : - mp_impl(std::move(other.mp_impl)) -{ - other.mp_impl = std::make_unique(); -} +file_content::file_content(file_content&& other) = default; file_content::file_content(std::string_view filepath) : mp_impl(std::make_unique(filepath)) {} -file_content::~file_content() {} +file_content::~file_content() = default; const char* file_content::data() const { @@ -251,13 +254,8 @@ memory_content::memory_content() : mp_impl(std::make_unique()) {} memory_content::memory_content(std::string_view s) : mp_impl(std::make_unique(s)) {} -memory_content::memory_content(memory_content&& other) : - mp_impl(std::move(other.mp_impl)) -{ - other.mp_impl = std::make_unique(); -} - -memory_content::~memory_content() {} +memory_content::memory_content(memory_content&& other) = default; +memory_content::~memory_content() = default; const char* memory_content::data() const { @@ -303,32 +301,33 @@ std::string_view memory_content::str() const return mp_impl->content; } -line_with_offset::line_with_offset(std::string _line, size_t _line_number, size_t _offset_on_line) : +line_with_offset::line_with_offset(std::string _line, std::size_t _line_number, std::size_t _offset_on_line) : line(std::move(_line)), line_number(_line_number), offset_on_line(_offset_on_line) {} -line_with_offset::line_with_offset(const line_with_offset& other) : - line(other.line), - line_number(other.line_number), - offset_on_line(other.offset_on_line) -{} +line_with_offset::line_with_offset(const line_with_offset& other) = default; +line_with_offset::line_with_offset(line_with_offset&& other) = default; +line_with_offset::~line_with_offset() = default; -line_with_offset::line_with_offset(line_with_offset&& other) : - line(std::move(other.line)), - line_number(other.line_number), - offset_on_line(other.offset_on_line) -{} +bool line_with_offset::operator== (const line_with_offset& other) const +{ + return line == other.line && line_number == other.line_number && offset_on_line == other.offset_on_line; +} -line_with_offset::~line_with_offset() {} +bool line_with_offset::operator!= (const line_with_offset& other) const +{ + return !operator==(other); +} std::string create_parse_error_output(std::string_view strm, std::ptrdiff_t offset) { - if (offset < 0) + if (strm.empty() || offset < 0) return std::string(); const size_t max_line_length = 60; + offset = std::min(strm.size() - 1, offset); auto line_info = find_line_with_offset(strm, offset); std::string_view line = std::get<0>(line_info); @@ -338,7 +337,7 @@ std::string create_parse_error_output(std::string_view strm, std::ptrdiff_t offs if (offset_on_line < 30) { std::ostringstream os; - os << line_num << ":" << (offset_on_line+1) << ": "; + os << (line_num+1) << ":" << (offset_on_line+1) << ": "; size_t line_num_width = os.str().size(); // Truncate line if it's too long. diff --git a/src/parser/stream_test.cpp b/src/parser/stream_test.cpp index cc6b875fedfd33eed9b44aeb7f9fd2a86418d922..1a6e9fbcb48ca1d6bb3cce318fb9623261a7cf30 100644 --- a/src/parser/stream_test.cpp +++ b/src/parser/stream_test.cpp @@ -85,11 +85,58 @@ void test_stream_logical_string_length() } } +void test_stream_locate_line_with_offset() +{ + test::stack_printer __sp__(__func__); + + std::string strm = "one\ntwo\nthree"; + + struct check + { + std::ptrdiff_t offset; + line_with_offset expected; + }; + + const std::vector checks = { + { 0, { "one", 0, 0 } }, + { 1, { "one", 0, 1 } }, + { 2, { "one", 0, 2 } }, + { 3, { "one", 0, 3 } }, // on line break + { 4, { "two", 1, 0 } }, + { 5, { "two", 1, 1 } }, + { 6, { "two", 1, 2 } }, + { 7, { "two", 1, 3 } }, // on line break + { 8, { "three", 2, 0 } }, + { 9, { "three", 2, 1 } }, + { 10, { "three", 2, 2 } }, + { 11, { "three", 2, 3 } }, + { 12, { "three", 2, 4 } }, + }; + + for (const auto& c : checks) + { + auto res = locate_line_with_offset(strm, c.offset); + assert(res == c.expected); + } + + try + { + auto res = locate_line_with_offset(strm, strm.size()); + assert(!"exception should have been thrown for out-of-bound offset!"); + } + catch (const std::invalid_argument& e) + { + // expected + cout << "exception thrown as expected: '" << e.what() << "'" << endl; + } +} + int main() { test_stream_create_error_output(); test_stream_locate_first_different_char(); test_stream_logical_string_length(); + test_stream_locate_line_with_offset(); return EXIT_SUCCESS; } diff --git a/src/parser/string_pool.cpp b/src/parser/string_pool.cpp index 27b38d01db7485a6b94ccbdc19feb9924fd72725..e438da564e31c6cea0aec3597f641d7e622df3bc 100644 --- a/src/parser/string_pool.cpp +++ b/src/parser/string_pool.cpp @@ -41,6 +41,8 @@ struct string_pool::impl string_pool::string_pool() : mp_impl(std::make_unique()) {} +string_pool::string_pool(string_pool&& other) : mp_impl(std::move(other.mp_impl)) {} + string_pool::~string_pool() = default; std::pair string_pool::intern(std::string_view str) diff --git a/src/parser/string_pool_test.cpp b/src/parser/string_pool_test.cpp index acb68f5d56784afe27d24f8917c849dbccd24194..56a4980acc32522efca6bc10e89bb09614c31cdd 100644 --- a/src/parser/string_pool_test.cpp +++ b/src/parser/string_pool_test.cpp @@ -8,6 +8,8 @@ #include "test_global.hpp" #include +#include + using namespace orcus; void test_basic() @@ -105,10 +107,27 @@ void test_merge() assert(entries.size() == pool1.size()); } +void test_move() +{ + static_assert(!std::is_copy_constructible_v); + static_assert(std::is_move_constructible_v); + + string_pool pool1; + pool1.intern("A"); + pool1.intern("B"); + pool1.intern("C"); + pool1.intern("D"); + pool1.intern("E"); + + string_pool pool2 = std::move(pool1); + assert(pool2.size() == 5); +} + int main() { test_basic(); test_merge(); + test_move(); return EXIT_SUCCESS; } diff --git a/src/parser/threaded_json_parser_test.cpp b/src/parser/threaded_json_parser_test.cpp index ecee3984f1bb8e6550761fcbe635fcea69c32b0c..5ac8053bccc06829522393f85d84225de76e1ee8 100644 --- a/src/parser/threaded_json_parser_test.cpp +++ b/src/parser/threaded_json_parser_test.cpp @@ -169,7 +169,7 @@ void test_threaded_json_parser_invalid() parser.parse(); assert(false); } - catch (const json::parse_error&) + catch (const parse_error&) { // works as expected. cout << "invalid source: " << src << endl; diff --git a/src/parser/threaded_sax_token_parser_test.cpp b/src/parser/threaded_sax_token_parser_test.cpp index 2f2cf1a43704b4ab32b4fbc0758c3f0a86289abc..1338b2acca7f99eaa6e8b267357766da31cea39b 100644 --- a/src/parser/threaded_sax_token_parser_test.cpp +++ b/src/parser/threaded_sax_token_parser_test.cpp @@ -134,7 +134,7 @@ void test_sax_token_parser_1() parser.parse(); assert(!"An exception was expected, but one was not thrown."); } - catch (const sax::malformed_xml_error& e) + catch (const malformed_xml_error& e) { assert(e.offset() == 28u); } diff --git a/src/parser/tokens.cpp b/src/parser/tokens.cpp index 846082c03ba348a11ffdfa76de9d0cee440fa9f6..5d3c5333df45ce3c7c2c35717039839437b98dce 100644 --- a/src/parser/tokens.cpp +++ b/src/parser/tokens.cpp @@ -17,6 +17,8 @@ tokens::tokens(const char** token_names, size_t token_name_count) : m_tokens.emplace(m_token_names[i], xml_token_t(i)); } +tokens::~tokens() = default; + bool tokens::is_valid_token(xml_token_t token) const { return token != XML_UNKNOWN_TOKEN; diff --git a/src/parser/types.cpp b/src/parser/types.cpp index c7c17fe74175ce1dfd8630d16b42685f3fd9a6af..5d469c5f51e678a14068a9b600054e990ee6b419 100644 --- a/src/parser/types.cpp +++ b/src/parser/types.cpp @@ -19,26 +19,20 @@ namespace orcus { -const xmlns_id_t XMLNS_UNKNOWN_ID = nullptr; -const xml_token_t XML_UNKNOWN_TOKEN = 0; - -size_t xml_token_pair_hash::operator()(const xml_token_pair_t& v) const -{ - return std::hash()(v.first) ^ std::hash()(v.second); -} - -const size_t index_not_found = std::numeric_limits::max(); - parse_error_value_t::parse_error_value_t() : offset(0) { } +parse_error_value_t::parse_error_value_t(const parse_error_value_t& other) = default; + parse_error_value_t::parse_error_value_t(std::string_view _str, std::ptrdiff_t _offset) : str(_str), offset(_offset) { } +parse_error_value_t& parse_error_value_t::operator=(const parse_error_value_t& other) = default; + bool parse_error_value_t::operator==(const parse_error_value_t& other) const { return str == other.str && offset == other.offset; @@ -49,23 +43,18 @@ bool parse_error_value_t::operator!=(const parse_error_value_t& other) const return !operator==(other); } -xml_name_t::xml_name_t() : ns(XMLNS_UNKNOWN_ID), name() {} +xml_name_t::xml_name_t() noexcept : ns(XMLNS_UNKNOWN_ID), name() {} xml_name_t::xml_name_t(xmlns_id_t _ns, std::string_view _name) : ns(_ns), name(_name) {} -xml_name_t::xml_name_t(const xml_name_t& r) : ns(r.ns), name(r.name) {} +xml_name_t::xml_name_t(const xml_name_t& other) = default; -xml_name_t& xml_name_t::operator= (const xml_name_t& other) -{ - ns = other.ns; - name = other.name; - return *this; -} +xml_name_t& xml_name_t::operator= (const xml_name_t& other) = default; -bool xml_name_t::operator== (const xml_name_t& other) const +bool xml_name_t::operator== (const xml_name_t& other) const noexcept { return ns == other.ns && name == other.name; } -bool xml_name_t::operator!= (const xml_name_t& other) const +bool xml_name_t::operator!= (const xml_name_t& other) const noexcept { return !operator==(other); } @@ -113,6 +102,8 @@ std::string xml_name_t::to_string(const xmlns_repository& repo) const xml_token_attr_t::xml_token_attr_t() : ns(XMLNS_UNKNOWN_ID), name(XML_UNKNOWN_TOKEN), transient(false) {} +xml_token_attr_t::xml_token_attr_t(const xml_token_attr_t& other) = default; + xml_token_attr_t::xml_token_attr_t( xmlns_id_t _ns, xml_token_t _name, std::string_view _value, bool _transient) : ns(_ns), name(_name), value(_value), transient(_transient) {} @@ -121,6 +112,8 @@ xml_token_attr_t::xml_token_attr_t( xmlns_id_t _ns, xml_token_t _name, std::string_view _raw_name, std::string_view _value, bool _transient) : ns(_ns), name(_name), raw_name(_raw_name), value(_value), transient(_transient) {} +xml_token_attr_t& xml_token_attr_t::operator=(const xml_token_attr_t& other) = default; + xml_token_element_t::xml_token_element_t() : ns(nullptr), name(XML_UNKNOWN_TOKEN) {} xml_token_element_t::xml_token_element_t( @@ -224,15 +217,8 @@ date_time_t::date_time_t(int _year, int _month, int _day) : date_time_t::date_time_t(int _year, int _month, int _day, int _hour, int _minute, double _second) : year(_year), month(_month), day(_day), hour(_hour), minute(_minute), second(_second) {} -date_time_t::date_time_t(const date_time_t& other) : - year(other.year), - month(other.month), - day(other.day), - hour(other.hour), - minute(other.minute), - second(other.second) {} - -date_time_t::~date_time_t() {} +date_time_t::date_time_t(const date_time_t& other) = default; +date_time_t::~date_time_t() = default; date_time_t& date_time_t::operator= (date_time_t other) { @@ -1459,6 +1445,10 @@ std::ostream& operator<< (std::ostream& os, format_t v) return os; } +const std::size_t INDEX_NOT_FOUND = std::numeric_limits::max(); +const xmlns_id_t XMLNS_UNKNOWN_ID = nullptr; +const xml_token_t XML_UNKNOWN_TOKEN = 0; + } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/parser/xml_namespace.cpp b/src/parser/xml_namespace.cpp index 9c371ca8354b326070e96ec18eb7d5a9b9cfc23e..2aafea3d8dba55642cd20205e63489c42d36e8f6 100644 --- a/src/parser/xml_namespace.cpp +++ b/src/parser/xml_namespace.cpp @@ -63,7 +63,14 @@ struct xmlns_repository::impl }; xmlns_repository::xmlns_repository() : mp_impl(std::make_unique()) {} -xmlns_repository::~xmlns_repository() {} +xmlns_repository::xmlns_repository(xmlns_repository&& other) : mp_impl(std::move(other.mp_impl)) {} +xmlns_repository::~xmlns_repository() = default; + +xmlns_repository& xmlns_repository::operator= (xmlns_repository&& other) +{ + mp_impl = std::move(other.mp_impl); + return *this; +} xmlns_id_t xmlns_repository::intern(std::string_view uri) { @@ -145,15 +152,11 @@ xmlns_id_t xmlns_repository::get_identifier(size_t index) const string xmlns_repository::get_short_name(xmlns_id_t ns_id) const { size_t index = get_index(ns_id); - return get_short_name(index); -} -string xmlns_repository::get_short_name(size_t index) const -{ - if (index == index_not_found) + if (index == INDEX_NOT_FOUND) return string("???"); - ostringstream os; + std::ostringstream os; os << "ns" << index; return os.str(); } @@ -161,11 +164,11 @@ string xmlns_repository::get_short_name(size_t index) const size_t xmlns_repository::get_index(xmlns_id_t ns_id) const { if (!ns_id) - return index_not_found; + return INDEX_NOT_FOUND; auto it = mp_impl->m_strid_map.find(std::string_view(ns_id)); if (it == mp_impl->m_strid_map.end()) - return index_not_found; + return INDEX_NOT_FOUND; return it->second; } @@ -196,7 +199,7 @@ xmlns_context::xmlns_context(xmlns_context&& r) : mp_impl(std::move(r.mp_impl)) r.mp_impl = std::make_unique(); } -xmlns_context::~xmlns_context() {} +xmlns_context::~xmlns_context() = default; xmlns_context& xmlns_context::operator= (const xmlns_context& r) { @@ -212,37 +215,37 @@ xmlns_context& xmlns_context::operator= (xmlns_context&& r) return *this; } -xmlns_id_t xmlns_context::push(std::string_view key, std::string_view uri) +xmlns_id_t xmlns_context::push(std::string_view alias, std::string_view uri) { if (!mp_impl->repo) throw general_error("this context is not associated with any repo."); #if ORCUS_DEBUG_XML_NAMESPACE - cout << "xmlns_context::push: key='" << key << "', uri='" << uri << "'" << endl; + cout << "xmlns_context::push: key='" << alias << "', uri='" << uri << "'" << endl; #endif mp_impl->m_trim_all_ns = true; xmlns_id_t id = mp_impl->repo->intern(uri); std::string_view uri_interned = id ? std::string_view(id) : std::string_view(); - if (key.empty()) + if (alias.empty()) { - // empty key value is associated with default namespace. + // empty alias value is associated with default namespace. mp_impl->m_default.push_back(uri_interned.data()); mp_impl->m_all_ns.push_back(uri_interned.data()); return mp_impl->m_default.back(); } - // See if this key already exists. - alias_map_type::iterator it = mp_impl->m_map.find(key); + // See if this alias already exists. + alias_map_type::iterator it = mp_impl->m_map.find(alias); if (it == mp_impl->m_map.end()) { - // This is the first time this key is used. + // This is the first time this alias is used. xmlns_list_type nslist; nslist.push_back(uri_interned.data()); mp_impl->m_all_ns.push_back(uri_interned.data()); std::pair r = - mp_impl->m_map.insert(alias_map_type::value_type(key, nslist)); + mp_impl->m_map.insert(alias_map_type::value_type(alias, nslist)); if (!r.second) // insertion failed. @@ -251,21 +254,21 @@ xmlns_id_t xmlns_context::push(std::string_view key, std::string_view uri) return nslist.back(); } - // The key already exists. + // The alias already exists. xmlns_list_type& nslist = it->second; nslist.push_back(uri_interned.data()); mp_impl->m_all_ns.push_back(uri_interned.data()); return nslist.back(); } -void xmlns_context::pop(std::string_view key) +void xmlns_context::pop(std::string_view alias) { #if ORCUS_DEBUG_XML_NAMESPACE - cout << "xmlns_context::pop: key='" << key << "'" << endl; + cout << "xmlns_context::pop: alias='" << alias << "'" << endl; #endif - if (key.empty()) + if (alias.empty()) { - // empty key value is associated with default namespace. + // empty alias value is associated with default namespace. if (mp_impl->m_default.empty()) throw general_error("default namespace stack is empty."); @@ -273,10 +276,14 @@ void xmlns_context::pop(std::string_view key) return; } - // See if this key really exists. - alias_map_type::iterator it = mp_impl->m_map.find(key); + // See if this alias really exists. + alias_map_type::iterator it = mp_impl->m_map.find(alias); if (it == mp_impl->m_map.end()) - throw general_error("failed to find the key."); + { + std::ostringstream os; + os << "alias named '" << alias << "' was attempted to be popped, but was not found in the stack"; + throw general_error(os.str()); + } xmlns_list_type& nslist = it->second; if (nslist.empty()) @@ -285,19 +292,19 @@ void xmlns_context::pop(std::string_view key) nslist.pop_back(); } -xmlns_id_t xmlns_context::get(std::string_view key) const +xmlns_id_t xmlns_context::get(std::string_view alias) const { #if ORCUS_DEBUG_XML_NAMESPACE - cout << "xmlns_context::get: alias='" << key << "', default ns stack size=" + cout << "xmlns_context::get: alias='" << alias << "', default ns stack size=" << mp_impl->m_default.size() << ", non-default alias count=" << mp_impl->m_map.size(); cout << ", "; print_map_keys(mp_impl->m_map); cout << endl; #endif - if (key.empty()) + if (alias.empty()) return mp_impl->m_default.empty() ? XMLNS_UNKNOWN_ID : mp_impl->m_default.back(); - alias_map_type::const_iterator it = mp_impl->m_map.find(key); + alias_map_type::const_iterator it = mp_impl->m_map.find(alias); if (it == mp_impl->m_map.end()) { #if ORCUS_DEBUG_XML_NAMESPACE @@ -382,7 +389,7 @@ public: void operator() (xmlns_id_t ns) { size_t num_id = m_cxt.get_index(ns); - if (num_id != index_not_found) + if (num_id != INDEX_NOT_FOUND) m_store.push_back(ns_item(num_id, ns)); } }; @@ -444,7 +451,7 @@ void xmlns_context::dump(std::ostream& os) const { xmlns_id_t ns_id = *it; size_t num_id = get_index(ns_id); - if (num_id == index_not_found) + if (num_id == INDEX_NOT_FOUND) continue; os << "ns" << num_id << "=\"" << ns_id << '"' << endl; @@ -457,7 +464,7 @@ void xmlns_context::dump_state(std::ostream& os) const for (xmlns_id_t ns_id : get_all_namespaces()) { size_t num_id = get_index(ns_id); - if (num_id == index_not_found) + if (num_id == INDEX_NOT_FOUND) continue; os << " ns" << num_id << ": \"" << ns_id << '"' << std::endl; diff --git a/src/parser/xml_namespace_test.cpp b/src/parser/xml_namespace_test.cpp index 440296159d8800b4ff4dda072eec16215cfa263b..38551be8710d3dd45845199e6b84642666110276 100644 --- a/src/parser/xml_namespace_test.cpp +++ b/src/parser/xml_namespace_test.cpp @@ -20,6 +20,8 @@ namespace { void test_basic() { + ORCUS_TEST_FUNC_SCOPE; + pstring xmlns1("http://some.xmlns/"); pstring xmlns2("http://other.xmlns/"); @@ -53,6 +55,8 @@ void test_basic() void test_all_namespaces() { + ORCUS_TEST_FUNC_SCOPE; + pstring key1("a"), key2("b"), key3("c"); pstring ns1("foo"), ns2("baa"), ns3("hmm"); @@ -110,6 +114,8 @@ void test_predefined_ns() void test_xml_name_t() { + ORCUS_TEST_FUNC_SCOPE; + xml_name_t name1; name1.ns = NS_test_name1; name1.name = "foo"; @@ -127,6 +133,8 @@ void test_xml_name_t() void test_ns_context() { + ORCUS_TEST_FUNC_SCOPE; + xmlns_repository repo; repo.add_predefined_values(NS_test_all); @@ -193,6 +201,30 @@ void test_ns_context() assert(id1 == id2); } +void test_repo_move() +{ + ORCUS_TEST_FUNC_SCOPE; + + static_assert(!std::is_copy_constructible_v); + static_assert(std::is_move_constructible_v); + + xmlns_repository repo; + repo.add_predefined_values(NS_test_all); + + xmlns_repository repo_moved = std::move(repo); // move construction + xmlns_repository repo_moved2; + repo_moved2 = std::move(repo_moved); // move assignment + + xmlns_id_t ns_id = repo_moved2.get_identifier(0); + assert(ns_id != XMLNS_UNKNOWN_ID); + ns_id = repo_moved2.get_identifier(1); + assert(ns_id != XMLNS_UNKNOWN_ID); + ns_id = repo_moved2.get_identifier(2); + assert(ns_id != XMLNS_UNKNOWN_ID); + ns_id = repo_moved2.get_identifier(3); + assert(ns_id == XMLNS_UNKNOWN_ID); +} + } // anonymous namespace int main() @@ -202,6 +234,7 @@ int main() test_predefined_ns(); test_xml_name_t(); test_ns_context(); + test_repo_move(); return EXIT_SUCCESS; } diff --git a/src/parser/xml_writer_test.cpp b/src/parser/xml_writer_test.cpp index 8687db322f08c4036f77ea6dd08c2f1ae5d87201..a6e4bed9f4bdbf00d686e3d37dec8c6f0c59438b 100644 --- a/src/parser/xml_writer_test.cpp +++ b/src/parser/xml_writer_test.cpp @@ -49,7 +49,7 @@ void test_encoded_content() _handler hdl; - sax_parser<_handler> parser(stream.data(), stream.size(), hdl); + sax_parser<_handler> parser(stream, hdl); parser.parse(); std::string content_read = hdl.os_content.str(); diff --git a/src/parser/yaml_parser_base.cpp b/src/parser/yaml_parser_base.cpp index 2ae028caa40cb53259c7aa6a1a29ed53c2eb1882..df4db23f190d79559ebec462ffce89e75bfb2604 100644 --- a/src/parser/yaml_parser_base.cpp +++ b/src/parser/yaml_parser_base.cpp @@ -19,20 +19,6 @@ namespace orcus { namespace yaml { -parse_error::parse_error(const std::string& msg, std::ptrdiff_t offset) : - ::orcus::parse_error(msg, offset) {} - -void parse_error::throw_with(const char* msg_before, char c, const char* msg_after, std::ptrdiff_t offset) -{ - throw parse_error(build_message(msg_before, c, msg_after), offset); -} - -void parse_error::throw_with( - const char* msg_before, const char* p, size_t n, const char* msg_after, std::ptrdiff_t offset) -{ - throw parse_error(build_message(msg_before, p, n, msg_after), offset); -} - struct scope { size_t width; @@ -67,10 +53,10 @@ const size_t parser_base::parse_indent_blank_line = std::numeric_limits::max() - 1; const size_t parser_base::scope_empty = std::numeric_limits::max() - 2; -parser_base::parser_base(const char* p, size_t n) : - ::orcus::parser_base(p, n, false), mp_impl(std::make_unique()) {} +parser_base::parser_base(std::string_view content) : + orcus::parser_base(content.data(), content.size()), mp_impl(std::make_unique()) {} -parser_base::~parser_base() {} +parser_base::~parser_base() = default; void parser_base::push_parse_token(detail::parse_token_t t) { @@ -301,7 +287,7 @@ std::string_view parser_base::merge_line_buffer() mp_impl->m_line_buffer.clear(); mp_impl->m_in_literal_block = false; - return std::string_view(buf.get(), buf.size()); + return buf.str(); } const char* parser_base::get_doc_hash() const @@ -439,7 +425,7 @@ parser_base::key_value parser_base::parse_key_value(const char* p, size_t len) // Key has not been found. detail::scope_t st = get_scope_type(); if (st == detail::scope_t::map) - throw yaml::parse_error("key was expected, but not found.", offset_last_char_of_line()); + throw parse_error("key was expected, but not found.", offset_last_char_of_line()); } return kv; @@ -493,7 +479,7 @@ void parser_base::handle_line_in_literal(size_t indent) // Start a new multi-line string scope. if (indent == cur_scope) - throw yaml::parse_error("parse: first line of a literal block must be indented.", offset()); + throw parse_error("parse: first line of a literal block must be indented.", offset()); push_scope(indent); set_scope_type(yaml::detail::scope_t::multi_line_string); diff --git a/src/parser/zip_archive.cpp b/src/parser/zip_archive.cpp index ee827a2f17e143d1b9abff7268cf4038148daf06..50d5da5be3f4b57c01b50eefa8c6f75bf9b77bee 100644 --- a/src/parser/zip_archive.cpp +++ b/src/parser/zip_archive.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -24,21 +25,6 @@ namespace orcus { -zip_error::zip_error() {} -zip_error::zip_error(const std::string& msg) : m_msg() -{ - std::ostringstream os; - os << "zip error: " << msg; - m_msg = os.str(); -} - -zip_error::~zip_error() throw() {} - -const char* zip_error::what() const throw() -{ - return m_msg.c_str(); -} - namespace { struct zip_file_param @@ -136,6 +122,18 @@ public: return std::string(reinterpret_cast(&buf[0])); } + std::vector read_bytes(std::size_t n) + { + if (!n) + throw zip_error("attempt to read string of zero size."); + + std::vector buf; + m_stream->seek(m_pos+m_pos_internal); + m_stream->read(buf.data(), n); + m_pos_internal += n; + return buf; + } + std::string_view read_string(size_t n, string_pool& pool) { std::vector buf(n+1, '\0'); @@ -199,7 +197,33 @@ struct central_dir_end } // anonymous namespace -class zip_archive_impl + +zip_file_entry_header::zip_file_entry_header() = default; +zip_file_entry_header::zip_file_entry_header(const zip_file_entry_header& other) = default; +zip_file_entry_header::zip_file_entry_header(zip_file_entry_header&& other) = default; +zip_file_entry_header::~zip_file_entry_header() = default; + +zip_file_entry_header& zip_file_entry_header::operator=(const zip_file_entry_header& other) = default; +zip_file_entry_header& zip_file_entry_header::operator=(zip_file_entry_header&& other) = default; + +std::ostream& operator<<(std::ostream& os, const zip_file_entry_header& header) +{ + os << "header signature: 0x" << std::hex << std::setfill('0') << std::setw(8) << header.header_signature << "\n" + << "version needed to extract: " << header.required_version << "\n" + << "general purpose bit flag: 0x" << std::hex << std::setfill('0') << std::setw(4) << header.flag << "\n" + << "compression method: " << header.compression_method << "\n" + << "last modified time: " << header.last_modified_time << "\n" + << "last modified date: " << header.last_modified_date << "\n" + << "crc32: 0x" << std::hex << std::setfill('0') << std::setw(8) << header.crc32 << "\n" + << "compressed size: " << header.compressed_size << "\n" + << "uncompressed size: " << header.uncompressed_size << "\n" + << "filename: " << header.filename << "\n" + << "extra field length: " << header.extra_field.size(); + + return os; +} + +class zip_archive::impl { typedef std::vector file_params_type; typedef std::unordered_map filename_map_type; @@ -215,12 +239,11 @@ class zip_archive_impl filename_map_type m_filenames; public: - zip_archive_impl(zip_archive_stream* stream); - ~zip_archive_impl(); + impl(zip_archive_stream* stream); void load(); - void dump_file_entry(size_t pos) const; - void dump_file_entry(std::string_view entry_name) const; + zip_file_entry_header get_file_entry_header(std::size_t index) const; + zip_file_entry_header get_file_entry_header(std::string_view name) const; std::string_view get_file_entry_name(size_t pos) const; size_t get_file_entry_count() const @@ -228,7 +251,7 @@ public: return m_file_params.size(); } - bool read_file_entry(std::string_view entry_name, std::vector& buf) const; + std::vector read_file_entry(std::string_view entry_name) const; private: @@ -242,7 +265,7 @@ private: void read_file_entries(); }; -zip_archive_impl::zip_archive_impl(zip_archive_stream* stream) : +zip_archive::impl::impl(zip_archive_stream* stream) : m_stream(stream), m_stream_size(0), m_central_dir_pos(0) { if (!m_stream) @@ -251,15 +274,11 @@ zip_archive_impl::zip_archive_impl(zip_archive_stream* stream) : m_stream_size = m_stream->size(); } -zip_archive_impl::~zip_archive_impl() -{ -} - -void zip_archive_impl::load() +void zip_archive::impl::load() { size_t central_dir_end_pos = seek_central_dir(); if (!central_dir_end_pos) - throw zip_error(); + throw zip_error("failed to seek the end position of the central directory"); m_central_dir_end = zip_stream_parser(m_stream, central_dir_end_pos); @@ -270,7 +289,51 @@ void zip_archive_impl::load() read_file_entries(); } -void zip_archive_impl::read_file_entries() +zip_file_entry_header zip_archive::impl::get_file_entry_header(std::size_t index) const +{ + if (index >= m_file_params.size()) + throw zip_error("invalid file entry index."); + + const zip_file_param& param = m_file_params[index]; + zip_stream_parser file_header(m_stream, param.offset_file_header); + + zip_file_entry_header header; + + header.header_signature = file_header.read_4bytes(); + header.required_version = file_header.read_2bytes(); + header.flag = file_header.read_2bytes(); + header.compression_method = file_header.read_2bytes(); + header.last_modified_time = file_header.read_2bytes(); + header.last_modified_date = file_header.read_2bytes(); + header.crc32 = file_header.read_4bytes(); + header.compressed_size = file_header.read_4bytes(); + header.uncompressed_size = file_header.read_4bytes(); + uint16_t filename_len = file_header.read_2bytes(); + uint16_t extra_field_len = file_header.read_2bytes(); + + if (filename_len) + header.filename = file_header.read_string(filename_len); + + if (extra_field_len) + header.extra_field = file_header.read_bytes(extra_field_len); + + return header; +} + +zip_file_entry_header zip_archive::impl::get_file_entry_header(std::string_view name) const +{ + auto it = m_filenames.find(name); + if (it == m_filenames.end()) + { + std::ostringstream os; + os << "file entry named '" << name << "' not found"; + throw zip_error(os.str()); + } + + return get_file_entry_header(it->second); +} + +void zip_archive::impl::read_file_entries() { m_file_params.clear(); @@ -344,94 +407,28 @@ void zip_archive_impl::read_file_entries() } } -void zip_archive_impl::dump_file_entry(size_t pos) const +std::string_view zip_archive::impl::get_file_entry_name(std::size_t pos) const { if (pos >= m_file_params.size()) - throw zip_error("invalid file entry index."); - - const zip_file_param& param = m_file_params[pos]; - std::cout << "-- filename: " << param.filename << std::endl; - - zip_stream_parser file_header(m_stream, param.offset_file_header); - uint32_t v32 = file_header.read_4bytes(); - printf(" header signature: 0x%8.8x\n", v32); - uint16_t v16 = file_header.read_2bytes(); - std::cout << " version needed to extract: " << v16 << std::endl; - v16 = file_header.read_2bytes(); - printf(" general purpose bit flag: 0x%4.4x\n", v16); - v16 = file_header.read_2bytes(); - std::cout << " compression method: " << v16 << std::endl; - v16 = file_header.read_2bytes(); - std::cout << " file last modified time: " << v16 << std::endl; - v16 = file_header.read_2bytes(); - std::cout << " file last modified date: " << v16 << std::endl; - v32 = file_header.read_4bytes(); - printf(" crc32: 0x%8.8x\n", v32); - v32 = file_header.read_4bytes(); - std::cout << " compressed size: " << v32 << std::endl; - v32 = file_header.read_4bytes(); - std::cout << " uncompressed size: " << v32 << std::endl; - size_t filename_len = file_header.read_2bytes(); - std::cout << " filename length: " << filename_len << std::endl; - uint16_t extra_field_len = file_header.read_2bytes(); - std::cout << " extra field length: " << extra_field_len << std::endl; - if (filename_len) - { - std::string filename = file_header.read_string(filename_len); - std::cout << " filename: '" << filename << "'" << std::endl; - } - - if (extra_field_len) - { - // Ignore extra field. - file_header.skip_bytes(extra_field_len); - } - - // Header followed by the actual data bytes. - - m_stream->seek(file_header.tell()); + return std::string_view{}; - std::vector buf; - if (read_file_entry(param.filename, buf)) - { - std::cout << "-- data section" << std::endl; - std::cout << &buf[0] << std::endl; - std::cout << "--" << std::endl; - } + return m_file_params[pos].filename; } -void zip_archive_impl::dump_file_entry(std::string_view entry_name) const +std::vector zip_archive::impl::read_file_entry(std::string_view entry_name) const { filename_map_type::const_iterator it = m_filenames.find(entry_name); if (it == m_filenames.end()) { - // entry name not found. - std::cout << "file entry '" << entry_name << "' not found." << std::endl; - return; + std::ostringstream os; + os << "entry named '" << entry_name << "' not found"; + throw zip_error(os.str()); } - dump_file_entry(it->second); -} - -std::string_view zip_archive_impl::get_file_entry_name(std::size_t pos) const -{ - if (pos >= m_file_params.size()) - return std::string_view{}; - - return m_file_params[pos].filename; -} - -bool zip_archive_impl::read_file_entry(std::string_view entry_name, std::vector& buf) const -{ - filename_map_type::const_iterator it = m_filenames.find(entry_name); - if (it == m_filenames.end()) - // entry name not found. - return false; size_t index = it->second; if (index >= m_file_params.size()) - // entry index is out of bound. - return false; + throw zip_error("entry index is out-of-bound"); const zip_file_param& param = m_file_params[index]; @@ -455,36 +452,34 @@ bool zip_archive_impl::read_file_entry(std::string_view entry_name, std::vector< m_stream->seek(file_header.tell()); std::vector raw_buf(param.size_compressed+1, 0); - m_stream->read(&raw_buf[0], param.size_compressed); + m_stream->read(raw_buf.data(), param.size_compressed); switch (param.compress_method) { case zip_file_param::stored: + { // Not compressed at all. - buf.swap(raw_buf); - return true; + return raw_buf; + } case zip_file_param::deflated: { // deflate compression std::vector zip_buf(param.size_uncompressed+1, 0); // null-terminated zip_inflater inflater(raw_buf, zip_buf, param); if (!inflater.init()) - break; + throw zip_error("error during initialization of inflater"); if (!inflater.inflate()) throw zip_error("error during inflate."); - buf.swap(zip_buf); - return true; + return zip_buf; } - default: - ; } - return false; + throw std::logic_error("compress method can be either 'stored' or 'deflated', but neither has happened"); } -size_t zip_archive_impl::seek_central_dir() +size_t zip_archive::impl::seek_central_dir() { // Search for the position of 0x06054b50 (read in little endian order - so // it's 0x50, 0x4b, 0x05, 0x06 in this order) somewhere near the end of @@ -538,7 +533,7 @@ size_t zip_archive_impl::seek_central_dir() return 0; } -void zip_archive_impl::read_central_dir_end() +void zip_archive::impl::read_central_dir_end() { central_dir_end content; content.magic_number = m_central_dir_end.read_4bytes(); @@ -566,34 +561,30 @@ void zip_archive_impl::read_central_dir_end() #endif } -zip_archive::zip_archive(zip_archive_stream* stream) : - mp_impl(new zip_archive_impl(stream)) +zip_archive::zip_archive(zip_archive_stream* stream) : mp_impl(std::make_unique(stream)) { } -zip_archive::~zip_archive() -{ - delete mp_impl; -} +zip_archive::~zip_archive() = default; void zip_archive::load() { mp_impl->load(); } -void zip_archive::dump_file_entry(size_t index) const +zip_file_entry_header zip_archive::get_file_entry_header(std::size_t index) const { - mp_impl->dump_file_entry(index); + return mp_impl->get_file_entry_header(index); } -std::string_view zip_archive::get_file_entry_name(std::size_t index) const +zip_file_entry_header zip_archive::get_file_entry_header(std::string_view name) const { - return mp_impl->get_file_entry_name(index); + return mp_impl->get_file_entry_header(name); } -void zip_archive::dump_file_entry(std::string_view entry_name) const +std::string_view zip_archive::get_file_entry_name(std::size_t index) const { - mp_impl->dump_file_entry(entry_name); + return mp_impl->get_file_entry_name(index); } size_t zip_archive::get_file_entry_count() const @@ -601,9 +592,9 @@ size_t zip_archive::get_file_entry_count() const return mp_impl->get_file_entry_count(); } -bool zip_archive::read_file_entry(std::string_view entry_name, std::vector& buf) const +std::vector zip_archive::read_file_entry(std::string_view entry_name) const { - return mp_impl->read_file_entry(entry_name, buf); + return mp_impl->read_file_entry(entry_name); } } diff --git a/src/parser/zip_archive_test.cpp b/src/parser/zip_archive_test.cpp index feef3b593e7b20f59f2b48f5f034e7ccfca62594..5f6fea8605af5365090ad30e345be427baa3315a 100644 --- a/src/parser/zip_archive_test.cpp +++ b/src/parser/zip_archive_test.cpp @@ -10,7 +10,10 @@ #include #include -#include "orcus/zip_archive_stream.hpp" +#include +#include + +#include #define ASSERT_THROW(expr) \ try \ @@ -22,10 +25,10 @@ catch (...) \ { \ } -using namespace std; using namespace orcus; +namespace fs = boost::filesystem; -void test_zip_archive_stream(zip_archive_stream* const strm, const unsigned char* const data, size_t const length) +void test_zip_archive_stream(zip_archive_stream* const strm, const unsigned char* const data, std::size_t const length) { assert(strm->size() == length); assert(strm->tell() == 0); @@ -34,17 +37,17 @@ void test_zip_archive_stream(zip_archive_stream* const strm, const unsigned char unsigned char* buf = buffer.data(); strm->read(buf, 2); - assert(equal(data, data + 2, buf)); + assert(std::equal(data, data + 2, buf)); assert(strm->tell() == 0); strm->read(buf, length); - assert(equal(data, data + length, buf)); + assert(std::equal(data, data + length, buf)); ASSERT_THROW(strm->read(buf, length + 1)); strm->read(buf, 0); strm->seek(2); assert(strm->tell() == 2); strm->read(buf, 2); - assert(equal(data + 2, data + 4, buf)); + assert(std::equal(data + 2, data + 4, buf)); strm->seek(length); assert(strm->tell() == length); ASSERT_THROW(strm->seek(length + 1)); @@ -53,14 +56,42 @@ void test_zip_archive_stream(zip_archive_stream* const strm, const unsigned char void test_zip_archive_stream_blob() { + ORCUS_TEST_FUNC_SCOPE; + const unsigned char data[] = "My hovercraft is full of eels."; zip_archive_stream_blob strm(data, sizeof(data)); test_zip_archive_stream(&strm, data, sizeof(data)); } +void test_zip_archive_file_entry_header() +{ + ORCUS_TEST_FUNC_SCOPE; + + fs::path filepath{SRCDIR"/test/ods/raw-values-1/input.ods"}; + assert(fs::is_regular_file(filepath)); + + zip_archive_stream_fd strm(filepath.string().c_str()); + + zip_archive archive(&strm); + archive.load(); + std::size_t n_entries = archive.get_file_entry_count(); + for (std::size_t i = 0; i < n_entries; ++i) + { + std::string_view name = archive.get_file_entry_name(i); + std::cout << "* entry name: " << name << std::endl; + zip_file_entry_header header = archive.get_file_entry_header(i); + assert(header.filename == name); + assert(header.header_signature == 0x04034b50); + + // 0 = none; 8 = deflate + assert(header.compression_method == 0 || header.compression_method == 8); + } +} + int main() { test_zip_archive_stream_blob(); + test_zip_archive_file_entry_header(); return EXIT_SUCCESS; } diff --git a/src/python/json.cpp b/src/python/json.cpp index f22cf666d9626c872c214ddbfa8e55233e43b37e..f64032b12eb2cd9c42c0687df8b6c04276edcd29 100644 --- a/src/python/json.cpp +++ b/src/python/json.cpp @@ -177,10 +177,10 @@ public: } } - void object_key(const char* p, size_t len, bool /*transient*/) + void object_key(std::string_view key, bool /*transient*/) { parser_stack& cur = m_stack.back(); - cur.key = PyUnicode_FromStringAndSize(p, len); + cur.key = PyUnicode_FromStringAndSize(key.data(), key.size()); } void end_object() @@ -213,9 +213,9 @@ public: push_value(Py_None); } - void string(const char* p, size_t len, bool /*transient*/) + void string(std::string_view val, bool /*transient*/) { - push_value(PyUnicode_FromStringAndSize(p, len)); + push_value(PyUnicode_FromStringAndSize(val.data(), val.size())); } void number(double val) @@ -242,13 +242,13 @@ PyObject* json_loads(PyObject* /*module*/, PyObject* args, PyObject* kwargs) } json_parser_handler hdl; - orcus::json_parser parser(stream, strlen(stream), hdl); + orcus::json_parser parser(stream, hdl); try { parser.parse(); return hdl.get_root(); } - catch (const orcus::json::parse_error& e) + catch (const orcus::parse_error& e) { PyErr_SetString(PyExc_TypeError, e.what()); } diff --git a/src/python/root.cpp b/src/python/root.cpp index 4b1b6b9daa22eee6defca0e408585a3810c00ade..91e7c18f6fc354970725391efee06dc3c160cbb0 100644 --- a/src/python/root.cpp +++ b/src/python/root.cpp @@ -34,7 +34,7 @@ PyObject* detect_format(PyObject* /*module*/, PyObject* args, PyObject* kwargs) try { - format_t ft = orcus::detect(reinterpret_cast(p), n); + format_t ft = orcus::detect({p, n}); switch (ft) { diff --git a/src/spreadsheet/CMakeLists.txt b/src/spreadsheet/CMakeLists.txt index 27482defe2720204a1427341a545339ffd2fa33a..6641511161e58aeb6252e9868cb5cf7bf0cbbf8c 100644 --- a/src/spreadsheet/CMakeLists.txt +++ b/src/spreadsheet/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(orcus-spreadsheet-model-${ORCUS_API_VERSION} SHARED debug_state_dumper.cpp document.cpp document_impl.cpp + document_types.cpp dumper_global.cpp factory.cpp factory_pivot.cpp diff --git a/src/spreadsheet/Makefile.am b/src/spreadsheet/Makefile.am index 9f42104989b5023d27620ff85d3c8ca6f6b666c4..17e731e69d2557acd964175eb9241ede10e10cd5 100644 --- a/src/spreadsheet/Makefile.am +++ b/src/spreadsheet/Makefile.am @@ -24,6 +24,7 @@ liborcus_spreadsheet_model_@ORCUS_API_VERSION@_la_SOURCES = \ document.cpp \ document_impl.hpp \ document_impl.cpp \ + document_types.cpp \ dumper_global.hpp \ dumper_global.cpp \ factory.cpp \ diff --git a/src/spreadsheet/document_types.cpp b/src/spreadsheet/document_types.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88e0724335ee947831ce8a1834c54db4314fcdcb --- /dev/null +++ b/src/spreadsheet/document_types.cpp @@ -0,0 +1,77 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include + +namespace orcus { namespace spreadsheet { + +color_t::color_t() : + alpha(0), red(0), green(0), blue(0) +{ +} + +color_t::color_t(color_elem_t _red, color_elem_t _green, color_elem_t _blue) : + alpha(255), red(_red), green(_green), blue(_blue) +{ +} + +color_t::color_t(color_elem_t _alpha, color_elem_t _red, color_elem_t _green, color_elem_t _blue) : + alpha(_alpha), red(_red), green(_green), blue(_blue) +{ +} + +void color_t::reset() +{ + *this = color_t(); +} + +bool color_t::operator==(const color_t& other) const +{ + return alpha == other.alpha && red == other.red && green == other.green && blue == other.blue; +} + +bool color_t::operator!=(const color_t& other) const +{ + return !operator==(other); +} + +format_run::format_run() : + pos(0), size(0), + font_size(0), + bold(false), italic(false) {} + +void format_run::reset() +{ + pos = 0; + size = 0; + font = std::string_view{}; + font_size = 0; + bold = false; + italic = false; + color = color_t(); +} + +bool format_run::formatted() const +{ + if (bold || italic) + return true; + + if (font_size) + return true; + + if (!font.empty()) + return true; + + if (color.alpha || color.red || color.green || color.blue) + return true; + + return false; +} + +}} // namespace orcus::spreadsheet + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/spreadsheet/factory_pivot.cpp b/src/spreadsheet/factory_pivot.cpp index 518c3aa8b0d763d8d8f4a3d11a1c1fec0f688f4e..2761da55738399560b0babee16242aca0af926f6 100644 --- a/src/spreadsheet/factory_pivot.cpp +++ b/src/spreadsheet/factory_pivot.cpp @@ -171,7 +171,7 @@ void import_pivot_cache_def::set_field_name(std::string_view name) m_current_field.name = intern(name); } -iface::import_pivot_cache_field_group* import_pivot_cache_def::create_field_group(size_t base_index) +iface::import_pivot_cache_field_group* import_pivot_cache_def::start_field_group(size_t base_index) { m_current_field_group = std::make_unique(m_doc, m_current_field, base_index); diff --git a/src/spreadsheet/factory_pivot.hpp b/src/spreadsheet/factory_pivot.hpp index 01173745b63fb17ace9df23567e0b49e4aa7e1a1..465fef1ba37a3d8ccf5f7a0e05978108de0cffdf 100644 --- a/src/spreadsheet/factory_pivot.hpp +++ b/src/spreadsheet/factory_pivot.hpp @@ -58,7 +58,7 @@ public: virtual void set_field_name(std::string_view name) override; - virtual iface::import_pivot_cache_field_group* create_field_group(size_t base_index) override; + virtual iface::import_pivot_cache_field_group* start_field_group(size_t base_index) override; virtual void set_field_min_value(double v) override; diff --git a/src/spreadsheet/factory_shared_strings.cpp b/src/spreadsheet/factory_shared_strings.cpp index f155fc4caf3978f8e182eade2669d1fd770a5048..a8375c0b724e4de85e32de464bb84b44003b02be 100644 --- a/src/spreadsheet/factory_shared_strings.cpp +++ b/src/spreadsheet/factory_shared_strings.cpp @@ -7,7 +7,6 @@ #include "factory_shared_strings.hpp" -#include #include #include #include diff --git a/src/spreadsheet/factory_shared_strings.hpp b/src/spreadsheet/factory_shared_strings.hpp index 21eaf23014f4a04523e9ac9c95125075cc2341dc..b49d2744fb72d09b2d721b314f60bff5cd246d01 100644 --- a/src/spreadsheet/factory_shared_strings.hpp +++ b/src/spreadsheet/factory_shared_strings.hpp @@ -8,6 +8,7 @@ #pragma once #include +#include #include diff --git a/src/spreadsheet/factory_table.cpp b/src/spreadsheet/factory_table.cpp index 432228dcb031d932e1aace63c242b909933d88b3..a77b7af38b656a5856d626c04fd56e0f5eb38e71 100644 --- a/src/spreadsheet/factory_table.cpp +++ b/src/spreadsheet/factory_table.cpp @@ -105,17 +105,9 @@ iface::import_auto_filter* import_table::get_auto_filter() return &mp_impl->m_auto_filter; } -void import_table::set_range(std::string_view ref) +void import_table::set_range(const range_t& range) { - const ixion::formula_name_resolver* resolver = - mp_impl->m_doc.get_formula_name_resolver(spreadsheet::formula_ref_context_t::global); - if (!resolver) - return; - - ixion::abs_range_t& range = mp_impl->mp_data->range; - range = to_abs_range(*resolver, ref.data(), ref.size()); - if (range.valid()) - range.first.sheet = range.last.sheet = mp_impl->m_sheet.get_index(); + mp_impl->mp_data->range = to_abs_range(range, mp_impl->m_sheet.get_index()); } void import_table::set_identifier(size_t id) diff --git a/src/spreadsheet/factory_table.hpp b/src/spreadsheet/factory_table.hpp index 766f1e889bac1384e836e6bed54a84d36778d2e1..0a274b4cb8d61a019909fd706df2f5526a8344de 100644 --- a/src/spreadsheet/factory_table.hpp +++ b/src/spreadsheet/factory_table.hpp @@ -28,7 +28,7 @@ public: virtual iface::import_auto_filter* get_auto_filter() override; - virtual void set_range(std::string_view ref) override; + virtual void set_range(const range_t& range) override; virtual void set_identifier(size_t id) override; virtual void set_name(std::string_view name) override; virtual void set_display_name(std::string_view name) override; diff --git a/test/xlsx/conditional-format/basic.xlsx b/test/xlsx/conditional-format/basic.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..847bb474e6508e81ac1975c63af833cd758b84df --- /dev/null +++ b/test/xlsx/conditional-format/basic.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41916486884cd5f351e5f996445cd9301e4cf597311d933a3b50e740910414b7 +size 13482 diff --git a/test/xlsx/conditional-format/data-bars.xlsx b/test/xlsx/conditional-format/data-bars.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a5b7718441ae29d750cb8ec6585b18935c3032d9 --- /dev/null +++ b/test/xlsx/conditional-format/data-bars.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12ef557de31ea71d44b8e0fd6b020dbc49c22d158117df075a0f23125760408 +size 11334