From 4f7a433b201a6b396a7ab60a4bf936dc15b67abb Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 19:24:25 -0500 Subject: [PATCH 01/91] Set the version number to 0.18.0 --- CMakeLists.txt | 4 ++-- configure.ac | 4 ++-- doc/conf.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5547ee34..e2915061 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.12) cmake_policy(SET CMP0048 NEW) set(ORCUS_MAJOR_VERSION 0) -set(ORCUS_MINOR_VERSION 17) -set(ORCUS_MICRO_VERSION 99) +set(ORCUS_MINOR_VERSION 18) +set(ORCUS_MICRO_VERSION 0) set(ORCUS_MAJOR_API_VERSION 0) set(ORCUS_MINOR_API_VERSION 18) set(ORCUS_VERSION ${ORCUS_MAJOR_VERSION}.${ORCUS_MINOR_VERSION}.${ORCUS_MICRO_VERSION}) diff --git a/configure.ac b/configure.ac index 62ee3f58..8a65dee0 100644 --- a/configure.ac +++ b/configure.ac @@ -5,8 +5,8 @@ # Version information # =================== m4_define([orcus_major_version], [0]) -m4_define([orcus_minor_version], [17]) -m4_define([orcus_micro_version], [99]) +m4_define([orcus_minor_version], [18]) +m4_define([orcus_micro_version], [0]) m4_define([orcus_version], [orcus_major_version.orcus_minor_version.orcus_micro_version]) # =============== diff --git a/doc/conf.py b/doc/conf.py index f90713dd..fcdce518 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -58,16 +58,16 @@ master_doc = 'index' # General information about the project. project = 'Orcus' -copyright = '2021, Kohei Yoshida' +copyright = '2022, Kohei Yoshida' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.17' +version = '0.18' # The full version, including alpha/beta/rc tags. -release = '0.17.2' +release = '0.18.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -- GitLab From beefecd7b325db444be91f7f4ef0cc156a0cabe2 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 19:32:18 -0500 Subject: [PATCH 02/91] Ignore this file too --- src/parser/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/.gitignore b/src/parser/.gitignore index 34791331..3590d47b 100644 --- a/src/parser/.gitignore +++ b/src/parser/.gitignore @@ -2,3 +2,4 @@ parser-test-* parser-global-test sax-parser-test utf8-test +types-test -- GitLab From b9b499adf40d6cdc6a862eb313051eb46bab50b2 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 20:42:33 -0500 Subject: [PATCH 03/91] Fix make distcheck --- Makefile.am | 336 +++++++++++++++++++++++++++------------------------- 1 file changed, 176 insertions(+), 160 deletions(-) diff --git a/Makefile.am b/Makefile.am index 04ffeb35..fe9aff9e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -154,129 +154,135 @@ test_data = \ test/json-structure/repeat-objects/input.json \ test/json-structure/arrays-in-object/check.txt \ test/json-structure/arrays-in-object/input.json \ - test/xlsx/date-cell/input.xlsx \ + test/xlsx/borders/directions.xlsx \ + test/xlsx/borders/grid-box.xlsx \ + test/xlsx/borders/single-cells.xlsx \ + test/xlsx/borders/colors.xlsx \ + test/xlsx/boolean-values/check.txt \ + test/xlsx/boolean-values/input.xlsx \ + test/xlsx/hidden-rows-columns/input.xlsx \ + test/xlsx/raw-values-1/check.txt \ + test/xlsx/raw-values-1/input.xlsx \ + test/xlsx/revision/cell-change-basic.xlsx \ + test/xlsx/styles/column-styles.xlsx \ + test/xlsx/styles/direct-format.xlsx \ + test/xlsx/formula-shared/check.txt \ + test/xlsx/formula-shared/input.xlsx \ + test/xlsx/text-alignment/input.xlsx \ + test/xlsx/matrix-results/input.xlsx \ test/xlsx/named-expression/check.txt \ test/xlsx/named-expression/input.xlsx \ - test/xlsx/revision/cell-change-basic.xlsx \ test/xlsx/formula-array-1/check.txt \ test/xlsx/formula-array-1/input.xlsx \ + test/xlsx/test.xlsx \ + test/xlsx/view/cursor-split-pane.xlsx \ + test/xlsx/view/frozen-pane.xlsx \ + test/xlsx/view/cursor-per-sheet.xlsx \ test/xlsx/formula-cells/check.txt \ test/xlsx/formula-cells/input.xlsx \ - test/xlsx/empty-shared-strings/check.txt \ - test/xlsx/empty-shared-strings/input.xlsx \ + test/xlsx/cell-properties/wrap-and-shrink.xlsx \ + test/xlsx/formula-with-string-results/check.txt \ + test/xlsx/formula-with-string-results/input.xlsx \ + test/xlsx/named-expression-relative/input.xlsx \ test/xlsx/data-table/one-variable.xlsx \ test/xlsx/data-table/multi-table.xlsx \ + test/xlsx/number-format/date-time.xlsx \ + test/xlsx/empty-shared-strings/check.txt \ + test/xlsx/empty-shared-strings/input.xlsx \ + test/xlsx/column-width-row-height/input.xlsx \ + test/xlsx/merged-cells/simple.xlsx \ + test/xlsx/date-time/input.xlsx \ + test/xlsx/date-cell/input.xlsx \ test/xlsx/doc-structure/unordered-sheet-positions.xlsx \ - test/xlsx/formatted-text/colored-text.xlsx \ - test/xlsx/formatted-text/bold-and-italic.xlsx \ - test/xlsx/matrix-results/input.xlsx \ - test/xlsx/text-alignment/input.xlsx \ - test/xlsx/boolean-values/check.txt \ - test/xlsx/boolean-values/input.xlsx \ test/xlsx/named-expression-sheet-local/check.txt \ test/xlsx/named-expression-sheet-local/input.xlsx \ - test/xlsx/hidden-rows-columns/input.xlsx \ - test/xlsx/borders/directions.xlsx \ - test/xlsx/borders/colors.xlsx \ - test/xlsx/borders/single-cells.xlsx \ - test/xlsx/borders/grid-box.xlsx \ - test/xlsx/number-format/date-time.xlsx \ - test/xlsx/formula-shared/check.txt \ - test/xlsx/formula-shared/input.xlsx \ - test/xlsx/formula-with-string-results/check.txt \ - test/xlsx/formula-with-string-results/input.xlsx \ - test/xlsx/merged-cells/simple.xlsx \ - test/xlsx/raw-values-1/check.txt \ - test/xlsx/raw-values-1/input.xlsx \ - test/xlsx/column-width-row-height/input.xlsx \ - test/xlsx/test.xlsx \ - test/xlsx/formula-simple.xlsx \ - test/xlsx/pivot-table/two-pivot-caches.xlsx \ - test/xlsx/pivot-table/group-by-numbers.xlsx \ - test/xlsx/pivot-table/three-pivot-tables-on-one-sheet.xlsx \ - test/xlsx/pivot-table/group-by-dates.xlsx \ + test/xlsx/background-color/standard.xlsx \ + test/xlsx/table/table-1.xlsx \ + test/xlsx/table/table-2.xlsx \ + test/xlsx/table/autofilter.xlsx \ + test/xlsx/table/autofilter-text-filter-1.xlsx \ + test/xlsx/formatted-text/colored-text.xlsx \ + test/xlsx/formatted-text/bold-and-italic.xlsx \ + test/xlsx/pivot-table/error-values.xlsx \ test/xlsx/pivot-table/chart-simple.xlsx \ - test/xlsx/pivot-table/group-field.xlsx \ test/xlsx/pivot-table/mixed-type-field.xlsx \ - test/xlsx/pivot-table/error-values.xlsx \ + test/xlsx/pivot-table/group-field.xlsx \ + test/xlsx/pivot-table/three-pivot-tables-on-one-sheet.xlsx \ + test/xlsx/pivot-table/two-pivot-caches.xlsx \ + test/xlsx/pivot-table/group-by-dates.xlsx \ + test/xlsx/pivot-table/group-by-numbers.xlsx \ test/xlsx/pivot-table/two-tables-one-source.xlsx \ test/xlsx/pivot-table/many-fields.xlsx \ - test/xlsx/background-color/standard.xlsx \ - test/xlsx/view/cursor-per-sheet.xlsx \ - test/xlsx/view/cursor-split-pane.xlsx \ - test/xlsx/view/frozen-pane.xlsx \ - test/xlsx/table/table-2.xlsx \ - test/xlsx/table/autofilter-text-filter-1.xlsx \ - test/xlsx/table/table-1.xlsx \ - test/xlsx/table/autofilter.xlsx \ - test/xlsx/date-time/input.xlsx \ - test/xlsx/named-expression-relative/input.xlsx \ - test/xml-mapped/attribute-basic/check-nomap.txt \ - test/xml-mapped/attribute-basic/check.txt \ + test/xlsx/formula-simple.xlsx \ test/xml-mapped/attribute-basic/input.xml \ + test/xml-mapped/attribute-basic/check.txt \ test/xml-mapped/attribute-basic/map.xml \ - test/xml-mapped/attribute-namespace-2/check-nomap.txt \ - test/xml-mapped/attribute-namespace-2/check.txt \ - test/xml-mapped/attribute-namespace-2/input.xml \ - test/xml-mapped/attribute-namespace-2/map.xml \ - test/xml-mapped/attribute-namespace/check-nomap.txt \ - test/xml-mapped/attribute-namespace/check.txt \ - test/xml-mapped/attribute-namespace/input.xml \ - test/xml-mapped/attribute-namespace/map.xml \ - test/xml-mapped/attribute-range-self-close/check-nomap.txt \ - test/xml-mapped/attribute-range-self-close/check.txt \ - test/xml-mapped/attribute-range-self-close/input.xml \ - test/xml-mapped/attribute-range-self-close/map.xml \ - test/xml-mapped/attribute-single-element-2/check.txt \ - test/xml-mapped/attribute-single-element-2/input.xml \ - test/xml-mapped/attribute-single-element-2/map.xml \ - test/xml-mapped/attribute-single-element/check.txt \ - test/xml-mapped/attribute-single-element/input.xml \ - test/xml-mapped/attribute-single-element/map.xml \ - test/xml-mapped/content-basic/check-nomap.txt \ - test/xml-mapped/content-basic/check.txt \ - test/xml-mapped/content-basic/input.xml \ - test/xml-mapped/content-basic/map.xml \ - test/xml-mapped/content-namespace-2/check-nomap.txt \ - test/xml-mapped/content-namespace-2/check.txt \ + test/xml-mapped/attribute-basic/check-nomap.txt \ test/xml-mapped/content-namespace-2/input.xml \ + test/xml-mapped/content-namespace-2/check.txt \ test/xml-mapped/content-namespace-2/map.xml \ - test/xml-mapped/content-namespace-3/check-nomap.txt \ - test/xml-mapped/content-namespace-3/check.txt \ - test/xml-mapped/content-namespace-3/input.xml \ - test/xml-mapped/content-namespace-3/map.xml \ - test/xml-mapped/content-namespace/check-nomap.txt \ - test/xml-mapped/content-namespace/check.txt \ - test/xml-mapped/content-namespace/input.xml \ - test/xml-mapped/content-namespace/map.xml \ - test/xml-mapped/custom-labels-2/check.txt \ + test/xml-mapped/content-namespace-2/check-nomap.txt \ test/xml-mapped/custom-labels-2/input.xml \ + test/xml-mapped/custom-labels-2/check.txt \ test/xml-mapped/custom-labels-2/map.xml \ - test/xml-mapped/custom-labels/check.txt \ - test/xml-mapped/custom-labels/input.xml \ - test/xml-mapped/custom-labels/map.xml \ - test/xml-mapped/fuel-economy/check-nomap.txt \ - test/xml-mapped/fuel-economy/check.txt \ - test/xml-mapped/fuel-economy/input.xml \ - test/xml-mapped/fuel-economy/map.xml \ - test/xml-mapped/invalids/map-defs/non-leaf-element-linked.xml \ - test/xml-mapped/invalids/map-defs/not-xml.xml \ - test/xml-mapped/nested-repeats-2/check-nomap.txt \ - test/xml-mapped/nested-repeats-2/check.txt \ - test/xml-mapped/nested-repeats-2/input.xml \ - test/xml-mapped/nested-repeats-2/map.xml \ - test/xml-mapped/nested-repeats-3/check-nomap.txt \ - test/xml-mapped/nested-repeats-3/check.txt \ + test/xml-mapped/content-namespace-3/input.xml \ + test/xml-mapped/content-namespace-3/check.txt \ + test/xml-mapped/content-namespace-3/map.xml \ + test/xml-mapped/content-namespace-3/check-nomap.txt \ + test/xml-mapped/attribute-single-element/input.xml \ + test/xml-mapped/attribute-single-element/check.txt \ + test/xml-mapped/attribute-single-element/map.xml \ + test/xml-mapped/attribute-range-self-close/input.xml \ + test/xml-mapped/attribute-range-self-close/check.txt \ + test/xml-mapped/attribute-range-self-close/map.xml \ + test/xml-mapped/attribute-range-self-close/check-nomap.txt \ test/xml-mapped/nested-repeats-3/input.xml \ + test/xml-mapped/nested-repeats-3/check.txt \ test/xml-mapped/nested-repeats-3/map.xml \ - test/xml-mapped/nested-repeats-4/check-nomap.txt \ - test/xml-mapped/nested-repeats-4/check.txt \ + test/xml-mapped/nested-repeats-3/check-nomap.txt \ + test/xml-mapped/encoding/euc-jp.xml \ + test/xml-mapped/encoding/gbk.xml \ + test/xml-mapped/encoding/utf-8.xml \ + test/xml-mapped/invalids/map-defs/non-leaf-element-linked.xml \ + test/xml-mapped/invalids/map-defs/not-xml.xml \ + test/xml-mapped/attribute-namespace-2/input.xml \ + test/xml-mapped/attribute-namespace-2/check.txt \ + test/xml-mapped/attribute-namespace-2/map.xml \ + test/xml-mapped/attribute-namespace-2/check-nomap.txt \ + test/xml-mapped/content-namespace/input.xml \ + test/xml-mapped/content-namespace/check.txt \ + test/xml-mapped/content-namespace/map.xml \ + test/xml-mapped/content-namespace/check-nomap.txt \ test/xml-mapped/nested-repeats-4/input.xml \ + test/xml-mapped/nested-repeats-4/check.txt \ test/xml-mapped/nested-repeats-4/map.xml \ - test/xml-mapped/nested-repeats/check-nomap.txt \ - test/xml-mapped/nested-repeats/check.txt \ + test/xml-mapped/nested-repeats-4/check-nomap.txt \ + test/xml-mapped/attribute-single-element-2/input.xml \ + test/xml-mapped/attribute-single-element-2/check.txt \ + test/xml-mapped/attribute-single-element-2/map.xml \ test/xml-mapped/nested-repeats/input.xml \ + test/xml-mapped/nested-repeats/check.txt \ test/xml-mapped/nested-repeats/map.xml \ + test/xml-mapped/nested-repeats/check-nomap.txt \ + test/xml-mapped/fuel-economy/input.xml \ + test/xml-mapped/fuel-economy/check.txt \ + test/xml-mapped/fuel-economy/map.xml \ + test/xml-mapped/fuel-economy/check-nomap.txt \ + test/xml-mapped/content-basic/input.xml \ + test/xml-mapped/content-basic/check.txt \ + test/xml-mapped/content-basic/map.xml \ + test/xml-mapped/content-basic/check-nomap.txt \ + test/xml-mapped/nested-repeats-2/input.xml \ + test/xml-mapped/nested-repeats-2/check.txt \ + test/xml-mapped/nested-repeats-2/map.xml \ + test/xml-mapped/nested-repeats-2/check-nomap.txt \ + test/xml-mapped/custom-labels/input.xml \ + test/xml-mapped/custom-labels/check.txt \ + test/xml-mapped/custom-labels/map.xml \ + test/xml-mapped/attribute-namespace/input.xml \ + test/xml-mapped/attribute-namespace/check.txt \ + test/xml-mapped/attribute-namespace/map.xml \ + test/xml-mapped/attribute-namespace/check-nomap.txt \ test/xml/simple/check.txt \ test/xml/simple/input.xml \ test/xml/no-decl-1/check.txt \ @@ -690,86 +696,96 @@ test_data = \ test/yaml/invalids/2.yaml \ test/yaml/invalids/1.yaml \ test/yaml/literal-block-2/input.yaml \ - test/xls-xml/background-color/standard.xml \ - test/xls-xml/basic-utf-16-be/check.txt \ - test/xls-xml/basic-utf-16-be/input.xml \ - test/xls-xml/basic-utf-16-le/check.txt \ - test/xls-xml/basic-utf-16-le/input.xml \ - test/xls-xml/basic/check.txt \ - test/xls-xml/basic/input.xml \ - test/xls-xml/bold-and-italic/check.txt \ - test/xls-xml/bold-and-italic/input.xml \ test/xls-xml/borders/colors.xml \ - test/xls-xml/borders/directions.xml \ - test/xls-xml/borders/grid-box.xml \ test/xls-xml/borders/single-cells.xml \ - test/xls-xml/character-set/input.xml \ - test/xls-xml/colored-text/check.txt \ - test/xls-xml/colored-text/input.xml \ - test/xls-xml/column-width-row-height/input.xml \ - test/xls-xml/date-time/input.xml \ - test/xls-xml/empty-rows/check.txt \ - test/xls-xml/empty-rows/input.xml \ - test/xls-xml/formula-array-1/check.txt \ - test/xls-xml/formula-array-1/input.xml \ - test/xls-xml/formula-cells-1/check.txt \ + test/xls-xml/borders/grid-box.xml \ + test/xls-xml/borders/directions.xml \ test/xls-xml/formula-cells-1/input.xml \ - test/xls-xml/formula-cells-2/check.txt \ - test/xls-xml/formula-cells-2/config.yaml \ - test/xls-xml/formula-cells-2/input.xml \ - test/xls-xml/formula-cells-3/check.txt \ - test/xls-xml/formula-cells-3/input.xml \ + test/xls-xml/formula-cells-1/check.txt \ test/xls-xml/formula-cells-parse-error/input.xml \ + test/xls-xml/colored-text/input.xml \ + test/xls-xml/colored-text/check.txt \ test/xls-xml/hidden-rows-columns/input.xml \ - test/xls-xml/invalid-sub-structure/check.txt \ + test/xls-xml/raw-values-1/input.xml \ + test/xls-xml/raw-values-1/check.txt \ + test/xls-xml/styles/direct-format.xml \ + test/xls-xml/styles/column-styles.xml \ test/xls-xml/invalid-sub-structure/input.xml \ - test/xls-xml/leading-whitespace/check.txt \ - test/xls-xml/leading-whitespace/input.xml \ + test/xls-xml/invalid-sub-structure/check.txt \ + test/xls-xml/basic-utf-16-be/input.xml \ + test/xls-xml/basic-utf-16-be/check.txt \ + test/xls-xml/text-alignment/input.xml \ test/xls-xml/matrix-results/input.xml \ - test/xls-xml/merged-cells/check.txt \ - test/xls-xml/merged-cells/input.xml \ - test/xls-xml/named-colors/check.txt \ - test/xls-xml/named-colors/input-upper.xml \ + test/xls-xml/named-expression/input.xml \ + test/xls-xml/named-expression/check.txt \ + test/xls-xml/formula-array-1/input.xml \ + test/xls-xml/formula-array-1/check.txt \ + test/xls-xml/empty-rows/input.xml \ + test/xls-xml/empty-rows/check.txt \ + test/xls-xml/basic/input.xml \ + test/xls-xml/basic/check.txt \ + test/xls-xml/view/cursor-per-sheet.xml \ + test/xls-xml/view/frozen-pane.xml \ + test/xls-xml/view/cursor-split-pane.xml \ + test/xls-xml/bold-and-italic/input.xml \ + test/xls-xml/bold-and-italic/check.txt \ + test/xls-xml/cell-properties/locked-and-hidden.xml \ + test/xls-xml/cell-properties/default-style.xml \ + test/xls-xml/cell-properties/wrap-and-shrink.xml \ test/xls-xml/named-colors/input.xml \ - test/xls-xml/named-colors/run.py \ + test/xls-xml/named-colors/input-upper.xml \ test/xls-xml/named-colors/saved-by-excel.xml \ - test/xls-xml/named-expression-sheet-local/check.txt \ - test/xls-xml/named-expression-sheet-local/input.xml \ - test/xls-xml/named-expression/check.txt \ - test/xls-xml/named-expression/input.xml \ + test/xls-xml/named-colors/check.txt \ + test/xls-xml/named-colors/run.py \ + test/xls-xml/formula-cells-3/input.xml \ + test/xls-xml/formula-cells-3/check.txt \ + test/xls-xml/leading-whitespace/input.xml \ + test/xls-xml/leading-whitespace/check.txt \ + test/xls-xml/basic-utf-16-le/input.xml \ + test/xls-xml/basic-utf-16-le/check.txt \ test/xls-xml/number-format/date-time.xml \ - test/xls-xml/raw-values-1/check.txt \ - test/xls-xml/raw-values-1/input.xml \ - test/xls-xml/table-offset/check.txt \ + test/xls-xml/formula-cells-2/input.xml \ + test/xls-xml/formula-cells-2/config.yaml \ + test/xls-xml/formula-cells-2/check.txt \ test/xls-xml/table-offset/input.xml \ + test/xls-xml/table-offset/check.txt \ + test/xls-xml/column-width-row-height/input.xml \ + test/xls-xml/character-set/input.xml \ + test/xls-xml/merged-cells/input.xml \ + test/xls-xml/merged-cells/check.txt \ + test/xls-xml/date-time/input.xml \ + test/xls-xml/named-expression-sheet-local/input.xml \ + test/xls-xml/named-expression-sheet-local/check.txt \ + test/xls-xml/background-color/standard.xml \ test/xls-xml/table/autofilter.xml \ - test/xls-xml/text-alignment/input.xml \ - test/xls-xml/view/cursor-per-sheet.xml \ - test/xls-xml/view/cursor-split-pane.xml \ - test/xls-xml/view/frozen-pane.xml \ - test/ods/named-range/input.ods \ - test/ods/named-range/check.txt \ - test/ods/date-cell/input.ods \ - test/ods/named-expression/input.ods \ - test/ods/named-expression/check.txt \ - test/ods/formula-2/input.ods \ - test/ods/formula-2/check.txt \ - test/ods/test.ods \ - test/ods/formatted-text/bold-and-italic.ods \ - test/ods/japanese.ods \ - test/ods/named-expression-sheet-local/input.ods \ - test/ods/named-expression-sheet-local/check.txt \ test/ods/borders/grid-box.ods \ test/ods/borders/single-cells.ods \ - test/ods/raw-values-1/input.ods \ + test/ods/import-styles/cell-protection.xml \ + test/ods/import-styles/standard-styles.xml \ + test/ods/import-styles/cell-styles.xml \ test/ods/raw-values-1/check.txt \ - test/ods/column-width-row-height/input.ods \ - test/ods/styles/cell-protection.xml \ - test/ods/styles/cell-styles.xml \ - test/ods/styles/number-format.xml \ - test/ods/styles/standard-styles.xml \ + test/ods/raw-values-1/input.ods \ + test/ods/styles/text-underlines.ods \ + test/ods/styles/column-styles.ods \ + test/ods/styles/direct-format.ods \ + test/ods/styles/asian-complex.ods \ + test/ods/test.ods \ + test/ods/named-expression/check.txt \ + test/ods/named-expression/input.ods \ + test/ods/formula-1/check.txt \ test/ods/formula-1/input.ods \ - test/ods/formula-1/check.txt + test/ods/cell-properties/wrap-and-shrink.ods \ + test/ods/formula-2/check.txt \ + test/ods/formula-2/input.ods \ + test/ods/number-format/basic-set.ods \ + test/ods/column-width-row-height/input.ods \ + test/ods/date-cell/input.ods \ + test/ods/japanese.ods \ + test/ods/named-expression-sheet-local/check.txt \ + test/ods/named-expression-sheet-local/input.ods \ + test/ods/named-range/check.txt \ + test/ods/named-range/input.ods \ + test/ods/formatted-text/bold-and-italic.ods EXTRA_DIST = \ CHANGELOG \ -- GitLab From 917043d66e8df345131a1f4d2397a05188358051 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 21:34:35 -0500 Subject: [PATCH 04/91] Summarize the change in 0.18.0 --- CHANGELOG | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 4081f7c7..32f2232f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,70 @@ +orcus 0.18.0 + +* general + + * fixed the flat output mode to properly calculate the lengths of UTF-8 + encoded strings. + + * replaced all uses of std::strtol() to parse_integer() to properly parse + strings that are not necessarily null-terminated. + + * added a new output format type 'debug-state' which dumps the internal + state of the populated document model in detail. This can be useful + during debugging. + + * separated the import_shared_string interface implementation from the + backend shared strings store per separation of responsibility. + + * merged the foo_t and foo_active_t struct pair, such as font_t and + font_active_t, in the styles store into a single type using std::optional. + +* ods + + * reimplemented the number format styles import to correctly keep track of + element stacks and correctly perform structure checks to detect malformed + documents. + + * added new interface to import named styles applied to columns. + + * added new interface to import attributes for asian and complex scripts for + the folloiwng font attributes: + + * font name + + * font size + + * font style + + * font weight + + * re-designed the styles import interface to make it multi-level. + + * re-worked the import of the style:text-underline-width attribute to make + its handling more in line with the specifications. + +* xls-xml + + * added support for importing wrap-text and shrink-to-fit cell format + attributes. + + * added support for importing cell-hidden and locked attributes. + + * added support for importing direct and named cell formats applied to + columns and rows. + +* xlsx + + * added support for importing wrap-text and shrink-to-fit cell format + attributes. + + * added support for importing direct and named cell formats applied to + columns and rows. + +* xml-map + + * added a new interface to pass the encoding information to the document + model so that it can correctly decode non-UTF-8-encoded string values. + orcus 0.17.2 * ods -- GitLab From fdc57fc043f4384d873f8e68a79898cf32e1a064 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 22:12:21 -0500 Subject: [PATCH 05/91] Emphasize that the yaml_parser is still experimental Also add some docs while I'm at it. --- README.md | 2 +- include/orcus/yaml_parser.hpp | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0ad9c80e..e7128f1a 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ The library also includes low-level parsers for the following: * CSS * XML * JSON -* YAML +* YAML (experimental) These parsers are all implemented as C++ templates and require a handler class passed as a template argument so that the handler class receives various diff --git a/include/orcus/yaml_parser.hpp b/include/orcus/yaml_parser.hpp index 123e1793..035d7d72 100644 --- a/include/orcus/yaml_parser.hpp +++ b/include/orcus/yaml_parser.hpp @@ -13,6 +13,10 @@ namespace orcus { +/** + * Blank handler class for @p yaml_parser. One can sub-class this and + * overwrite callback functions one needs to handle. + */ class yaml_handler { public: @@ -103,11 +107,19 @@ public: void null() {} }; -template +/** + * Parser for YAML documents. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to @p yaml_handler. + * + * @warning This parser is still highly experimental. Use with caution. + */ +template class yaml_parser : public yaml::parser_base { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; yaml_parser(const char* p, size_t n, handler_type& hdl); -- GitLab From 6fc6fc45f576277d60cbbbf01c09dd2fca560bd6 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 22:27:21 -0500 Subject: [PATCH 06/91] Document string_pool a bit more --- include/orcus/string_pool.hpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/include/orcus/string_pool.hpp b/include/orcus/string_pool.hpp index 9b2a6674..1edcf6e9 100644 --- a/include/orcus/string_pool.hpp +++ b/include/orcus/string_pool.hpp @@ -17,7 +17,10 @@ namespace orcus { /** - * Implements string hash map. + * This class implements a shared string pool with the ability to merge with + * other pools. + * + * @note Instances of this class are not copyable. */ class ORCUS_PSR_DLLPUBLIC string_pool { @@ -46,12 +49,31 @@ public: */ std::vector get_interned_strings() const; + /** + * Dump pool's content to stdout. + * + * @todo This needs to be reworked to make it more generally usable. + */ void dump() const; + /** + * Clear pool's content. + */ void clear(); + /** + * Query the total number of strings stored in the pool. + * + * @return size_t total number of strings in the pool. + */ size_t size() const; + /** + * Swap the content with another string-pool instance. + * + * + * @param other string-pool instance to swap contents with. + */ void swap(string_pool& other); /** -- GitLab From c592f47fa4a7310ba423aff3becf89a6a911d76a Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 22:41:29 -0500 Subject: [PATCH 07/91] Make string_pool move-constructible This will make string_pool a lot more usable. --- include/orcus/string_pool.hpp | 3 ++- src/parser/string_pool.cpp | 2 ++ src/parser/string_pool_test.cpp | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/include/orcus/string_pool.hpp b/include/orcus/string_pool.hpp index 1edcf6e9..12419bc7 100644 --- a/include/orcus/string_pool.hpp +++ b/include/orcus/string_pool.hpp @@ -20,7 +20,7 @@ namespace orcus { * This class implements a shared string pool with the ability to merge with * other pools. * - * @note Instances of this class are not copyable. + * @note This class is not copy-constructible, but is move-constructible. */ class ORCUS_PSR_DLLPUBLIC string_pool { @@ -29,6 +29,7 @@ public: string_pool& operator=(const string_pool&) = delete; string_pool(); + string_pool(string_pool&& other); ~string_pool(); /** diff --git a/src/parser/string_pool.cpp b/src/parser/string_pool.cpp index 27b38d01..e438da56 100644 --- a/src/parser/string_pool.cpp +++ b/src/parser/string_pool.cpp @@ -41,6 +41,8 @@ struct string_pool::impl string_pool::string_pool() : mp_impl(std::make_unique()) {} +string_pool::string_pool(string_pool&& other) : mp_impl(std::move(other.mp_impl)) {} + string_pool::~string_pool() = default; std::pair string_pool::intern(std::string_view str) diff --git a/src/parser/string_pool_test.cpp b/src/parser/string_pool_test.cpp index acb68f5d..56a4980a 100644 --- a/src/parser/string_pool_test.cpp +++ b/src/parser/string_pool_test.cpp @@ -8,6 +8,8 @@ #include "test_global.hpp" #include +#include + using namespace orcus; void test_basic() @@ -105,10 +107,27 @@ void test_merge() assert(entries.size() == pool1.size()); } +void test_move() +{ + static_assert(!std::is_copy_constructible_v); + static_assert(std::is_move_constructible_v); + + string_pool pool1; + pool1.intern("A"); + pool1.intern("B"); + pool1.intern("C"); + pool1.intern("D"); + pool1.intern("E"); + + string_pool pool2 = std::move(pool1); + assert(pool2.size() == 5); +} + int main() { test_basic(); test_merge(); + test_move(); return EXIT_SUCCESS; } -- GitLab From 639453a76a80c91f2ee6e7c10c02e1f67093e65e Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 22:55:31 -0500 Subject: [PATCH 08/91] Document tokens class & make it officially non-copyable --- include/orcus/tokens.hpp | 13 +++++++++++++ src/parser/tokens.cpp | 2 ++ 2 files changed, 15 insertions(+) diff --git a/include/orcus/tokens.hpp b/include/orcus/tokens.hpp index a5dc9a44..9edc8771 100644 --- a/include/orcus/tokens.hpp +++ b/include/orcus/tokens.hpp @@ -15,11 +15,24 @@ namespace orcus { +/** + * XML token store that provides mapping of integral token indentifiers and + * their original names. Instances of this class are typically used as global + * constants. + * + * @note The string values for the original token names should be static + * values whose values and memory addresses remain unchanged during the + * life cycle of the instance that references them. + * + * @note This class is not copy-constructible. + */ class ORCUS_PSR_DLLPUBLIC tokens { public: tokens() = delete; + tokens(const tokens&) = delete; tokens(const char** token_names, size_t token_name_count); + ~tokens(); /** * Check if a token returned from get_token() method is valid. diff --git a/src/parser/tokens.cpp b/src/parser/tokens.cpp index 846082c0..5d3c5333 100644 --- a/src/parser/tokens.cpp +++ b/src/parser/tokens.cpp @@ -17,6 +17,8 @@ tokens::tokens(const char** token_names, size_t token_name_count) : m_tokens.emplace(m_token_names[i], xml_token_t(i)); } +tokens::~tokens() = default; + bool tokens::is_valid_token(xml_token_t token) const { return token != XML_UNKNOWN_TOKEN; -- GitLab From 434be9437a08c6b95e381ad8cb6bc5192b8cfe43 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 23:07:49 -0500 Subject: [PATCH 09/91] Revise the doc for cell_buffer just a tiny bit --- include/orcus/cell_buffer.hpp | 16 +++++++++++++--- src/parser/cell_buffer.cpp | 4 +++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/orcus/cell_buffer.hpp b/include/orcus/cell_buffer.hpp index e6b82ec3..4f3858cd 100644 --- a/include/orcus/cell_buffer.hpp +++ b/include/orcus/cell_buffer.hpp @@ -15,19 +15,29 @@ namespace orcus { /** - * Temporary cell buffer used to convert cell values when needed. This is - * used in the sax and csv parsers. + * Temporary cell buffer used to decode encoded cell values. This is used in + * the sax, json and csv parsers. */ class ORCUS_PSR_DLLPUBLIC cell_buffer { std::string m_buffer; - size_t m_buf_size; /// Logical buffer size. May differ from the actual buffer size. + size_t m_buf_size; public: + cell_buffer(const cell_buffer&) = delete; + cell_buffer(); + ~cell_buffer(); void append(const char* p, size_t len); void reset(); const char* get() const; + + /** + * Get the logical size of the buffer. This may differ from the actual + * buffer size. + * + * @return logical size of the buffer. + */ size_t size() const; bool empty() const; }; diff --git a/src/parser/cell_buffer.cpp b/src/parser/cell_buffer.cpp index 54bc4d5d..1c4c5a39 100644 --- a/src/parser/cell_buffer.cpp +++ b/src/parser/cell_buffer.cpp @@ -21,6 +21,8 @@ namespace orcus { cell_buffer::cell_buffer() : m_buf_size(0) {} +cell_buffer::~cell_buffer() = default; + void cell_buffer::append(const char* p, size_t len) { if (!len) @@ -46,7 +48,7 @@ void cell_buffer::reset() const char* cell_buffer::get() const { - return &m_buffer[0]; + return m_buffer.data(); } size_t cell_buffer::size() const -- GitLab From 6522af789c041ef2cddb9459498098d7353f575f Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 23:33:41 -0500 Subject: [PATCH 10/91] detect() to take std::string_view to simplify its use Also add it to the doc along with a whole bunch other types which were previously not included in the doc. --- doc/cpp/filter/index.rst | 4 ++++ doc/cpp/parser/util.rst | 15 +++++++++++++++ include/orcus/format_detection.hpp | 7 ++++++- src/liborcus/format_detection.cpp | 12 +++++++----- src/liborcus/format_detection_test.cpp | 7 ++----- src/orcus_detect_main.cpp | 2 +- src/python/root.cpp | 2 +- 7 files changed, 36 insertions(+), 13 deletions(-) diff --git a/doc/cpp/filter/index.rst b/doc/cpp/filter/index.rst index 900942c3..8d44ff40 100644 --- a/doc/cpp/filter/index.rst +++ b/doc/cpp/filter/index.rst @@ -51,3 +51,7 @@ Generic XML :members: +Utility Functions +----------------- + +.. doxygenfunction:: orcus::detect diff --git a/doc/cpp/parser/util.rst b/doc/cpp/parser/util.rst index 4363a9ea..fc1075de 100644 --- a/doc/cpp/parser/util.rst +++ b/doc/cpp/parser/util.rst @@ -27,12 +27,27 @@ XML Types .. doxygenstruct:: orcus::xml_token_attr_t +.. doxygenstruct:: orcus::xml_token_element_t + +.. doxygenstruct:: orcus::xml_declaration_t + +.. doxygentypedef:: orcus::xml_attrs_t Other Types =========== +.. doxygenenum:: orcus::character_set_t + +.. doxygenstruct:: orcus::parse_error_value_t + .. doxygenenum:: orcus::length_unit_t +.. doxygenenum:: orcus::format_t + +.. doxygenenum:: orcus::dump_format_t + +.. doxygenstruct:: orcus::length_t + .. doxygenstruct:: orcus::date_time_t diff --git a/include/orcus/format_detection.hpp b/include/orcus/format_detection.hpp index ee01b8e6..049b9da9 100644 --- a/include/orcus/format_detection.hpp +++ b/include/orcus/format_detection.hpp @@ -15,7 +15,12 @@ namespace orcus { -ORCUS_DLLPUBLIC format_t detect(const unsigned char* buffer, size_t length); +/** + * Detect the format of a given document stream. + * + * @param strm document stream to detect the format of. + */ +ORCUS_DLLPUBLIC format_t detect(std::string_view strm); } diff --git a/src/liborcus/format_detection.cpp b/src/liborcus/format_detection.cpp index 0bb02777..c3a31b1c 100644 --- a/src/liborcus/format_detection.cpp +++ b/src/liborcus/format_detection.cpp @@ -48,22 +48,24 @@ namespace orcus { -format_t detect(const unsigned char* buffer, size_t length) try +format_t detect(std::string_view strm) try { + const auto* p = reinterpret_cast(strm.data()); + #if ODS_ENABLED - if (orcus_ods::detect(buffer, length)) + if (orcus_ods::detect(p, strm.size())) return format_t::ods; #endif #if XLSX_ENABLED - if (orcus_xlsx::detect(buffer, length)) + if (orcus_xlsx::detect(p, strm.size())) return format_t::xlsx; #endif #if GNUMERIC_ENABLED - if (orcus_gnumeric::detect(buffer, length)) + if (orcus_gnumeric::detect(p, strm.size())) return format_t::gnumeric; #endif #if XLS_XML_ENABLED - if (orcus_xls_xml::detect(buffer, length)) + if (orcus_xls_xml::detect(p, strm.size())) return format_t::xls_xml; #endif diff --git a/src/liborcus/format_detection_test.cpp b/src/liborcus/format_detection_test.cpp index 378eff3d..74b4e51e 100644 --- a/src/liborcus/format_detection_test.cpp +++ b/src/liborcus/format_detection_test.cpp @@ -34,8 +34,7 @@ void test_detect_formats() { orcus::file_content content(tests[i].path.string()); assert(!content.empty()); - orcus::format_t detected = orcus::detect( - reinterpret_cast(content.data()), content.size()); + orcus::format_t detected = orcus::detect(content.str()); assert(detected == tests[i].format); } @@ -54,9 +53,7 @@ void test_invalids() orcus::file_content content(p.string()); assert(!content.empty()); - orcus::format_t detected = orcus::detect( - reinterpret_cast(content.data()), content.size()); - + orcus::format_t detected = orcus::detect(content.str()); assert(detected == orcus::format_t::unknown); } } diff --git a/src/orcus_detect_main.cpp b/src/orcus_detect_main.cpp index 2a19931c..addf9b09 100644 --- a/src/orcus_detect_main.cpp +++ b/src/orcus_detect_main.cpp @@ -30,7 +30,7 @@ int main(int argc, char** argv) try return EXIT_FAILURE; } - format_t detected_type = detect(reinterpret_cast(content.data()), content.size()); + format_t detected_type = detect(content.str()); cout << "type: "; switch (detected_type) diff --git a/src/python/root.cpp b/src/python/root.cpp index 4b1b6b9d..91e7c18f 100644 --- a/src/python/root.cpp +++ b/src/python/root.cpp @@ -34,7 +34,7 @@ PyObject* detect_format(PyObject* /*module*/, PyObject* args, PyObject* kwargs) try { - format_t ft = orcus::detect(reinterpret_cast(p), n); + format_t ft = orcus::detect({p, n}); switch (ft) { -- GitLab From 814eb27812c1c01cd8d94f807f251160951c946b Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 23:41:34 -0500 Subject: [PATCH 11/91] Add file_content and memory_content to doc Also reword the description of file_content to make it flow a bit better. --- doc/cpp/parser/util.rst | 6 ++++++ include/orcus/stream.hpp | 7 ++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/cpp/parser/util.rst b/doc/cpp/parser/util.rst index fc1075de..98e0d1f1 100644 --- a/doc/cpp/parser/util.rst +++ b/doc/cpp/parser/util.rst @@ -15,6 +15,12 @@ Utilities .. doxygenclass:: orcus::zip_archive :members: +.. doxygenclass:: orcus::file_content + :members: + +.. doxygenclass:: orcus::memory_content + :members: + XML Types ========= diff --git a/include/orcus/stream.hpp b/include/orcus/stream.hpp index 1e24942f..b864a14c 100644 --- a/include/orcus/stream.hpp +++ b/include/orcus/stream.hpp @@ -16,9 +16,10 @@ namespace orcus { /** - * Represents the content of a file. The file content may be either - * in-memory, or memory-mapped; it is initially memory-mapped, but it may - * become in-memory when converted to a different encoding. + * Represents the content of a file. + * + * The file content is memory-mapped initially, but may later become in-memory + * if the non-utf-8 content gets converted to utf-8. */ class ORCUS_PSR_DLLPUBLIC file_content { -- GitLab From 6d8086cc6b77c374f73a8ca9c660f9e56a66e114 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 17 Nov 2022 23:48:31 -0500 Subject: [PATCH 12/91] Add a separate section for stream classes and functions And add stream functions to that section. --- doc/cpp/parser/index.rst | 1 + doc/cpp/parser/stream.rst | 18 ++++++++++++++++++ doc/cpp/parser/util.rst | 6 ------ 3 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 doc/cpp/parser/stream.rst diff --git a/doc/cpp/parser/index.rst b/doc/cpp/parser/index.rst index 8a12be2f..d970878d 100644 --- a/doc/cpp/parser/index.rst +++ b/doc/cpp/parser/index.rst @@ -5,6 +5,7 @@ Low-Level Parsers and Utilities .. toctree:: :maxdepth: 1 + stream.rst util.rst css.rst csv.rst diff --git a/doc/cpp/parser/stream.rst b/doc/cpp/parser/stream.rst new file mode 100644 index 00000000..a7fc2281 --- /dev/null +++ b/doc/cpp/parser/stream.rst @@ -0,0 +1,18 @@ +.. highlight:: cpp + +Stream +====== + +.. doxygenclass:: orcus::file_content + :members: + +.. doxygenclass:: orcus::memory_content + :members: + +.. doxygenstruct:: orcus::line_with_offset + :members: + +.. doxygenfunction:: orcus::create_parse_error_output +.. doxygenfunction:: orcus::locate_line_with_offset +.. doxygenfunction:: orcus::locate_first_different_char +.. doxygenfunction:: orcus::calc_logical_string_length diff --git a/doc/cpp/parser/util.rst b/doc/cpp/parser/util.rst index 98e0d1f1..fc1075de 100644 --- a/doc/cpp/parser/util.rst +++ b/doc/cpp/parser/util.rst @@ -15,12 +15,6 @@ Utilities .. doxygenclass:: orcus::zip_archive :members: -.. doxygenclass:: orcus::file_content - :members: - -.. doxygenclass:: orcus::memory_content - :members: - XML Types ========= -- GitLab From cd46ad28fdf6048631da605a85fff619f7bc1aff Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Fri, 18 Nov 2022 17:35:16 -0500 Subject: [PATCH 13/91] Use more compiler-default functions where applicable --- src/parser/stream.cpp | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/src/parser/stream.cpp b/src/parser/stream.cpp index 889258b8..33cc21dd 100644 --- a/src/parser/stream.cpp +++ b/src/parser/stream.cpp @@ -175,16 +175,12 @@ struct file_content::impl file_content::file_content() : mp_impl(std::make_unique()) {} -file_content::file_content(file_content&& other) : - mp_impl(std::move(other.mp_impl)) -{ - other.mp_impl = std::make_unique(); -} +file_content::file_content(file_content&& other) = default; file_content::file_content(std::string_view filepath) : mp_impl(std::make_unique(filepath)) {} -file_content::~file_content() {} +file_content::~file_content() = default; const char* file_content::data() const { @@ -251,13 +247,8 @@ memory_content::memory_content() : mp_impl(std::make_unique()) {} memory_content::memory_content(std::string_view s) : mp_impl(std::make_unique(s)) {} -memory_content::memory_content(memory_content&& other) : - mp_impl(std::move(other.mp_impl)) -{ - other.mp_impl = std::make_unique(); -} - -memory_content::~memory_content() {} +memory_content::memory_content(memory_content&& other) = default; +memory_content::~memory_content() = default; const char* memory_content::data() const { @@ -309,19 +300,9 @@ line_with_offset::line_with_offset(std::string _line, size_t _line_number, size_ offset_on_line(_offset_on_line) {} -line_with_offset::line_with_offset(const line_with_offset& other) : - line(other.line), - line_number(other.line_number), - offset_on_line(other.offset_on_line) -{} - -line_with_offset::line_with_offset(line_with_offset&& other) : - line(std::move(other.line)), - line_number(other.line_number), - offset_on_line(other.offset_on_line) -{} - -line_with_offset::~line_with_offset() {} +line_with_offset::line_with_offset(const line_with_offset& other) = default; +line_with_offset::line_with_offset(line_with_offset&& other) = default; +line_with_offset::~line_with_offset() = default; std::string create_parse_error_output(std::string_view strm, std::ptrdiff_t offset) { -- GitLab From f6d2ec51135efccf570104b59afc4e15591f00e1 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Fri, 18 Nov 2022 18:27:49 -0500 Subject: [PATCH 14/91] Document & add tests for locate_line_with_offset() Also ensure that the offset doesn't exceed the length of the stream being searched. --- include/orcus/stream.hpp | 17 ++++++++++---- slickedit/cpp.vpj | 1 + src/parser/stream.cpp | 26 +++++++++++++++++---- src/parser/stream_test.cpp | 47 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 8 deletions(-) diff --git a/include/orcus/stream.hpp b/include/orcus/stream.hpp index b864a14c..dd094bb9 100644 --- a/include/orcus/stream.hpp +++ b/include/orcus/stream.hpp @@ -118,14 +118,20 @@ public: struct ORCUS_PSR_DLLPUBLIC line_with_offset { + /** content of the entire line. */ std::string line; - size_t line_number; - size_t offset_on_line; + /** 0-based line number. */ + std::size_t line_number; + /** 0-based offset within the line. */ + std::size_t offset_on_line; - line_with_offset(std::string _line, size_t _line_number, size_t _offset_on_line); + line_with_offset(std::string _line, std::size_t _line_number, std::size_t _offset_on_line); line_with_offset(const line_with_offset& other); line_with_offset(line_with_offset&& other); ~line_with_offset(); + + bool operator== (const line_with_offset& other) const; + bool operator!= (const line_with_offset& other) const; }; /** @@ -143,11 +149,14 @@ ORCUS_PSR_DLLPUBLIC std::string create_parse_error_output(std::string_view strm, * Given a string consisting of multiple lines i.e. multiple line breaks, * find the line that contains the specified offset position. * - * @param strm string buffer containing multiple lines. + * @param strm string stream containing multiple lines to search. * @param offset offset position. * * @return structure containing information about the line containing the * offset position. + * + * @exception std::invalid_argument if the offset value equals or exceeds the + * length of the string stream being searched. */ ORCUS_PSR_DLLPUBLIC line_with_offset locate_line_with_offset(std::string_view strm, std::ptrdiff_t offset); diff --git a/slickedit/cpp.vpj b/slickedit/cpp.vpj index c34a764f..f2566060 100644 --- a/slickedit/cpp.vpj +++ b/slickedit/cpp.vpj @@ -472,6 +472,7 @@ + diff --git a/src/parser/stream.cpp b/src/parser/stream.cpp index 33cc21dd..3964e4b5 100644 --- a/src/parser/stream.cpp +++ b/src/parser/stream.cpp @@ -106,8 +106,15 @@ std::tuple find_line_with_offset(std::string_v const char* p_end = p0 + strm.size(); const char* p_offset = p0 + offset; + if (p_offset >= p_end) + { + std::ostringstream os; + os << "offset value of " << offset << " is out-of-bound for a stream of length " << strm.size(); + throw std::invalid_argument(os.str()); + } + // Determine the line number. - size_t line_num = 1; + std::size_t line_num = 0; for (const char* p = p0; p != p_offset; ++p) { if (*p == '\n') @@ -294,7 +301,7 @@ std::string_view memory_content::str() const return mp_impl->content; } -line_with_offset::line_with_offset(std::string _line, size_t _line_number, size_t _offset_on_line) : +line_with_offset::line_with_offset(std::string _line, std::size_t _line_number, std::size_t _offset_on_line) : line(std::move(_line)), line_number(_line_number), offset_on_line(_offset_on_line) @@ -304,12 +311,23 @@ line_with_offset::line_with_offset(const line_with_offset& other) = default; line_with_offset::line_with_offset(line_with_offset&& other) = default; line_with_offset::~line_with_offset() = default; +bool line_with_offset::operator== (const line_with_offset& other) const +{ + return line == other.line && line_number == other.line_number && offset_on_line == other.offset_on_line; +} + +bool line_with_offset::operator!= (const line_with_offset& other) const +{ + return !operator==(other); +} + std::string create_parse_error_output(std::string_view strm, std::ptrdiff_t offset) { - if (offset < 0) + if (strm.empty() || offset < 0) return std::string(); const size_t max_line_length = 60; + offset = std::min(strm.size() - 1, offset); auto line_info = find_line_with_offset(strm, offset); std::string_view line = std::get<0>(line_info); @@ -319,7 +337,7 @@ std::string create_parse_error_output(std::string_view strm, std::ptrdiff_t offs if (offset_on_line < 30) { std::ostringstream os; - os << line_num << ":" << (offset_on_line+1) << ": "; + os << (line_num+1) << ":" << (offset_on_line+1) << ": "; size_t line_num_width = os.str().size(); // Truncate line if it's too long. diff --git a/src/parser/stream_test.cpp b/src/parser/stream_test.cpp index cc6b875f..1a6e9fbc 100644 --- a/src/parser/stream_test.cpp +++ b/src/parser/stream_test.cpp @@ -85,11 +85,58 @@ void test_stream_logical_string_length() } } +void test_stream_locate_line_with_offset() +{ + test::stack_printer __sp__(__func__); + + std::string strm = "one\ntwo\nthree"; + + struct check + { + std::ptrdiff_t offset; + line_with_offset expected; + }; + + const std::vector checks = { + { 0, { "one", 0, 0 } }, + { 1, { "one", 0, 1 } }, + { 2, { "one", 0, 2 } }, + { 3, { "one", 0, 3 } }, // on line break + { 4, { "two", 1, 0 } }, + { 5, { "two", 1, 1 } }, + { 6, { "two", 1, 2 } }, + { 7, { "two", 1, 3 } }, // on line break + { 8, { "three", 2, 0 } }, + { 9, { "three", 2, 1 } }, + { 10, { "three", 2, 2 } }, + { 11, { "three", 2, 3 } }, + { 12, { "three", 2, 4 } }, + }; + + for (const auto& c : checks) + { + auto res = locate_line_with_offset(strm, c.offset); + assert(res == c.expected); + } + + try + { + auto res = locate_line_with_offset(strm, strm.size()); + assert(!"exception should have been thrown for out-of-bound offset!"); + } + catch (const std::invalid_argument& e) + { + // expected + cout << "exception thrown as expected: '" << e.what() << "'" << endl; + } +} + int main() { test_stream_create_error_output(); test_stream_locate_first_different_char(); test_stream_logical_string_length(); + test_stream_locate_line_with_offset(); return EXIT_SUCCESS; } -- GitLab From b6a7dcd652532aacb0ecbcc99815d35c2d11c8ce Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Fri, 18 Nov 2022 20:29:36 -0500 Subject: [PATCH 15/91] Document XML parsers and related types & make trivial code cleanups --- doc/cpp/parser/xml.rst | 44 +++++++++++-- include/orcus/css_parser.hpp | 10 ++- include/orcus/csv_parser.hpp | 10 ++- include/orcus/csv_parser_base.hpp | 2 +- include/orcus/json_parser.hpp | 10 +-- include/orcus/sax_ns_parser.hpp | 102 ++++++++++++++++++++++-------- include/orcus/sax_parser.hpp | 30 +++++---- include/orcus/sax_parser_base.hpp | 32 ++++++---- 8 files changed, 174 insertions(+), 66 deletions(-) diff --git a/doc/cpp/parser/xml.rst b/doc/cpp/parser/xml.rst index fdcf348a..d7edc31b 100644 --- a/doc/cpp/parser/xml.rst +++ b/doc/cpp/parser/xml.rst @@ -3,28 +3,47 @@ XML Parsers =========== +SAX base parser +--------------- + .. doxygenclass:: orcus::sax_parser :members: -.. doxygenclass:: orcus::sax_ns_parser +.. doxygenstruct:: orcus::sax_parser_default_config :members: -.. doxygenclass:: orcus::sax_token_parser +.. doxygenclass:: orcus::sax_handler :members: +.. doxygenstruct:: orcus::sax::parser_element + :members: -Parser Handlers ---------------- +.. doxygenstruct:: orcus::sax::parser_attribute + :members: -.. doxygenclass:: orcus::sax_handler +SAX namespace parser +-------------------- + +.. doxygenclass:: orcus::sax_ns_parser :members: .. doxygenclass:: orcus::sax_ns_handler :members: -.. doxygenclass:: orcus::sax_token_handler +.. doxygenstruct:: orcus::sax_ns_parser_element + :members: + +.. doxygenstruct:: orcus::sax_ns_parser_attribute :members: +SAX token parser +---------------- + +.. doxygenclass:: orcus::sax_token_parser + :members: + +.. doxygenclass:: orcus::sax_token_handler + :members: Namespace --------- @@ -34,3 +53,16 @@ Namespace .. doxygenclass:: orcus::xmlns_context :members: + +Common +------ + +.. doxygenclass:: orcus::sax::malformed_xml_error + :members: + +.. doxygenstruct:: orcus::sax::doctype_declaration + :members: + +.. doxygenfunction:: orcus::sax::decode_xml_encoded_char + +.. doxygenfunction:: orcus::sax::decode_xml_unicode_char diff --git a/include/orcus/css_parser.hpp b/include/orcus/css_parser.hpp index 53a5aadc..815c291f 100644 --- a/include/orcus/css_parser.hpp +++ b/include/orcus/css_parser.hpp @@ -188,11 +188,17 @@ public: void end_property() {} }; -template +/** + * Parser for CSS documents. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to @p css_handler. + */ +template class css_parser : public css::parser_base { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; css_parser(const char* p, size_t n, handler_type& hdl); void parse(); diff --git a/include/orcus/csv_parser.hpp b/include/orcus/csv_parser.hpp index 27b4f924..8f416ec9 100644 --- a/include/orcus/csv_parser.hpp +++ b/include/orcus/csv_parser.hpp @@ -53,11 +53,17 @@ public: } }; -template +/** + * Parser for CSV documents. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to @p csv_handler. + */ +template class csv_parser : public csv::parser_base { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; csv_parser(const char* p, size_t n, handler_type& hdl, const csv::parser_config& config); void parse(); diff --git a/include/orcus/csv_parser_base.hpp b/include/orcus/csv_parser_base.hpp index e7d4be41..2784ab77 100644 --- a/include/orcus/csv_parser_base.hpp +++ b/include/orcus/csv_parser_base.hpp @@ -31,7 +31,7 @@ using std::endl; namespace orcus { namespace csv { /** - * Run-time configuration object for orcus::csv_parser. + * Run-time configuration object for @p orcus::csv_parser. */ struct ORCUS_PSR_DLLPUBLIC parser_config { diff --git a/include/orcus/json_parser.hpp b/include/orcus/json_parser.hpp index ef22b3a8..732d9dc3 100644 --- a/include/orcus/json_parser.hpp +++ b/include/orcus/json_parser.hpp @@ -107,14 +107,16 @@ public: }; /** - * Low-level JSON parser. The caller must provide a handler class to - * receive callbacks. + * Parser for JSON documents. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to @p json_handler. */ -template +template class json_parser : public json::parser_base { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; /** * Constructor. diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index 5bd62836..6b955053 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -20,20 +20,30 @@ namespace orcus { struct sax_ns_parser_element { - xmlns_id_t ns; // element namespace - std::string_view ns_alias; // element namespace alias - std::string_view name; // element name - std::ptrdiff_t begin_pos; // position of the opening brace '<'. - std::ptrdiff_t end_pos; // position of the char after the closing brace '>'. + /** Element namespace identifier. */ + xmlns_id_t ns; + /** Element namespace alias. */ + std::string_view ns_alias; + /** Element name. */ + std::string_view name; + /** Position of the opening brace '<'. */ + std::ptrdiff_t begin_pos; + /** Position immediately after the closing brace '>'. */ + std::ptrdiff_t end_pos; }; struct sax_ns_parser_attribute { - xmlns_id_t ns; // attribute namespace - std::string_view ns_alias; // attribute namespace alias - std::string_view name; // attribute name - std::string_view value; // attribute value - bool transient; // whether or not the attribute value is transient. + /** Attribute namespace identifier. */ + xmlns_id_t ns; + /** Attribute namespace alias. */ + std::string_view ns_alias; + /** Attribute name. */ + std::string_view name; + /** Attribute value. */ + std::string_view value; + /** Whether or not the attribute value is transient. */ + bool transient; }; namespace __sax { @@ -89,37 +99,80 @@ public: class sax_ns_handler { public: - void doctype(const orcus::sax::doctype_declaration& /*dtd*/) {} + void doctype(const orcus::sax::doctype_declaration& dtd) + { + (void)dtd; + } - void start_declaration(std::string_view /*decl*/) {} + void start_declaration(std::string_view decl) + { + (void)decl; + } - void end_declaration(std::string_view /*decl*/) {} + void end_declaration(std::string_view decl) + { + (void)decl; + } - void start_element(const orcus::sax_ns_parser_element& /*elem*/) {} + void start_element(const orcus::sax_ns_parser_element& elem) + { + (void)elem; + } - void end_element(const orcus::sax_ns_parser_element& /*elem*/) {} + void end_element(const orcus::sax_ns_parser_element& elem) + { + (void)elem; + } - void characters(std::string_view /*val*/, bool /*transient*/) {} + void characters(std::string_view val, bool transient) + { + (void)val; + (void)transient; + } - void attribute(std::string_view /*name*/, std::string_view /*val*/) {} + void attribute(std::string_view name, std::string_view val) + { + (void)name; + (void)val; + } - void attribute(const orcus::sax_ns_parser_attribute& /*attr*/) {} + void attribute(const orcus::sax_ns_parser_attribute& attr) + { + (void)attr; + } }; /** - * SAX based XML parser with proper namespace handling. + * SAX based XML parser with extra namespace handling. + * + * It uses an instance of xmlns_context passed by the caller to validate and + * convert namespace values into identifiers. The namespace identifier of + * each encountered element is always given even if one is not explicitly + * given. + * + * This parser keeps track of element scopes and detects non-matching element + * pairs. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to @p sax_ns_handler. */ -template +template class sax_ns_parser { public: - typedef _Handler handler_type; + typedef Handler handler_type; sax_ns_parser(const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler); sax_ns_parser(const char* content, const size_t size, bool transient_stream, xmlns_context& ns_cxt, handler_type& handler); - ~sax_ns_parser(); + ~sax_ns_parser() = default; + /** + * Start parsing the document. + * + * @exception orcus::sax::malformed_xml_error when it encounters a + * non-matching closing element. + */ void parse(); private: @@ -265,11 +318,6 @@ sax_ns_parser<_Handler>::sax_ns_parser( { } -template -sax_ns_parser<_Handler>::~sax_ns_parser() -{ -} - template void sax_ns_parser<_Handler>::parse() { diff --git a/include/orcus/sax_parser.hpp b/include/orcus/sax_parser.hpp index c15ba48f..b043e082 100644 --- a/include/orcus/sax_parser.hpp +++ b/include/orcus/sax_parser.hpp @@ -21,7 +21,7 @@ struct sax_parser_default_config * corresponds with version 1.0 whereas a value of 11 corresponds with * version 1.1. */ - static const uint8_t baseline_version = 10; + static constexpr uint8_t baseline_version = 10; }; class sax_handler @@ -113,19 +113,30 @@ public: }; /** - * Template-based sax parser that doesn't use function pointer for - * callbacks for better performance, especially on large XML streams. + * SAX parser for XML documents. + * + * This parser is barebone in that it only parses the document and picks up + * all encountered elements and attributes without checking proper element + * pairs. The user is responsible for checking whether or not the document is + * well-formed in terms of element scopes. + * + * This parser additionally records the begin and end offset positions of each + * element. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to @p sax_handler. + * @tparam ConfigT Parser configuration. */ -template +template class sax_parser : public sax::parser_base { public: - typedef _Handler handler_type; - typedef _Config config_type; + typedef HandlerT handler_type; + typedef ConfigT config_type; sax_parser(const char* content, const size_t size, handler_type& handler); sax_parser(const char* content, const size_t size, bool transient_stream, handler_type& handler); - ~sax_parser(); + ~sax_parser() = default; void parse(); @@ -167,11 +178,6 @@ sax_parser<_Handler,_Config>::sax_parser( { } -template -sax_parser<_Handler,_Config>::~sax_parser() -{ -} - template void sax_parser<_Handler,_Config>::parse() { diff --git a/include/orcus/sax_parser_base.hpp b/include/orcus/sax_parser_base.hpp index bf12f580..5243e501 100644 --- a/include/orcus/sax_parser_base.hpp +++ b/include/orcus/sax_parser_base.hpp @@ -83,25 +83,33 @@ ORCUS_PSR_DLLPUBLIC std::string decode_xml_unicode_char(const char* p, size_t n) */ struct parser_element { - std::string_view ns; // element namespace (optional) - std::string_view name; // element name - std::ptrdiff_t begin_pos; // position of the opening brace '<'. - std::ptrdiff_t end_pos; // position of the char after the closing brace '>'. + /** Optional element namespace. It may be empty if it's not given. */ + std::string_view ns; + /** Element name. */ + std::string_view name; + /** Position of the opening brace '<'. */ + std::ptrdiff_t begin_pos; + /** Position immediately after the closing brace '>'. */ + std::ptrdiff_t end_pos; }; /** * Attribute properties passed by sax_parser to its handler's attribute() - * call. When an attribute value is transient, it has been converted due to - * presence of encoded character(s) and stored in a temporary buffer. The - * handler must assume that the value will not survive beyond the scope of - * the callback. + * call. When an attribute value is "transient", it has been converted due to + * presence of encoded character(s) and has been stored in a temporary buffer. + * The handler must assume that the value will not survive after the callback + * function ends. */ struct parser_attribute { - std::string_view ns; // attribute namespace (optional) - std::string_view name; // attribute name - std::string_view value; // attribute value - bool transient; // whether or not the attribute value is on a temporary buffer. + /** Optional attribute namespace. It may be empty if it's not given. */ + std::string_view ns; + /** Attribute name. */ + std::string_view name; + /** Attribute value. */ + std::string_view value; + /** Whether or not the attribute value is in a temporary buffer. */ + bool transient; }; class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base -- GitLab From 2c306c9b21cbb10d4f77b27dcc7616f5e14ddc9e Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Fri, 18 Nov 2022 22:35:00 -0500 Subject: [PATCH 16/91] Use range-based for loop --- include/orcus/sax_ns_parser.hpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index 6b955053..4278d317 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -83,17 +83,6 @@ struct elem_scope typedef std::vector> elem_scopes_type; -class pop_ns_by_key -{ - xmlns_context& m_cxt; -public: - pop_ns_by_key(xmlns_context& cxt) : m_cxt(cxt) {} - void operator() (std::string_view key) - { - m_cxt.pop(key); - } -}; - } class sax_ns_handler @@ -246,7 +235,8 @@ private: m_handler.end_element(m_elem); // Pop all namespaces declared in this scope. - std::for_each(scope.ns_keys.begin(), scope.ns_keys.end(), __sax::pop_ns_by_key(m_ns_cxt)); + for (const std::string_view& key : scope.ns_keys) + m_ns_cxt.pop(key); m_scopes.pop_back(); } -- GitLab From a030e4614cdc601cd4f21bf3bcffb592a5e75719 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Fri, 18 Nov 2022 22:40:25 -0500 Subject: [PATCH 17/91] Rename __sax namespace to sax::detail --- include/orcus/sax_ns_parser.hpp | 20 ++++++++++---------- src/parser/sax_token_parser_thread.cpp | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index 4278d317..28f21410 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -46,7 +46,7 @@ struct sax_ns_parser_attribute bool transient; }; -namespace __sax { +namespace sax { namespace detail { struct entity_name { @@ -83,7 +83,7 @@ struct elem_scope typedef std::vector> elem_scopes_type; -} +}} // namespace sax::detail class sax_ns_handler { @@ -171,9 +171,9 @@ private: */ class handler_wrapper { - __sax::elem_scopes_type m_scopes; - __sax::ns_keys_type m_ns_keys; - __sax::entity_names_type m_attrs; + sax::detail::elem_scopes_type m_scopes; + sax::detail::ns_keys_type m_ns_keys; + sax::detail::entity_names_type m_attrs; sax_ns_parser_element m_elem; sax_ns_parser_attribute m_attr; @@ -205,8 +205,8 @@ private: void start_element(const sax::parser_element& elem) { - m_scopes.push_back(std::make_unique<__sax::elem_scope>()); - __sax::elem_scope& scope = *m_scopes.back(); + m_scopes.push_back(std::make_unique()); + sax::detail::elem_scope& scope = *m_scopes.back(); scope.ns = m_ns_cxt.get(elem.ns); scope.name = elem.name; scope.ns_keys.swap(m_ns_keys); @@ -223,7 +223,7 @@ private: void end_element(const sax::parser_element& elem) { - __sax::elem_scope& scope = *m_scopes.back(); + sax::detail::elem_scope& scope = *m_scopes.back(); if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name) throw sax::malformed_xml_error("mis-matching closing element.", -1); @@ -255,11 +255,11 @@ private: return; } - if (m_attrs.count(__sax::entity_name(attr.ns, attr.name)) > 0) + if (m_attrs.count(sax::detail::entity_name(attr.ns, attr.name)) > 0) throw sax::malformed_xml_error( "You can't define two attributes of the same name in the same element.", -1); - m_attrs.insert(__sax::entity_name(attr.ns, attr.name)); + m_attrs.insert(sax::detail::entity_name(attr.ns, attr.name)); if (attr.ns.empty() && attr.name == "xmlns") { diff --git a/src/parser/sax_token_parser_thread.cpp b/src/parser/sax_token_parser_thread.cpp index 36cef8f7..92ea0d6c 100644 --- a/src/parser/sax_token_parser_thread.cpp +++ b/src/parser/sax_token_parser_thread.cpp @@ -53,7 +53,7 @@ bool parse_token::operator!= (const parse_token& other) const struct parser_thread::impl { - detail::thread::parser_token_buffer m_token_buffer; + orcus::detail::thread::parser_token_buffer m_token_buffer; string_pool m_pool; std::vector> m_element_store; -- GitLab From ef2d0f6d0784ecb9eedd6ea314d21ee869d20d02 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Sat, 19 Nov 2022 11:17:08 -0500 Subject: [PATCH 18/91] No need to use std::unique_ptr for element scope structure --- include/orcus/sax_ns_parser.hpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index 28f21410..1a9868a3 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -13,7 +13,6 @@ #include #include -#include #include namespace orcus { @@ -79,9 +78,13 @@ struct elem_scope xmlns_id_t ns; std::string_view name; ns_keys_type ns_keys; + + elem_scope() {} + elem_scope(const elem_scope&) = delete; + elem_scope(elem_scope&& other) = default; }; -typedef std::vector> elem_scopes_type; +using elem_scopes_type = std::vector; }} // namespace sax::detail @@ -205,8 +208,8 @@ private: void start_element(const sax::parser_element& elem) { - m_scopes.push_back(std::make_unique()); - sax::detail::elem_scope& scope = *m_scopes.back(); + m_scopes.emplace_back(); + sax::detail::elem_scope& scope = m_scopes.back(); scope.ns = m_ns_cxt.get(elem.ns); scope.name = elem.name; scope.ns_keys.swap(m_ns_keys); @@ -223,7 +226,7 @@ private: void end_element(const sax::parser_element& elem) { - sax::detail::elem_scope& scope = *m_scopes.back(); + sax::detail::elem_scope& scope = m_scopes.back(); if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name) throw sax::malformed_xml_error("mis-matching closing element.", -1); -- GitLab From 316eee7c98b0cfa8c0727e3a04a6539cd981a515 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 18:47:57 -0500 Subject: [PATCH 19/91] Add docs to ns-handler members --- include/orcus/sax_ns_parser.hpp | 60 +++++++++++++++++++++++++++++++++ include/orcus/sax_parser.hpp | 6 ++-- 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index 1a9868a3..40816df5 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -91,43 +91,103 @@ using elem_scopes_type = std::vector; class sax_ns_handler { public: + /** + * Called when a doctype declaration <!DOCTYPE ... > is encountered. + * + * @param dtd struct containing doctype declaration data. + */ void doctype(const orcus::sax::doctype_declaration& dtd) { (void)dtd; } + /** + * Called when <?... is encountered, where the '...' may be an + * arbitraray dentifier. One common declaration is <?xml which is + * typically given at the start of an XML stream. + * + * @param decl name of the identifier. + */ void start_declaration(std::string_view decl) { (void)decl; } + /** + * Called when the closing tag (>) of a <?... ?> is encountered. + * + * @param decl name of the identifier. + */ void end_declaration(std::string_view decl) { (void)decl; } + /** + * Called at the start of each element. + * + * @param elem information of the element being parsed. + */ void start_element(const orcus::sax_ns_parser_element& elem) { (void)elem; } + /** + * Called at the end of each element. + * + * @param elem information of the element being parsed. + */ void end_element(const orcus::sax_ns_parser_element& elem) { (void)elem; } + /** + * Called when a segment of a text content is parsed. Each text content + * is a direct child of an element, which may have multiple child contents + * when the element also has a child element that are direct sibling to + * the text contents or the text contents are splitted by a comment. + * + * @param val value of the text content. + * @param transient when true, the text content has been converted and is + * stored in a temporary buffer due to presence of one or + * more encoded characters, in which case the passed + * text value needs to be either immediately converted to + * a non-text value or be interned within the scope of + * the callback. + */ void characters(std::string_view val, bool transient) { (void)val; (void)transient; } + /** + * Called upon parsing of an attribute of a declaration. The value of an + * attribute is assumed to be transient thus should be consumed within the + * scope of this callback. + * + * @param name name of an attribute. + * @param val value of an attribute. + * + * @todo Perhaps we should pass the transient flag here as well like all the + * other places. + */ void attribute(std::string_view name, std::string_view val) { (void)name; (void)val; } + /** + * Called upon parsing of an attribute of an element. Note that when + * the attribute's transient flag is set, the attribute value is stored in + * a temporary buffer due to a presence of encoded characters, and must be + * processed within the scope of the callback. + * + * @param attr struct containing attribute information. + */ void attribute(const orcus::sax_ns_parser_attribute& attr) { (void)attr; diff --git a/include/orcus/sax_parser.hpp b/include/orcus/sax_parser.hpp index b043e082..2727bad3 100644 --- a/include/orcus/sax_parser.hpp +++ b/include/orcus/sax_parser.hpp @@ -30,11 +30,11 @@ public: /** * Called when a doctype declaration <!DOCTYPE ... > is encountered. * - * @param param struct containing doctype declaration data. + * @param dtd struct containing doctype declaration data. */ - void doctype(const orcus::sax::doctype_declaration& param) + void doctype(const orcus::sax::doctype_declaration& dtd) { - (void)param; + (void)dtd; } /** -- GitLab From b3e3dcbfa6cc0b79224916a812b902cbbdc21aac Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 18:50:27 -0500 Subject: [PATCH 20/91] Apply the same renaming of template parameters to the definitions --- include/orcus/sax_ns_parser.hpp | 16 +++++----- include/orcus/sax_parser.hpp | 56 ++++++++++++++++----------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index 40816df5..68c72750 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -208,11 +208,11 @@ public: * @tparam HandlerT Hanlder type with member functions for event callbacks. * Refer to @p sax_ns_handler. */ -template +template class sax_ns_parser { public: - typedef Handler handler_type; + typedef HandlerT handler_type; sax_ns_parser(const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler); sax_ns_parser(const char* content, const size_t size, bool transient_stream, @@ -357,22 +357,22 @@ private: sax_parser m_parser; }; -template -sax_ns_parser<_Handler>::sax_ns_parser( +template +sax_ns_parser::sax_ns_parser( const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler) : m_wrapper(ns_cxt, handler), m_parser(content, size, m_wrapper) { } -template -sax_ns_parser<_Handler>::sax_ns_parser( +template +sax_ns_parser::sax_ns_parser( const char* content, const size_t size, bool transient_stream, xmlns_context& ns_cxt, handler_type& handler) : m_wrapper(ns_cxt, handler), m_parser(content, size, transient_stream, m_wrapper) { } -template -void sax_ns_parser<_Handler>::parse() +template +void sax_ns_parser::parse() { m_parser.parse(); } diff --git a/include/orcus/sax_parser.hpp b/include/orcus/sax_parser.hpp index 2727bad3..6fa99789 100644 --- a/include/orcus/sax_parser.hpp +++ b/include/orcus/sax_parser.hpp @@ -162,24 +162,24 @@ private: handler_type& m_handler; }; -template -sax_parser<_Handler,_Config>::sax_parser( +template +sax_parser::sax_parser( const char* content, const size_t size, handler_type& handler) : sax::parser_base(content, size, false), m_handler(handler) { } -template -sax_parser<_Handler,_Config>::sax_parser( +template +sax_parser::sax_parser( const char* content, const size_t size, bool transient_stream, handler_type& handler) : sax::parser_base(content, size, transient_stream), m_handler(handler) { } -template -void sax_parser<_Handler,_Config>::parse() +template +void sax_parser::parse() { m_nest_level = 0; mp_char = mp_begin; @@ -190,8 +190,8 @@ void sax_parser<_Handler,_Config>::parse() assert(m_buffer_pos == 0); } -template -void sax_parser<_Handler,_Config>::header() +template +void sax_parser::header() { // we don't handle multi byte encodings so we can just skip bom entry if exists. skip_bom(); @@ -210,8 +210,8 @@ void sax_parser<_Handler,_Config>::header() } } -template -void sax_parser<_Handler,_Config>::body() +template +void sax_parser::body() { while (has_char()) { @@ -230,8 +230,8 @@ void sax_parser<_Handler,_Config>::body() } } -template -void sax_parser<_Handler,_Config>::element() +template +void sax_parser::element() { assert(cur_char() == '<'); std::ptrdiff_t pos = offset(); @@ -252,8 +252,8 @@ void sax_parser<_Handler,_Config>::element() element_open(pos); } -template -void sax_parser<_Handler,_Config>::element_open(std::ptrdiff_t begin_pos) +template +void sax_parser::element_open(std::ptrdiff_t begin_pos) { sax::parser_element elem; element_name(elem, begin_pos); @@ -297,8 +297,8 @@ void sax_parser<_Handler,_Config>::element_open(std::ptrdiff_t begin_pos) } } -template -void sax_parser<_Handler,_Config>::element_close(std::ptrdiff_t begin_pos) +template +void sax_parser::element_close(std::ptrdiff_t begin_pos) { assert(cur_char() == '/'); nest_down(); @@ -319,8 +319,8 @@ void sax_parser<_Handler,_Config>::element_close(std::ptrdiff_t begin_pos) m_root_elem_open = false; } -template -void sax_parser<_Handler,_Config>::special_tag() +template +void sax_parser::special_tag() { assert(cur_char() == '!'); // This can be either ::special_tag() } } -template -void sax_parser<_Handler,_Config>::declaration(const char* name_check) +template +void sax_parser::declaration(const char* name_check) { assert(cur_char() == '?'); next_check(); @@ -406,8 +406,8 @@ void sax_parser<_Handler,_Config>::declaration(const char* name_check) #endif } -template -void sax_parser<_Handler,_Config>::cdata() +template +void sax_parser::cdata() { size_t len = remains(); assert(len > 3); @@ -443,8 +443,8 @@ void sax_parser<_Handler,_Config>::cdata() throw sax::malformed_xml_error("malformed CDATA section.", offset()); } -template -void sax_parser<_Handler,_Config>::doctype() +template +void sax_parser::doctype() { // Parse the root element first. sax::doctype_declaration param; @@ -509,8 +509,8 @@ void sax_parser<_Handler,_Config>::doctype() next(); } -template -void sax_parser<_Handler,_Config>::characters() +template +void sax_parser::characters() { const char* p0 = mp_char; for (; has_char(); next()) @@ -540,8 +540,8 @@ void sax_parser<_Handler,_Config>::characters() } } -template -void sax_parser<_Handler,_Config>::attribute() +template +void sax_parser::attribute() { sax::parser_attribute attr; attribute_name(attr.ns, attr.name); -- GitLab From bb6ee140fc87cc53b86620a09211b326a5489d14 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 21:04:59 -0500 Subject: [PATCH 21/91] Document sax_token_parser and related types --- doc/cpp/parser/exception.rst | 29 +++++++++++++++ doc/cpp/parser/index.rst | 1 + doc/cpp/parser/stream.rst | 12 +++++++ include/orcus/sax_ns_parser.hpp | 4 +-- include/orcus/sax_parser.hpp | 4 +-- include/orcus/sax_parser_base.hpp | 3 ++ include/orcus/sax_token_parser.hpp | 36 ++++++++++--------- slickedit/doc.vpj | 57 ++---------------------------- 8 files changed, 72 insertions(+), 74 deletions(-) create mode 100644 doc/cpp/parser/exception.rst diff --git a/doc/cpp/parser/exception.rst b/doc/cpp/parser/exception.rst new file mode 100644 index 00000000..71446f03 --- /dev/null +++ b/doc/cpp/parser/exception.rst @@ -0,0 +1,29 @@ +.. highlight:: cpp + +Exceptions +========== + +.. doxygenclass:: orcus::general_error + :members: + +.. doxygenclass:: orcus::invalid_arg_error + :members: + +.. doxygenclass:: orcus::xml_structure_error + :members: + +.. doxygenclass:: orcus::json_structure_error + :members: + +.. doxygenclass:: orcus::invalid_map_error + :members: + +.. doxygenclass:: orcus::value_error + :members: + +.. doxygenclass:: orcus::xpath_error + :members: + +.. doxygenclass:: orcus::interface_error + :members: + diff --git a/doc/cpp/parser/index.rst b/doc/cpp/parser/index.rst index d970878d..c55fa63a 100644 --- a/doc/cpp/parser/index.rst +++ b/doc/cpp/parser/index.rst @@ -5,6 +5,7 @@ Low-Level Parsers and Utilities .. toctree:: :maxdepth: 1 + exception.rst stream.rst util.rst css.rst diff --git a/doc/cpp/parser/stream.rst b/doc/cpp/parser/stream.rst index a7fc2281..3e12d896 100644 --- a/doc/cpp/parser/stream.rst +++ b/doc/cpp/parser/stream.rst @@ -3,12 +3,24 @@ Stream ====== +Stream buffers +-------------- + .. doxygenclass:: orcus::file_content :members: .. doxygenclass:: orcus::memory_content :members: +Exceptions +---------- + +.. doxygenclass:: orcus::parse_error + :members: + +Utility functions +----------------- + .. doxygenstruct:: orcus::line_with_offset :members: diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index 68c72750..15253298 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -205,8 +205,8 @@ public: * This parser keeps track of element scopes and detects non-matching element * pairs. * - * @tparam HandlerT Hanlder type with member functions for event callbacks. - * Refer to @p sax_ns_handler. + * @tparam HandlerT Handler type with member functions for event callbacks. + * Refer to @ref sax_ns_handler. */ template class sax_ns_parser diff --git a/include/orcus/sax_parser.hpp b/include/orcus/sax_parser.hpp index 6fa99789..a6ec042a 100644 --- a/include/orcus/sax_parser.hpp +++ b/include/orcus/sax_parser.hpp @@ -123,8 +123,8 @@ public: * This parser additionally records the begin and end offset positions of each * element. * - * @tparam HandlerT Hanlder type with member functions for event callbacks. - * Refer to @p sax_handler. + * @tparam HandlerT Handler type with member functions for event callbacks. + * Refer to @ref sax_handler. * @tparam ConfigT Parser configuration. */ template diff --git a/include/orcus/sax_parser_base.hpp b/include/orcus/sax_parser_base.hpp index 5243e501..15897a18 100644 --- a/include/orcus/sax_parser_base.hpp +++ b/include/orcus/sax_parser_base.hpp @@ -29,6 +29,9 @@ using std::endl; namespace orcus { namespace sax { +/** + * This exception is thrown when SAX parser detects a malformed XML document. + */ class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public ::orcus::parse_error { public: diff --git a/include/orcus/sax_token_parser.hpp b/include/orcus/sax_token_parser.hpp index 5e86af3e..2227a852 100644 --- a/include/orcus/sax_token_parser.hpp +++ b/include/orcus/sax_token_parser.hpp @@ -119,13 +119,22 @@ public: }; /** - * XML parser that tokenizes element and attribute names while parsing. + * SAX parser that tokenizes element and attribute names while parsing. All + * pre-defined elements and attribute names are translated into integral + * identifiers via use of @ref tokens. The user of this class needs to + * provide a pre-defined set of element and attribute names at construction + * time. + * + * This parser internally uses @ref sax_ns_parser. + * + * @tparam HandlerT Handler type with member functions for event callbacks. + * Refer to @ref sax_token_handler. */ -template +template class sax_token_parser { public: - typedef _Handler handler_type; + typedef HandlerT handler_type; sax_token_parser( const char* content, const size_t size, const tokens& _tokens, @@ -135,7 +144,7 @@ public: const char* content, const size_t size, bool transient_stream, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler); - ~sax_token_parser(); + ~sax_token_parser() = default; void parse(); @@ -187,16 +196,16 @@ private: sax_ns_parser m_parser; }; -template -sax_token_parser<_Handler>::sax_token_parser( +template +sax_token_parser::sax_token_parser( const char* content, const size_t size, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) : m_wrapper(_tokens, handler), m_parser(content, size, ns_cxt, m_wrapper) { } -template -sax_token_parser<_Handler>::sax_token_parser( +template +sax_token_parser::sax_token_parser( const char* content, const size_t size, bool transient_stream, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) : m_wrapper(_tokens, handler), @@ -204,18 +213,13 @@ sax_token_parser<_Handler>::sax_token_parser( { } -template -sax_token_parser<_Handler>::~sax_token_parser() -{ -} - -template -void sax_token_parser<_Handler>::parse() +template +void sax_token_parser::parse() { m_parser.parse(); } -} +} // namespace orcus #endif /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/slickedit/doc.vpj b/slickedit/doc.vpj index 252ca4b4..0486dff5 100644 --- a/slickedit/doc.vpj +++ b/slickedit/doc.vpj @@ -70,60 +70,9 @@ Name="Other Files" Filters="" GUID="{58BF30C7-CBF2-4454-ABD3-F8D694316022}"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + -- GitLab From b89c98ed8917783fda0152a15197302ea67a582a Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 21:07:32 -0500 Subject: [PATCH 22/91] Destructor with throw() is not a thing anymore --- include/orcus/csv_parser_base.hpp | 2 +- include/orcus/json_document_tree.hpp | 4 ++-- include/orcus/sax_parser_base.hpp | 2 +- include/orcus/yaml_document_tree.hpp | 2 +- include/orcus/zip_archive.hpp | 2 +- src/liborcus/json_document_tree.cpp | 4 ++-- src/liborcus/yaml_document_tree.cpp | 2 +- src/parser/csv_parser_base.cpp | 2 +- src/parser/sax_parser_base.cpp | 2 +- src/parser/zip_archive.cpp | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/orcus/csv_parser_base.hpp b/include/orcus/csv_parser_base.hpp index 2784ab77..40f14975 100644 --- a/include/orcus/csv_parser_base.hpp +++ b/include/orcus/csv_parser_base.hpp @@ -59,7 +59,7 @@ class ORCUS_PSR_DLLPUBLIC parse_error : public std::exception std::string m_msg; public: parse_error(const std::string& msg); - virtual ~parse_error() throw(); + virtual ~parse_error(); virtual const char* what() const throw(); }; diff --git a/include/orcus/json_document_tree.hpp b/include/orcus/json_document_tree.hpp index ffba35b8..e558c38c 100644 --- a/include/orcus/json_document_tree.hpp +++ b/include/orcus/json_document_tree.hpp @@ -32,7 +32,7 @@ class ORCUS_DLLPUBLIC document_error : public general_error { public: document_error(const std::string& msg); - virtual ~document_error() throw(); + virtual ~document_error(); }; /** @@ -44,7 +44,7 @@ class ORCUS_DLLPUBLIC key_value_error : public document_error { public: key_value_error(const std::string& msg); - virtual ~key_value_error() throw(); + virtual ~key_value_error(); }; enum class node_t : uint8_t diff --git a/include/orcus/sax_parser_base.hpp b/include/orcus/sax_parser_base.hpp index 15897a18..5d58624b 100644 --- a/include/orcus/sax_parser_base.hpp +++ b/include/orcus/sax_parser_base.hpp @@ -37,7 +37,7 @@ class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public ::orcus::parse_error public: malformed_xml_error() = delete; malformed_xml_error(const std::string& msg, std::ptrdiff_t offset); - virtual ~malformed_xml_error() throw(); + virtual ~malformed_xml_error(); }; /** diff --git a/include/orcus/yaml_document_tree.hpp b/include/orcus/yaml_document_tree.hpp index 6e8d55bd..d22a588d 100644 --- a/include/orcus/yaml_document_tree.hpp +++ b/include/orcus/yaml_document_tree.hpp @@ -25,7 +25,7 @@ class ORCUS_DLLPUBLIC document_error : public general_error { public: document_error(const std::string& msg); - virtual ~document_error() throw(); + virtual ~document_error(); }; enum class node_t : uint8_t diff --git a/include/orcus/zip_archive.hpp b/include/orcus/zip_archive.hpp index 47ceb0c7..c7ea1d9a 100644 --- a/include/orcus/zip_archive.hpp +++ b/include/orcus/zip_archive.hpp @@ -26,7 +26,7 @@ class ORCUS_PSR_DLLPUBLIC zip_error : public std::exception public: zip_error(); zip_error(const std::string& msg); - virtual ~zip_error() throw(); + virtual ~zip_error(); virtual const char* what() const throw(); }; diff --git a/src/liborcus/json_document_tree.cpp b/src/liborcus/json_document_tree.cpp index e23521ea..508f7e3e 100644 --- a/src/liborcus/json_document_tree.cpp +++ b/src/liborcus/json_document_tree.cpp @@ -106,12 +106,12 @@ std::ostream& operator<< (std::ostream& os, node_t nt) document_error::document_error(const std::string& msg) : general_error("json::document_error", msg) {} -document_error::~document_error() throw() {} +document_error::~document_error() = default; key_value_error::key_value_error(const std::string& msg) : document_error(msg) {} -key_value_error::~key_value_error() throw() {} +key_value_error::~key_value_error() = default; struct json_value final { diff --git a/src/liborcus/yaml_document_tree.cpp b/src/liborcus/yaml_document_tree.cpp index 7a580483..7ac0735b 100644 --- a/src/liborcus/yaml_document_tree.cpp +++ b/src/liborcus/yaml_document_tree.cpp @@ -30,7 +30,7 @@ namespace orcus { namespace yaml { document_error::document_error(const std::string& msg) : general_error("yaml_document_error", msg) {} -document_error::~document_error() throw() {} +document_error::~document_error() = default; struct yaml_value { diff --git a/src/parser/csv_parser_base.cpp b/src/parser/csv_parser_base.cpp index 517e2073..9f56cb41 100644 --- a/src/parser/csv_parser_base.cpp +++ b/src/parser/csv_parser_base.cpp @@ -17,7 +17,7 @@ parser_config::parser_config() : parse_error::parse_error(const std::string& msg) : m_msg(msg) {} -parse_error::~parse_error() throw() {} +parse_error::~parse_error() = default; const char* parse_error::what() const throw() { diff --git a/src/parser/sax_parser_base.cpp b/src/parser/sax_parser_base.cpp index 54dd89ae..540f77f4 100644 --- a/src/parser/sax_parser_base.cpp +++ b/src/parser/sax_parser_base.cpp @@ -22,7 +22,7 @@ namespace orcus { namespace sax { malformed_xml_error::malformed_xml_error(const std::string& msg, std::ptrdiff_t offset) : ::orcus::parse_error("malformed_xml_error", msg, offset) {} -malformed_xml_error::~malformed_xml_error() throw() {} +malformed_xml_error::~malformed_xml_error() = default; char decode_xml_encoded_char(const char* p, size_t n) { diff --git a/src/parser/zip_archive.cpp b/src/parser/zip_archive.cpp index ee827a2f..386adbbd 100644 --- a/src/parser/zip_archive.cpp +++ b/src/parser/zip_archive.cpp @@ -32,7 +32,7 @@ zip_error::zip_error(const std::string& msg) : m_msg() m_msg = os.str(); } -zip_error::~zip_error() throw() {} +zip_error::~zip_error() = default; const char* zip_error::what() const throw() { -- GitLab From a1bfd021e0800cbcef7b9e284acc16b3b517a403 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 21:24:45 -0500 Subject: [PATCH 23/91] Move generic parse_error into exception.hpp --- Makefile.am | 3 ++ doc/cpp/parser/exception.rst | 2 ++ doc/cpp/parser/stream.rst | 6 ---- include/orcus/exception.hpp | 19 +++++++++++ include/orcus/parser_base.hpp | 22 ++---------- src/parser/css_parser_base.cpp | 4 +-- src/parser/exception.cpp | 43 ++++++++++++++++++++++++ src/parser/json_parser_base.cpp | 2 +- src/parser/parser_base.cpp | 59 --------------------------------- src/parser/yaml_parser_base.cpp | 2 +- 10 files changed, 73 insertions(+), 89 deletions(-) diff --git a/Makefile.am b/Makefile.am index fe9aff9e..f9faa49a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -819,5 +819,8 @@ doc-sphinx: doc: doc-doxygen doc-sphinx +doc-clean: + @rm -rf ./doc/_build ./doc/_doxygen + diff --git a/doc/cpp/parser/exception.rst b/doc/cpp/parser/exception.rst index 71446f03..b30ad09b 100644 --- a/doc/cpp/parser/exception.rst +++ b/doc/cpp/parser/exception.rst @@ -27,3 +27,5 @@ Exceptions .. doxygenclass:: orcus::interface_error :members: +.. doxygenclass:: orcus::parse_error + :members: diff --git a/doc/cpp/parser/stream.rst b/doc/cpp/parser/stream.rst index 3e12d896..6f8ecde5 100644 --- a/doc/cpp/parser/stream.rst +++ b/doc/cpp/parser/stream.rst @@ -12,12 +12,6 @@ Stream buffers .. doxygenclass:: orcus::memory_content :members: -Exceptions ----------- - -.. doxygenclass:: orcus::parse_error - :members: - Utility functions ----------------- diff --git a/include/orcus/exception.hpp b/include/orcus/exception.hpp index 3d82535b..239e0231 100644 --- a/include/orcus/exception.hpp +++ b/include/orcus/exception.hpp @@ -86,6 +86,25 @@ public: virtual ~interface_error() noexcept; }; +/** + * Exception related to parsing error that includes the offset in the stream + * where the error occurred. + */ +class ORCUS_PSR_DLLPUBLIC parse_error : public general_error +{ + std::ptrdiff_t m_offset; /// offset in the stream where the error occurred. + +protected: + parse_error(const std::string& msg, std::ptrdiff_t offset); + parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset); + + static std::string build_message(std::string_view msg_before, char c, std::string_view msg_after); + static std::string build_message(std::string_view msg_before, std::string_view msg, std::string_view msg_after); + +public: + std::ptrdiff_t offset() const; +}; + namespace detail { /** diff --git a/include/orcus/parser_base.hpp b/include/orcus/parser_base.hpp index 26752849..eb12c63b 100644 --- a/include/orcus/parser_base.hpp +++ b/include/orcus/parser_base.hpp @@ -8,8 +8,8 @@ #ifndef INCLUDED_ORCUS_PARSER_BASE_HPP #define INCLUDED_ORCUS_PARSER_BASE_HPP -#include "orcus/env.hpp" -#include "orcus/exception.hpp" +#include "env.hpp" +#include "exception.hpp" #include #include @@ -19,24 +19,6 @@ namespace orcus { -/** - * Exception related to parsing error that includes the offset in the stream - * where the error occurred. - */ -class ORCUS_PSR_DLLPUBLIC parse_error : public general_error -{ - std::ptrdiff_t m_offset; /// offset in the stream where the error occurred. -protected: - parse_error(const std::string& msg, std::ptrdiff_t offset); - parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset); - - static std::string build_message(const char* msg_before, char c, const char* msg_after); - static std::string build_message(const char* msg_before, const char* p, size_t n, const char* msg_after); - -public: - std::ptrdiff_t offset() const; -}; - class ORCUS_PSR_DLLPUBLIC parser_base { protected: diff --git a/src/parser/css_parser_base.cpp b/src/parser/css_parser_base.cpp index 34f7d6ef..3ce101b8 100644 --- a/src/parser/css_parser_base.cpp +++ b/src/parser/css_parser_base.cpp @@ -30,12 +30,12 @@ void parse_error::throw_with(const char* msg_before, char c, const char* msg_aft void parse_error::throw_with( const char* msg_before, const char* p, size_t n, const char* msg_after) { - throw parse_error(build_message(msg_before, p, n, msg_after)); + throw parse_error(build_message(msg_before, {p, n}, msg_after)); } void parse_error::throw_with(const char* msg_before, std::string_view s, const char* msg_after) { - throw parse_error(build_message(msg_before, s.data(), s.size(), msg_after)); + throw parse_error(build_message(msg_before, s, msg_after)); } parser_base::parser_base(const char* p, size_t n) : diff --git a/src/parser/exception.cpp b/src/parser/exception.cpp index 71e1f336..d0906835 100644 --- a/src/parser/exception.cpp +++ b/src/parser/exception.cpp @@ -72,5 +72,48 @@ interface_error::interface_error(const std::string& msg) : general_error(msg) {} interface_error::~interface_error() noexcept {} +namespace { + +std::string build_offset_msg(std::ptrdiff_t offset) +{ + std::ostringstream os; + os << " (offset=" << offset << ')'; + return os.str(); +} + +} + +parse_error::parse_error(const std::string& msg, std::ptrdiff_t offset) : + general_error(msg), m_offset(offset) +{ + append_msg(build_offset_msg(offset)); +} + +parse_error::parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset) : + general_error(cls, msg), m_offset(offset) +{ + append_msg(build_offset_msg(offset)); +} + +std::ptrdiff_t parse_error::offset() const +{ + return m_offset; +} + +std::string parse_error::build_message(std::string_view msg_before, char c, std::string_view msg_after) +{ + std::ostringstream os; + os << msg_before << c << msg_after; + return os.str(); +} + +std::string parse_error::build_message( + std::string_view msg_before, std::string_view msg, std::string_view msg_after) +{ + std::ostringstream os; + os << msg_before << msg << msg_after; + return os.str(); +} + } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/parser/json_parser_base.cpp b/src/parser/json_parser_base.cpp index 1e1e1ee6..9c56c61c 100644 --- a/src/parser/json_parser_base.cpp +++ b/src/parser/json_parser_base.cpp @@ -43,7 +43,7 @@ void parse_error::throw_with( void parse_error::throw_with( const char* msg_before, const char* p, size_t n, const char* msg_after, std::ptrdiff_t offset) { - throw parse_error(build_message(msg_before, p, n, msg_after), offset); + throw parse_error(build_message(msg_before, {p, n}, msg_after), offset); } struct parser_base::impl diff --git a/src/parser/parser_base.cpp b/src/parser/parser_base.cpp index 8a392860..17924f5e 100644 --- a/src/parser/parser_base.cpp +++ b/src/parser/parser_base.cpp @@ -21,65 +21,6 @@ namespace orcus { -namespace { - -std::string build_offset_msg(std::ptrdiff_t offset) -{ - std::ostringstream os; - os << " (offset=" << offset << ')'; - return os.str(); -} - -} - -parse_error::parse_error(const std::string& msg, std::ptrdiff_t offset) : - general_error(msg), m_offset(offset) -{ - append_msg(build_offset_msg(offset)); -} - -parse_error::parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset) : - general_error(cls, msg), m_offset(offset) -{ - append_msg(build_offset_msg(offset)); -} - -std::ptrdiff_t parse_error::offset() const -{ - return m_offset; -} - -std::string parse_error::build_message(const char* msg_before, char c, const char* msg_after) -{ - std::ostringstream os; - - if (msg_before) - os << msg_before; - - os << c; - - if (msg_after) - os << msg_after; - - return os.str(); -} - -std::string parse_error::build_message( - const char* msg_before, const char* p, size_t n, const char* msg_after) -{ - std::ostringstream os; - - if (msg_before) - os << msg_before; - - os << std::string_view(p, n); - - if (msg_after) - os << msg_after; - - return os.str(); -} - parser_base::parser_base(const char* p, size_t n, bool transient_stream) : mp_begin(p), mp_char(p), mp_end(p+n), m_transient_stream(transient_stream), diff --git a/src/parser/yaml_parser_base.cpp b/src/parser/yaml_parser_base.cpp index 2ae028ca..e4633ea5 100644 --- a/src/parser/yaml_parser_base.cpp +++ b/src/parser/yaml_parser_base.cpp @@ -30,7 +30,7 @@ void parse_error::throw_with(const char* msg_before, char c, const char* msg_aft void parse_error::throw_with( const char* msg_before, const char* p, size_t n, const char* msg_after, std::ptrdiff_t offset) { - throw parse_error(build_message(msg_before, p, n, msg_after), offset); + throw parse_error(build_message(msg_before, {p, n}, msg_after), offset); } struct scope -- GitLab From cd48c621213b3d2b56e95a5c8123c696ce31e9ac Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 21:40:00 -0500 Subject: [PATCH 24/91] Remove json::parse_error; just use orcus::parse_error --- include/orcus/exception.hpp | 12 ++++-- include/orcus/json_parser.hpp | 44 +++++++++++----------- include/orcus/json_parser_base.hpp | 17 ++------- include/orcus/threaded_json_parser.hpp | 2 +- src/liborcus/json_document_tree.cpp | 2 +- src/liborcus/json_document_tree_test.cpp | 4 +- src/liborcus/orcus_json.cpp | 2 +- src/orcus_json_cli.cpp | 2 +- src/parser/exception.cpp | 13 +++++++ src/parser/json_parser_base.cpp | 15 -------- src/parser/parser_test_json_validation.cpp | 2 +- src/parser/threaded_json_parser_test.cpp | 2 +- src/python/json.cpp | 2 +- 13 files changed, 56 insertions(+), 63 deletions(-) diff --git a/include/orcus/exception.hpp b/include/orcus/exception.hpp index 239e0231..79411a31 100644 --- a/include/orcus/exception.hpp +++ b/include/orcus/exception.hpp @@ -95,14 +95,20 @@ class ORCUS_PSR_DLLPUBLIC parse_error : public general_error std::ptrdiff_t m_offset; /// offset in the stream where the error occurred. protected: - parse_error(const std::string& msg, std::ptrdiff_t offset); - parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset); - static std::string build_message(std::string_view msg_before, char c, std::string_view msg_after); static std::string build_message(std::string_view msg_before, std::string_view msg, std::string_view msg_after); public: + parse_error(const std::string& msg, std::ptrdiff_t offset); + parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset); + std::ptrdiff_t offset() const; + + static void throw_with( + std::string_view msg_before, char c, std::string_view msg_after, std::ptrdiff_t offset); + + static void throw_with( + std::string_view msg_before, std::string_view msg, std::string_view msg_after, std::ptrdiff_t offset); }; namespace detail { diff --git a/include/orcus/json_parser.hpp b/include/orcus/json_parser.hpp index 732d9dc3..6eeb6f0d 100644 --- a/include/orcus/json_parser.hpp +++ b/include/orcus/json_parser.hpp @@ -8,7 +8,7 @@ #ifndef INCLUDED_ORCUS_JSON_PARSER_HPP #define INCLUDED_ORCUS_JSON_PARSER_HPP -#include "orcus/json_parser_base.hpp" +#include "json_parser_base.hpp" #include #include @@ -159,10 +159,10 @@ void json_parser<_Handler>::parse() if (has_char()) root_value(); else - throw json::parse_error("parse: no json content could be found in file", offset()); + throw parse_error("parse: no json content could be found in file", offset()); if (has_char()) - throw json::parse_error("parse: unexpected trailing string segment.", offset()); + throw parse_error("parse: unexpected trailing string segment.", offset()); m_handler.end_parse(); } @@ -181,7 +181,7 @@ void json_parser<_Handler>::root_value() object(); break; default: - json::parse_error::throw_with( + parse_error::throw_with( "root_value: either '[' or '{' was expected, but '", cur_char(), "' was found.", offset()); } } @@ -223,7 +223,7 @@ void json_parser<_Handler>::value() string(); break; default: - json::parse_error::throw_with("value: failed to parse '", cur_char(), "'.", offset()); + parse_error::throw_with("value: failed to parse '", cur_char(), "'.", offset()); } } @@ -256,12 +256,12 @@ void json_parser<_Handler>::array() case ',': if (next_char() == ']') { - json::parse_error::throw_with( + parse_error::throw_with( "array: ']' expected but '", cur_char(), "' found.", offset() ); } continue; default: - json::parse_error::throw_with( + parse_error::throw_with( "array: either ']' or ',' expected, but '", cur_char(), "' found.", offset()); } } @@ -274,7 +274,7 @@ void json_parser<_Handler>::array() } } - throw json::parse_error("array: failed to parse array.", offset()); + throw parse_error("array: failed to parse array.", offset()); } template @@ -296,14 +296,14 @@ void json_parser<_Handler>::object() { skip_ws(); if (!has_char()) - throw json::parse_error("object: stream ended prematurely before reaching a key.", offset()); + throw parse_error("object: stream ended prematurely before reaching a key.", offset()); switch (cur_char()) { case '}': if (require_new_key) { - json::parse_error::throw_with( + parse_error::throw_with( "object: new key expected, but '", cur_char(), "' found.", offset()); } m_handler.end_object(); @@ -313,7 +313,7 @@ void json_parser<_Handler>::object() case '"': break; default: - json::parse_error::throw_with( + parse_error::throw_with( "object: '\"' was expected, but '", cur_char(), "' found.", offset()); } require_new_key = false; @@ -323,32 +323,32 @@ void json_parser<_Handler>::object() { // Parsing was unsuccessful. if (res.length == parse_quoted_string_state::error_no_closing_quote) - throw json::parse_error("object: stream ended prematurely before reaching the closing quote of a key.", offset()); + throw parse_error("object: stream ended prematurely before reaching the closing quote of a key.", offset()); else if (res.length == parse_quoted_string_state::error_illegal_escape_char) - json::parse_error::throw_with( + parse_error::throw_with( "object: illegal escape character '", cur_char(), "' in key value.", offset()); else - throw json::parse_error("object: unknown error while parsing a key value.", offset()); + throw parse_error("object: unknown error while parsing a key value.", offset()); } m_handler.object_key(res.str, res.length, res.transient); skip_ws(); if (cur_char() != ':') - json::parse_error::throw_with( + parse_error::throw_with( "object: ':' was expected, but '", cur_char(), "' found.", offset()); next(); skip_ws(); if (!has_char()) - throw json::parse_error("object: stream ended prematurely before reaching a value.", offset()); + throw parse_error("object: stream ended prematurely before reaching a value.", offset()); value(); skip_ws(); if (!has_char()) - throw json::parse_error("object: stream ended prematurely before reaching either '}' or ','.", offset()); + throw parse_error("object: stream ended prematurely before reaching either '}' or ','.", offset()); switch (cur_char()) { @@ -361,12 +361,12 @@ void json_parser<_Handler>::object() require_new_key = true; continue; default: - json::parse_error::throw_with( + parse_error::throw_with( "object: either '}' or ',' expected, but '", cur_char(), "' found.", offset()); } } - throw json::parse_error("object: closing '}' was never reached.", offset()); + throw parse_error("object: closing '}' was never reached.", offset()); } template @@ -391,11 +391,11 @@ void json_parser<_Handler>::string() // Parsing was unsuccessful. if (res.length == parse_quoted_string_state::error_no_closing_quote) - throw json::parse_error("string: stream ended prematurely before reaching the closing quote.", offset()); + throw parse_error("string: stream ended prematurely before reaching the closing quote.", offset()); else if (res.length == parse_quoted_string_state::error_illegal_escape_char) - json::parse_error::throw_with("string: illegal escape character '", cur_char(), "'.", offset()); + parse_error::throw_with("string: illegal escape character '", cur_char(), "'.", offset()); else - throw json::parse_error("string: unknown error.", offset()); + throw parse_error("string: unknown error.", offset()); } } diff --git a/include/orcus/json_parser_base.hpp b/include/orcus/json_parser_base.hpp index 4e70f469..3e49753a 100644 --- a/include/orcus/json_parser_base.hpp +++ b/include/orcus/json_parser_base.hpp @@ -8,25 +8,14 @@ #ifndef INCLUDED_ORCUS_JSON_PARSER_BASE_HPP #define INCLUDED_ORCUS_JSON_PARSER_BASE_HPP -#include "orcus/parser_base.hpp" -#include "orcus/parser_global.hpp" +#include "parser_base.hpp" +#include "parser_global.hpp" +#include "exception.hpp" #include namespace orcus { namespace json { -class ORCUS_PSR_DLLPUBLIC parse_error : public ::orcus::parse_error -{ -public: - parse_error(const std::string& msg, std::ptrdiff_t offset); - - static void throw_with( - const char* msg_before, char c, const char* msg_after, std::ptrdiff_t offset); - - static void throw_with( - const char* msg_before, const char* p, size_t n, const char* msg_after, std::ptrdiff_t offset); -}; - class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base { struct impl; diff --git a/include/orcus/threaded_json_parser.hpp b/include/orcus/threaded_json_parser.hpp index 51cdaced..09bddfac 100644 --- a/include/orcus/threaded_json_parser.hpp +++ b/include/orcus/threaded_json_parser.hpp @@ -168,7 +168,7 @@ void threaded_json_parser<_Handler>::process_tokens(json::parse_tokens_t& tokens case json::parse_token_t::parse_error: { auto v = std::get(t.value); - throw json::parse_error(std::string{v.str}, v.offset); + throw parse_error(std::string{v.str}, v.offset); } case json::parse_token_t::unknown: default: diff --git a/src/liborcus/json_document_tree.cpp b/src/liborcus/json_document_tree.cpp index 508f7e3e..8db5c42b 100644 --- a/src/liborcus/json_document_tree.cpp +++ b/src/liborcus/json_document_tree.cpp @@ -1701,7 +1701,7 @@ void document_tree::load(std::string_view stream, const json_config& config) { doc.load(ext_content.str(), ext_config); } - catch (const json::parse_error& e) + catch (const parse_error& e) { std::ostringstream os; os << "Error while parsing " << extpath.string() << std::endl; diff --git a/src/liborcus/json_document_tree_test.cpp b/src/liborcus/json_document_tree_test.cpp index b4e430dc..018702c0 100644 --- a/src/liborcus/json_document_tree_test.cpp +++ b/src/liborcus/json_document_tree_test.cpp @@ -189,7 +189,7 @@ void test_json_parse_empty() { doc.load(test, test_config); } - catch (const json::parse_error& e) + catch (const parse_error& e) { std::cout << create_parse_error_output(test, e.offset()) << std::endl; std::cout << e.what() << std::endl; @@ -221,7 +221,7 @@ void test_json_parse_invalid() std::cerr << "Invalid JSON expression is parsed as valid: '" << invalid_json << "'" << std::endl; assert(false); } - catch (const json::parse_error& e) + catch (const parse_error& e) { // works as expected. std::cout << "invalid expression tested: " << invalid_json << std::endl; diff --git a/src/liborcus/orcus_json.cpp b/src/liborcus/orcus_json.cpp index 48a2d563..f60912a7 100644 --- a/src/liborcus/orcus_json.cpp +++ b/src/liborcus/orcus_json.cpp @@ -466,7 +466,7 @@ void orcus_json::read_map_definition(std::string_view stream) } } } - catch (const json::parse_error& e) + catch (const parse_error& e) { std::ostringstream os; os << "Error parsing the map definition file:" << std::endl diff --git a/src/orcus_json_cli.cpp b/src/orcus_json_cli.cpp index ca6f682f..5818612c 100644 --- a/src/orcus_json_cli.cpp +++ b/src/orcus_json_cli.cpp @@ -425,7 +425,7 @@ int main(int argc, char** argv) return EXIT_FAILURE; } } - catch (const json::parse_error& e) + catch (const parse_error& e) { cerr << create_parse_error_output(content.str(), e.offset()) << endl; cerr << e.what() << endl; diff --git a/src/parser/exception.cpp b/src/parser/exception.cpp index d0906835..81839953 100644 --- a/src/parser/exception.cpp +++ b/src/parser/exception.cpp @@ -115,5 +115,18 @@ std::string parse_error::build_message( return os.str(); } +void parse_error::throw_with( + std::string_view msg_before, char c, std::string_view msg_after, std::ptrdiff_t offset) +{ + throw parse_error(build_message(msg_before, c, msg_after), offset); +} + +void parse_error::throw_with( + std::string_view msg_before, std::string_view msg, std::string_view msg_after, std::ptrdiff_t offset) +{ + throw parse_error(build_message(msg_before, msg, msg_after), offset); } + +} + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/parser/json_parser_base.cpp b/src/parser/json_parser_base.cpp index 9c56c61c..b123817a 100644 --- a/src/parser/json_parser_base.cpp +++ b/src/parser/json_parser_base.cpp @@ -31,21 +31,6 @@ const char* parse_numeric_json(const char* p, const char* p_end, double& value) } // anonymous namespace -parse_error::parse_error(const std::string& msg, std::ptrdiff_t offset) : - ::orcus::parse_error(msg, offset) {} - -void parse_error::throw_with( - const char* msg_before, char c, const char* msg_after, std::ptrdiff_t offset) -{ - throw parse_error(build_message(msg_before, c, msg_after), offset); -} - -void parse_error::throw_with( - const char* msg_before, const char* p, size_t n, const char* msg_after, std::ptrdiff_t offset) -{ - throw parse_error(build_message(msg_before, {p, n}, msg_after), offset); -} - struct parser_base::impl { cell_buffer m_buffer; diff --git a/src/parser/parser_test_json_validation.cpp b/src/parser/parser_test_json_validation.cpp index 11588bd5..65056bb6 100644 --- a/src/parser/parser_test_json_validation.cpp +++ b/src/parser/parser_test_json_validation.cpp @@ -372,7 +372,7 @@ void test_pass() orcus::json_parser parser(content.c_str(), content.size(), hdl); parser.parse(); } - catch (const orcus::json::parse_error& e) + catch (const orcus::parse_error& e) { std::cout << e.what() << std::endl; std::cout << orcus::create_parse_error_output(content, e.offset()) << std::endl; diff --git a/src/parser/threaded_json_parser_test.cpp b/src/parser/threaded_json_parser_test.cpp index ecee3984..5ac8053b 100644 --- a/src/parser/threaded_json_parser_test.cpp +++ b/src/parser/threaded_json_parser_test.cpp @@ -169,7 +169,7 @@ void test_threaded_json_parser_invalid() parser.parse(); assert(false); } - catch (const json::parse_error&) + catch (const parse_error&) { // works as expected. cout << "invalid source: " << src << endl; diff --git a/src/python/json.cpp b/src/python/json.cpp index f22cf666..265ac1c4 100644 --- a/src/python/json.cpp +++ b/src/python/json.cpp @@ -248,7 +248,7 @@ PyObject* json_loads(PyObject* /*module*/, PyObject* args, PyObject* kwargs) parser.parse(); return hdl.get_root(); } - catch (const orcus::json::parse_error& e) + catch (const orcus::parse_error& e) { PyErr_SetString(PyExc_TypeError, e.what()); } -- GitLab From 300d583cf5c0d51ee76740ae83bbbb2890a9e4af Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 21:48:56 -0500 Subject: [PATCH 25/91] Remove csv::parse_error in favor of generic parse_error --- include/orcus/csv_parser.hpp | 4 ++-- include/orcus/csv_parser_base.hpp | 9 --------- src/liborcus/orcus_csv.cpp | 4 ++-- src/parser/csv_parser_base.cpp | 9 --------- 4 files changed, 4 insertions(+), 22 deletions(-) diff --git a/include/orcus/csv_parser.hpp b/include/orcus/csv_parser.hpp index 8f416ec9..f6c7d3bd 100644 --- a/include/orcus/csv_parser.hpp +++ b/include/orcus/csv_parser.hpp @@ -135,7 +135,7 @@ void csv_parser<_Handler>::row() } if (!is_delim(c)) - throw orcus::csv::parse_error("expected a delimiter"); + throw orcus::parse_error("expected a delimiter", offset()); next(); @@ -263,7 +263,7 @@ void csv_parser<_Handler>::parse_cell_with_quote(const char* p0, size_t len0) } // Stream ended prematurely. - throw csv::parse_error("stream ended prematurely while parsing quoted cell."); + throw parse_error("stream ended prematurely while parsing quoted cell.", offset()); } template diff --git a/include/orcus/csv_parser_base.hpp b/include/orcus/csv_parser_base.hpp index 40f14975..d6a59d6d 100644 --- a/include/orcus/csv_parser_base.hpp +++ b/include/orcus/csv_parser_base.hpp @@ -54,15 +54,6 @@ struct ORCUS_PSR_DLLPUBLIC parser_config parser_config(); }; -class ORCUS_PSR_DLLPUBLIC parse_error : public std::exception -{ - std::string m_msg; -public: - parse_error(const std::string& msg); - virtual ~parse_error(); - virtual const char* what() const throw(); -}; - class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base { protected: diff --git a/src/liborcus/orcus_csv.cpp b/src/liborcus/orcus_csv.cpp index d16e040d..9bb4fa5a 100644 --- a/src/liborcus/orcus_csv.cpp +++ b/src/liborcus/orcus_csv.cpp @@ -158,9 +158,9 @@ struct orcus_csv::impl // The parser has decided to end the import due to the destination // sheet being full. } - catch (const csv::parse_error& e) + catch (const parse_error& e) { - cout << "parse failed: " << e.what() << endl; + cout << "parse failed at offset " << e.offset() << ": " << e.what() << endl; } } }; diff --git a/src/parser/csv_parser_base.cpp b/src/parser/csv_parser_base.cpp index 9f56cb41..38fb8648 100644 --- a/src/parser/csv_parser_base.cpp +++ b/src/parser/csv_parser_base.cpp @@ -15,15 +15,6 @@ parser_config::parser_config() : text_qualifier('\0'), trim_cell_value(false) {} -parse_error::parse_error(const std::string& msg) : m_msg(msg) {} - -parse_error::~parse_error() = default; - -const char* parse_error::what() const throw() -{ - return m_msg.c_str(); -} - parser_base::parser_base( const char* p, size_t n, const csv::parser_config& config) : ::orcus::parser_base(p, n, false), m_config(config) -- GitLab From 406a6e0630c96915ec7ccdd7c2bb6597443658e5 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 21:56:10 -0500 Subject: [PATCH 26/91] Remove css::parse_error in favor of generic parse_error --- include/orcus/css_parser.hpp | 36 ++++++++++---------- include/orcus/css_parser_base.hpp | 10 ------ src/liborcus/css_document_tree_test.cpp | 2 +- src/parser/css_parser_base.cpp | 45 +++++++------------------ 4 files changed, 31 insertions(+), 62 deletions(-) diff --git a/include/orcus/css_parser.hpp b/include/orcus/css_parser.hpp index 815c291f..1633453b 100644 --- a/include/orcus/css_parser.hpp +++ b/include/orcus/css_parser.hpp @@ -285,7 +285,7 @@ void css_parser<_Handler>::rule() block(); break; default: - css::parse_error::throw_with("rule: failed to parse '", c, "'"); + parse_error::throw_with("rule: failed to parse '", c, "'", offset()); } } } @@ -298,7 +298,7 @@ void css_parser<_Handler>::at_rule_name() next(); char c = cur_char(); if (!is_alpha(c)) - throw css::parse_error("at_rule_name: first character of an at-rule name must be an alphabet."); + throw parse_error("at_rule_name: first character of an at-rule name must be an alphabet.", offset()); const char* p; size_t len; @@ -389,8 +389,8 @@ void css_parser<_Handler>::simple_selector_name() identifier(p, n); css::pseudo_element_t elem = css::to_pseudo_element({p, n}); if (!elem) - css::parse_error::throw_with( - "selector_name: unknown pseudo element '", p, n, "'"); + parse_error::throw_with( + "selector_name: unknown pseudo element '", {p, n}, "'", offset()); m_handler.simple_selector_pseudo_element(elem); } @@ -400,8 +400,8 @@ void css_parser<_Handler>::simple_selector_name() identifier(p, n); css::pseudo_class_t pc = css::to_pseudo_class({p, n}); if (!pc) - css::parse_error::throw_with( - "selector_name: unknown pseudo class '", p, n, "'"); + parse_error::throw_with( + "selector_name: unknown pseudo class '", {p, n}, "'", offset()); m_handler.simple_selector_pseudo_class(pc); } @@ -430,8 +430,8 @@ void css_parser<_Handler>::property_name() assert(has_char()); char c = cur_char(); if (!is_alpha(c) && c != '.') - css::parse_error::throw_with( - "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'"); + parse_error::throw_with( + "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'", offset()); const char* p; size_t len; @@ -453,7 +453,7 @@ void css_parser<_Handler>::property() m_handler.begin_property(); property_name(); if (cur_char() != ':') - throw css::parse_error("property: ':' expected."); + throw parse_error("property: ':' expected.", offset()); next(); skip_comments_and_blanks(); @@ -537,7 +537,7 @@ void css_parser<_Handler>::function_value(std::string_view v) assert(cur_char() == '('); css::property_function_t func = css::to_property_function(v); if (func == css::property_function_t::unknown) - css::parse_error::throw_with("function_value: unknown function '", v, "'"); + parse_error::throw_with("function_value: unknown function '", v, "'", offset()); // Move to the first character of the first argument. next(); @@ -561,12 +561,12 @@ void css_parser<_Handler>::function_value(std::string_view v) function_url(); break; default: - css::parse_error::throw_with("function_value: unhandled function '", v, "'"); + parse_error::throw_with("function_value: unhandled function '", v, "'", offset()); } char c = cur_char(); if (c != ')') - css::parse_error::throw_with("function_value: ')' expected but '", c, "' found."); + parse_error::throw_with("function_value: ')' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -594,7 +594,7 @@ void css_parser<_Handler>::function_rgb(bool alpha) c = cur_char(); if (c != ',') - css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found."); + parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -604,7 +604,7 @@ void css_parser<_Handler>::function_rgb(bool alpha) { c = cur_char(); if (c != ',') - css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found."); + parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -641,7 +641,7 @@ void css_parser<_Handler>::function_hsl(bool alpha) char c = cur_char(); if (c != ',') - css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found."); + parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -652,7 +652,7 @@ void css_parser<_Handler>::function_hsl(bool alpha) c = cur_char(); if (c != ',') - css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found."); + parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -669,7 +669,7 @@ void css_parser<_Handler>::function_hsl(bool alpha) c = cur_char(); if (c != ',') - css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found."); + parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset()); next(); skip_comments_and_blanks(); @@ -761,7 +761,7 @@ void css_parser<_Handler>::block() } if (cur_char() != '}') - throw css::parse_error("block: '}' expected."); + throw parse_error("block: '}' expected.", offset()); m_handler.end_block(); diff --git a/include/orcus/css_parser_base.hpp b/include/orcus/css_parser_base.hpp index 0964d7c1..e372d39c 100644 --- a/include/orcus/css_parser_base.hpp +++ b/include/orcus/css_parser_base.hpp @@ -18,16 +18,6 @@ namespace orcus { namespace css { -class ORCUS_PSR_DLLPUBLIC parse_error : public ::orcus::parse_error -{ -public: - parse_error(const std::string& msg); - - static void throw_with(const char* msg_before, char c, const char* msg_after); - static void throw_with(const char* msg_before, const char* p, size_t n, const char* msg_after); - static void throw_with(const char* msg_before, std::string_view s, const char* msg_after); -}; - class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base { public: diff --git a/src/liborcus/css_document_tree_test.cpp b/src/liborcus/css_document_tree_test.cpp index ca164acf..09b98d2f 100644 --- a/src/liborcus/css_document_tree_test.cpp +++ b/src/liborcus/css_document_tree_test.cpp @@ -108,7 +108,7 @@ void test_css_invalids() doc.load(content.str()); assert(!"css::parse_error was not thrown, but expected to be."); } - catch (const css::parse_error&) + catch (const parse_error&) { // This is expected. } diff --git a/src/parser/css_parser_base.cpp b/src/parser/css_parser_base.cpp index 3ce101b8..e1c81cce 100644 --- a/src/parser/css_parser_base.cpp +++ b/src/parser/css_parser_base.cpp @@ -15,29 +15,8 @@ #include #include -using namespace std; - namespace orcus { namespace css { -parse_error::parse_error(const std::string& msg) : - orcus::parse_error(msg, 0) {} - -void parse_error::throw_with(const char* msg_before, char c, const char* msg_after) -{ - throw parse_error(build_message(msg_before, c, msg_after)); -} - -void parse_error::throw_with( - const char* msg_before, const char* p, size_t n, const char* msg_after) -{ - throw parse_error(build_message(msg_before, {p, n}, msg_after)); -} - -void parse_error::throw_with(const char* msg_before, std::string_view s, const char* msg_after) -{ - throw parse_error(build_message(msg_before, s, msg_after)); -} - parser_base::parser_base(const char* p, size_t n) : ::orcus::parser_base(p, n, false), m_simple_selector_count(0), @@ -80,7 +59,7 @@ uint8_t parser_base::parse_uint8() } if (!len) - throw css::parse_error("parse_uint8: no digit encountered."); + throw parse_error("parse_uint8: no digit encountered.", offset()); int maxval = std::numeric_limits::max(); if (val > maxval) @@ -91,20 +70,20 @@ uint8_t parser_base::parse_uint8() std::string_view parser_base::parse_value() { - auto throw_invalid = [](uint8_t n_bytes) + auto throw_invalid = [this](uint8_t n_bytes) { std::ostringstream os; os << "parse_value: invalid utf-8 byte length (" << int(n_bytes) << ")"; - throw css::parse_error(os.str()); + throw parse_error(os.str(), offset()); }; - auto check_byte_length_or_throw = [](uint8_t n_bytes, std::size_t max_size) + auto check_byte_length_or_throw = [this](uint8_t n_bytes, std::size_t max_size) { if (std::size_t(n_bytes) > max_size) { std::ostringstream os; os << "parse_value: utf-8 byte length is " << int(n_bytes) << " but only " << max_size << " bytes remaining."; - throw css::parse_error(os.str()); + throw parse_error(os.str(), offset()); } }; @@ -126,7 +105,7 @@ std::string_view parser_base::parse_value() case 1: { if (!is_alpha(c) && !is_numeric(c) && !is_in(c, "-+.#")) - css::parse_error::throw_with("parse_value: illegal first character of a value '", c, "'"); + parse_error::throw_with("parse_value: illegal first character of a value '", c, "'", offset()); break; } case 2: @@ -180,8 +159,8 @@ double parser_base::parse_percent() double v = parse_double_or_throw(); if (*mp_char != '%') - css::parse_error::throw_with( - "parse_percent: '%' expected after the numeric value, but '", *mp_char, "' found."); + parse_error::throw_with( + "parse_percent: '%' expected after the numeric value, but '", *mp_char, "' found.", offset()); next(); // skip the '%'. return v; @@ -191,7 +170,7 @@ double parser_base::parse_double_or_throw() { double v = parse_double(); if (std::isnan(v)) - throw css::parse_error("parse_double: failed to parse double precision value."); + throw parse_error("parse_double: failed to parse double precision value.", offset()); return v; } @@ -202,7 +181,7 @@ void parser_base::literal(const char*& p, size_t& len, char quote) skip_to(p, len, quote); if (cur_char() != quote) - throw css::parse_error("literal: end quote has never been reached."); + throw parse_error("literal: end quote has never been reached.", offset()); } void parser_base::skip_to(const char*&p, size_t& len, char c) @@ -339,8 +318,8 @@ void parser_base::skip_comments_and_blanks() void parser_base::set_combinator(char c, css::combinator_t combinator) { if (!m_simple_selector_count) - css::parse_error::throw_with( - "set_combinator: combinator '", c, "' encountered without parent element."); + parse_error::throw_with( + "set_combinator: combinator '", c, "' encountered without parent element.", offset()); m_combinator = combinator; next(); -- GitLab From bf114ce3903efeaed3f478ba6d08d2e192cf93fd Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 22:00:29 -0500 Subject: [PATCH 27/91] Remove yaml::parse_error in favor of generic parse_error --- include/orcus/yaml_parser.hpp | 18 +++++++++--------- include/orcus/yaml_parser_base.hpp | 9 --------- src/liborcus/yaml_document_tree_test.cpp | 2 +- src/orcus_yaml_main.cpp | 2 +- src/parser/yaml_parser_base.cpp | 18 ++---------------- 5 files changed, 13 insertions(+), 36 deletions(-) diff --git a/include/orcus/yaml_parser.hpp b/include/orcus/yaml_parser.hpp index 035d7d72..bb14ccb1 100644 --- a/include/orcus/yaml_parser.hpp +++ b/include/orcus/yaml_parser.hpp @@ -302,7 +302,7 @@ void yaml_parser<_Handler>::parse() if (cur_scope == scope_empty) { if (indent > 0) - throw yaml::parse_error( + throw parse_error( "first node of the document should not be indented.", offset()); push_scope(indent); @@ -318,7 +318,7 @@ void yaml_parser<_Handler>::parse() { cur_scope = end_scope(); if (cur_scope < indent) - throw yaml::parse_error("parse: invalid indent level.", offset()); + throw parse_error("parse: invalid indent level.", offset()); } while (indent < cur_scope); } @@ -515,10 +515,10 @@ void yaml_parser<_Handler>::parse_line(const char* p, size_t len) // start of a document ++p; if (p == p_end) - throw yaml::parse_error("parse_line: line ended with '--'.", offset_last_char_of_line()); + throw parse_error("parse_line: line ended with '--'.", offset_last_char_of_line()); if (*p != '-') - yaml::parse_error::throw_with( + parse_error::throw_with( "parse_line: '-' expected but '", *p, "' found.", offset_last_char_of_line() - std::ptrdiff_t(p_end-p)); @@ -545,7 +545,7 @@ void yaml_parser<_Handler>::parse_line(const char* p, size_t len) // list item start with inline first item content. ++p; if (p == p_end) - throw yaml::parse_error( + throw parse_error( "parse_line: list item expected, but the line ended prematurely.", offset_last_char_of_line() - std::ptrdiff_t(p_end-p)); @@ -564,7 +564,7 @@ void yaml_parser<_Handler>::parse_line(const char* p, size_t len) } if (get_scope_type() == yaml::detail::scope_t::sequence) - yaml::parse_error::throw_with( + parse_error::throw_with( "'-' was expected for a sequence element, but '", *p, "' was found.", offset_last_char_of_line()-len+1); @@ -593,7 +593,7 @@ void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len) skip_blanks(p, p_end-p); if (*p != ':') - throw yaml::parse_error( + throw parse_error( "parse_map_key: ':' is expected after the quoted string key.", offset() - std::ptrdiff_t(p_end-p+1)); @@ -623,7 +623,7 @@ void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len) skip_blanks(p, p_end-p); if (*p != ':') - throw yaml::parse_error( + throw parse_error( "parse_map_key: ':' is expected after the quoted string key.", offset() - std::ptrdiff_t(p_end-p+1)); @@ -676,7 +676,7 @@ void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len) // inline map item. if (*p == '-') - throw yaml::parse_error( + throw parse_error( "parse_map_key: sequence entry is not allowed as an inline map item.", offset() - std::ptrdiff_t(p_end-p+1)); diff --git a/include/orcus/yaml_parser_base.hpp b/include/orcus/yaml_parser_base.hpp index ec13d22d..94b2215a 100644 --- a/include/orcus/yaml_parser_base.hpp +++ b/include/orcus/yaml_parser_base.hpp @@ -15,15 +15,6 @@ namespace orcus { namespace yaml { -class ORCUS_PSR_DLLPUBLIC parse_error : public ::orcus::parse_error -{ -public: - parse_error(const std::string& msg, std::ptrdiff_t offset); - - static void throw_with(const char* msg_before, char c, const char* msg_after, std::ptrdiff_t offset); - static void throw_with(const char* msg_before, const char* p, size_t n, const char* msg_after, std::ptrdiff_t offset); -}; - namespace detail { enum class scope_t diff --git a/src/liborcus/yaml_document_tree_test.cpp b/src/liborcus/yaml_document_tree_test.cpp index b4e68d9d..7a732ac5 100644 --- a/src/liborcus/yaml_document_tree_test.cpp +++ b/src/liborcus/yaml_document_tree_test.cpp @@ -96,7 +96,7 @@ void test_yaml_invalids() doc.load(strm.str()); assert(!"yaml::parse_error was not thrown, but expected to be."); } - catch (const yaml::parse_error&) + catch (const parse_error&) { // This is expected. } diff --git a/src/orcus_yaml_main.cpp b/src/orcus_yaml_main.cpp index 9f750dd4..09dc48bc 100644 --- a/src/orcus_yaml_main.cpp +++ b/src/orcus_yaml_main.cpp @@ -146,7 +146,7 @@ std::unique_ptr load_doc(const char* p, size_t n) { doc->load({p, n}); } - catch (const yaml::parse_error& e) + catch (const parse_error& e) { cerr << create_parse_error_output(std::string_view(p, n), e.offset()) << endl; throw; diff --git a/src/parser/yaml_parser_base.cpp b/src/parser/yaml_parser_base.cpp index e4633ea5..53000ced 100644 --- a/src/parser/yaml_parser_base.cpp +++ b/src/parser/yaml_parser_base.cpp @@ -19,20 +19,6 @@ namespace orcus { namespace yaml { -parse_error::parse_error(const std::string& msg, std::ptrdiff_t offset) : - ::orcus::parse_error(msg, offset) {} - -void parse_error::throw_with(const char* msg_before, char c, const char* msg_after, std::ptrdiff_t offset) -{ - throw parse_error(build_message(msg_before, c, msg_after), offset); -} - -void parse_error::throw_with( - const char* msg_before, const char* p, size_t n, const char* msg_after, std::ptrdiff_t offset) -{ - throw parse_error(build_message(msg_before, {p, n}, msg_after), offset); -} - struct scope { size_t width; @@ -439,7 +425,7 @@ parser_base::key_value parser_base::parse_key_value(const char* p, size_t len) // Key has not been found. detail::scope_t st = get_scope_type(); if (st == detail::scope_t::map) - throw yaml::parse_error("key was expected, but not found.", offset_last_char_of_line()); + throw parse_error("key was expected, but not found.", offset_last_char_of_line()); } return kv; @@ -493,7 +479,7 @@ void parser_base::handle_line_in_literal(size_t indent) // Start a new multi-line string scope. if (indent == cur_scope) - throw yaml::parse_error("parse: first line of a literal block must be indented.", offset()); + throw parse_error("parse: first line of a literal block must be indented.", offset()); push_scope(indent); set_scope_type(yaml::detail::scope_t::multi_line_string); -- GitLab From 0929753d505222e4ecd4f19bc2167ee4e22292ab Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 22:02:07 -0500 Subject: [PATCH 28/91] build_message() can be private --- include/orcus/exception.hpp | 4 ---- src/parser/exception.cpp | 30 +++++++++++++++--------------- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/include/orcus/exception.hpp b/include/orcus/exception.hpp index 79411a31..15dae315 100644 --- a/include/orcus/exception.hpp +++ b/include/orcus/exception.hpp @@ -94,10 +94,6 @@ class ORCUS_PSR_DLLPUBLIC parse_error : public general_error { std::ptrdiff_t m_offset; /// offset in the stream where the error occurred. -protected: - static std::string build_message(std::string_view msg_before, char c, std::string_view msg_after); - static std::string build_message(std::string_view msg_before, std::string_view msg, std::string_view msg_after); - public: parse_error(const std::string& msg, std::ptrdiff_t offset); parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset); diff --git a/src/parser/exception.cpp b/src/parser/exception.cpp index 81839953..485db1b6 100644 --- a/src/parser/exception.cpp +++ b/src/parser/exception.cpp @@ -81,6 +81,21 @@ std::string build_offset_msg(std::ptrdiff_t offset) return os.str(); } +std::string build_message(std::string_view msg_before, char c, std::string_view msg_after) +{ + std::ostringstream os; + os << msg_before << c << msg_after; + return os.str(); +} + +std::string build_message( + std::string_view msg_before, std::string_view msg, std::string_view msg_after) +{ + std::ostringstream os; + os << msg_before << msg << msg_after; + return os.str(); +} + } parse_error::parse_error(const std::string& msg, std::ptrdiff_t offset) : @@ -100,21 +115,6 @@ std::ptrdiff_t parse_error::offset() const return m_offset; } -std::string parse_error::build_message(std::string_view msg_before, char c, std::string_view msg_after) -{ - std::ostringstream os; - os << msg_before << c << msg_after; - return os.str(); -} - -std::string parse_error::build_message( - std::string_view msg_before, std::string_view msg, std::string_view msg_after) -{ - std::ostringstream os; - os << msg_before << msg << msg_after; - return os.str(); -} - void parse_error::throw_with( std::string_view msg_before, char c, std::string_view msg_after, std::ptrdiff_t offset) { -- GitLab From 8300189c8beaa2e7490feed0e204ff3f252bcc15 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 22:11:03 -0500 Subject: [PATCH 29/91] Pass std::string as r-value when appropriate --- include/orcus/exception.hpp | 18 +++++++-------- src/parser/exception.cpp | 46 ++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/include/orcus/exception.hpp b/include/orcus/exception.hpp index 15dae315..61823661 100644 --- a/include/orcus/exception.hpp +++ b/include/orcus/exception.hpp @@ -18,7 +18,7 @@ namespace orcus { class ORCUS_PSR_DLLPUBLIC general_error : public std::exception { public: - explicit general_error(const std::string& msg); + explicit general_error(std::string msg); explicit general_error(const std::string& cls, const std::string& msg); virtual ~general_error() noexcept; virtual const char* what() const noexcept; @@ -27,7 +27,7 @@ protected: void append_msg(const std::string& s); private: - ::std::string m_msg; + std::string m_msg; }; class ORCUS_PSR_DLLPUBLIC invalid_arg_error : public std::invalid_argument @@ -40,28 +40,28 @@ public: class ORCUS_PSR_DLLPUBLIC xml_structure_error : public general_error { public: - explicit xml_structure_error(const ::std::string& msg); + explicit xml_structure_error(std::string msg); virtual ~xml_structure_error() noexcept; }; class ORCUS_PSR_DLLPUBLIC json_structure_error : public general_error { public: - explicit json_structure_error(const ::std::string& msg); + explicit json_structure_error(std::string msg); virtual ~json_structure_error() noexcept; }; class ORCUS_PSR_DLLPUBLIC invalid_map_error : public general_error { public: - explicit invalid_map_error(const ::std::string& msg); + explicit invalid_map_error(std::string msg); virtual ~invalid_map_error() noexcept; }; class ORCUS_PSR_DLLPUBLIC value_error : public general_error { public: - explicit value_error(const std::string& msg); + explicit value_error(std::string msg); virtual ~value_error() noexcept; }; @@ -71,7 +71,7 @@ public: class ORCUS_PSR_DLLPUBLIC xpath_error : public general_error { public: - xpath_error(const std::string& msg); + xpath_error(std::string msg); virtual ~xpath_error() noexcept; }; @@ -82,7 +82,7 @@ public: class ORCUS_PSR_DLLPUBLIC interface_error : public general_error { public: - interface_error(const std::string& msg); + interface_error(std::string msg); virtual ~interface_error() noexcept; }; @@ -95,7 +95,7 @@ class ORCUS_PSR_DLLPUBLIC parse_error : public general_error std::ptrdiff_t m_offset; /// offset in the stream where the error occurred. public: - parse_error(const std::string& msg, std::ptrdiff_t offset); + parse_error(std::string msg, std::ptrdiff_t offset); parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset); std::ptrdiff_t offset() const; diff --git a/src/parser/exception.cpp b/src/parser/exception.cpp index 485db1b6..e30821ca 100644 --- a/src/parser/exception.cpp +++ b/src/parser/exception.cpp @@ -13,21 +13,19 @@ using namespace std; namespace orcus { -general_error::general_error(const string& msg) : - m_msg(msg) +general_error::general_error(std::string msg) : + m_msg(std::move(msg)) { } general_error::general_error(const std::string& cls, const std::string& msg) { - ostringstream os; + std::ostringstream os; os << cls << ": " << msg; m_msg = os.str(); } -general_error::~general_error() noexcept -{ -} +general_error::~general_error() noexcept = default; const char* general_error::what() const noexcept { @@ -44,33 +42,33 @@ invalid_arg_error::invalid_arg_error(const std::string& msg) : invalid_arg_error::~invalid_arg_error() noexcept {} -xml_structure_error::xml_structure_error(const string& msg) : - general_error(msg) {} +xml_structure_error::xml_structure_error(std::string msg) : + general_error(std::move(msg)) {} -xml_structure_error::~xml_structure_error() noexcept {} +xml_structure_error::~xml_structure_error() noexcept = default; -json_structure_error::json_structure_error(const string& msg) : - general_error(msg) {} +json_structure_error::json_structure_error(std::string msg) : + general_error(std::move(msg)) {} -json_structure_error::~json_structure_error() noexcept {} +json_structure_error::~json_structure_error() noexcept = default; -invalid_map_error::invalid_map_error(const string& msg) : - general_error(msg) {} +invalid_map_error::invalid_map_error(std::string msg) : + general_error(std::move(msg)) {} -invalid_map_error::~invalid_map_error() noexcept {} +invalid_map_error::~invalid_map_error() noexcept = default; -value_error::value_error(const string& msg) : - general_error(msg) {} +value_error::value_error(std::string msg) : + general_error(std::move(msg)) {} -value_error::~value_error() noexcept {} +value_error::~value_error() noexcept = default; -xpath_error::xpath_error(const string& msg) : general_error(msg) {} +xpath_error::xpath_error(std::string msg) : general_error(std::move(msg)) {} -xpath_error::~xpath_error() noexcept {} +xpath_error::~xpath_error() noexcept = default; -interface_error::interface_error(const std::string& msg) : general_error(msg) {} +interface_error::interface_error(std::string msg) : general_error(std::move(msg)) {} -interface_error::~interface_error() noexcept {} +interface_error::~interface_error() noexcept = default; namespace { @@ -98,8 +96,8 @@ std::string build_message( } -parse_error::parse_error(const std::string& msg, std::ptrdiff_t offset) : - general_error(msg), m_offset(offset) +parse_error::parse_error(std::string msg, std::ptrdiff_t offset) : + general_error(std::move(msg)), m_offset(offset) { append_msg(build_offset_msg(offset)); } -- GitLab From cfb0ad6454422bc5db08acf7b911ce632dea14fd Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 22:17:52 -0500 Subject: [PATCH 30/91] This variant of ctor is used in derived classes only --- include/orcus/exception.hpp | 11 +++++++++-- src/parser/exception.cpp | 8 ++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/orcus/exception.hpp b/include/orcus/exception.hpp index 61823661..e994be80 100644 --- a/include/orcus/exception.hpp +++ b/include/orcus/exception.hpp @@ -87,17 +87,24 @@ public: }; /** - * Exception related to parsing error that includes the offset in the stream + * Exception related to a parsing error that includes an offset in the stream * where the error occurred. */ class ORCUS_PSR_DLLPUBLIC parse_error : public general_error { std::ptrdiff_t m_offset; /// offset in the stream where the error occurred. +protected: + parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset); + public: parse_error(std::string msg, std::ptrdiff_t offset); - parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset); + /** + * Get the offset in a stream associated with the error. + * + * @return offset in a stream where the error occurred. + */ std::ptrdiff_t offset() const; static void throw_with( diff --git a/src/parser/exception.cpp b/src/parser/exception.cpp index e30821ca..c2e09616 100644 --- a/src/parser/exception.cpp +++ b/src/parser/exception.cpp @@ -96,14 +96,14 @@ std::string build_message( } -parse_error::parse_error(std::string msg, std::ptrdiff_t offset) : - general_error(std::move(msg)), m_offset(offset) +parse_error::parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset) : + general_error(cls, msg), m_offset(offset) { append_msg(build_offset_msg(offset)); } -parse_error::parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset) : - general_error(cls, msg), m_offset(offset) +parse_error::parse_error(std::string msg, std::ptrdiff_t offset) : + general_error(std::move(msg)), m_offset(offset) { append_msg(build_offset_msg(offset)); } -- GitLab From 22e07945c0d826a80887af0f7b208e4c4ca11604 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 22:31:35 -0500 Subject: [PATCH 31/91] Move malformed_xml_error to exception.hpp And take it out of the sax namespace. --- doc/cpp/parser/exception.rst | 3 ++ doc/cpp/parser/xml.rst | 3 -- include/orcus/exception.hpp | 15 +++++++-- include/orcus/sax_ns_parser.hpp | 6 ++-- include/orcus/sax_parser.hpp | 32 +++++++++---------- include/orcus/sax_parser_base.hpp | 11 ------- include/orcus/threaded_sax_token_parser.hpp | 2 +- slickedit/cpp.vpj | 1 - src/orcus_xml_main.cpp | 2 +- src/parser/exception.cpp | 11 +++++-- src/parser/parser_test_xml_validation.cpp | 2 +- src/parser/sax_parser_base.cpp | 5 --- src/parser/threaded_sax_token_parser_test.cpp | 2 +- 13 files changed, 47 insertions(+), 48 deletions(-) diff --git a/doc/cpp/parser/exception.rst b/doc/cpp/parser/exception.rst index b30ad09b..d524cf4d 100644 --- a/doc/cpp/parser/exception.rst +++ b/doc/cpp/parser/exception.rst @@ -29,3 +29,6 @@ Exceptions .. doxygenclass:: orcus::parse_error :members: + +.. doxygenclass:: orcus::malformed_xml_error + :members: diff --git a/doc/cpp/parser/xml.rst b/doc/cpp/parser/xml.rst index d7edc31b..2592f3fe 100644 --- a/doc/cpp/parser/xml.rst +++ b/doc/cpp/parser/xml.rst @@ -57,9 +57,6 @@ Namespace Common ------ -.. doxygenclass:: orcus::sax::malformed_xml_error - :members: - .. doxygenstruct:: orcus::sax::doctype_declaration :members: diff --git a/include/orcus/exception.hpp b/include/orcus/exception.hpp index e994be80..12c9e31a 100644 --- a/include/orcus/exception.hpp +++ b/include/orcus/exception.hpp @@ -19,7 +19,7 @@ class ORCUS_PSR_DLLPUBLIC general_error : public std::exception { public: explicit general_error(std::string msg); - explicit general_error(const std::string& cls, const std::string& msg); + explicit general_error(std::string_view cls, std::string_view msg); virtual ~general_error() noexcept; virtual const char* what() const noexcept; @@ -95,7 +95,7 @@ class ORCUS_PSR_DLLPUBLIC parse_error : public general_error std::ptrdiff_t m_offset; /// offset in the stream where the error occurred. protected: - parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset); + parse_error(std::string_view cls, std::string_view msg, std::ptrdiff_t offset); public: parse_error(std::string msg, std::ptrdiff_t offset); @@ -114,6 +114,17 @@ public: std::string_view msg_before, std::string_view msg, std::string_view msg_after, std::ptrdiff_t offset); }; +/** + * This exception is thrown when SAX parser detects a malformed XML document. + */ +class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public orcus::parse_error +{ +public: + malformed_xml_error() = delete; + malformed_xml_error(std::string_view msg, std::ptrdiff_t offset); + virtual ~malformed_xml_error(); +}; + namespace detail { /** diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index 15253298..fe13b725 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -222,7 +222,7 @@ public: /** * Start parsing the document. * - * @exception orcus::sax::malformed_xml_error when it encounters a + * @exception orcus::malformed_xml_error when it encounters a * non-matching closing element. */ void parse(); @@ -288,7 +288,7 @@ private: { sax::detail::elem_scope& scope = m_scopes.back(); if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name) - throw sax::malformed_xml_error("mis-matching closing element.", -1); + throw malformed_xml_error("mis-matching closing element.", -1); m_elem.ns = scope.ns; m_elem.ns_alias = elem.ns; @@ -319,7 +319,7 @@ private: } if (m_attrs.count(sax::detail::entity_name(attr.ns, attr.name)) > 0) - throw sax::malformed_xml_error( + throw malformed_xml_error( "You can't define two attributes of the same name in the same element.", -1); m_attrs.insert(sax::detail::entity_name(attr.ns, attr.name)); diff --git a/include/orcus/sax_parser.hpp b/include/orcus/sax_parser.hpp index a6ec042a..a8da8365 100644 --- a/include/orcus/sax_parser.hpp +++ b/include/orcus/sax_parser.hpp @@ -197,14 +197,14 @@ void sax_parser::header() skip_bom(); skip_space_and_control(); if (!has_char() || cur_char() != '<') - throw sax::malformed_xml_error("xml file must begin with '<'.", offset()); + throw malformed_xml_error("xml file must begin with '<'.", offset()); if (config_type::baseline_version >= 11) { // XML version 1.1 requires a header declaration whereas in 1.0 it's // optional. if (next_char_checked() != '?') - throw sax::malformed_xml_error("xml file must begin with '::element_open(std::ptrdiff_t begin_pos) { // Self-closing element: if (next_and_char() != '>') - throw sax::malformed_xml_error("expected '/>' to self-close the element.", offset()); + throw malformed_xml_error("expected '/>' to self-close the element.", offset()); next(); elem.end_pos = offset(); m_handler.start_element(elem); @@ -307,7 +307,7 @@ void sax_parser::element_close(std::ptrdiff_t begin_pos) element_name(elem, begin_pos); if (cur_char() != '>') - throw sax::malformed_xml_error("expected '>' to close the element.", offset()); + throw malformed_xml_error("expected '>' to close the element.", offset()); next(); elem.end_pos = offset(); @@ -326,7 +326,7 @@ void sax_parser::special_tag() // This can be either ::special_tag() { // Possibly comment. if (next_and_char() != '-') - throw sax::malformed_xml_error("comment expected.", offset()); + throw malformed_xml_error("comment expected.", offset()); len -= 2; if (len < 3) - throw sax::malformed_xml_error("malformed comment.", offset()); + throw malformed_xml_error("malformed comment.", offset()); next(); comment(); @@ -362,7 +362,7 @@ void sax_parser::special_tag() } break; default: - throw sax::malformed_xml_error("failed to parse special tag.", offset()); + throw malformed_xml_error("failed to parse special tag.", offset()); } } @@ -383,7 +383,7 @@ void sax_parser::declaration(const char* name_check) { std::ostringstream os; os << "declaration name of '" << name_check << "' was expected, but '" << decl_name << "' was found instead."; - throw sax::malformed_xml_error(os.str(), offset()); + throw malformed_xml_error(os.str(), offset()); } m_handler.start_declaration(decl_name); @@ -396,7 +396,7 @@ void sax_parser::declaration(const char* name_check) skip_space_and_control(); } if (next_char_checked() != '>') - throw sax::malformed_xml_error("declaration must end with '?>'.", offset()); + throw malformed_xml_error("declaration must end with '?>'.", offset()); m_handler.end_declaration(decl_name); reset_buffer_pos(); @@ -440,7 +440,7 @@ void sax_parser::cdata() else match = 0; } - throw sax::malformed_xml_error("malformed CDATA section.", offset()); + throw malformed_xml_error("malformed CDATA section.", offset()); } template @@ -454,21 +454,21 @@ void sax_parser::doctype() // Either PUBLIC or SYSTEM. size_t len = remains(); if (len < 6) - throw sax::malformed_xml_error("DOCTYPE section too short.", offset()); + throw malformed_xml_error("DOCTYPE section too short.", offset()); param.keyword = sax::doctype_declaration::keyword_type::dtd_private; char c = cur_char(); if (c == 'P') { if (next_and_char() != 'U' || next_and_char() != 'B' || next_and_char() != 'L' || next_and_char() != 'I' || next_and_char() != 'C') - throw sax::malformed_xml_error("malformed DOCTYPE section.", offset()); + throw malformed_xml_error("malformed DOCTYPE section.", offset()); param.keyword = sax::doctype_declaration::keyword_type::dtd_public; } else if (c == 'S') { if (next_and_char() != 'Y' || next_and_char() != 'S' || next_and_char() != 'T' || next_and_char() != 'E' || next_and_char() != 'M') - throw sax::malformed_xml_error("malformed DOCTYPE section.", offset()); + throw malformed_xml_error("malformed DOCTYPE section.", offset()); } next_check(); @@ -500,7 +500,7 @@ void sax_parser::doctype() has_char_throw("DOCTYPE section too short."); if (cur_char() != '>') - throw sax::malformed_xml_error("malformed DOCTYPE section - closing '>' expected but not found.", offset()); + throw malformed_xml_error("malformed DOCTYPE section - closing '>' expected but not found.", offset()); #if ORCUS_DEBUG_SAX_PARSER cout << "sax_parser::doctype: root='" << param.root_element << "', fpi='" << param.fpi << "' uri='" << param.uri << "'" << endl; @@ -557,7 +557,7 @@ void sax_parser::attribute() { std::ostringstream os; os << "Attribute must begin with 'name=..'. (ns='" << attr.ns << "', name='" << attr.name << "')"; - throw sax::malformed_xml_error(os.str(), offset()); + throw malformed_xml_error(os.str(), offset()); } next_check(); // skip the '='. diff --git a/include/orcus/sax_parser_base.hpp b/include/orcus/sax_parser_base.hpp index 5d58624b..aec94335 100644 --- a/include/orcus/sax_parser_base.hpp +++ b/include/orcus/sax_parser_base.hpp @@ -29,17 +29,6 @@ using std::endl; namespace orcus { namespace sax { -/** - * This exception is thrown when SAX parser detects a malformed XML document. - */ -class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public ::orcus::parse_error -{ -public: - malformed_xml_error() = delete; - malformed_xml_error(const std::string& msg, std::ptrdiff_t offset); - virtual ~malformed_xml_error(); -}; - /** * Document type declaration passed by sax_parser to its handler's doctype() * call. diff --git a/include/orcus/threaded_sax_token_parser.hpp b/include/orcus/threaded_sax_token_parser.hpp index 59ea967a..bad4761c 100644 --- a/include/orcus/threaded_sax_token_parser.hpp +++ b/include/orcus/threaded_sax_token_parser.hpp @@ -150,7 +150,7 @@ void threaded_sax_token_parser<_Handler>::process_tokens(const sax::parse_tokens case sax::parse_token_t::parse_error: { auto v = std::get(t.value); - throw sax::malformed_xml_error(std::string{v.str}, v.offset); + throw malformed_xml_error(std::string{v.str}, v.offset); } default: throw general_error("unknown token type encountered."); diff --git a/slickedit/cpp.vpj b/slickedit/cpp.vpj index f2566060..46d39b5b 100644 --- a/slickedit/cpp.vpj +++ b/slickedit/cpp.vpj @@ -620,7 +620,6 @@ - diff --git a/src/orcus_xml_main.cpp b/src/orcus_xml_main.cpp index 4056939f..617dc658 100644 --- a/src/orcus_xml_main.cpp +++ b/src/orcus_xml_main.cpp @@ -332,7 +332,7 @@ int main(int argc, char** argv) try ; } } - catch (const sax::malformed_xml_error& e) + catch (const malformed_xml_error& e) { cerr << create_parse_error_output(content.str(), e.offset()) << endl; cerr << e.what() << endl; diff --git a/src/parser/exception.cpp b/src/parser/exception.cpp index c2e09616..4cae6ee0 100644 --- a/src/parser/exception.cpp +++ b/src/parser/exception.cpp @@ -18,7 +18,7 @@ general_error::general_error(std::string msg) : { } -general_error::general_error(const std::string& cls, const std::string& msg) +general_error::general_error(std::string_view cls, std::string_view msg) { std::ostringstream os; os << cls << ": " << msg; @@ -96,7 +96,7 @@ std::string build_message( } -parse_error::parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset) : +parse_error::parse_error(std::string_view cls, std::string_view msg, std::ptrdiff_t offset) : general_error(cls, msg), m_offset(offset) { append_msg(build_offset_msg(offset)); @@ -125,6 +125,11 @@ void parse_error::throw_with( throw parse_error(build_message(msg_before, msg, msg_after), offset); } -} +malformed_xml_error::malformed_xml_error(std::string_view msg, std::ptrdiff_t offset) : + orcus::parse_error("malformed_xml_error", msg, offset) {} + +malformed_xml_error::~malformed_xml_error() = default; + +} // namespace orcus /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/parser/parser_test_xml_validation.cpp b/src/parser/parser_test_xml_validation.cpp index 84fc26d1..b1d50221 100644 --- a/src/parser/parser_test_xml_validation.cpp +++ b/src/parser/parser_test_xml_validation.cpp @@ -38,7 +38,7 @@ void test_invalid() parser.parse(); assert(!"exception was expected, but one was not thrown."); } - catch (const orcus::sax::malformed_xml_error& e) + catch (const orcus::malformed_xml_error& e) { std::cerr << orcus::create_parse_error_output(content.str(), e.offset()) << std::endl; std::cerr << e.what() << std::endl; diff --git a/src/parser/sax_parser_base.cpp b/src/parser/sax_parser_base.cpp index 540f77f4..d558166f 100644 --- a/src/parser/sax_parser_base.cpp +++ b/src/parser/sax_parser_base.cpp @@ -19,11 +19,6 @@ namespace orcus { namespace sax { -malformed_xml_error::malformed_xml_error(const std::string& msg, std::ptrdiff_t offset) : - ::orcus::parse_error("malformed_xml_error", msg, offset) {} - -malformed_xml_error::~malformed_xml_error() = default; - char decode_xml_encoded_char(const char* p, size_t n) { if (n == 2) diff --git a/src/parser/threaded_sax_token_parser_test.cpp b/src/parser/threaded_sax_token_parser_test.cpp index 2f2cf1a4..1338b2ac 100644 --- a/src/parser/threaded_sax_token_parser_test.cpp +++ b/src/parser/threaded_sax_token_parser_test.cpp @@ -134,7 +134,7 @@ void test_sax_token_parser_1() parser.parse(); assert(!"An exception was expected, but one was not thrown."); } - catch (const sax::malformed_xml_error& e) + catch (const malformed_xml_error& e) { assert(e.offset() == 28u); } -- GitLab From 556d35bdb9b9008090b6810a38eac99c5f23a128 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 22:50:14 -0500 Subject: [PATCH 32/91] Kill the idea of "transient stream" That was mis-guided. --- include/orcus/parser_base.hpp | 5 +-- include/orcus/sax_ns_parser.hpp | 9 ----- include/orcus/sax_parser.hpp | 17 ++------- include/orcus/sax_parser_base.hpp | 2 +- include/orcus/sax_token_parser.hpp | 13 ------- src/parser/css_parser_base.cpp | 2 +- src/parser/csv_parser_base.cpp | 2 +- src/parser/json_parser_base.cpp | 2 +- src/parser/parser_base.cpp | 3 +- src/parser/parser_base_test.cpp | 2 +- src/parser/sax_parser_base.cpp | 6 +-- src/parser/sax_parser_test.cpp | 61 ------------------------------ src/parser/yaml_parser_base.cpp | 2 +- 13 files changed, 15 insertions(+), 111 deletions(-) diff --git a/include/orcus/parser_base.hpp b/include/orcus/parser_base.hpp index eb12c63b..7eb8d81e 100644 --- a/include/orcus/parser_base.hpp +++ b/include/orcus/parser_base.hpp @@ -27,21 +27,18 @@ protected: const char* const mp_begin; const char* mp_char; const char* mp_end; - const bool m_transient_stream; private: numeric_parser_type m_func_parse_numeric; protected: - parser_base(const char* p, size_t n, bool transient_stream); + parser_base(const char* p, size_t n); void set_numeric_parser(const numeric_parser_type& func) { m_func_parse_numeric = func; } - bool transient_stream() const { return m_transient_stream; } - bool has_char() const { assert(mp_char <= mp_end); diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index fe13b725..42323beb 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -215,8 +215,6 @@ public: typedef HandlerT handler_type; sax_ns_parser(const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler); - sax_ns_parser(const char* content, const size_t size, bool transient_stream, - xmlns_context& ns_cxt, handler_type& handler); ~sax_ns_parser() = default; /** @@ -364,13 +362,6 @@ sax_ns_parser::sax_ns_parser( { } -template -sax_ns_parser::sax_ns_parser( - const char* content, const size_t size, bool transient_stream, xmlns_context& ns_cxt, handler_type& handler) : - m_wrapper(ns_cxt, handler), m_parser(content, size, transient_stream, m_wrapper) -{ -} - template void sax_ns_parser::parse() { diff --git a/include/orcus/sax_parser.hpp b/include/orcus/sax_parser.hpp index a8da8365..0295a511 100644 --- a/include/orcus/sax_parser.hpp +++ b/include/orcus/sax_parser.hpp @@ -135,7 +135,6 @@ public: typedef ConfigT config_type; sax_parser(const char* content, const size_t size, handler_type& handler); - sax_parser(const char* content, const size_t size, bool transient_stream, handler_type& handler); ~sax_parser() = default; void parse(); @@ -165,15 +164,7 @@ private: template sax_parser::sax_parser( const char* content, const size_t size, handler_type& handler) : - sax::parser_base(content, size, false), - m_handler(handler) -{ -} - -template -sax_parser::sax_parser( - const char* content, const size_t size, bool transient_stream, handler_type& handler) : - sax::parser_base(content, size, transient_stream), + sax::parser_base(content, size), m_handler(handler) { } @@ -433,7 +424,7 @@ void sax_parser::cdata() { // Found ']]>'. size_t cdata_len = i - 2; - m_handler.characters(std::string_view(p0, cdata_len), transient_stream()); + m_handler.characters(std::string_view(p0, cdata_len), false); next(); return; } @@ -526,7 +517,7 @@ void sax_parser::characters() buf.append(p0, mp_char-p0); characters_with_encoded_char(buf); if (buf.empty()) - m_handler.characters(std::string_view{}, transient_stream()); + m_handler.characters(std::string_view{}, false); else m_handler.characters(std::string_view(buf.get(), buf.size()), true); return; @@ -536,7 +527,7 @@ void sax_parser::characters() if (mp_char > p0) { std::string_view val(p0, mp_char-p0); - m_handler.characters(val, transient_stream()); + m_handler.characters(val, false); } } diff --git a/include/orcus/sax_parser_base.hpp b/include/orcus/sax_parser_base.hpp index aec94335..596df447 100644 --- a/include/orcus/sax_parser_base.hpp +++ b/include/orcus/sax_parser_base.hpp @@ -118,7 +118,7 @@ protected: bool m_root_elem_open:1; protected: - parser_base(const char* content, size_t size, bool transient_stream); + parser_base(const char* content, size_t size); ~parser_base(); void next_check() diff --git a/include/orcus/sax_token_parser.hpp b/include/orcus/sax_token_parser.hpp index 2227a852..cfefa039 100644 --- a/include/orcus/sax_token_parser.hpp +++ b/include/orcus/sax_token_parser.hpp @@ -140,10 +140,6 @@ public: const char* content, const size_t size, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler); - sax_token_parser( - const char* content, const size_t size, bool transient_stream, - const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler); - ~sax_token_parser() = default; void parse(); @@ -204,15 +200,6 @@ sax_token_parser::sax_token_parser( { } -template -sax_token_parser::sax_token_parser( - const char* content, const size_t size, bool transient_stream, - const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) : - m_wrapper(_tokens, handler), - m_parser(content, size, transient_stream, ns_cxt, m_wrapper) -{ -} - template void sax_token_parser::parse() { diff --git a/src/parser/css_parser_base.cpp b/src/parser/css_parser_base.cpp index e1c81cce..676d6f50 100644 --- a/src/parser/css_parser_base.cpp +++ b/src/parser/css_parser_base.cpp @@ -18,7 +18,7 @@ namespace orcus { namespace css { parser_base::parser_base(const char* p, size_t n) : - ::orcus::parser_base(p, n, false), + orcus::parser_base(p, n), m_simple_selector_count(0), m_combinator(combinator_t::descendant) {} diff --git a/src/parser/csv_parser_base.cpp b/src/parser/csv_parser_base.cpp index 38fb8648..b8b47bab 100644 --- a/src/parser/csv_parser_base.cpp +++ b/src/parser/csv_parser_base.cpp @@ -17,7 +17,7 @@ parser_config::parser_config() : parser_base::parser_base( const char* p, size_t n, const csv::parser_config& config) : - ::orcus::parser_base(p, n, false), m_config(config) + ::orcus::parser_base(p, n), m_config(config) { maybe_skip_bom(); } diff --git a/src/parser/json_parser_base.cpp b/src/parser/json_parser_base.cpp index b123817a..17018f59 100644 --- a/src/parser/json_parser_base.cpp +++ b/src/parser/json_parser_base.cpp @@ -37,7 +37,7 @@ struct parser_base::impl }; parser_base::parser_base(const char* p, size_t n) : - ::orcus::parser_base(p, n, false), mp_impl(std::make_unique()) + orcus::parser_base(p, n), mp_impl(std::make_unique()) { set_numeric_parser(parse_numeric_json); diff --git a/src/parser/parser_base.cpp b/src/parser/parser_base.cpp index 17924f5e..20587a9e 100644 --- a/src/parser/parser_base.cpp +++ b/src/parser/parser_base.cpp @@ -21,9 +21,8 @@ namespace orcus { -parser_base::parser_base(const char* p, size_t n, bool transient_stream) : +parser_base::parser_base(const char* p, size_t n) : mp_begin(p), mp_char(p), mp_end(p+n), - m_transient_stream(transient_stream), m_func_parse_numeric(parse_numeric) { } diff --git a/src/parser/parser_base_test.cpp b/src/parser/parser_base_test.cpp index 6675f24e..74994da3 100644 --- a/src/parser/parser_base_test.cpp +++ b/src/parser/parser_base_test.cpp @@ -16,7 +16,7 @@ void test_skip_space_and_control() class _test_type : public orcus::parser_base { public: - _test_type(const char* p, size_t n) : orcus::parser_base(p, n, false) {} + _test_type(const char* p, size_t n) : orcus::parser_base(p, n) {} void run() { diff --git a/src/parser/sax_parser_base.cpp b/src/parser/sax_parser_base.cpp index d558166f..95ee9314 100644 --- a/src/parser/sax_parser_base.cpp +++ b/src/parser/sax_parser_base.cpp @@ -108,8 +108,8 @@ struct parser_base::impl std::vector> m_cell_buffers; }; -parser_base::parser_base(const char* content, size_t size, bool transient_stream) : - ::orcus::parser_base(content, size, transient_stream), +parser_base::parser_base(const char* content, size_t size) : + ::orcus::parser_base(content, size), mp_impl(std::make_unique()), m_nest_level(0), m_buffer_pos(0), @@ -322,7 +322,7 @@ bool parser_base::value(std::string_view& str, bool decode) // Skip the closing quote. next(); - return transient_stream(); + return false; } void parser_base::name(std::string_view& str) diff --git a/src/parser/sax_parser_test.cpp b/src/parser/sax_parser_test.cpp index 6476ff8a..b4227622 100644 --- a/src/parser/sax_parser_test.cpp +++ b/src/parser/sax_parser_test.cpp @@ -21,66 +21,6 @@ void test_handler() parser.parse(); } -void test_transient_stream() -{ - struct _handler : public orcus::sax_handler - { - void characters(std::string_view val, bool transient) - { - cout << "characters: '" << val << "' (transient=" << transient << ")" << endl; - - if (transient_stream) - // When parsing a transient stream, this flag is always set. - assert(transient); - else if (val == "non-transient") - assert(!transient); - else if (val == "(&&&)") - assert(transient); - else if (val == " ") - assert(!transient); - } - - void attribute(const orcus::sax::parser_attribute& attr) - { - cout << "attribute: " << attr.name << "=\"" << attr.value << "\" (transient=" << attr.transient << ")" << endl; - - if (transient_stream) - // When parsing a transient stream, this flag is always set. - assert(attr.transient); - else if (attr.name == "attr1") - assert(!attr.transient); - else if (attr.name == "attr2") - assert(attr.transient); - else if (attr.name == "version") - assert(!attr.transient); - } - - bool transient_stream = false; - }; - - const char* content = - "" - "" - " non-transient" - " (&&&)" - "" - ; - - { - _handler hdl; - hdl.transient_stream = false; - orcus::sax_parser<_handler> parser(content, strlen(content), hdl.transient_stream, hdl); - parser.parse(); - } - - { - _handler hdl; - hdl.transient_stream = true; - orcus::sax_parser<_handler> parser(content, strlen(content), hdl.transient_stream, hdl); - parser.parse(); - } -} - void test_attr_equal_with_whitespace() { struct _handler : public orcus::sax_handler {}; @@ -119,7 +59,6 @@ void test_attr_with_encoded_chars_single_quotes() int main() { test_handler(); - test_transient_stream(); test_attr_equal_with_whitespace(); test_attr_with_encoded_chars_single_quotes(); diff --git a/src/parser/yaml_parser_base.cpp b/src/parser/yaml_parser_base.cpp index 53000ced..3a5dc5ff 100644 --- a/src/parser/yaml_parser_base.cpp +++ b/src/parser/yaml_parser_base.cpp @@ -54,7 +54,7 @@ const size_t parser_base::parse_indent_end_of_stream = std::numeric_limits::max() - 2; parser_base::parser_base(const char* p, size_t n) : - ::orcus::parser_base(p, n, false), mp_impl(std::make_unique()) {} + orcus::parser_base(p, n), mp_impl(std::make_unique()) {} parser_base::~parser_base() {} -- GitLab From 2cf2a04d9faed476c62153ce75a3464c497b0af2 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 23:04:15 -0500 Subject: [PATCH 33/91] I have no use for this --- include/orcus/sax_token_parser.hpp | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/include/orcus/sax_token_parser.hpp b/include/orcus/sax_token_parser.hpp index cfefa039..a2598064 100644 --- a/include/orcus/sax_token_parser.hpp +++ b/include/orcus/sax_token_parser.hpp @@ -19,32 +19,6 @@ namespace orcus { class tokens; -namespace sax { - -#if ORCUS_DEBUG_SAX_PARSER -template -class attr_printer -{ -public: - attr_printer(const _Tokens& tokens, const ::std::string& indent) : - m_tokens(tokens), m_indent(indent) {} - - void operator() (const _Attr& attr) const - { - using namespace std; - cout << m_indent << " attribute: " - << attr.ns << ":" - << m_tokens.get_token_name(attr.name) << "=\"" - << attr.value.str() << "\"" << endl; - } -private: - const _Tokens& m_tokens; - ::std::string m_indent; -}; -#endif - -} - class ORCUS_PSR_DLLPUBLIC sax_token_handler_wrapper_base { protected: -- GitLab From befe0dcc6be5a27677d88041241bd18cbc4b7e66 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 21 Nov 2022 23:10:57 -0500 Subject: [PATCH 34/91] Create a section for zip archive related classes --- doc/cpp/parser/archive.rst | 18 ++++++++++++++++++ doc/cpp/parser/index.rst | 1 + doc/cpp/parser/util.rst | 3 --- 3 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 doc/cpp/parser/archive.rst diff --git a/doc/cpp/parser/archive.rst b/doc/cpp/parser/archive.rst new file mode 100644 index 00000000..da4fcd8b --- /dev/null +++ b/doc/cpp/parser/archive.rst @@ -0,0 +1,18 @@ + +Archive +======= + +Zip archive +----------- + +.. doxygenclass:: orcus::zip_archive + :members: + +.. doxygenclass:: orcus::zip_archive_stream + :members: + +.. doxygenclass:: orcus::zip_archive_stream_fd + :members: + +.. doxygenclass:: orcus::zip_archive_stream_blob + :members: diff --git a/doc/cpp/parser/index.rst b/doc/cpp/parser/index.rst index c55fa63a..c1b49787 100644 --- a/doc/cpp/parser/index.rst +++ b/doc/cpp/parser/index.rst @@ -7,6 +7,7 @@ Low-Level Parsers and Utilities exception.rst stream.rst + archive.rst util.rst css.rst csv.rst diff --git a/doc/cpp/parser/util.rst b/doc/cpp/parser/util.rst index fc1075de..e9d8f3fd 100644 --- a/doc/cpp/parser/util.rst +++ b/doc/cpp/parser/util.rst @@ -12,9 +12,6 @@ Utilities .. doxygenclass:: orcus::cell_buffer :members: -.. doxygenclass:: orcus::zip_archive - :members: - XML Types ========= -- GitLab From 24bd9006ddc22b27a75a4111ee47af9672c23c4f Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 22 Nov 2022 20:07:21 -0500 Subject: [PATCH 35/91] Move zip_error into exception.hpp --- doc/cpp/parser/exception.rst | 3 +++ include/orcus/exception.hpp | 12 +++++++++++- include/orcus/zip_archive.hpp | 13 +------------ src/parser/exception.cpp | 6 ++++++ src/parser/zip_archive.cpp | 17 +---------------- 5 files changed, 22 insertions(+), 29 deletions(-) diff --git a/doc/cpp/parser/exception.rst b/doc/cpp/parser/exception.rst index d524cf4d..e6b14ccb 100644 --- a/doc/cpp/parser/exception.rst +++ b/doc/cpp/parser/exception.rst @@ -32,3 +32,6 @@ Exceptions .. doxygenclass:: orcus::malformed_xml_error :members: + +.. doxygenclass:: orcus::zip_error + :members: diff --git a/include/orcus/exception.hpp b/include/orcus/exception.hpp index 12c9e31a..5d1aa827 100644 --- a/include/orcus/exception.hpp +++ b/include/orcus/exception.hpp @@ -117,7 +117,7 @@ public: /** * This exception is thrown when SAX parser detects a malformed XML document. */ -class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public orcus::parse_error +class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public parse_error { public: malformed_xml_error() = delete; @@ -125,6 +125,16 @@ public: virtual ~malformed_xml_error(); }; +/** + * Exception related to parsing of zip archive stream. + */ +class ORCUS_PSR_DLLPUBLIC zip_error : public general_error +{ +public: + zip_error(std::string_view msg); + virtual ~zip_error(); +}; + namespace detail { /** diff --git a/include/orcus/zip_archive.hpp b/include/orcus/zip_archive.hpp index c7ea1d9a..159ba9e1 100644 --- a/include/orcus/zip_archive.hpp +++ b/include/orcus/zip_archive.hpp @@ -9,9 +9,9 @@ #define INCLUDED_ORCUS_ZIP_ARCHIVE_HPP #include "env.hpp" +#include "exception.hpp" #include -#include #include #include @@ -20,17 +20,6 @@ namespace orcus { class zip_archive_stream; class zip_archive_impl; -class ORCUS_PSR_DLLPUBLIC zip_error : public std::exception -{ - std::string m_msg; -public: - zip_error(); - zip_error(const std::string& msg); - virtual ~zip_error(); - - virtual const char* what() const throw(); -}; - class ORCUS_PSR_DLLPUBLIC zip_archive { zip_archive_impl* mp_impl; diff --git a/src/parser/exception.cpp b/src/parser/exception.cpp index 4cae6ee0..1d958fdb 100644 --- a/src/parser/exception.cpp +++ b/src/parser/exception.cpp @@ -130,6 +130,12 @@ malformed_xml_error::malformed_xml_error(std::string_view msg, std::ptrdiff_t of malformed_xml_error::~malformed_xml_error() = default; +zip_error::zip_error(std::string_view msg) : general_error("zip_error", msg) +{ +} + +zip_error::~zip_error() = default; + } // namespace orcus /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/parser/zip_archive.cpp b/src/parser/zip_archive.cpp index 386adbbd..5ad8d702 100644 --- a/src/parser/zip_archive.cpp +++ b/src/parser/zip_archive.cpp @@ -24,21 +24,6 @@ namespace orcus { -zip_error::zip_error() {} -zip_error::zip_error(const std::string& msg) : m_msg() -{ - std::ostringstream os; - os << "zip error: " << msg; - m_msg = os.str(); -} - -zip_error::~zip_error() = default; - -const char* zip_error::what() const throw() -{ - return m_msg.c_str(); -} - namespace { struct zip_file_param @@ -259,7 +244,7 @@ void zip_archive_impl::load() { size_t central_dir_end_pos = seek_central_dir(); if (!central_dir_end_pos) - throw zip_error(); + throw zip_error("failed to seek the end position of the central directory"); m_central_dir_end = zip_stream_parser(m_stream, central_dir_end_pos); -- GitLab From 5a38a5af5016898480e3b34fb5b9a70216479016 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 22 Nov 2022 20:12:22 -0500 Subject: [PATCH 36/91] Use std::unique_ptr for the pimpl idiom --- include/orcus/zip_archive.hpp | 11 ++++++----- src/parser/zip_archive.cpp | 35 +++++++++++++---------------------- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/include/orcus/zip_archive.hpp b/include/orcus/zip_archive.hpp index 159ba9e1..2cbabe63 100644 --- a/include/orcus/zip_archive.hpp +++ b/include/orcus/zip_archive.hpp @@ -11,24 +11,25 @@ #include "env.hpp" #include "exception.hpp" -#include -#include +#include #include +#include namespace orcus { class zip_archive_stream; -class zip_archive_impl; class ORCUS_PSR_DLLPUBLIC zip_archive { - zip_archive_impl* mp_impl; + struct impl; + std::unique_ptr mp_impl; + +public: zip_archive() = delete; zip_archive(const zip_archive&) = delete; zip_archive& operator= (const zip_archive) = delete; -public: zip_archive(zip_archive_stream* stream); ~zip_archive(); diff --git a/src/parser/zip_archive.cpp b/src/parser/zip_archive.cpp index 5ad8d702..be2b08f8 100644 --- a/src/parser/zip_archive.cpp +++ b/src/parser/zip_archive.cpp @@ -184,7 +184,7 @@ struct central_dir_end } // anonymous namespace -class zip_archive_impl +class zip_archive::impl { typedef std::vector file_params_type; typedef std::unordered_map filename_map_type; @@ -200,8 +200,7 @@ class zip_archive_impl filename_map_type m_filenames; public: - zip_archive_impl(zip_archive_stream* stream); - ~zip_archive_impl(); + impl(zip_archive_stream* stream); void load(); void dump_file_entry(size_t pos) const; @@ -227,7 +226,7 @@ private: void read_file_entries(); }; -zip_archive_impl::zip_archive_impl(zip_archive_stream* stream) : +zip_archive::impl::impl(zip_archive_stream* stream) : m_stream(stream), m_stream_size(0), m_central_dir_pos(0) { if (!m_stream) @@ -236,11 +235,7 @@ zip_archive_impl::zip_archive_impl(zip_archive_stream* stream) : m_stream_size = m_stream->size(); } -zip_archive_impl::~zip_archive_impl() -{ -} - -void zip_archive_impl::load() +void zip_archive::impl::load() { size_t central_dir_end_pos = seek_central_dir(); if (!central_dir_end_pos) @@ -255,7 +250,7 @@ void zip_archive_impl::load() read_file_entries(); } -void zip_archive_impl::read_file_entries() +void zip_archive::impl::read_file_entries() { m_file_params.clear(); @@ -329,7 +324,7 @@ void zip_archive_impl::read_file_entries() } } -void zip_archive_impl::dump_file_entry(size_t pos) const +void zip_archive::impl::dump_file_entry(size_t pos) const { if (pos >= m_file_params.size()) throw zip_error("invalid file entry index."); @@ -385,7 +380,7 @@ void zip_archive_impl::dump_file_entry(size_t pos) const } } -void zip_archive_impl::dump_file_entry(std::string_view entry_name) const +void zip_archive::impl::dump_file_entry(std::string_view entry_name) const { filename_map_type::const_iterator it = m_filenames.find(entry_name); if (it == m_filenames.end()) @@ -398,7 +393,7 @@ void zip_archive_impl::dump_file_entry(std::string_view entry_name) const dump_file_entry(it->second); } -std::string_view zip_archive_impl::get_file_entry_name(std::size_t pos) const +std::string_view zip_archive::impl::get_file_entry_name(std::size_t pos) const { if (pos >= m_file_params.size()) return std::string_view{}; @@ -406,7 +401,7 @@ std::string_view zip_archive_impl::get_file_entry_name(std::size_t pos) const return m_file_params[pos].filename; } -bool zip_archive_impl::read_file_entry(std::string_view entry_name, std::vector& buf) const +bool zip_archive::impl::read_file_entry(std::string_view entry_name, std::vector& buf) const { filename_map_type::const_iterator it = m_filenames.find(entry_name); if (it == m_filenames.end()) @@ -469,7 +464,7 @@ bool zip_archive_impl::read_file_entry(std::string_view entry_name, std::vector< return false; } -size_t zip_archive_impl::seek_central_dir() +size_t zip_archive::impl::seek_central_dir() { // Search for the position of 0x06054b50 (read in little endian order - so // it's 0x50, 0x4b, 0x05, 0x06 in this order) somewhere near the end of @@ -523,7 +518,7 @@ size_t zip_archive_impl::seek_central_dir() return 0; } -void zip_archive_impl::read_central_dir_end() +void zip_archive::impl::read_central_dir_end() { central_dir_end content; content.magic_number = m_central_dir_end.read_4bytes(); @@ -551,15 +546,11 @@ void zip_archive_impl::read_central_dir_end() #endif } -zip_archive::zip_archive(zip_archive_stream* stream) : - mp_impl(new zip_archive_impl(stream)) +zip_archive::zip_archive(zip_archive_stream* stream) : mp_impl(std::make_unique(stream)) { } -zip_archive::~zip_archive() -{ - delete mp_impl; -} +zip_archive::~zip_archive() = default; void zip_archive::load() { -- GitLab From 559a1f116c3dce9c9c489d63af8ec4821de0e7a2 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 22 Nov 2022 20:53:51 -0500 Subject: [PATCH 37/91] read_file_entry() to return the retrieved data stream directly --- include/orcus/zip_archive.hpp | 15 +++++---- src/liborcus/opc_reader.cpp | 11 +++++- src/liborcus/orcus_ods.cpp | 54 +++++++++++++++++------------- src/liborcus/orcus_xlsx.cpp | 63 +++++++++++++++++------------------ src/parser/zip_archive.cpp | 41 ++++++++++++----------- 5 files changed, 100 insertions(+), 84 deletions(-) diff --git a/include/orcus/zip_archive.hpp b/include/orcus/zip_archive.hpp index 2cbabe63..9415c957 100644 --- a/include/orcus/zip_archive.hpp +++ b/include/orcus/zip_archive.hpp @@ -74,17 +74,18 @@ public: size_t get_file_entry_count() const; /** - * Retrieve data stream of specified file entry into buffer. The retrieved - * data stream gets uncompressed if the original stream is compressed. - * The method will overwrite the content of passed buffer if there is any - * pre-existing data in it. + * Retrieve data stream of specified file entry. The retrieved data stream + * gets uncompressed if the original stream is compressed. * - * @param entry_name file entry name + * @param entry_name file entry name. * @param buf buffer to put the retrieved data stream into. * - * @return true if successful, false otherwise. + * @return buffer containing the data stream for specified entry. + * + * @exception zip_error thrown when any problem is encountered during data + * stream retrieval. */ - bool read_file_entry(std::string_view entry_name, std::vector& buf) const; + std::vector read_file_entry(std::string_view entry_name) const; }; } diff --git a/src/liborcus/opc_reader.cpp b/src/liborcus/opc_reader.cpp index a98cecca..8056969e 100644 --- a/src/liborcus/opc_reader.cpp +++ b/src/liborcus/opc_reader.cpp @@ -71,7 +71,16 @@ void opc_reader::read_file(std::unique_ptr&& stream) bool opc_reader::open_zip_stream(const string& path, vector& buf) { - return m_archive->read_file_entry(path.c_str(), buf); + try + { + std::vector entry = m_archive->read_file_entry(path.c_str()); + buf.swap(entry); + return true; + } + catch (const std::exception&) + { + return false; + } } void opc_reader::read_part(const pstring& path, const schema_t type, opc_rel_extra* data) diff --git a/src/liborcus/orcus_ods.cpp b/src/liborcus/orcus_ods.cpp index 16a5a481..f6b24a34 100644 --- a/src/liborcus/orcus_ods.cpp +++ b/src/liborcus/orcus_ods.cpp @@ -69,9 +69,14 @@ void orcus_ods::read_styles(const zip_archive& archive) return; std::vector buf; - if (!archive.read_file_entry("styles.xml", buf)) + + try + { + buf = archive.read_file_entry("styles.xml"); + } + catch (const std::exception& e) { - std::cout << "failed to get stat on styles.xml" << std::endl; + std::cerr << "failed to get stat on styles.xml (reason: " << e.what() << ")" << std::endl; return; } @@ -95,13 +100,18 @@ void orcus_ods::read_styles(const zip_archive& archive) void orcus_ods::read_content(const zip_archive& archive) { vector buf; - if (!archive.read_file_entry("content.xml", buf)) + + try { - cout << "failed to get stat on content.xml" << endl; + buf = archive.read_file_entry("content.xml"); + } + catch (const std::exception& e) + { + std::cerr << "failed to get stat on content.xml (reason: " << e.what() << ")" << std::endl; return; } - read_content_xml(&buf[0], buf.size()); + read_content_xml(buf.data(), buf.size()); } void orcus_ods::read_content_xml(const unsigned char* p, size_t size) @@ -144,9 +154,25 @@ bool orcus_ods::detect(const unsigned char* blob, size_t size) { zip_archive_stream_blob stream(blob, size); zip_archive archive(&stream); + try { archive.load(); + + std::vector buf = archive.read_file_entry("mimetype"); + + if (buf.empty()) + // mimetype is empty. + return false; + + const char* mimetype = "application/vnd.oasis.opendocument.spreadsheet"; + size_t n = strlen(mimetype); + if (buf.size() < n) + return false; + + if (strncmp(mimetype, reinterpret_cast(buf.data()), n)) + // The mimetype content differs. + return false; } catch (const zip_error&) { @@ -154,24 +180,6 @@ bool orcus_ods::detect(const unsigned char* blob, size_t size) return false; } - vector buf; - if (!archive.read_file_entry("mimetype", buf)) - // Failed to read 'mimetype' entry. - return false; - - if (buf.empty()) - // mimetype is empty. - return false; - - const char* mimetype = "application/vnd.oasis.opendocument.spreadsheet"; - size_t n = strlen(mimetype); - if (buf.size() < n) - return false; - - if (strncmp(mimetype, reinterpret_cast(&buf[0]), n)) - // The mimetype content differs. - return false; - return true; } diff --git a/src/liborcus/orcus_xlsx.cpp b/src/liborcus/orcus_xlsx.cpp index a6d99813..01caec4c 100644 --- a/src/liborcus/orcus_xlsx.cpp +++ b/src/liborcus/orcus_xlsx.cpp @@ -154,50 +154,47 @@ bool orcus_xlsx::detect(const unsigned char* blob, size_t size) { zip_archive_stream_blob stream(blob, size); zip_archive archive(&stream); + try { archive.load(); - } - catch (const zip_error&) - { - // Not a valid zip archive. - return false; - } - // Find and parse [Content_Types].xml which is required for OPC package. - vector buf; - if (!archive.read_file_entry("[Content_Types].xml", buf)) - // Failed to read the contnet types entry. - return false; + // Find and parse [Content_Types].xml which is required for OPC package. + std::vector buf = archive.read_file_entry("[Content_Types].xml"); - if (buf.empty()) - return false; + if (buf.empty()) + return false; - config opt(format_t::xlsx); - xmlns_repository ns_repo; - ns_repo.add_predefined_values(NS_opc_all); - session_context session_cxt; - xml_stream_parser parser( - opt, ns_repo, opc_tokens, reinterpret_cast(&buf[0]), buf.size()); + config opt(format_t::xlsx); + xmlns_repository ns_repo; + ns_repo.add_predefined_values(NS_opc_all); + session_context session_cxt; + xml_stream_parser parser( + opt, ns_repo, opc_tokens, reinterpret_cast(&buf[0]), buf.size()); - xml_simple_stream_handler handler( - session_cxt, opc_tokens, - std::make_unique(session_cxt, opc_tokens)); - parser.set_handler(&handler); - parser.parse(); + xml_simple_stream_handler handler( + session_cxt, opc_tokens, + std::make_unique(session_cxt, opc_tokens)); + parser.set_handler(&handler); + parser.parse(); - opc_content_types_context& context = - static_cast(handler.get_context()); + opc_content_types_context& context = + static_cast(handler.get_context()); - std::vector parts; - context.pop_parts(parts); + std::vector parts; + context.pop_parts(parts); - if (parts.empty()) - return false; + if (parts.empty()) + return false; - // See if we can find the workbook stream. - xml_part_t workbook_part("/xl/workbook.xml", CT_ooxml_xlsx_sheet_main); - return std::find(parts.begin(), parts.end(), workbook_part) != parts.end(); + // See if we can find the workbook stream. + xml_part_t workbook_part("/xl/workbook.xml", CT_ooxml_xlsx_sheet_main); + return std::find(parts.begin(), parts.end(), workbook_part) != parts.end(); + } + catch (const std::exception&) + { + return false; + } } void orcus_xlsx::read_file(const string& filepath) diff --git a/src/parser/zip_archive.cpp b/src/parser/zip_archive.cpp index be2b08f8..f9889835 100644 --- a/src/parser/zip_archive.cpp +++ b/src/parser/zip_archive.cpp @@ -212,7 +212,7 @@ public: return m_file_params.size(); } - bool read_file_entry(std::string_view entry_name, std::vector& buf) const; + std::vector read_file_entry(std::string_view entry_name) const; private: @@ -371,11 +371,11 @@ void zip_archive::impl::dump_file_entry(size_t pos) const m_stream->seek(file_header.tell()); - std::vector buf; - if (read_file_entry(param.filename, buf)) + std::vector buf = read_file_entry(param.filename); + if (!buf.empty()) { std::cout << "-- data section" << std::endl; - std::cout << &buf[0] << std::endl; + std::cout << buf.data() << std::endl; std::cout << "--" << std::endl; } } @@ -401,17 +401,20 @@ std::string_view zip_archive::impl::get_file_entry_name(std::size_t pos) const return m_file_params[pos].filename; } -bool zip_archive::impl::read_file_entry(std::string_view entry_name, std::vector& buf) const +std::vector zip_archive::impl::read_file_entry(std::string_view entry_name) const { filename_map_type::const_iterator it = m_filenames.find(entry_name); if (it == m_filenames.end()) - // entry name not found. - return false; + { + std::ostringstream os; + os << "entry named '" << entry_name << "' not found"; + throw zip_error(os.str()); + } + size_t index = it->second; if (index >= m_file_params.size()) - // entry index is out of bound. - return false; + throw zip_error("entry index is out-of-bound"); const zip_file_param& param = m_file_params[index]; @@ -435,33 +438,31 @@ bool zip_archive::impl::read_file_entry(std::string_view entry_name, std::vector m_stream->seek(file_header.tell()); std::vector raw_buf(param.size_compressed+1, 0); - m_stream->read(&raw_buf[0], param.size_compressed); + m_stream->read(raw_buf.data(), param.size_compressed); switch (param.compress_method) { case zip_file_param::stored: + { // Not compressed at all. - buf.swap(raw_buf); - return true; + return raw_buf; + } case zip_file_param::deflated: { // deflate compression std::vector zip_buf(param.size_uncompressed+1, 0); // null-terminated zip_inflater inflater(raw_buf, zip_buf, param); if (!inflater.init()) - break; + throw zip_error("error during initialization of inflater"); if (!inflater.inflate()) throw zip_error("error during inflate."); - buf.swap(zip_buf); - return true; + return zip_buf; } - default: - ; } - return false; + throw std::logic_error("compress method can be either 'stored' or 'deflated', but neither has happened"); } size_t zip_archive::impl::seek_central_dir() @@ -577,9 +578,9 @@ size_t zip_archive::get_file_entry_count() const return mp_impl->get_file_entry_count(); } -bool zip_archive::read_file_entry(std::string_view entry_name, std::vector& buf) const +std::vector zip_archive::read_file_entry(std::string_view entry_name) const { - return mp_impl->read_file_entry(entry_name, buf); + return mp_impl->read_file_entry(entry_name); } } -- GitLab From fb598144d8ba4d49a25848537f2daaadf50b6e4d Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 22 Nov 2022 21:09:32 -0500 Subject: [PATCH 38/91] Remove using namespace std --- src/parser/zip_archive_test.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/parser/zip_archive_test.cpp b/src/parser/zip_archive_test.cpp index feef3b59..45393fa8 100644 --- a/src/parser/zip_archive_test.cpp +++ b/src/parser/zip_archive_test.cpp @@ -22,10 +22,9 @@ catch (...) \ { \ } -using namespace std; using namespace orcus; -void test_zip_archive_stream(zip_archive_stream* const strm, const unsigned char* const data, size_t const length) +void test_zip_archive_stream(zip_archive_stream* const strm, const unsigned char* const data, std::size_t const length) { assert(strm->size() == length); assert(strm->tell() == 0); @@ -34,17 +33,17 @@ void test_zip_archive_stream(zip_archive_stream* const strm, const unsigned char unsigned char* buf = buffer.data(); strm->read(buf, 2); - assert(equal(data, data + 2, buf)); + assert(std::equal(data, data + 2, buf)); assert(strm->tell() == 0); strm->read(buf, length); - assert(equal(data, data + length, buf)); + assert(std::equal(data, data + length, buf)); ASSERT_THROW(strm->read(buf, length + 1)); strm->read(buf, 0); strm->seek(2); assert(strm->tell() == 2); strm->read(buf, 2); - assert(equal(data + 2, data + 4, buf)); + assert(std::equal(data + 2, data + 4, buf)); strm->seek(length); assert(strm->tell() == length); ASSERT_THROW(strm->seek(length + 1)); -- GitLab From 254c5f34a6333796bb076e34161e1c98ba207517 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 22 Nov 2022 23:19:13 -0500 Subject: [PATCH 39/91] Remove dump_file_entry() which wasn't generally very useful Instead, add a method that returns the header info and add a function to print it to std::ostream. --- doc/cpp/parser/archive.rst | 3 + include/orcus/zip_archive.hpp | 45 +++++++-- src/include/test_global.hpp | 2 + src/orcus_zip_dump.cpp | 10 +- src/parser/Makefile.am | 9 +- src/parser/zip_archive.cpp | 168 +++++++++++++++++--------------- src/parser/zip_archive_test.cpp | 34 ++++++- 7 files changed, 183 insertions(+), 88 deletions(-) diff --git a/doc/cpp/parser/archive.rst b/doc/cpp/parser/archive.rst index da4fcd8b..e5f7a25a 100644 --- a/doc/cpp/parser/archive.rst +++ b/doc/cpp/parser/archive.rst @@ -5,6 +5,9 @@ Archive Zip archive ----------- +.. doxygenstruct:: orcus::zip_file_entry_header + :members: + .. doxygenclass:: orcus::zip_archive :members: diff --git a/include/orcus/zip_archive.hpp b/include/orcus/zip_archive.hpp index 9415c957..9714b17e 100644 --- a/include/orcus/zip_archive.hpp +++ b/include/orcus/zip_archive.hpp @@ -14,9 +14,39 @@ #include #include #include +#include namespace orcus { +/** + * Structure containing file entry header attributes. + */ +struct ORCUS_PSR_DLLPUBLIC zip_file_entry_header +{ + uint32_t header_signature = 0; + uint16_t required_version = 0; + uint16_t flag = 0; + uint16_t compression_method = 0; + uint16_t last_modified_time = 0; + uint16_t last_modified_date = 0; + uint32_t crc32 = 0; + uint32_t compressed_size = 0; + uint32_t uncompressed_size = 0; + + std::string filename; + std::vector extra_field; + + zip_file_entry_header(); + zip_file_entry_header(const zip_file_entry_header& other); + zip_file_entry_header(zip_file_entry_header&& other); + ~zip_file_entry_header(); + + zip_file_entry_header& operator=(const zip_file_entry_header& other); + zip_file_entry_header& operator=(zip_file_entry_header&& other); +}; + +ORCUS_PSR_DLLPUBLIC std::ostream& operator<<(std::ostream& os, const zip_file_entry_header& header); + class zip_archive_stream; class ORCUS_PSR_DLLPUBLIC zip_archive @@ -41,19 +71,22 @@ public: void load(); /** - * Dump the content of a specified file entry to stdout. + * Retrieve the header information for a file entry specified by index. * - * @param index file entry index + * @param index file entry index. + * + * @return header information for a file entry. */ - void dump_file_entry(size_t index) const; + zip_file_entry_header get_file_entry_header(std::size_t index) const; /** - * Dump the content of a specified file entry to stdout. + * Retrieve the header information for a file entry specified by name. * + * @param name file entry name. * - * @param entry_name file entry name. + * @return header information for a file entry. */ - void dump_file_entry(std::string_view entry_name) const; + zip_file_entry_header get_file_entry_header(std::string_view name) const; /** * Get file entry name from its index. diff --git a/src/include/test_global.hpp b/src/include/test_global.hpp index 3aa508eb..cd8d32c1 100644 --- a/src/include/test_global.hpp +++ b/src/include/test_global.hpp @@ -51,6 +51,8 @@ void verify_content( }} // namespace orcus::test +#define ORCUS_TEST_FUNC_SCOPE orcus::test::stack_printer __sp__(__func__) + #endif /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/orcus_zip_dump.cpp b/src/orcus_zip_dump.cpp index 6563c43e..1cf81e77 100644 --- a/src/orcus_zip_dump.cpp +++ b/src/orcus_zip_dump.cpp @@ -29,12 +29,16 @@ int main(int argc, char** argv) if (argc < 3) { for (size_t i = 0; i < n; ++i) - archive.dump_file_entry(i); + { + auto header = archive.get_file_entry_header(i); + std::cout << "--" << std::endl; + std::cout << header << std::endl; + } return EXIT_SUCCESS; } - const char* entry_name = argv[2]; - archive.dump_file_entry(entry_name); + auto header = archive.get_file_entry_header(argv[2]); + std::cout << header << std::endl; } catch (const std::exception& e) { diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index 708f8423..d68adb2e 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -200,8 +200,15 @@ parser_test_stream_CPPFLAGS = $(AM_CPPFLAGS) parser_test_zip_archive_SOURCES = \ zip_archive_test.cpp -parser_test_zip_archive_LDADD = liborcus-parser-@ORCUS_API_VERSION@.la parser_test_zip_archive_CPPFLAGS = $(AM_CPPFLAGS) +parser_test_zip_archive_LDADD = \ + liborcus-parser-@ORCUS_API_VERSION@.la \ + ../test/liborcus-test.a \ + $(BOOST_SYSTEM_LIBS) \ + $(BOOST_FILESYSTEM_LIBS) +parser_test_zip_archive_LDFLAGS = \ + $(BOOST_SYSTEM_LDFLAGS) \ + $(BOOST_FILESYSTEM_LDFLAGS) # parser-test-base diff --git a/src/parser/zip_archive.cpp b/src/parser/zip_archive.cpp index f9889835..50d5da5b 100644 --- a/src/parser/zip_archive.cpp +++ b/src/parser/zip_archive.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -121,6 +122,18 @@ public: return std::string(reinterpret_cast(&buf[0])); } + std::vector read_bytes(std::size_t n) + { + if (!n) + throw zip_error("attempt to read string of zero size."); + + std::vector buf; + m_stream->seek(m_pos+m_pos_internal); + m_stream->read(buf.data(), n); + m_pos_internal += n; + return buf; + } + std::string_view read_string(size_t n, string_pool& pool) { std::vector buf(n+1, '\0'); @@ -184,6 +197,32 @@ struct central_dir_end } // anonymous namespace + +zip_file_entry_header::zip_file_entry_header() = default; +zip_file_entry_header::zip_file_entry_header(const zip_file_entry_header& other) = default; +zip_file_entry_header::zip_file_entry_header(zip_file_entry_header&& other) = default; +zip_file_entry_header::~zip_file_entry_header() = default; + +zip_file_entry_header& zip_file_entry_header::operator=(const zip_file_entry_header& other) = default; +zip_file_entry_header& zip_file_entry_header::operator=(zip_file_entry_header&& other) = default; + +std::ostream& operator<<(std::ostream& os, const zip_file_entry_header& header) +{ + os << "header signature: 0x" << std::hex << std::setfill('0') << std::setw(8) << header.header_signature << "\n" + << "version needed to extract: " << header.required_version << "\n" + << "general purpose bit flag: 0x" << std::hex << std::setfill('0') << std::setw(4) << header.flag << "\n" + << "compression method: " << header.compression_method << "\n" + << "last modified time: " << header.last_modified_time << "\n" + << "last modified date: " << header.last_modified_date << "\n" + << "crc32: 0x" << std::hex << std::setfill('0') << std::setw(8) << header.crc32 << "\n" + << "compressed size: " << header.compressed_size << "\n" + << "uncompressed size: " << header.uncompressed_size << "\n" + << "filename: " << header.filename << "\n" + << "extra field length: " << header.extra_field.size(); + + return os; +} + class zip_archive::impl { typedef std::vector file_params_type; @@ -203,8 +242,8 @@ public: impl(zip_archive_stream* stream); void load(); - void dump_file_entry(size_t pos) const; - void dump_file_entry(std::string_view entry_name) const; + zip_file_entry_header get_file_entry_header(std::size_t index) const; + zip_file_entry_header get_file_entry_header(std::string_view name) const; std::string_view get_file_entry_name(size_t pos) const; size_t get_file_entry_count() const @@ -250,6 +289,50 @@ void zip_archive::impl::load() read_file_entries(); } +zip_file_entry_header zip_archive::impl::get_file_entry_header(std::size_t index) const +{ + if (index >= m_file_params.size()) + throw zip_error("invalid file entry index."); + + const zip_file_param& param = m_file_params[index]; + zip_stream_parser file_header(m_stream, param.offset_file_header); + + zip_file_entry_header header; + + header.header_signature = file_header.read_4bytes(); + header.required_version = file_header.read_2bytes(); + header.flag = file_header.read_2bytes(); + header.compression_method = file_header.read_2bytes(); + header.last_modified_time = file_header.read_2bytes(); + header.last_modified_date = file_header.read_2bytes(); + header.crc32 = file_header.read_4bytes(); + header.compressed_size = file_header.read_4bytes(); + header.uncompressed_size = file_header.read_4bytes(); + uint16_t filename_len = file_header.read_2bytes(); + uint16_t extra_field_len = file_header.read_2bytes(); + + if (filename_len) + header.filename = file_header.read_string(filename_len); + + if (extra_field_len) + header.extra_field = file_header.read_bytes(extra_field_len); + + return header; +} + +zip_file_entry_header zip_archive::impl::get_file_entry_header(std::string_view name) const +{ + auto it = m_filenames.find(name); + if (it == m_filenames.end()) + { + std::ostringstream os; + os << "file entry named '" << name << "' not found"; + throw zip_error(os.str()); + } + + return get_file_entry_header(it->second); +} + void zip_archive::impl::read_file_entries() { m_file_params.clear(); @@ -324,75 +407,6 @@ void zip_archive::impl::read_file_entries() } } -void zip_archive::impl::dump_file_entry(size_t pos) const -{ - if (pos >= m_file_params.size()) - throw zip_error("invalid file entry index."); - - const zip_file_param& param = m_file_params[pos]; - std::cout << "-- filename: " << param.filename << std::endl; - - zip_stream_parser file_header(m_stream, param.offset_file_header); - uint32_t v32 = file_header.read_4bytes(); - printf(" header signature: 0x%8.8x\n", v32); - uint16_t v16 = file_header.read_2bytes(); - std::cout << " version needed to extract: " << v16 << std::endl; - v16 = file_header.read_2bytes(); - printf(" general purpose bit flag: 0x%4.4x\n", v16); - v16 = file_header.read_2bytes(); - std::cout << " compression method: " << v16 << std::endl; - v16 = file_header.read_2bytes(); - std::cout << " file last modified time: " << v16 << std::endl; - v16 = file_header.read_2bytes(); - std::cout << " file last modified date: " << v16 << std::endl; - v32 = file_header.read_4bytes(); - printf(" crc32: 0x%8.8x\n", v32); - v32 = file_header.read_4bytes(); - std::cout << " compressed size: " << v32 << std::endl; - v32 = file_header.read_4bytes(); - std::cout << " uncompressed size: " << v32 << std::endl; - size_t filename_len = file_header.read_2bytes(); - std::cout << " filename length: " << filename_len << std::endl; - uint16_t extra_field_len = file_header.read_2bytes(); - std::cout << " extra field length: " << extra_field_len << std::endl; - if (filename_len) - { - std::string filename = file_header.read_string(filename_len); - std::cout << " filename: '" << filename << "'" << std::endl; - } - - if (extra_field_len) - { - // Ignore extra field. - file_header.skip_bytes(extra_field_len); - } - - // Header followed by the actual data bytes. - - m_stream->seek(file_header.tell()); - - std::vector buf = read_file_entry(param.filename); - if (!buf.empty()) - { - std::cout << "-- data section" << std::endl; - std::cout << buf.data() << std::endl; - std::cout << "--" << std::endl; - } -} - -void zip_archive::impl::dump_file_entry(std::string_view entry_name) const -{ - filename_map_type::const_iterator it = m_filenames.find(entry_name); - if (it == m_filenames.end()) - { - // entry name not found. - std::cout << "file entry '" << entry_name << "' not found." << std::endl; - return; - } - - dump_file_entry(it->second); -} - std::string_view zip_archive::impl::get_file_entry_name(std::size_t pos) const { if (pos >= m_file_params.size()) @@ -558,19 +572,19 @@ void zip_archive::load() mp_impl->load(); } -void zip_archive::dump_file_entry(size_t index) const +zip_file_entry_header zip_archive::get_file_entry_header(std::size_t index) const { - mp_impl->dump_file_entry(index); + return mp_impl->get_file_entry_header(index); } -std::string_view zip_archive::get_file_entry_name(std::size_t index) const +zip_file_entry_header zip_archive::get_file_entry_header(std::string_view name) const { - return mp_impl->get_file_entry_name(index); + return mp_impl->get_file_entry_header(name); } -void zip_archive::dump_file_entry(std::string_view entry_name) const +std::string_view zip_archive::get_file_entry_name(std::size_t index) const { - mp_impl->dump_file_entry(entry_name); + return mp_impl->get_file_entry_name(index); } size_t zip_archive::get_file_entry_count() const diff --git a/src/parser/zip_archive_test.cpp b/src/parser/zip_archive_test.cpp index 45393fa8..5f6fea86 100644 --- a/src/parser/zip_archive_test.cpp +++ b/src/parser/zip_archive_test.cpp @@ -10,7 +10,10 @@ #include #include -#include "orcus/zip_archive_stream.hpp" +#include +#include + +#include #define ASSERT_THROW(expr) \ try \ @@ -23,6 +26,7 @@ catch (...) \ } using namespace orcus; +namespace fs = boost::filesystem; void test_zip_archive_stream(zip_archive_stream* const strm, const unsigned char* const data, std::size_t const length) { @@ -52,14 +56,42 @@ void test_zip_archive_stream(zip_archive_stream* const strm, const unsigned char void test_zip_archive_stream_blob() { + ORCUS_TEST_FUNC_SCOPE; + const unsigned char data[] = "My hovercraft is full of eels."; zip_archive_stream_blob strm(data, sizeof(data)); test_zip_archive_stream(&strm, data, sizeof(data)); } +void test_zip_archive_file_entry_header() +{ + ORCUS_TEST_FUNC_SCOPE; + + fs::path filepath{SRCDIR"/test/ods/raw-values-1/input.ods"}; + assert(fs::is_regular_file(filepath)); + + zip_archive_stream_fd strm(filepath.string().c_str()); + + zip_archive archive(&strm); + archive.load(); + std::size_t n_entries = archive.get_file_entry_count(); + for (std::size_t i = 0; i < n_entries; ++i) + { + std::string_view name = archive.get_file_entry_name(i); + std::cout << "* entry name: " << name << std::endl; + zip_file_entry_header header = archive.get_file_entry_header(i); + assert(header.filename == name); + assert(header.header_signature == 0x04034b50); + + // 0 = none; 8 = deflate + assert(header.compression_method == 0 || header.compression_method == 8); + } +} + int main() { test_zip_archive_stream_blob(); + test_zip_archive_file_entry_header(); return EXIT_SUCCESS; } -- GitLab From 25b25d646a3919e78b70bf53a10a8937e9ef0a46 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 22 Nov 2022 23:31:21 -0500 Subject: [PATCH 40/91] Use sentence casing for section headers --- doc/cpp/filter/index.rst | 8 ++++---- doc/cpp/filter/interface.rst | 14 +++++++------- doc/cpp/model/index.rst | 2 +- doc/cpp/model/json.rst | 2 +- doc/cpp/model/spreadsheet.rst | 6 +++--- doc/cpp/model/yaml.rst | 2 +- doc/cpp/parser/css.rst | 6 +++--- doc/cpp/parser/csv.rst | 4 ++-- doc/cpp/parser/index.rst | 2 +- doc/cpp/parser/json.rst | 4 ++-- doc/cpp/parser/util.rst | 4 ++-- doc/cpp/parser/xml.rst | 2 +- doc/cpp/parser/xml_writer.rst | 2 +- doc/cpp/parser/yaml.rst | 2 +- 14 files changed, 30 insertions(+), 30 deletions(-) diff --git a/doc/cpp/filter/index.rst b/doc/cpp/filter/index.rst index 8d44ff40..51bd2d0e 100644 --- a/doc/cpp/filter/index.rst +++ b/doc/cpp/filter/index.rst @@ -1,16 +1,16 @@ -Spreadsheet Import Filters +Spreadsheet import filters ========================== -Plain Text (CSV) +Plain text (CSV) ---------------- .. doxygenclass:: orcus::orcus_csv :members: -Open Document Spreadsheet +Open document spreadsheet ------------------------- .. doxygenclass:: orcus::orcus_ods @@ -51,7 +51,7 @@ Generic XML :members: -Utility Functions +Utility functions ----------------- .. doxygenfunction:: orcus::detect diff --git a/doc/cpp/filter/interface.rst b/doc/cpp/filter/interface.rst index 0d15f0c9..1e0aa909 100644 --- a/doc/cpp/filter/interface.rst +++ b/doc/cpp/filter/interface.rst @@ -1,9 +1,9 @@ -Types and Interfaces -==================== +Spreadsheet types and interfaces +================================ -Global Interface +Global interface ---------------- .. doxygenclass:: orcus::iface::import_filter @@ -14,7 +14,7 @@ Global Interface .. _spreadsheet-interface: -Spreadsheet Interface +Spreadsheet interface --------------------- import_array_formula @@ -174,10 +174,10 @@ export_sheet :members: -Spreadsheet Types +Spreadsheet types ----------------- -Type Aliases +Type aliases ^^^^^^^^^^^^ .. doxygentypedef:: orcus::spreadsheet::row_t @@ -246,7 +246,7 @@ Enums .. doxygenenum:: orcus::spreadsheet::pane_state_t -Spreadsheet Global Functions +Spreadsheet global functions ---------------------------- .. doxygenfunction:: orcus::spreadsheet::get_default_column_width diff --git a/doc/cpp/model/index.rst b/doc/cpp/model/index.rst index f85eac76..8a0c3d51 100644 --- a/doc/cpp/model/index.rst +++ b/doc/cpp/model/index.rst @@ -1,5 +1,5 @@ -Document Model +Document model ============== .. toctree:: diff --git a/doc/cpp/model/json.rst b/doc/cpp/model/json.rst index d2385b6c..12e9e354 100644 --- a/doc/cpp/model/json.rst +++ b/doc/cpp/model/json.rst @@ -1,5 +1,5 @@ -JSON Document Tree +JSON document tree ================== Document tree diff --git a/doc/cpp/model/spreadsheet.rst b/doc/cpp/model/spreadsheet.rst index 8df640fb..e940c2b1 100644 --- a/doc/cpp/model/spreadsheet.rst +++ b/doc/cpp/model/spreadsheet.rst @@ -1,5 +1,5 @@ -Spreadsheet Document +Spreadsheet document ==================== @@ -17,7 +17,7 @@ Sheet :members: -Pivot Table +Pivot table ----------- .. doxygenstruct:: orcus::spreadsheet::pivot_cache_record_value_t @@ -39,7 +39,7 @@ Pivot Table :members: -Import Factory +Import factory -------------- .. doxygenclass:: orcus::spreadsheet::import_factory diff --git a/doc/cpp/model/yaml.rst b/doc/cpp/model/yaml.rst index c5193881..06bb91b0 100644 --- a/doc/cpp/model/yaml.rst +++ b/doc/cpp/model/yaml.rst @@ -1,4 +1,4 @@ -YAML Document Tree +YAML document tree ================== diff --git a/doc/cpp/parser/css.rst b/doc/cpp/parser/css.rst index efbf545c..ce875f63 100644 --- a/doc/cpp/parser/css.rst +++ b/doc/cpp/parser/css.rst @@ -1,18 +1,18 @@ .. highlight:: cpp -CSS Parser +CSS parser ========== .. doxygenclass:: orcus::css_parser :members: -Parser Handler +Parser handler -------------- .. doxygenclass:: orcus::css_handler :members: -CSS Types +CSS types --------- .. doxygenenum:: orcus::css::combinator_t diff --git a/doc/cpp/parser/csv.rst b/doc/cpp/parser/csv.rst index 560cb73c..67e708ac 100644 --- a/doc/cpp/parser/csv.rst +++ b/doc/cpp/parser/csv.rst @@ -1,6 +1,6 @@ .. highlight:: cpp -CSV Parser +CSV parser ========== .. doxygenclass:: orcus::csv_parser @@ -9,7 +9,7 @@ CSV Parser .. doxygenstruct:: orcus::csv::parser_config :members: -Parser Handler +Parser handler -------------- .. doxygenclass:: orcus::csv_handler diff --git a/doc/cpp/parser/index.rst b/doc/cpp/parser/index.rst index c1b49787..4ed221cc 100644 --- a/doc/cpp/parser/index.rst +++ b/doc/cpp/parser/index.rst @@ -1,5 +1,5 @@ -Low-Level Parsers and Utilities +Low-level parsers and utilities =============================== .. toctree:: diff --git a/doc/cpp/parser/json.rst b/doc/cpp/parser/json.rst index 10688b71..8aa402b1 100644 --- a/doc/cpp/parser/json.rst +++ b/doc/cpp/parser/json.rst @@ -1,12 +1,12 @@ .. highlight:: cpp -JSON Parser +JSON parser =========== .. doxygenclass:: orcus::json_parser :members: -Parser Handler +Parser handler -------------- .. doxygenclass:: orcus::json_handler diff --git a/doc/cpp/parser/util.rst b/doc/cpp/parser/util.rst index e9d8f3fd..48fac1fe 100644 --- a/doc/cpp/parser/util.rst +++ b/doc/cpp/parser/util.rst @@ -13,7 +13,7 @@ Utilities :members: -XML Types +XML types ========= .. doxygentypedef:: orcus::xml_token_t @@ -30,7 +30,7 @@ XML Types .. doxygentypedef:: orcus::xml_attrs_t -Other Types +Other types =========== .. doxygenenum:: orcus::character_set_t diff --git a/doc/cpp/parser/xml.rst b/doc/cpp/parser/xml.rst index 2592f3fe..462c4661 100644 --- a/doc/cpp/parser/xml.rst +++ b/doc/cpp/parser/xml.rst @@ -1,6 +1,6 @@ .. highlight:: cpp -XML Parsers +XML parsers =========== SAX base parser diff --git a/doc/cpp/parser/xml_writer.rst b/doc/cpp/parser/xml_writer.rst index 18395d96..1092ac4c 100644 --- a/doc/cpp/parser/xml_writer.rst +++ b/doc/cpp/parser/xml_writer.rst @@ -1,6 +1,6 @@ .. highlight:: cpp -XML Writer +XML writer ========== .. doxygenclass:: orcus::xml_writer diff --git a/doc/cpp/parser/yaml.rst b/doc/cpp/parser/yaml.rst index 1835b732..0a1107ab 100644 --- a/doc/cpp/parser/yaml.rst +++ b/doc/cpp/parser/yaml.rst @@ -1,6 +1,6 @@ .. highlight:: cpp -YAML Parser +YAML parser =========== .. doxygenclass:: orcus::yaml_parser -- GitLab From 9f1d7074002b6adbee6dfc98f5200bd9f97c0ae2 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 22 Nov 2022 23:37:21 -0500 Subject: [PATCH 41/91] Add orcus_json to the doc --- doc/cpp/filter/index.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/cpp/filter/index.rst b/doc/cpp/filter/index.rst index 51bd2d0e..85991d46 100644 --- a/doc/cpp/filter/index.rst +++ b/doc/cpp/filter/index.rst @@ -51,6 +51,13 @@ Generic XML :members: +Generic JSON +------------ + +.. doxygenclass:: orcus::orcus_json + :members: + + Utility functions ----------------- -- GitLab From 5813ef70363a0eadb073b2afb60a1cfadda4c5fc Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 23 Nov 2022 17:06:08 -0500 Subject: [PATCH 42/91] Organic doc items in types.hpp & reorder sections under parser --- doc/cpp/parser/index.rst | 15 +++++++------ doc/cpp/parser/types.rst | 47 ++++++++++++++++++++++++++++++++++++++++ doc/cpp/parser/util.rst | 37 ------------------------------- 3 files changed, 55 insertions(+), 44 deletions(-) create mode 100644 doc/cpp/parser/types.rst diff --git a/doc/cpp/parser/index.rst b/doc/cpp/parser/index.rst index 4ed221cc..9bc17645 100644 --- a/doc/cpp/parser/index.rst +++ b/doc/cpp/parser/index.rst @@ -5,13 +5,14 @@ Low-level parsers and utilities .. toctree:: :maxdepth: 1 - exception.rst - stream.rst - archive.rst - util.rst + xml.rst + xml_writer.rst + json.rst css.rst csv.rst - json.rst - xml.rst yaml.rst - xml_writer.rst + types.rst + util.rst + stream.rst + archive.rst + exception.rst diff --git a/doc/cpp/parser/types.rst b/doc/cpp/parser/types.rst new file mode 100644 index 00000000..5b70b82b --- /dev/null +++ b/doc/cpp/parser/types.rst @@ -0,0 +1,47 @@ + +Basic types +=========== + +Constants +--------- + +.. doxygenvariable:: orcus::XMLNS_UNKNOWN_ID +.. doxygenvariable:: orcus::XML_UNKNOWN_TOKEN +.. doxygenvariable:: orcus::index_not_found + +Type aliases +------------ + +.. doxygentypedef:: orcus::xml_attrs_t +.. doxygentypedef:: orcus::xml_elem_set_t +.. doxygentypedef:: orcus::xml_elem_stack_t +.. doxygentypedef:: orcus::xml_token_pair_t +.. doxygentypedef:: orcus::xml_token_t +.. doxygentypedef:: orcus::xmlns_id_t + +Structs +------- + +.. doxygenstruct:: orcus::date_time_t +.. doxygenstruct:: orcus::length_t +.. doxygenstruct:: orcus::parse_error_value_t +.. doxygenstruct:: orcus::xml_declaration_t +.. doxygenstruct:: orcus::xml_name_t +.. doxygenstruct:: orcus::xml_token_attr_t +.. doxygenstruct:: orcus::xml_token_element_t +.. doxygenstruct:: orcus::xml_token_pair_hash + +Enums +----- + +.. doxygenenum:: orcus::character_set_t +.. doxygenenum:: orcus::dump_format_t +.. doxygenenum:: orcus::format_t +.. doxygenenum:: orcus::length_unit_t + +Utility functions +----------------- + +.. doxygenfunction:: orcus::get_dump_format_entries +.. doxygenfunction:: orcus::to_character_set +.. doxygenfunction:: orcus::to_dump_format_enum diff --git a/doc/cpp/parser/util.rst b/doc/cpp/parser/util.rst index 48fac1fe..2d3ec0d5 100644 --- a/doc/cpp/parser/util.rst +++ b/doc/cpp/parser/util.rst @@ -11,40 +11,3 @@ Utilities .. doxygenclass:: orcus::cell_buffer :members: - - -XML types -========= - -.. doxygentypedef:: orcus::xml_token_t - -.. doxygentypedef:: orcus::xmlns_id_t - -.. doxygenstruct:: orcus::xml_name_t - -.. doxygenstruct:: orcus::xml_token_attr_t - -.. doxygenstruct:: orcus::xml_token_element_t - -.. doxygenstruct:: orcus::xml_declaration_t - -.. doxygentypedef:: orcus::xml_attrs_t - -Other types -=========== - -.. doxygenenum:: orcus::character_set_t - -.. doxygenstruct:: orcus::parse_error_value_t - -.. doxygenenum:: orcus::length_unit_t - -.. doxygenenum:: orcus::format_t - -.. doxygenenum:: orcus::dump_format_t - -.. doxygenstruct:: orcus::length_t - -.. doxygenstruct:: orcus::date_time_t - - -- GitLab From 5979effb63e6663b387f591919d7eef66f00e7a1 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 23 Nov 2022 17:34:19 -0500 Subject: [PATCH 43/91] Upcase index_not_found for consistency as a global constant --- doc/cpp/parser/types.rst | 2 +- include/orcus/types.hpp | 25 ++++++++++++++++--------- src/liborcus/dom_tree.cpp | 2 +- src/liborcus/xml_structure_tree.cpp | 6 +++--- src/parser/types.cpp | 9 ++++----- src/parser/xml_namespace.cpp | 12 ++++++------ 6 files changed, 31 insertions(+), 25 deletions(-) diff --git a/doc/cpp/parser/types.rst b/doc/cpp/parser/types.rst index 5b70b82b..133e292f 100644 --- a/doc/cpp/parser/types.rst +++ b/doc/cpp/parser/types.rst @@ -5,9 +5,9 @@ Basic types Constants --------- +.. doxygenvariable:: orcus::INDEX_NOT_FOUND .. doxygenvariable:: orcus::XMLNS_UNKNOWN_ID .. doxygenvariable:: orcus::XML_UNKNOWN_TOKEN -.. doxygenvariable:: orcus::index_not_found Type aliases ------------ diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp index 3117d8d6..e4148050 100644 --- a/include/orcus/types.hpp +++ b/include/orcus/types.hpp @@ -20,12 +20,6 @@ namespace orcus { class xmlns_context; class xmlns_repository; -/** - * Generic constant to be used to indicate that a valid index value is - * expected but not found. - */ -ORCUS_PSR_DLLPUBLIC extern const size_t index_not_found; - // XML specific types using xml_token_t = size_t; @@ -41,9 +35,6 @@ struct ORCUS_PSR_DLLPUBLIC xml_token_pair_hash using xml_elem_stack_t = std::vector; using xml_elem_set_t = std::unordered_set; -ORCUS_PSR_DLLPUBLIC extern const xmlns_id_t XMLNS_UNKNOWN_ID; -ORCUS_PSR_DLLPUBLIC extern const xml_token_t XML_UNKNOWN_TOKEN; - struct ORCUS_PSR_DLLPUBLIC parse_error_value_t { std::string_view str; @@ -512,6 +503,22 @@ ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, format_t v); typedef ::std::vector xml_attrs_t; +/** + * Generic constant to be used to indicate that a valid index value is + * expected but not found. + */ +ORCUS_PSR_DLLPUBLIC extern const std::size_t INDEX_NOT_FOUND; + +/** + * Value associated with an unknown XML namespace. + */ +ORCUS_PSR_DLLPUBLIC extern const xmlns_id_t XMLNS_UNKNOWN_ID; + +/** + * Value associated with an unknown XML token. + */ +ORCUS_PSR_DLLPUBLIC extern const xml_token_t XML_UNKNOWN_TOKEN; + } #endif diff --git a/src/liborcus/dom_tree.cpp b/src/liborcus/dom_tree.cpp index f8414437..f5e84122 100644 --- a/src/liborcus/dom_tree.cpp +++ b/src/liborcus/dom_tree.cpp @@ -119,7 +119,7 @@ void print(std::ostream& os, const entity_name& name, const xmlns_context& cxt) if (name.ns) { size_t index = cxt.get_index(name.ns); - if (index != index_not_found) + if (index != INDEX_NOT_FOUND) os << "ns" << index << ':'; } os << name.name; diff --git a/src/liborcus/xml_structure_tree.cpp b/src/liborcus/xml_structure_tree.cpp index e14932e2..c7828387 100644 --- a/src/liborcus/xml_structure_tree.cpp +++ b/src/liborcus/xml_structure_tree.cpp @@ -263,7 +263,7 @@ void print_scope(std::ostream& os, const scopes_type& scopes, const xmlns_contex { os << "/"; size_t num_id = cxt.get_index((*it)->name.ns); - if (num_id != index_not_found) + if (num_id != INDEX_NOT_FOUND) os << "ns" << num_id << ":"; os << (*it)->name.name; if ((*it)->repeat) @@ -292,7 +292,7 @@ struct xml_structure_tree::impl std::string to_string(const xml_structure_tree::entity_name& name) const { std::ostringstream ss; - if (m_xmlns_cxt.get_index(name.ns) != index_not_found) + if (m_xmlns_cxt.get_index(name.ns) != INDEX_NOT_FOUND) ss << m_xmlns_cxt.get_short_name(name.ns) << ":"; ss << name.name; return ss.str(); @@ -555,7 +555,7 @@ void xml_structure_tree::dump_compact(std::ostream& os) const ss << "/"; size_t num_id = cxt.get_index(this_elem.name.ns); - if (num_id != index_not_found) + if (num_id != INDEX_NOT_FOUND) ss << "ns" << num_id << ":"; ss << this_elem.name.name; if (this_elem.prop->repeat) diff --git a/src/parser/types.cpp b/src/parser/types.cpp index c7c17fe7..322f3cac 100644 --- a/src/parser/types.cpp +++ b/src/parser/types.cpp @@ -19,16 +19,11 @@ namespace orcus { -const xmlns_id_t XMLNS_UNKNOWN_ID = nullptr; -const xml_token_t XML_UNKNOWN_TOKEN = 0; - size_t xml_token_pair_hash::operator()(const xml_token_pair_t& v) const { return std::hash()(v.first) ^ std::hash()(v.second); } -const size_t index_not_found = std::numeric_limits::max(); - parse_error_value_t::parse_error_value_t() : offset(0) { @@ -1459,6 +1454,10 @@ std::ostream& operator<< (std::ostream& os, format_t v) return os; } +const std::size_t INDEX_NOT_FOUND = std::numeric_limits::max(); +const xmlns_id_t XMLNS_UNKNOWN_ID = nullptr; +const xml_token_t XML_UNKNOWN_TOKEN = 0; + } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/parser/xml_namespace.cpp b/src/parser/xml_namespace.cpp index 9c371ca8..be3f2934 100644 --- a/src/parser/xml_namespace.cpp +++ b/src/parser/xml_namespace.cpp @@ -150,7 +150,7 @@ string xmlns_repository::get_short_name(xmlns_id_t ns_id) const string xmlns_repository::get_short_name(size_t index) const { - if (index == index_not_found) + if (index == INDEX_NOT_FOUND) return string("???"); ostringstream os; @@ -161,11 +161,11 @@ string xmlns_repository::get_short_name(size_t index) const size_t xmlns_repository::get_index(xmlns_id_t ns_id) const { if (!ns_id) - return index_not_found; + return INDEX_NOT_FOUND; auto it = mp_impl->m_strid_map.find(std::string_view(ns_id)); if (it == mp_impl->m_strid_map.end()) - return index_not_found; + return INDEX_NOT_FOUND; return it->second; } @@ -382,7 +382,7 @@ public: void operator() (xmlns_id_t ns) { size_t num_id = m_cxt.get_index(ns); - if (num_id != index_not_found) + if (num_id != INDEX_NOT_FOUND) m_store.push_back(ns_item(num_id, ns)); } }; @@ -444,7 +444,7 @@ void xmlns_context::dump(std::ostream& os) const { xmlns_id_t ns_id = *it; size_t num_id = get_index(ns_id); - if (num_id == index_not_found) + if (num_id == INDEX_NOT_FOUND) continue; os << "ns" << num_id << "=\"" << ns_id << '"' << endl; @@ -457,7 +457,7 @@ void xmlns_context::dump_state(std::ostream& os) const for (xmlns_id_t ns_id : get_all_namespaces()) { size_t num_id = get_index(ns_id); - if (num_id == index_not_found) + if (num_id == INDEX_NOT_FOUND) continue; os << " ns" << num_id << ": \"" << ns_id << '"' << std::endl; -- GitLab From 131eece503963b4687b2a0664883ab0d3fcd831c Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 23 Nov 2022 17:37:33 -0500 Subject: [PATCH 44/91] Rename xml_attrs_t to xml_token_attrs_t --- doc/cpp/parser/types.rst | 2 +- include/orcus/types.hpp | 4 ++-- src/liborcus/gnumeric_cell_context.cpp | 4 ++-- src/liborcus/gnumeric_cell_context.hpp | 4 ++-- src/liborcus/gnumeric_cell_context_test.cpp | 14 ++++++------- src/liborcus/gnumeric_context.cpp | 2 +- src/liborcus/gnumeric_context.hpp | 2 +- src/liborcus/gnumeric_sheet_context.cpp | 14 ++++++------- src/liborcus/gnumeric_sheet_context.hpp | 14 ++++++------- src/liborcus/gnumeric_sheet_context_test.cpp | 6 +++--- src/liborcus/odf_para_context.cpp | 2 +- src/liborcus/odf_para_context.hpp | 2 +- src/liborcus/odf_style_context.cpp | 6 +++--- src/liborcus/odf_style_context.hpp | 6 +++--- src/liborcus/odf_styles_context.cpp | 2 +- src/liborcus/odf_styles_context.hpp | 2 +- src/liborcus/ods_content_xml_context.cpp | 18 ++++++++--------- src/liborcus/ods_content_xml_context.hpp | 16 +++++++-------- src/liborcus/xls_xml_context.cpp | 20 +++++++++---------- src/liborcus/xls_xml_context.hpp | 20 +++++++++---------- src/liborcus/xlsx_autofilter_context.cpp | 2 +- src/liborcus/xlsx_autofilter_context.hpp | 2 +- .../xlsx_conditional_format_context.cpp | 2 +- .../xlsx_conditional_format_context.hpp | 2 +- src/liborcus/xlsx_context.cpp | 14 ++++++------- src/liborcus/xlsx_context.hpp | 14 ++++++------- src/liborcus/xlsx_sheet_context.cpp | 16 +++++++-------- src/liborcus/xlsx_sheet_context.hpp | 16 +++++++-------- src/liborcus/xlsx_sheet_context_test.cpp | 18 ++++++++--------- src/liborcus/xlsx_table_context.cpp | 2 +- src/liborcus/xlsx_table_context.hpp | 2 +- src/liborcus/xlsx_workbook_context.cpp | 2 +- src/liborcus/xlsx_workbook_context.hpp | 2 +- src/liborcus/xml_util.cpp | 2 +- src/liborcus/xml_util.hpp | 2 +- 35 files changed, 129 insertions(+), 129 deletions(-) diff --git a/doc/cpp/parser/types.rst b/doc/cpp/parser/types.rst index 133e292f..a1c3a44c 100644 --- a/doc/cpp/parser/types.rst +++ b/doc/cpp/parser/types.rst @@ -12,9 +12,9 @@ Constants Type aliases ------------ -.. doxygentypedef:: orcus::xml_attrs_t .. doxygentypedef:: orcus::xml_elem_set_t .. doxygentypedef:: orcus::xml_elem_stack_t +.. doxygentypedef:: orcus::xml_token_attrs_t .. doxygentypedef:: orcus::xml_token_pair_t .. doxygentypedef:: orcus::xml_token_t .. doxygentypedef:: orcus::xmlns_id_t diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp index e4148050..a5cfd684 100644 --- a/include/orcus/types.hpp +++ b/include/orcus/types.hpp @@ -92,6 +92,8 @@ struct ORCUS_PSR_DLLPUBLIC xml_token_attr_t std::string_view _value, bool _transient); }; +using xml_token_attrs_t = std::vector; + /** * Element properties passed to its handler via start_element() and * end_element() calls. @@ -501,8 +503,6 @@ ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const length_t& ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const date_time_t& v); ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, format_t v); -typedef ::std::vector xml_attrs_t; - /** * Generic constant to be used to indicate that a valid index value is * expected but not found. diff --git a/src/liborcus/gnumeric_cell_context.cpp b/src/liborcus/gnumeric_cell_context.cpp index 17c3a008..83f43535 100644 --- a/src/liborcus/gnumeric_cell_context.cpp +++ b/src/liborcus/gnumeric_cell_context.cpp @@ -121,7 +121,7 @@ gnumeric_cell_context::gnumeric_cell_context(session_context& session_cxt, const gnumeric_cell_context::~gnumeric_cell_context() = default; -void gnumeric_cell_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void gnumeric_cell_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { push_stack(ns, name); @@ -172,7 +172,7 @@ void gnumeric_cell_context::reset(ss::iface::import_sheet* sheet) mp_sheet = sheet; } -void gnumeric_cell_context::start_cell(const xml_attrs_t& attrs) +void gnumeric_cell_context::start_cell(const xml_token_attrs_t& attrs) { mp_cell_data.reset(new gnumeric_cell_data); cell_attr_parser parser = for_each(attrs.begin(), attrs.end(), cell_attr_parser()); diff --git a/src/liborcus/gnumeric_cell_context.hpp b/src/liborcus/gnumeric_cell_context.hpp index e6f6a76e..ae4727ab 100644 --- a/src/liborcus/gnumeric_cell_context.hpp +++ b/src/liborcus/gnumeric_cell_context.hpp @@ -31,14 +31,14 @@ public: spreadsheet::iface::import_factory* factory); virtual ~gnumeric_cell_context() override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; virtual void characters(std::string_view str, bool transient) override; void reset(spreadsheet::iface::import_sheet* sheet); private: - void start_cell(const xml_attrs_t& attrs); + void start_cell(const xml_token_attrs_t& attrs); void end_cell(); private: spreadsheet::iface::import_factory* mp_factory; diff --git a/src/liborcus/gnumeric_cell_context_test.cpp b/src/liborcus/gnumeric_cell_context_test.cpp index eca5f4eb..188cea1c 100644 --- a/src/liborcus/gnumeric_cell_context_test.cpp +++ b/src/liborcus/gnumeric_cell_context_test.cpp @@ -173,7 +173,7 @@ void test_cell_value() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "1", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "2", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_ValueType, "40", false)); @@ -193,7 +193,7 @@ void test_cell_bool() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "31", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "32", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_ValueType, "20", false)); @@ -213,7 +213,7 @@ void test_cell_string() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "10", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "321", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_ValueType, "60", false)); @@ -268,7 +268,7 @@ void test_shared_formula_with_string() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "5", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "15", false)); @@ -319,7 +319,7 @@ void test_shared_formula_without_string() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "6", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "16", false)); @@ -340,7 +340,7 @@ void test_cell_formula() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "9", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "11", false)); context.start_element(ns, elem, attrs); @@ -359,7 +359,7 @@ void test_cell_array_formula() orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t elem = XML_Cell; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Row, "19", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Col, "111", false)); attrs.push_back(xml_token_attr_t(NS_gnumeric_gnm, XML_Rows, "2", false)); diff --git a/src/liborcus/gnumeric_context.cpp b/src/liborcus/gnumeric_context.cpp index 8050e4fd..353d6059 100644 --- a/src/liborcus/gnumeric_context.cpp +++ b/src/liborcus/gnumeric_context.cpp @@ -52,7 +52,7 @@ void gnumeric_content_xml_context::end_child_context(xmlns_id_t /*ns*/, xml_toke { } -void gnumeric_content_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& /*attrs*/) +void gnumeric_content_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& /*attrs*/) { push_stack(ns, name); diff --git a/src/liborcus/gnumeric_context.hpp b/src/liborcus/gnumeric_context.hpp index d43bda79..73f2bb92 100644 --- a/src/liborcus/gnumeric_context.hpp +++ b/src/liborcus/gnumeric_context.hpp @@ -32,7 +32,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); diff --git a/src/liborcus/gnumeric_sheet_context.cpp b/src/liborcus/gnumeric_sheet_context.cpp index 261490f5..d9ded623 100644 --- a/src/liborcus/gnumeric_sheet_context.cpp +++ b/src/liborcus/gnumeric_sheet_context.cpp @@ -355,7 +355,7 @@ xml_context_base* gnumeric_sheet_context::create_child_context(xmlns_id_t ns, xm return nullptr; } -void gnumeric_sheet_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); if (ns == NS_gnumeric_gnm) @@ -527,7 +527,7 @@ void gnumeric_sheet_context::reset(spreadsheet::sheet_t sheet_index) m_chars = std::string_view{}; } -void gnumeric_sheet_context::start_font(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_font(const xml_token_attrs_t& attrs) { auto* styles = mp_factory->get_styles(); if (!styles) @@ -579,7 +579,7 @@ void gnumeric_sheet_context::start_font(const xml_attrs_t& attrs) } } -void gnumeric_sheet_context::start_col(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_col(const xml_token_attrs_t& attrs) { gnumeric_col_row_info col_info = for_each(attrs.begin(), attrs.end(), gnumeric_col_row_info()); @@ -593,7 +593,7 @@ void gnumeric_sheet_context::start_col(const xml_attrs_t& attrs) p_sheet_props->set_column_hidden(col, col_span, hidden); } -void gnumeric_sheet_context::start_row(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_row(const xml_token_attrs_t& attrs) { gnumeric_col_row_info row_info = for_each(attrs.begin(), attrs.end(), gnumeric_col_row_info()); @@ -608,7 +608,7 @@ void gnumeric_sheet_context::start_row(const xml_attrs_t& attrs) } } -void gnumeric_sheet_context::start_style(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_style(const xml_token_attrs_t& attrs) { auto* styles = mp_factory->get_styles(); if (!styles) @@ -736,13 +736,13 @@ void gnumeric_sheet_context::start_style(const xml_attrs_t& attrs) } } -void gnumeric_sheet_context::start_style_region(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_style_region(const xml_token_attrs_t& attrs) { mp_region_data.reset(new gnumeric_style_region()); for_each(attrs.begin(), attrs.end(), gnumeric_style_region_attr_parser(*mp_region_data)); } -void gnumeric_sheet_context::start_condition(const xml_attrs_t& attrs) +void gnumeric_sheet_context::start_condition(const xml_token_attrs_t& attrs) { spreadsheet::iface::import_conditional_format* cond_format = mp_sheet->get_conditional_format(); diff --git a/src/liborcus/gnumeric_sheet_context.hpp b/src/liborcus/gnumeric_sheet_context.hpp index 9dc652cb..e44a9663 100644 --- a/src/liborcus/gnumeric_sheet_context.hpp +++ b/src/liborcus/gnumeric_sheet_context.hpp @@ -67,19 +67,19 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name) override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; virtual void characters(std::string_view str, bool transient) override; void reset(spreadsheet::sheet_t sheet_index); private: - void start_style_region(const xml_attrs_t& attrs); - void start_style(const xml_attrs_t& attrs); - void start_font(const xml_attrs_t& attrs); - void start_col(const xml_attrs_t& attrs); - void start_row(const xml_attrs_t& attrs); - void start_condition(const xml_attrs_t& attrs); + void start_style_region(const xml_token_attrs_t& attrs); + void start_style(const xml_token_attrs_t& attrs); + void start_font(const xml_token_attrs_t& attrs); + void start_col(const xml_token_attrs_t& attrs); + void start_row(const xml_token_attrs_t& attrs); + void start_condition(const xml_token_attrs_t& attrs); void end_table(); void end_style(bool conditional_format); diff --git a/src/liborcus/gnumeric_sheet_context_test.cpp b/src/liborcus/gnumeric_sheet_context_test.cpp index 436d95eb..147e56ad 100644 --- a/src/liborcus/gnumeric_sheet_context_test.cpp +++ b/src/liborcus/gnumeric_sheet_context_test.cpp @@ -88,18 +88,18 @@ void test_column_width() context.reset(0); orcus::xmlns_id_t ns = NS_gnumeric_gnm; orcus::xml_token_t parent = XML_Sheet; - orcus::xml_attrs_t parent_attr; + orcus::xml_token_attrs_t parent_attr; context.start_element(ns, parent, parent_attr); { orcus::xml_token_t elem = XML_Name; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; context.start_element(ns, elem, attrs); context.characters("test", false); context.end_element(ns, elem); } { orcus::xml_token_t elem = XML_ColInfo; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(ns, XML_No, "2", false)); attrs.push_back(xml_token_attr_t(ns, XML_Unit, "37.3", false)); attrs.push_back(xml_token_attr_t(ns, XML_Unit, "37.3", false)); diff --git a/src/liborcus/odf_para_context.cpp b/src/liborcus/odf_para_context.cpp index b8f8242c..2bd96dcb 100644 --- a/src/liborcus/odf_para_context.cpp +++ b/src/liborcus/odf_para_context.cpp @@ -43,7 +43,7 @@ void text_para_context::end_child_context(xmlns_id_t /*ns*/, xml_token_t /*name* // not implemented yet. } -void text_para_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void text_para_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); if (ns == NS_odf_text) diff --git a/src/liborcus/odf_para_context.hpp b/src/liborcus/odf_para_context.hpp index 1614b945..bc44988b 100644 --- a/src/liborcus/odf_para_context.hpp +++ b/src/liborcus/odf_para_context.hpp @@ -34,7 +34,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); diff --git a/src/liborcus/odf_style_context.cpp b/src/liborcus/odf_style_context.cpp index 7a338f1d..70953a10 100644 --- a/src/liborcus/odf_style_context.cpp +++ b/src/liborcus/odf_style_context.cpp @@ -251,7 +251,7 @@ void style_context::characters(std::string_view /*str*/, bool /*transient*/) { } -void style_context::start_paragraph_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void style_context::start_paragraph_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_element_expected(parent, NS_odf_style, XML_style); @@ -298,7 +298,7 @@ void style_context::start_paragraph_properties(const xml_token_pair_t& parent, c } } -void style_context::start_text_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void style_context::start_text_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { static const xml_elem_set_t expected = { { NS_odf_style, XML_style }, @@ -548,7 +548,7 @@ void style_context::start_text_properties(const xml_token_pair_t& parent, const } } -void style_context::start_table_cell_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void style_context::start_table_cell_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_element_expected(parent, NS_odf_style, XML_style); diff --git a/src/liborcus/odf_style_context.hpp b/src/liborcus/odf_style_context.hpp index 81e88ee5..4a339485 100644 --- a/src/liborcus/odf_style_context.hpp +++ b/src/liborcus/odf_style_context.hpp @@ -37,9 +37,9 @@ public: std::unique_ptr pop_style(); private: - void start_paragraph_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_text_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_table_cell_properties(const xml_token_pair_t& parent, const xml_attrs_t& attrs); + void start_paragraph_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_text_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_table_cell_properties(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); private: spreadsheet::iface::import_styles* mp_styles = nullptr; diff --git a/src/liborcus/odf_styles_context.cpp b/src/liborcus/odf_styles_context.cpp index 33647e85..cf2589da 100644 --- a/src/liborcus/odf_styles_context.cpp +++ b/src/liborcus/odf_styles_context.cpp @@ -230,7 +230,7 @@ void styles_context::end_child_context(xmlns_id_t ns, xml_token_t name, xml_cont } } -void styles_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& /*attrs*/) +void styles_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& /*attrs*/) { xml_token_pair_t parent = push_stack(ns, name); if (ns == NS_odf_office) diff --git a/src/liborcus/odf_styles_context.hpp b/src/liborcus/odf_styles_context.hpp index 38dc6bfa..01b2ac48 100644 --- a/src/liborcus/odf_styles_context.hpp +++ b/src/liborcus/odf_styles_context.hpp @@ -32,7 +32,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name) override; virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child) override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; virtual void characters(std::string_view str, bool transient) override; diff --git a/src/liborcus/ods_content_xml_context.cpp b/src/liborcus/ods_content_xml_context.cpp index 65f25b80..5f07594a 100644 --- a/src/liborcus/ods_content_xml_context.cpp +++ b/src/liborcus/ods_content_xml_context.cpp @@ -51,7 +51,7 @@ const map_type& get() } // namespace cell_value void pick_up_named_range_or_expression( - session_context& cxt, const xml_attrs_t& attrs, xmlns_id_t exp_attr_ns, xml_token_t exp_attr_name, + session_context& cxt, const xml_token_attrs_t& attrs, xmlns_id_t exp_attr_ns, xml_token_t exp_attr_name, ods_session_data::named_exp_type name_type, ss::sheet_t scope) { std::string_view name; @@ -193,7 +193,7 @@ void ods_content_xml_context::end_child_context(xmlns_id_t ns, xml_token_t name, } } -void ods_content_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void ods_content_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); @@ -326,7 +326,7 @@ bool ods_content_xml_context::end_element(xmlns_id_t ns, xml_token_t name) return pop_stack(ns, name); } -void ods_content_xml_context::start_null_date(const xml_attrs_t& attrs) +void ods_content_xml_context::start_null_date(const xml_token_attrs_t& attrs) { spreadsheet::iface::import_global_settings* gs = mp_factory->get_global_settings(); if (!gs) @@ -346,7 +346,7 @@ void ods_content_xml_context::start_null_date(const xml_attrs_t& attrs) gs->set_origin_date(val.year, val.month, val.day); } -void ods_content_xml_context::start_table(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void ods_content_xml_context::start_table(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { static const xml_elem_set_t expected = { { NS_odf_office, XML_spreadsheet }, @@ -391,7 +391,7 @@ void ods_content_xml_context::end_table() } } -void ods_content_xml_context::start_named_range(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void ods_content_xml_context::start_named_range(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_element_expected(parent, NS_odf_table, XML_named_expressions); @@ -404,7 +404,7 @@ void ods_content_xml_context::end_named_range() { } -void ods_content_xml_context::start_named_expression(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void ods_content_xml_context::start_named_expression(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_element_expected(parent, NS_odf_table, XML_named_expressions); @@ -417,7 +417,7 @@ void ods_content_xml_context::end_named_expression() { } -void ods_content_xml_context::start_column(const xml_attrs_t& attrs) +void ods_content_xml_context::start_column(const xml_token_attrs_t& attrs) { if (!m_cur_sheet.sheet) return; @@ -469,7 +469,7 @@ void ods_content_xml_context::end_column() m_col += m_col_repeated; } -void ods_content_xml_context::start_row(const xml_attrs_t& attrs) +void ods_content_xml_context::start_row(const xml_token_attrs_t& attrs) { m_col = 0; m_row_attr = row_attr(); @@ -528,7 +528,7 @@ void ods_content_xml_context::end_row() m_row += m_row_attr.number_rows_repeated; } -void ods_content_xml_context::start_cell(const xml_attrs_t& attrs) +void ods_content_xml_context::start_cell(const xml_token_attrs_t& attrs) { m_cell_attr = cell_attr(); diff --git a/src/liborcus/ods_content_xml_context.hpp b/src/liborcus/ods_content_xml_context.hpp index 499833b7..59217529 100644 --- a/src/liborcus/ods_content_xml_context.hpp +++ b/src/liborcus/ods_content_xml_context.hpp @@ -70,28 +70,28 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name) override; virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child) override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; private: - void start_null_date(const xml_attrs_t& attrs); + void start_null_date(const xml_token_attrs_t& attrs); - void start_table(const xml_token_pair_t& parent, const xml_attrs_t& attrs); + void start_table(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); void end_table(); - void start_named_range(const xml_token_pair_t& parent, const xml_attrs_t& attrs); + void start_named_range(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); void end_named_range(); - void start_named_expression(const xml_token_pair_t& parent, const xml_attrs_t& attrs); + void start_named_expression(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); void end_named_expression(); - void start_column(const xml_attrs_t& attrs); + void start_column(const xml_token_attrs_t& attrs); void end_column(); - void start_row(const xml_attrs_t& attrs); + void start_row(const xml_token_attrs_t& attrs); void end_row(); - void start_cell(const xml_attrs_t& attrs); + void start_cell(const xml_token_attrs_t& attrs); void end_cell(); /** diff --git a/src/liborcus/xls_xml_context.cpp b/src/liborcus/xls_xml_context.cpp index d468cf04..85916fc7 100644 --- a/src/liborcus/xls_xml_context.cpp +++ b/src/liborcus/xls_xml_context.cpp @@ -262,7 +262,7 @@ void xls_xml_data_context::reset() } void xls_xml_data_context::start_element_data( - const xml_token_pair_t& /*parent*/, const xml_attrs_t& attrs) + const xml_token_pair_t& /*parent*/, const xml_token_attrs_t& attrs) { m_cell_type = ct_unknown; m_cell_string.clear(); @@ -851,7 +851,7 @@ void xls_xml_context::end_child_context(xmlns_id_t /*ns*/, xml_token_t /*name*/, { } -void xls_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xls_xml_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { push_stack(ns, name); @@ -1283,12 +1283,12 @@ void xls_xml_context::characters(std::string_view str, bool /*transient*/) } } -void xls_xml_context::start_element_borders(const xml_attrs_t& /*attrs*/) +void xls_xml_context::start_element_borders(const xml_token_attrs_t& /*attrs*/) { m_current_style->borders.clear(); } -void xls_xml_context::start_element_border(const xml_attrs_t& attrs) +void xls_xml_context::start_element_border(const xml_token_attrs_t& attrs) { ss::border_direction_t dir = ss::border_direction_t::unknown; ss::border_style_t style = ss::border_style_t::unknown; @@ -1376,7 +1376,7 @@ void xls_xml_context::start_element_border(const xml_attrs_t& attrs) } } -void xls_xml_context::start_element_number_format(const xml_attrs_t& attrs) +void xls_xml_context::start_element_number_format(const xml_token_attrs_t& attrs) { m_current_style->number_format = std::string_view{}; @@ -1399,7 +1399,7 @@ void xls_xml_context::start_element_number_format(const xml_attrs_t& attrs) } } -void xls_xml_context::start_element_cell(const xml_attrs_t& attrs) +void xls_xml_context::start_element_cell(const xml_token_attrs_t& attrs) { long col_index = 0; std::string_view formula; @@ -1467,7 +1467,7 @@ void xls_xml_context::start_element_cell(const xml_attrs_t& attrs) } } -void xls_xml_context::start_element_column(const xml_attrs_t& attrs) +void xls_xml_context::start_element_column(const xml_token_attrs_t& attrs) { if (!mp_sheet_props && !mp_cur_sheet) return; @@ -1537,7 +1537,7 @@ void xls_xml_context::start_element_column(const xml_attrs_t& attrs) m_cur_prop_col = col_index + span + 1; } -void xls_xml_context::start_element_row(const xml_attrs_t& attrs) +void xls_xml_context::start_element_row(const xml_token_attrs_t& attrs) { m_cur_col = m_table_props.pos.column; ss::row_t row_index = -1; @@ -1604,7 +1604,7 @@ void xls_xml_context::start_element_row(const xml_attrs_t& attrs) } } -void xls_xml_context::start_element_table(const xml_attrs_t& attrs) +void xls_xml_context::start_element_table(const xml_token_attrs_t& attrs) { ss::row_t row_index = -1; ss::col_t col_index = -1; @@ -1642,7 +1642,7 @@ void xls_xml_context::start_element_table(const xml_attrs_t& attrs) m_table_props.pos.column = col_index - 1; } -void xls_xml_context::start_element_worksheet(const xml_attrs_t& attrs) +void xls_xml_context::start_element_worksheet(const xml_token_attrs_t& attrs) { ++m_cur_sheet; std::string_view sheet_name; diff --git a/src/liborcus/xls_xml_context.hpp b/src/liborcus/xls_xml_context.hpp index 9a2ef6a9..9f00bb07 100644 --- a/src/liborcus/xls_xml_context.hpp +++ b/src/liborcus/xls_xml_context.hpp @@ -92,7 +92,7 @@ public: private: - void start_element_data(const xml_token_pair_t& parent, const xml_attrs_t& attrs); + void start_element_data(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); void end_element_data(); bool handle_array_formula_result(); @@ -241,20 +241,20 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name) override; virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child) override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; virtual void characters(std::string_view str, bool transient) override; private: - void start_element_borders(const xml_attrs_t& attrs); - void start_element_border(const xml_attrs_t& attrs); - void start_element_number_format(const xml_attrs_t& attrs); - void start_element_cell(const xml_attrs_t& attrs); - void start_element_column(const xml_attrs_t& attrs); - void start_element_row(const xml_attrs_t& attrs); - void start_element_table(const xml_attrs_t& attrs); - void start_element_worksheet(const xml_attrs_t& attrs); + void start_element_borders(const xml_token_attrs_t& attrs); + void start_element_border(const xml_token_attrs_t& attrs); + void start_element_number_format(const xml_token_attrs_t& attrs); + void start_element_cell(const xml_token_attrs_t& attrs); + void start_element_column(const xml_token_attrs_t& attrs); + void start_element_row(const xml_token_attrs_t& attrs); + void start_element_table(const xml_token_attrs_t& attrs); + void start_element_worksheet(const xml_token_attrs_t& attrs); void end_element_borders(); void end_element_border(); diff --git a/src/liborcus/xlsx_autofilter_context.cpp b/src/liborcus/xlsx_autofilter_context.cpp index c04268e9..7a4d5ab6 100644 --- a/src/liborcus/xlsx_autofilter_context.cpp +++ b/src/liborcus/xlsx_autofilter_context.cpp @@ -37,7 +37,7 @@ void xlsx_autofilter_context::end_child_context( { } -void xlsx_autofilter_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_autofilter_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); diff --git a/src/liborcus/xlsx_autofilter_context.hpp b/src/liborcus/xlsx_autofilter_context.hpp index 5803a923..27f5e7bd 100644 --- a/src/liborcus/xlsx_autofilter_context.hpp +++ b/src/liborcus/xlsx_autofilter_context.hpp @@ -38,7 +38,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); diff --git a/src/liborcus/xlsx_conditional_format_context.cpp b/src/liborcus/xlsx_conditional_format_context.cpp index 7a023a94..e667d834 100644 --- a/src/liborcus/xlsx_conditional_format_context.cpp +++ b/src/liborcus/xlsx_conditional_format_context.cpp @@ -655,7 +655,7 @@ void xlsx_conditional_format_context::end_child_context(xmlns_id_t /*ns*/, xml_t { } -void xlsx_conditional_format_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_conditional_format_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); diff --git a/src/liborcus/xlsx_conditional_format_context.hpp b/src/liborcus/xlsx_conditional_format_context.hpp index 9f1103db..f03d53dd 100644 --- a/src/liborcus/xlsx_conditional_format_context.hpp +++ b/src/liborcus/xlsx_conditional_format_context.hpp @@ -45,7 +45,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name) override; virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child) override; - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) override; + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) override; virtual bool end_element(xmlns_id_t ns, xml_token_t name) override; virtual void characters(std::string_view str, bool transient) override; diff --git a/src/liborcus/xlsx_context.cpp b/src/liborcus/xlsx_context.cpp index a41d9862..77df51cc 100644 --- a/src/liborcus/xlsx_context.cpp +++ b/src/liborcus/xlsx_context.cpp @@ -105,7 +105,7 @@ void xlsx_shared_strings_context::end_child_context(xmlns_id_t /*ns*/, xml_token { } -void xlsx_shared_strings_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_shared_strings_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); switch (name) @@ -504,7 +504,7 @@ void xlsx_styles_context::end_child_context(xmlns_id_t /*ns*/, xml_token_t /*nam { } -void xlsx_styles_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_styles_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); @@ -1041,7 +1041,7 @@ void xlsx_styles_context::characters(std::string_view /*str*/, bool /*transient* // not used in the styles.xml part. } -void xlsx_styles_context::start_element_number_format(const xml_attrs_t& attrs) +void xlsx_styles_context::start_element_number_format(const xml_token_attrs_t& attrs) { if (!mp_styles) return; @@ -1071,7 +1071,7 @@ void xlsx_styles_context::start_element_number_format(const xml_attrs_t& attrs) } } -void xlsx_styles_context::start_element_border(const xml_attrs_t& attrs) +void xlsx_styles_context::start_element_border(const xml_token_attrs_t& attrs) { bool diagonal_up = false; bool diagonal_down = false; @@ -1102,7 +1102,7 @@ void xlsx_styles_context::start_element_border(const xml_attrs_t& attrs) m_diagonal_down = diagonal_down; } -void xlsx_styles_context::start_element_diagonal(const xml_attrs_t& attrs) +void xlsx_styles_context::start_element_diagonal(const xml_token_attrs_t& attrs) { assert(mp_border); @@ -1128,7 +1128,7 @@ void xlsx_styles_context::start_element_diagonal(const xml_attrs_t& attrs) for_each(attrs.begin(), attrs.end(), func); } -void xlsx_styles_context::start_border_color(const xml_attrs_t& attrs) +void xlsx_styles_context::start_border_color(const xml_token_attrs_t& attrs) { assert(mp_border); @@ -1143,7 +1143,7 @@ void xlsx_styles_context::start_border_color(const xml_attrs_t& attrs) mp_border->set_color(m_cur_border_dir, alpha, red, green, blue); } -void xlsx_styles_context::start_font_color(const xml_attrs_t& attrs) +void xlsx_styles_context::start_font_color(const xml_token_attrs_t& attrs) { assert(mp_font); diff --git a/src/liborcus/xlsx_context.hpp b/src/liborcus/xlsx_context.hpp index 021e135b..56fb702c 100644 --- a/src/liborcus/xlsx_context.hpp +++ b/src/liborcus/xlsx_context.hpp @@ -42,7 +42,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); @@ -67,17 +67,17 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); private: - void start_element_number_format(const xml_attrs_t& attrs); + void start_element_number_format(const xml_token_attrs_t& attrs); - void start_element_border(const xml_attrs_t& attrs); - void start_element_diagonal(const xml_attrs_t& attrs); - void start_border_color(const xml_attrs_t& attrs); - void start_font_color(const xml_attrs_t& attrs); + void start_element_border(const xml_token_attrs_t& attrs); + void start_element_diagonal(const xml_token_attrs_t& attrs); + void start_border_color(const xml_token_attrs_t& attrs); + void start_font_color(const xml_token_attrs_t& attrs); void end_element_number_format(); diff --git a/src/liborcus/xlsx_sheet_context.cpp b/src/liborcus/xlsx_sheet_context.cpp index 878a6a72..a6ce7fc7 100644 --- a/src/liborcus/xlsx_sheet_context.cpp +++ b/src/liborcus/xlsx_sheet_context.cpp @@ -169,7 +169,7 @@ void xlsx_sheet_context::end_child_context(xmlns_id_t ns, xml_token_t name, xml_ } } -void xlsx_sheet_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_sheet_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); @@ -312,7 +312,7 @@ void xlsx_sheet_context::characters(std::string_view str, bool transient) m_cur_str = intern_in_context(str, transient); } -void xlsx_sheet_context::start_element_formula(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void xlsx_sheet_context::start_element_formula(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { const xml_elem_set_t expected = { { NS_ooxml_xlsx, XML_c }, @@ -363,7 +363,7 @@ void xlsx_sheet_context::start_element_formula(const xml_token_pair_t& parent, c } void xlsx_sheet_context::start_element_sheet_view( - const xml_token_pair_t& parent, const xml_attrs_t& attrs) + const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_element_expected(parent, NS_ooxml_xlsx, XML_sheetViews); @@ -393,7 +393,7 @@ void xlsx_sheet_context::start_element_sheet_view( } void xlsx_sheet_context::start_element_selection( - const xml_token_pair_t& parent, const xml_attrs_t& attrs) + const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_elem_stack_t elems; elems.emplace_back(NS_ooxml_xlsx, XML_sheetView); @@ -446,7 +446,7 @@ void xlsx_sheet_context::start_element_selection( } void xlsx_sheet_context::start_element_pane( - const xml_token_pair_t& parent, const xml_attrs_t& attrs) + const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xml_elem_stack_t elems; elems.emplace_back(NS_ooxml_xlsx, XML_sheetView); @@ -513,7 +513,7 @@ void xlsx_sheet_context::start_element_pane( } } -void xlsx_sheet_context::start_element_cell(const xml_token_pair_t& parent, const xml_attrs_t& attrs) +void xlsx_sheet_context::start_element_cell(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs) { xlsx_cell_t cell_type = xlsx_ct_numeric; ss::address_t address; @@ -566,7 +566,7 @@ void xlsx_sheet_context::start_element_cell(const xml_token_pair_t& parent, cons m_cur_cell_xf = xf; } -void xlsx_sheet_context::start_element_col(const xml_attrs_t& attrs) +void xlsx_sheet_context::start_element_col(const xml_token_attrs_t& attrs) { long col_min = 0; // 1-based long col_max = 0; // 1-based @@ -621,7 +621,7 @@ void xlsx_sheet_context::start_element_col(const xml_attrs_t& attrs) } } -void xlsx_sheet_context::start_element_row(const xml_attrs_t& attrs) +void xlsx_sheet_context::start_element_row(const xml_token_attrs_t& attrs) { std::optional row; length_t height; diff --git a/src/liborcus/xlsx_sheet_context.hpp b/src/liborcus/xlsx_sheet_context.hpp index b7cf43b4..3f26ea0a 100644 --- a/src/liborcus/xlsx_sheet_context.hpp +++ b/src/liborcus/xlsx_sheet_context.hpp @@ -70,20 +70,20 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); void pop_rel_extras(opc_rel_extras_t& other); private: - void start_element_formula(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_element_sheet_view(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_element_selection(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_element_pane(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_element_cell(const xml_token_pair_t& parent, const xml_attrs_t& attrs); - void start_element_col(const xml_attrs_t& attrs); - void start_element_row(const xml_attrs_t& attrs); + void start_element_formula(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_element_sheet_view(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_element_selection(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_element_pane(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_element_cell(const xml_token_pair_t& parent, const xml_token_attrs_t& attrs); + void start_element_col(const xml_token_attrs_t& attrs); + void start_element_row(const xml_token_attrs_t& attrs); void end_element_cell(); void push_raw_cell_value(); diff --git a/src/liborcus/xlsx_sheet_context_test.cpp b/src/liborcus/xlsx_sheet_context_test.cpp index d961fc4a..20492b53 100644 --- a/src/liborcus/xlsx_sheet_context_test.cpp +++ b/src/liborcus/xlsx_sheet_context_test.cpp @@ -152,11 +152,11 @@ void test_cell_value() orcus::xmlns_id_t ns = NS_ooxml_xlsx; orcus::xml_token_t elem = XML_c; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; context.start_element(ns, elem, attrs); { - xml_attrs_t val_attrs; + xml_token_attrs_t val_attrs; context.start_element(ns, XML_v, val_attrs); context.characters("5", false); context.end_element(ns, XML_v); @@ -178,12 +178,12 @@ void test_cell_bool() orcus::xmlns_id_t ns = NS_ooxml_xlsx; orcus::xml_token_t elem = XML_c; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(xml_token_attr_t(NS_ooxml_xlsx, XML_t, "b", false)); context.start_element(ns, elem, attrs); { - xml_attrs_t val_attrs; + xml_token_attrs_t val_attrs; context.start_element(ns, XML_v, val_attrs); context.characters("1", false); context.end_element(ns, XML_v); @@ -205,11 +205,11 @@ void test_array_formula() orcus::xmlns_id_t ns = NS_ooxml_xlsx; orcus::xml_token_t elem = XML_c; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; context.start_element(ns, elem, attrs); { - xml_attrs_t formula_attrs; + xml_token_attrs_t formula_attrs; formula_attrs.push_back(xml_token_attr_t(NS_ooxml_xlsx, XML_t, "array", false)); formula_attrs.push_back(xml_token_attr_t(NS_ooxml_xlsx, XML_ref, "B3:B4", false)); context.start_element(ns, XML_f, formula_attrs); @@ -217,7 +217,7 @@ void test_array_formula() context.end_element(ns, XML_f); } { - xml_attrs_t val_attrs; + xml_token_attrs_t val_attrs; context.start_element(ns, XML_v, val_attrs); context.characters("5", false); context.end_element(ns, XML_v); @@ -239,7 +239,7 @@ void test_hidden_col() orcus::xmlns_id_t ns = NS_ooxml_xlsx; orcus::xml_token_t elem = XML_col; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(orcus::xml_token_attr_t(ns, XML_min, "2", false)); attrs.push_back(orcus::xml_token_attr_t(ns, XML_max, "2", false)); attrs.push_back(orcus::xml_token_attr_t(ns, XML_hidden, "1", false)); @@ -260,7 +260,7 @@ void test_hidden_row() orcus::xmlns_id_t ns = NS_ooxml_xlsx; orcus::xml_token_t elem = XML_row; - orcus::xml_attrs_t attrs; + orcus::xml_token_attrs_t attrs; attrs.push_back(orcus::xml_token_attr_t(ns, XML_r, "4", false)); attrs.push_back(orcus::xml_token_attr_t(ns, XML_hidden, "1", false)); context.start_element(ns, elem, attrs); diff --git a/src/liborcus/xlsx_table_context.cpp b/src/liborcus/xlsx_table_context.cpp index e13aa69a..2d476100 100644 --- a/src/liborcus/xlsx_table_context.cpp +++ b/src/liborcus/xlsx_table_context.cpp @@ -219,7 +219,7 @@ void xlsx_table_context::end_child_context(xmlns_id_t ns, xml_token_t name, xml_ } } -void xlsx_table_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_table_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); if (ns != NS_ooxml_xlsx) diff --git a/src/liborcus/xlsx_table_context.hpp b/src/liborcus/xlsx_table_context.hpp index 39b33b61..a277d2b8 100644 --- a/src/liborcus/xlsx_table_context.hpp +++ b/src/liborcus/xlsx_table_context.hpp @@ -32,7 +32,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); diff --git a/src/liborcus/xlsx_workbook_context.cpp b/src/liborcus/xlsx_workbook_context.cpp index 9924ddd5..0e3a6815 100644 --- a/src/liborcus/xlsx_workbook_context.cpp +++ b/src/liborcus/xlsx_workbook_context.cpp @@ -43,7 +43,7 @@ void xlsx_workbook_context::end_child_context(xmlns_id_t /*ns*/, xml_token_t /*n { } -void xlsx_workbook_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs) +void xlsx_workbook_context::start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs) { xml_token_pair_t parent = push_stack(ns, name); session_context& cxt = get_session_context(); diff --git a/src/liborcus/xlsx_workbook_context.hpp b/src/liborcus/xlsx_workbook_context.hpp index f3f346bb..08716700 100644 --- a/src/liborcus/xlsx_workbook_context.hpp +++ b/src/liborcus/xlsx_workbook_context.hpp @@ -41,7 +41,7 @@ public: virtual xml_context_base* create_child_context(xmlns_id_t ns, xml_token_t name); virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); - virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_attrs_t& attrs); + virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); virtual void characters(std::string_view str, bool transient); diff --git a/src/liborcus/xml_util.cpp b/src/liborcus/xml_util.cpp index c2a0c9db..7c15adfd 100644 --- a/src/liborcus/xml_util.cpp +++ b/src/liborcus/xml_util.cpp @@ -53,7 +53,7 @@ void xml_element_printer::print_element(std::ostream& os, xmlns_id_t ns, xml_tok os << m_tokens.get_token_name(name) << '>'; } -void print_attrs(const tokens& tokens, const xml_attrs_t& attrs) +void print_attrs(const tokens& tokens, const xml_token_attrs_t& attrs) { for (const auto& attr : attrs) { diff --git a/src/liborcus/xml_util.hpp b/src/liborcus/xml_util.hpp index 4f670153..1021ccf4 100644 --- a/src/liborcus/xml_util.hpp +++ b/src/liborcus/xml_util.hpp @@ -36,7 +36,7 @@ void print_element(std::ostream& os, const tokens& t, xmlns_id_t ns, xml_token_t /** * Print attributes to stdout for debugging purposes. */ -void print_attrs(const tokens& tokens, const xml_attrs_t& attrs); +void print_attrs(const tokens& tokens, const xml_token_attrs_t& attrs); } // namespace orcus -- GitLab From b3a9486c0b05162fc97477994a445006291660ea Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 23 Nov 2022 17:57:17 -0500 Subject: [PATCH 45/91] Organize docs on date_time_t & use default keywords where appropriate --- include/orcus/types.hpp | 26 +++++++++++++++++++++----- src/parser/types.cpp | 11 ++--------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp index a5cfd684..1fbf25ef 100644 --- a/include/orcus/types.hpp +++ b/include/orcus/types.hpp @@ -455,6 +455,9 @@ struct ORCUS_PSR_DLLPUBLIC length_t bool operator!= (const length_t& other) const noexcept; }; +/** + * Struct that holds a date or date-time value. + */ struct ORCUS_PSR_DLLPUBLIC date_time_t { int year; @@ -476,17 +479,30 @@ struct ORCUS_PSR_DLLPUBLIC date_time_t bool operator!= (const date_time_t& other) const; bool operator< (const date_time_t& other) const; + /** + * Convert the date-time value to an ISO-formatted string representation. + * + * @return ISO-formatted string representation of the date-time value. + */ std::string to_string() const; + /** + * Swap the value with another instance. + * + * @param other another instance to swap values with. + */ void swap(date_time_t& other); /** - * Parse a string representation of a date-time value, and convert it into a - * date_time_t value. A string representation allows either a date only or - * a date and time value, but it does not allow a time only value. + * Parse an ISO-formatted string representation of a date-time value, and + * convert it into a date_time_t value. A string representation allows + * either a date only or a date and time value, but it does not allow a time + * only value. + * + * Here are some examples of ISO-formatted date and date-time values: * - * date only: 2013-04-09 - * date and time: 2013-04-09T21:34:09.55 + * @li 2013-04-09 (date only) + * @li 2013-04-09T21:34:09.55 (date and time) * * @param str string representation of a date-time value. * @return converted date-time value consisting of a set of numeric values. diff --git a/src/parser/types.cpp b/src/parser/types.cpp index 322f3cac..706f79e4 100644 --- a/src/parser/types.cpp +++ b/src/parser/types.cpp @@ -219,15 +219,8 @@ date_time_t::date_time_t(int _year, int _month, int _day) : date_time_t::date_time_t(int _year, int _month, int _day, int _hour, int _minute, double _second) : year(_year), month(_month), day(_day), hour(_hour), minute(_minute), second(_second) {} -date_time_t::date_time_t(const date_time_t& other) : - year(other.year), - month(other.month), - day(other.day), - hour(other.hour), - minute(other.minute), - second(other.second) {} - -date_time_t::~date_time_t() {} +date_time_t::date_time_t(const date_time_t& other) = default; +date_time_t::~date_time_t() = default; date_time_t& date_time_t::operator= (date_time_t other) { -- GitLab From 0c522e8de8d86b3000ea2c2ac48b3e95a6093768 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 24 Nov 2022 10:50:05 -0500 Subject: [PATCH 46/91] Move previously public types into liborcus only location These types are only used within liborcus --- doc/cpp/parser/types.rst | 4 ---- include/orcus/types.hpp | 10 --------- slickedit/cpp.vpj | 2 ++ src/liborcus/CMakeLists.txt | 1 + src/liborcus/Makefile.am | 4 ++++ src/liborcus/xml_element_types.cpp | 19 ++++++++++++++++ src/liborcus/xml_element_types.hpp | 30 ++++++++++++++++++++++++++ src/liborcus/xml_element_validator.hpp | 2 +- src/parser/types.cpp | 5 ----- 9 files changed, 57 insertions(+), 20 deletions(-) create mode 100644 src/liborcus/xml_element_types.cpp create mode 100644 src/liborcus/xml_element_types.hpp diff --git a/doc/cpp/parser/types.rst b/doc/cpp/parser/types.rst index a1c3a44c..6c532673 100644 --- a/doc/cpp/parser/types.rst +++ b/doc/cpp/parser/types.rst @@ -12,10 +12,7 @@ Constants Type aliases ------------ -.. doxygentypedef:: orcus::xml_elem_set_t -.. doxygentypedef:: orcus::xml_elem_stack_t .. doxygentypedef:: orcus::xml_token_attrs_t -.. doxygentypedef:: orcus::xml_token_pair_t .. doxygentypedef:: orcus::xml_token_t .. doxygentypedef:: orcus::xmlns_id_t @@ -29,7 +26,6 @@ Structs .. doxygenstruct:: orcus::xml_name_t .. doxygenstruct:: orcus::xml_token_attr_t .. doxygenstruct:: orcus::xml_token_element_t -.. doxygenstruct:: orcus::xml_token_pair_hash Enums ----- diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp index 1fbf25ef..f5df3aa0 100644 --- a/include/orcus/types.hpp +++ b/include/orcus/types.hpp @@ -25,16 +25,6 @@ class xmlns_repository; using xml_token_t = size_t; using xmlns_id_t = const char*; -using xml_token_pair_t = std::pair; - -struct ORCUS_PSR_DLLPUBLIC xml_token_pair_hash -{ - size_t operator()(const xml_token_pair_t& v) const; -}; - -using xml_elem_stack_t = std::vector; -using xml_elem_set_t = std::unordered_set; - struct ORCUS_PSR_DLLPUBLIC parse_error_value_t { std::string_view str; diff --git a/slickedit/cpp.vpj b/slickedit/cpp.vpj index 46d39b5b..9141b986 100644 --- a/slickedit/cpp.vpj +++ b/slickedit/cpp.vpj @@ -424,6 +424,8 @@ + + diff --git a/src/liborcus/CMakeLists.txt b/src/liborcus/CMakeLists.txt index 40a0bc02..9eb38080 100644 --- a/src/liborcus/CMakeLists.txt +++ b/src/liborcus/CMakeLists.txt @@ -24,6 +24,7 @@ add_library(orcus-${ORCUS_API_VERSION} SHARED measurement.cpp xml_context_base.cpp xml_context_global.cpp + xml_element_types.cpp xml_element_validator.cpp xml_empty_context.cpp xml_map_tree.cpp diff --git a/src/liborcus/Makefile.am b/src/liborcus/Makefile.am index b547bb0a..dd374743 100644 --- a/src/liborcus/Makefile.am +++ b/src/liborcus/Makefile.am @@ -19,6 +19,9 @@ EXTRA_PROGRAMS = \ TESTS = +EXTRA_DIST = \ + xml_element_types.hpp + if HAVE_STATIC_LIB AM_CPPFLAGS += -D__ORCUS_STATIC_LIB=1 else @@ -71,6 +74,7 @@ liborcus_@ORCUS_API_VERSION@_la_SOURCES = \ xml_context_base.cpp \ xml_context_global.hpp \ xml_context_global.cpp \ + xml_element_types.cpp \ xml_element_validator.hpp \ xml_element_validator.cpp \ xml_empty_context.hpp \ diff --git a/src/liborcus/xml_element_types.cpp b/src/liborcus/xml_element_types.cpp new file mode 100644 index 00000000..967b4d4d --- /dev/null +++ b/src/liborcus/xml_element_types.cpp @@ -0,0 +1,19 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "xml_element_types.hpp" + +namespace orcus { + +size_t xml_token_pair_hash::operator()(const xml_token_pair_t& v) const +{ + return std::hash()(v.first) ^ std::hash()(v.second); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/liborcus/xml_element_types.hpp b/src/liborcus/xml_element_types.hpp new file mode 100644 index 00000000..8bf391ff --- /dev/null +++ b/src/liborcus/xml_element_types.hpp @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include + +namespace orcus { + +/** + * Holds a pair of XML namespace identifier and an element token. Typically + * used when managing the element stack inside element context classes. + */ +using xml_token_pair_t = std::pair; + +struct ORCUS_PSR_DLLPUBLIC xml_token_pair_hash +{ + size_t operator()(const xml_token_pair_t& v) const; +}; + +using xml_elem_stack_t = std::vector; +using xml_elem_set_t = std::unordered_set; + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/liborcus/xml_element_validator.hpp b/src/liborcus/xml_element_validator.hpp index ed2a47a1..43a44e0d 100644 --- a/src/liborcus/xml_element_validator.hpp +++ b/src/liborcus/xml_element_validator.hpp @@ -7,7 +7,7 @@ #pragma once -#include +#include "xml_element_types.hpp" #include diff --git a/src/parser/types.cpp b/src/parser/types.cpp index 706f79e4..7a57c5a9 100644 --- a/src/parser/types.cpp +++ b/src/parser/types.cpp @@ -19,11 +19,6 @@ namespace orcus { -size_t xml_token_pair_hash::operator()(const xml_token_pair_t& v) const -{ - return std::hash()(v.first) ^ std::hash()(v.second); -} - parse_error_value_t::parse_error_value_t() : offset(0) { -- GitLab From 3118ce31a7f5d4818ad73d876d28d8924980e916 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 24 Nov 2022 10:58:08 -0500 Subject: [PATCH 47/91] Document these type aliases --- include/orcus/types.hpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp index f5df3aa0..34f1e597 100644 --- a/include/orcus/types.hpp +++ b/include/orcus/types.hpp @@ -20,9 +20,16 @@ namespace orcus { class xmlns_context; class xmlns_repository; -// XML specific types +/** + * Integral type that represents a tokenized XML element name. + */ +using xml_token_t = std::size_t; -using xml_token_t = size_t; +/** + * Type that represents a normalized XML namespace identifier. Internally it + * is a pointer value that points to a static char buffer that stores a + * namespace name. + */ using xmlns_id_t = const char*; struct ORCUS_PSR_DLLPUBLIC parse_error_value_t -- GitLab From 7e82df8d3d80a959fe54ce12fc059bd24bf61397 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 24 Nov 2022 11:15:29 -0500 Subject: [PATCH 48/91] Use type alias here --- include/orcus/types.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp index 34f1e597..9ae7f503 100644 --- a/include/orcus/types.hpp +++ b/include/orcus/types.hpp @@ -100,7 +100,7 @@ struct ORCUS_PSR_DLLPUBLIC xml_token_element_t xmlns_id_t ns; xml_token_t name; std::string_view raw_name; - std::vector attrs; + xml_token_attrs_t attrs; xml_token_element_t& operator= (xml_token_element_t) = delete; -- GitLab From d2931aa472065b8c12abdc16ddc84e50e62585d9 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 24 Nov 2022 12:12:54 -0500 Subject: [PATCH 49/91] More on documenting the basic types and adding default member functions --- include/orcus/types.hpp | 102 ++++++++++++++++++++++++++++++---- include/orcus/zip_archive.hpp | 1 - src/parser/types.cpp | 23 ++++---- 3 files changed, 103 insertions(+), 23 deletions(-) diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp index 9ae7f503..c1e47a9f 100644 --- a/include/orcus/types.hpp +++ b/include/orcus/types.hpp @@ -32,18 +32,32 @@ using xml_token_t = std::size_t; */ using xmlns_id_t = const char*; +/** + * Parser token that represents the state of a parse error, used by + * threaded_json_parser and threaded_sax_token_parser when transferring + * parse status between threads. + */ struct ORCUS_PSR_DLLPUBLIC parse_error_value_t { + /** error message associated with the parse error. */ std::string_view str; + /** offset in stream where the error occurred. */ std::ptrdiff_t offset; parse_error_value_t(); + parse_error_value_t(const parse_error_value_t& other); parse_error_value_t(std::string_view _str, std::ptrdiff_t _offset); + parse_error_value_t& operator=(const parse_error_value_t& other); + bool operator==(const parse_error_value_t& other) const; bool operator!=(const parse_error_value_t& other) const; }; +/** + * Represents a name with a normalized namespace in XML documents. This can + * be used either as an element name or as an attribute name. + */ struct ORCUS_PSR_DLLPUBLIC xml_name_t { enum to_string_type { use_alias, use_short_name }; @@ -51,20 +65,47 @@ struct ORCUS_PSR_DLLPUBLIC xml_name_t xmlns_id_t ns; std::string_view name; - xml_name_t(); + xml_name_t() noexcept; xml_name_t(xmlns_id_t _ns, std::string_view _name); - xml_name_t(const xml_name_t& r); + xml_name_t(const xml_name_t& other); xml_name_t& operator= (const xml_name_t& other); - bool operator== (const xml_name_t& other) const; - bool operator!= (const xml_name_t& other) const; + bool operator== (const xml_name_t& other) const noexcept; + bool operator!= (const xml_name_t& other) const noexcept; + /** + * Convert a namespace-name value pair to a string representation with the + * namespace value converted to either an alias or a unique "short name". + * Refer to @link xmlns_context::get_alias() get_alias() @endlink and + * @link xmlns_context::get_short_name() get_short_name() @endlink + * for the explanations of an alias and short name. + * + * @param cxt namespace context object associated with the XML stream + * currently being parsed. + * @param type policy on how to convert a namespace identifier to a string + * representation. + * + * @return string representation of a namespace-name value pair. + */ std::string to_string(const xmlns_context& cxt, to_string_type type) const; + /** + * Convert a namespace-name value pair to a string representation with the + * namespace value converted to a unique "short name". Refer to + * orcus::xmlns_repository::get_short_name() get_short_name() @endlink for the + * explanations of a short name. + * + * @param repo namespace repository. + * + * @return string representation of a namespace-name value pair. + */ std::string to_string(const xmlns_repository& repo) const; }; +/** + * Struct containing properties of a tokenized XML attribute. + */ struct ORCUS_PSR_DLLPUBLIC xml_token_attr_t { xmlns_id_t ns; @@ -82,18 +123,26 @@ struct ORCUS_PSR_DLLPUBLIC xml_token_attr_t bool transient; xml_token_attr_t(); + xml_token_attr_t(const xml_token_attr_t& other); xml_token_attr_t( xmlns_id_t _ns, xml_token_t _name, std::string_view _value, bool _transient); xml_token_attr_t( xmlns_id_t _ns, xml_token_t _name, std::string_view _raw_name, std::string_view _value, bool _transient); + + xml_token_attr_t& operator=(const xml_token_attr_t& other); }; using xml_token_attrs_t = std::vector; /** - * Element properties passed to its handler via start_element() and - * end_element() calls. + * Struct containing XML element properties passed to the handler of + * sax_token_parser via its @p start_element() and @p end_element() + * calls. + * + * @see + * @li sax_token_handler::start_element + * @li sax_token_handler::end_element */ struct ORCUS_PSR_DLLPUBLIC xml_token_element_t { @@ -111,7 +160,7 @@ struct ORCUS_PSR_DLLPUBLIC xml_token_element_t }; /** - * Character set types. + * Character set types, generated from IANA character-sets specifications. * * @see https://www.iana.org/assignments/character-sets/character-sets.xhtml */ @@ -378,6 +427,9 @@ enum class character_set_t windows_874, }; +/** + * Struct holding XML declaration properties. + */ struct ORCUS_PSR_DLLPUBLIC xml_declaration_t { uint8_t version_major; @@ -396,8 +448,6 @@ struct ORCUS_PSR_DLLPUBLIC xml_declaration_t bool operator!= (const xml_declaration_t& other) const; }; -// Other types - enum class length_unit_t { unknown = 0, @@ -408,8 +458,6 @@ enum class length_unit_t point, twip, pixel - - // TODO: Add more. }; enum class format_t @@ -424,7 +472,7 @@ enum class format_t enum class dump_format_t { - unknown, + unknown = 0, none, check, csv, @@ -436,6 +484,9 @@ enum class dump_format_t debug_state }; +/** + * Represents a length with unit of measurement. + */ struct ORCUS_PSR_DLLPUBLIC length_t { length_unit_t unit; @@ -507,9 +558,36 @@ struct ORCUS_PSR_DLLPUBLIC date_time_t static date_time_t from_chars(std::string_view str); }; +/** + * Parse a string that represents an output format type and convert it to a + * corresponding enum value. + * + * @param s string representing an output format type. + * + * @return enum value representing a character set, or + * character_set_t::unknown in case it cannot be + * determined. + */ ORCUS_PSR_DLLPUBLIC dump_format_t to_dump_format_enum(std::string_view s); + +/** + * Parse a string that represents a character set and convert it to a + * corresponding enum value. + * + * @param s string representing a character set. + * + * @return enum value representing a character set, or + * character_set_t::unspecified in case it cannot be + * determined. + */ ORCUS_PSR_DLLPUBLIC character_set_t to_character_set(std::string_view s); +/** + * Get a list of available output format entries. Each entry consists of the + * name of a format and its enum value equivalent. + * + * @return list of available output format entries. + */ ORCUS_PSR_DLLPUBLIC std::vector> get_dump_format_entries(); ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const length_t& v); diff --git a/include/orcus/zip_archive.hpp b/include/orcus/zip_archive.hpp index 9714b17e..8896bd88 100644 --- a/include/orcus/zip_archive.hpp +++ b/include/orcus/zip_archive.hpp @@ -111,7 +111,6 @@ public: * gets uncompressed if the original stream is compressed. * * @param entry_name file entry name. - * @param buf buffer to put the retrieved data stream into. * * @return buffer containing the data stream for specified entry. * diff --git a/src/parser/types.cpp b/src/parser/types.cpp index 7a57c5a9..5d469c5f 100644 --- a/src/parser/types.cpp +++ b/src/parser/types.cpp @@ -24,11 +24,15 @@ parse_error_value_t::parse_error_value_t() : { } +parse_error_value_t::parse_error_value_t(const parse_error_value_t& other) = default; + parse_error_value_t::parse_error_value_t(std::string_view _str, std::ptrdiff_t _offset) : str(_str), offset(_offset) { } +parse_error_value_t& parse_error_value_t::operator=(const parse_error_value_t& other) = default; + bool parse_error_value_t::operator==(const parse_error_value_t& other) const { return str == other.str && offset == other.offset; @@ -39,23 +43,18 @@ bool parse_error_value_t::operator!=(const parse_error_value_t& other) const return !operator==(other); } -xml_name_t::xml_name_t() : ns(XMLNS_UNKNOWN_ID), name() {} +xml_name_t::xml_name_t() noexcept : ns(XMLNS_UNKNOWN_ID), name() {} xml_name_t::xml_name_t(xmlns_id_t _ns, std::string_view _name) : ns(_ns), name(_name) {} -xml_name_t::xml_name_t(const xml_name_t& r) : ns(r.ns), name(r.name) {} +xml_name_t::xml_name_t(const xml_name_t& other) = default; -xml_name_t& xml_name_t::operator= (const xml_name_t& other) -{ - ns = other.ns; - name = other.name; - return *this; -} +xml_name_t& xml_name_t::operator= (const xml_name_t& other) = default; -bool xml_name_t::operator== (const xml_name_t& other) const +bool xml_name_t::operator== (const xml_name_t& other) const noexcept { return ns == other.ns && name == other.name; } -bool xml_name_t::operator!= (const xml_name_t& other) const +bool xml_name_t::operator!= (const xml_name_t& other) const noexcept { return !operator==(other); } @@ -103,6 +102,8 @@ std::string xml_name_t::to_string(const xmlns_repository& repo) const xml_token_attr_t::xml_token_attr_t() : ns(XMLNS_UNKNOWN_ID), name(XML_UNKNOWN_TOKEN), transient(false) {} +xml_token_attr_t::xml_token_attr_t(const xml_token_attr_t& other) = default; + xml_token_attr_t::xml_token_attr_t( xmlns_id_t _ns, xml_token_t _name, std::string_view _value, bool _transient) : ns(_ns), name(_name), value(_value), transient(_transient) {} @@ -111,6 +112,8 @@ xml_token_attr_t::xml_token_attr_t( xmlns_id_t _ns, xml_token_t _name, std::string_view _raw_name, std::string_view _value, bool _transient) : ns(_ns), name(_name), raw_name(_raw_name), value(_value), transient(_transient) {} +xml_token_attr_t& xml_token_attr_t::operator=(const xml_token_attr_t& other) = default; + xml_token_element_t::xml_token_element_t() : ns(nullptr), name(XML_UNKNOWN_TOKEN) {} xml_token_element_t::xml_token_element_t( -- GitLab From a0d9574612084f2ef576066723c86a496d376800 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 24 Nov 2022 12:52:59 -0500 Subject: [PATCH 50/91] xmlns_repository::get_short_name(size_t) is not used at all Only the other variant is used. --- include/orcus/types.hpp | 4 ++-- include/orcus/xml_namespace.hpp | 9 ++++++--- src/parser/xml_namespace.cpp | 6 +----- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp index c1e47a9f..40aa8e0b 100644 --- a/include/orcus/types.hpp +++ b/include/orcus/types.hpp @@ -92,8 +92,8 @@ struct ORCUS_PSR_DLLPUBLIC xml_name_t /** * Convert a namespace-name value pair to a string representation with the - * namespace value converted to a unique "short name". Refer to - * orcus::xmlns_repository::get_short_name() get_short_name() @endlink for the + * namespace value converted to a unique "short name". Refer to @link + * xmlns_repository::get_short_name() get_short_name() @endlink for the * explanations of a short name. * * @param repo namespace repository. diff --git a/include/orcus/xml_namespace.hpp b/include/orcus/xml_namespace.hpp index fd5d9ff5..1cfa1e89 100644 --- a/include/orcus/xml_namespace.hpp +++ b/include/orcus/xml_namespace.hpp @@ -66,8 +66,11 @@ public: */ xmlns_id_t get_identifier(size_t index) const; + /** + * See xmlns_context::get_short_name() for the explanation of this method, + * which works identically to it. + */ std::string get_short_name(xmlns_id_t ns_id) const; - std::string get_short_name(size_t index) const; }; /** @@ -123,8 +126,8 @@ public: * but still guaranteed to be unique to the identifier it is associated * with. * - *

Note that the xmlns_repository class has method of the same - * name, and that method works identically to this method.

+ * @note The xmlns_repository class has method of the same name, and that + * method works identically to this method. * * @param ns_id a namespace identifier to obtain short name for. * diff --git a/src/parser/xml_namespace.cpp b/src/parser/xml_namespace.cpp index be3f2934..66bb6697 100644 --- a/src/parser/xml_namespace.cpp +++ b/src/parser/xml_namespace.cpp @@ -145,15 +145,11 @@ xmlns_id_t xmlns_repository::get_identifier(size_t index) const string xmlns_repository::get_short_name(xmlns_id_t ns_id) const { size_t index = get_index(ns_id); - return get_short_name(index); -} -string xmlns_repository::get_short_name(size_t index) const -{ if (index == INDEX_NOT_FOUND) return string("???"); - ostringstream os; + std::ostringstream os; os << "ns" << index; return os.str(); } -- GitLab From 7b9eeb082ce8ef31c7be8e71a312adc4838ae01c Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 24 Nov 2022 13:15:47 -0500 Subject: [PATCH 51/91] Add more details to the docs of XML namespace classes Also rename 'key' with 'alias', as the latter sounds more apt. --- include/orcus/xml_namespace.hpp | 40 +++++++++++++++++++++------ src/parser/xml_namespace.cpp | 48 ++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 30 deletions(-) diff --git a/include/orcus/xml_namespace.hpp b/include/orcus/xml_namespace.hpp index 1cfa1e89..f64245e5 100644 --- a/include/orcus/xml_namespace.hpp +++ b/include/orcus/xml_namespace.hpp @@ -32,12 +32,12 @@ class ORCUS_PSR_DLLPUBLIC xmlns_repository xmlns_id_t intern(std::string_view uri); - xmlns_repository(const xmlns_repository&); // disabled - xmlns_repository& operator= (const xmlns_repository&); // disabled - size_t get_index(xmlns_id_t ns_id) const; public: + xmlns_repository(const xmlns_repository&) = delete; + xmlns_repository& operator= (const xmlns_repository&) = delete; + xmlns_repository(); ~xmlns_repository(); @@ -55,6 +55,14 @@ public: */ void add_predefined_values(const xmlns_id_t* predefined_ns); + /** + * Create a context object associated with this namespace repository. + * + * @warning Since this context object references values in this repo, make + * sure that it will not out-live the repository object itself. + * + * @return context object to use for a new XML stream. + */ xmlns_context create_context(); /** @@ -79,7 +87,7 @@ public: * instance of this class any longer than the life cycle of the xml stream * it is used in. * - * An empty key value is associated with a default namespace. + * An empty key value i.e. `""` is associated with a default namespace. */ class ORCUS_PSR_DLLPUBLIC xmlns_context { @@ -98,17 +106,33 @@ public: xmlns_context& operator= (const xmlns_context& r); xmlns_context& operator= (xmlns_context&& r); - xmlns_id_t push(std::string_view key, std::string_view uri); - void pop(std::string_view key); + /** + * Push a new namespace alias-value pair to the stack. + * + * @param alias namespace alias to push onto the stack. If the same alias + * is already present, this overwrites it until it gets popped + * off the stack. + * @param uri namespace name to associate with the alias. + * + * @return normalized namespace identifier for the namespace name. + */ + xmlns_id_t push(std::string_view alias, std::string_view uri); + + /** + * Pop a namespace alias from the stack. + * + * @param alias namespace alias to pop from the stack. + */ + void pop(std::string_view alias); /** * Get the currnet namespace identifier for a specified namespace alias. * - * @param key namespace alias to get the current namespace identifier for. + * @param alias namespace alias to get the current namespace identifier for. * * @return current namespace identifier associated with the alias. */ - xmlns_id_t get(std::string_view key) const; + xmlns_id_t get(std::string_view alias) const; /** * Get a unique index value associated with a specified identifier. An diff --git a/src/parser/xml_namespace.cpp b/src/parser/xml_namespace.cpp index 66bb6697..79295f70 100644 --- a/src/parser/xml_namespace.cpp +++ b/src/parser/xml_namespace.cpp @@ -63,7 +63,7 @@ struct xmlns_repository::impl }; xmlns_repository::xmlns_repository() : mp_impl(std::make_unique()) {} -xmlns_repository::~xmlns_repository() {} +xmlns_repository::~xmlns_repository() = default; xmlns_id_t xmlns_repository::intern(std::string_view uri) { @@ -192,7 +192,7 @@ xmlns_context::xmlns_context(xmlns_context&& r) : mp_impl(std::move(r.mp_impl)) r.mp_impl = std::make_unique(); } -xmlns_context::~xmlns_context() {} +xmlns_context::~xmlns_context() = default; xmlns_context& xmlns_context::operator= (const xmlns_context& r) { @@ -208,37 +208,37 @@ xmlns_context& xmlns_context::operator= (xmlns_context&& r) return *this; } -xmlns_id_t xmlns_context::push(std::string_view key, std::string_view uri) +xmlns_id_t xmlns_context::push(std::string_view alias, std::string_view uri) { if (!mp_impl->repo) throw general_error("this context is not associated with any repo."); #if ORCUS_DEBUG_XML_NAMESPACE - cout << "xmlns_context::push: key='" << key << "', uri='" << uri << "'" << endl; + cout << "xmlns_context::push: key='" << alias << "', uri='" << uri << "'" << endl; #endif mp_impl->m_trim_all_ns = true; xmlns_id_t id = mp_impl->repo->intern(uri); std::string_view uri_interned = id ? std::string_view(id) : std::string_view(); - if (key.empty()) + if (alias.empty()) { - // empty key value is associated with default namespace. + // empty alias value is associated with default namespace. mp_impl->m_default.push_back(uri_interned.data()); mp_impl->m_all_ns.push_back(uri_interned.data()); return mp_impl->m_default.back(); } - // See if this key already exists. - alias_map_type::iterator it = mp_impl->m_map.find(key); + // See if this alias already exists. + alias_map_type::iterator it = mp_impl->m_map.find(alias); if (it == mp_impl->m_map.end()) { - // This is the first time this key is used. + // This is the first time this alias is used. xmlns_list_type nslist; nslist.push_back(uri_interned.data()); mp_impl->m_all_ns.push_back(uri_interned.data()); std::pair r = - mp_impl->m_map.insert(alias_map_type::value_type(key, nslist)); + mp_impl->m_map.insert(alias_map_type::value_type(alias, nslist)); if (!r.second) // insertion failed. @@ -247,21 +247,21 @@ xmlns_id_t xmlns_context::push(std::string_view key, std::string_view uri) return nslist.back(); } - // The key already exists. + // The alias already exists. xmlns_list_type& nslist = it->second; nslist.push_back(uri_interned.data()); mp_impl->m_all_ns.push_back(uri_interned.data()); return nslist.back(); } -void xmlns_context::pop(std::string_view key) +void xmlns_context::pop(std::string_view alias) { #if ORCUS_DEBUG_XML_NAMESPACE - cout << "xmlns_context::pop: key='" << key << "'" << endl; + cout << "xmlns_context::pop: alias='" << alias << "'" << endl; #endif - if (key.empty()) + if (alias.empty()) { - // empty key value is associated with default namespace. + // empty alias value is associated with default namespace. if (mp_impl->m_default.empty()) throw general_error("default namespace stack is empty."); @@ -269,10 +269,14 @@ void xmlns_context::pop(std::string_view key) return; } - // See if this key really exists. - alias_map_type::iterator it = mp_impl->m_map.find(key); + // See if this alias really exists. + alias_map_type::iterator it = mp_impl->m_map.find(alias); if (it == mp_impl->m_map.end()) - throw general_error("failed to find the key."); + { + std::ostringstream os; + os << "alias named '" << alias << "' was attempted to be popped, but was not found in the stack"; + throw general_error(os.str()); + } xmlns_list_type& nslist = it->second; if (nslist.empty()) @@ -281,19 +285,19 @@ void xmlns_context::pop(std::string_view key) nslist.pop_back(); } -xmlns_id_t xmlns_context::get(std::string_view key) const +xmlns_id_t xmlns_context::get(std::string_view alias) const { #if ORCUS_DEBUG_XML_NAMESPACE - cout << "xmlns_context::get: alias='" << key << "', default ns stack size=" + cout << "xmlns_context::get: alias='" << alias << "', default ns stack size=" << mp_impl->m_default.size() << ", non-default alias count=" << mp_impl->m_map.size(); cout << ", "; print_map_keys(mp_impl->m_map); cout << endl; #endif - if (key.empty()) + if (alias.empty()) return mp_impl->m_default.empty() ? XMLNS_UNKNOWN_ID : mp_impl->m_default.back(); - alias_map_type::const_iterator it = mp_impl->m_map.find(key); + alias_map_type::const_iterator it = mp_impl->m_map.find(alias); if (it == mp_impl->m_map.end()) { #if ORCUS_DEBUG_XML_NAMESPACE -- GitLab From f43f16a399e9c5dc13f4ec1d7ec2b94873b40f5d Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 24 Nov 2022 13:37:29 -0500 Subject: [PATCH 52/91] Make xmlns_repository movable --- include/orcus/xml_namespace.hpp | 11 +++++++++-- src/parser/Makefile.am | 4 +++- src/parser/xml_namespace.cpp | 7 +++++++ src/parser/xml_namespace_test.cpp | 33 +++++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/include/orcus/xml_namespace.hpp b/include/orcus/xml_namespace.hpp index f64245e5..cf9b2700 100644 --- a/include/orcus/xml_namespace.hpp +++ b/include/orcus/xml_namespace.hpp @@ -22,6 +22,9 @@ struct xmlns_context_impl; /** * Central XML namespace repository that stores all namespaces that are used * in the current session. + * + * @warning this class is not copyable, but is movable; however, the + * moved-from object will not be usable after the move. */ class ORCUS_PSR_DLLPUBLIC xmlns_repository { @@ -39,8 +42,11 @@ public: xmlns_repository& operator= (const xmlns_repository&) = delete; xmlns_repository(); + xmlns_repository(xmlns_repository&& other); ~xmlns_repository(); + xmlns_repository& operator= (xmlns_repository&&); + /** * Add a set of predefined namespace values to the repository. * @@ -58,8 +64,9 @@ public: /** * Create a context object associated with this namespace repository. * - * @warning Since this context object references values in this repo, make - * sure that it will not out-live the repository object itself. + * @warning Since this context object references values stored in the repo, + * make sure that it will not out-live the repository object + * itself. * * @return context object to use for a new XML stream. */ diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index d68adb2e..d9822de7 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -86,7 +86,9 @@ parser_test_xml_namespace_SOURCES = \ xml_namespace.cpp \ xml_namespace_test.cpp -parser_test_xml_namespace_LDADD = liborcus-parser-@ORCUS_API_VERSION@.la +parser_test_xml_namespace_LDADD = \ + liborcus-parser-@ORCUS_API_VERSION@.la \ + ../test/liborcus-test.a parser_test_xml_namespace_CPPFLAGS = $(AM_CPPFLAGS) # parser-test-xml-validation diff --git a/src/parser/xml_namespace.cpp b/src/parser/xml_namespace.cpp index 79295f70..2aafea3d 100644 --- a/src/parser/xml_namespace.cpp +++ b/src/parser/xml_namespace.cpp @@ -63,8 +63,15 @@ struct xmlns_repository::impl }; xmlns_repository::xmlns_repository() : mp_impl(std::make_unique()) {} +xmlns_repository::xmlns_repository(xmlns_repository&& other) : mp_impl(std::move(other.mp_impl)) {} xmlns_repository::~xmlns_repository() = default; +xmlns_repository& xmlns_repository::operator= (xmlns_repository&& other) +{ + mp_impl = std::move(other.mp_impl); + return *this; +} + xmlns_id_t xmlns_repository::intern(std::string_view uri) { // See if the uri is already registered. diff --git a/src/parser/xml_namespace_test.cpp b/src/parser/xml_namespace_test.cpp index 44029615..38551be8 100644 --- a/src/parser/xml_namespace_test.cpp +++ b/src/parser/xml_namespace_test.cpp @@ -20,6 +20,8 @@ namespace { void test_basic() { + ORCUS_TEST_FUNC_SCOPE; + pstring xmlns1("http://some.xmlns/"); pstring xmlns2("http://other.xmlns/"); @@ -53,6 +55,8 @@ void test_basic() void test_all_namespaces() { + ORCUS_TEST_FUNC_SCOPE; + pstring key1("a"), key2("b"), key3("c"); pstring ns1("foo"), ns2("baa"), ns3("hmm"); @@ -110,6 +114,8 @@ void test_predefined_ns() void test_xml_name_t() { + ORCUS_TEST_FUNC_SCOPE; + xml_name_t name1; name1.ns = NS_test_name1; name1.name = "foo"; @@ -127,6 +133,8 @@ void test_xml_name_t() void test_ns_context() { + ORCUS_TEST_FUNC_SCOPE; + xmlns_repository repo; repo.add_predefined_values(NS_test_all); @@ -193,6 +201,30 @@ void test_ns_context() assert(id1 == id2); } +void test_repo_move() +{ + ORCUS_TEST_FUNC_SCOPE; + + static_assert(!std::is_copy_constructible_v); + static_assert(std::is_move_constructible_v); + + xmlns_repository repo; + repo.add_predefined_values(NS_test_all); + + xmlns_repository repo_moved = std::move(repo); // move construction + xmlns_repository repo_moved2; + repo_moved2 = std::move(repo_moved); // move assignment + + xmlns_id_t ns_id = repo_moved2.get_identifier(0); + assert(ns_id != XMLNS_UNKNOWN_ID); + ns_id = repo_moved2.get_identifier(1); + assert(ns_id != XMLNS_UNKNOWN_ID); + ns_id = repo_moved2.get_identifier(2); + assert(ns_id != XMLNS_UNKNOWN_ID); + ns_id = repo_moved2.get_identifier(3); + assert(ns_id == XMLNS_UNKNOWN_ID); +} + } // anonymous namespace int main() @@ -202,6 +234,7 @@ int main() test_predefined_ns(); test_xml_name_t(); test_ns_context(); + test_repo_move(); return EXIT_SUCCESS; } -- GitLab From c1adf7bf4ea777e4fc6612b264cae531471204e3 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 24 Nov 2022 13:56:15 -0500 Subject: [PATCH 53/91] Document the rest of basic enum types --- include/orcus/types.hpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp index 40aa8e0b..6b852068 100644 --- a/include/orcus/types.hpp +++ b/include/orcus/types.hpp @@ -448,18 +448,32 @@ struct ORCUS_PSR_DLLPUBLIC xml_declaration_t bool operator!= (const xml_declaration_t& other) const; }; +/** + * Unit of length, as used in length_t. + */ enum class length_unit_t { unknown = 0, centimeter, millimeter, + /** + * Special unit of length used by Excel, defined as the maximum digit width + * of font used as the "Normal" style font. + * + * @note Since it's not possible to determine the actual length using this + * unit, it is approximated by 1.9 millimeters. + */ xlsx_column_digit, inch, point, + /** One twip is a twentieth of a point equal to 1/1440 of an inch. */ twip, pixel }; +/** + * Input formats that orcus can import. + */ enum class format_t { unknown = 0, @@ -470,6 +484,9 @@ enum class format_t csv }; +/** + * Formats supported by orcus as output formats. + */ enum class dump_format_t { unknown = 0, @@ -485,7 +502,7 @@ enum class dump_format_t }; /** - * Represents a length with unit of measurement. + * Holds a length value with unit of measurement. */ struct ORCUS_PSR_DLLPUBLIC length_t { -- GitLab From a06ed2d664a12ffcdb85f74b7c2a8209d715863b Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 24 Nov 2022 14:03:13 -0500 Subject: [PATCH 54/91] Document base64 utility functions These are currently not actively used, but may be used in the future. --- doc/cpp/parser/base64.rst | 5 +++++ doc/cpp/parser/index.rst | 1 + 2 files changed, 6 insertions(+) create mode 100644 doc/cpp/parser/base64.rst diff --git a/doc/cpp/parser/base64.rst b/doc/cpp/parser/base64.rst new file mode 100644 index 00000000..617db375 --- /dev/null +++ b/doc/cpp/parser/base64.rst @@ -0,0 +1,5 @@ + +Base64 +====== + +.. doxygenfile:: base64.hpp diff --git a/doc/cpp/parser/index.rst b/doc/cpp/parser/index.rst index 9bc17645..c91d7925 100644 --- a/doc/cpp/parser/index.rst +++ b/doc/cpp/parser/index.rst @@ -14,5 +14,6 @@ Low-level parsers and utilities types.rst util.rst stream.rst + base64.rst archive.rst exception.rst -- GitLab From 79407cf53f80fce872d0829e13f950ba017c3fbf Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 24 Nov 2022 14:14:39 -0500 Subject: [PATCH 55/91] Remove unnecessary @p tags --- include/orcus/css_parser.hpp | 2 +- include/orcus/csv_parser.hpp | 2 +- include/orcus/csv_parser_base.hpp | 2 +- include/orcus/json_parser.hpp | 2 +- include/orcus/yaml_parser.hpp | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/orcus/css_parser.hpp b/include/orcus/css_parser.hpp index 1633453b..063508b1 100644 --- a/include/orcus/css_parser.hpp +++ b/include/orcus/css_parser.hpp @@ -192,7 +192,7 @@ public: * Parser for CSS documents. * * @tparam HandlerT Hanlder type with member functions for event callbacks. - * Refer to @p css_handler. + * Refer to css_handler. */ template class css_parser : public css::parser_base diff --git a/include/orcus/csv_parser.hpp b/include/orcus/csv_parser.hpp index f6c7d3bd..01c55cb3 100644 --- a/include/orcus/csv_parser.hpp +++ b/include/orcus/csv_parser.hpp @@ -57,7 +57,7 @@ public: * Parser for CSV documents. * * @tparam HandlerT Hanlder type with member functions for event callbacks. - * Refer to @p csv_handler. + * Refer to csv_handler. */ template class csv_parser : public csv::parser_base diff --git a/include/orcus/csv_parser_base.hpp b/include/orcus/csv_parser_base.hpp index d6a59d6d..d7ceaad1 100644 --- a/include/orcus/csv_parser_base.hpp +++ b/include/orcus/csv_parser_base.hpp @@ -31,7 +31,7 @@ using std::endl; namespace orcus { namespace csv { /** - * Run-time configuration object for @p orcus::csv_parser. + * Run-time configuration object for csv_parser. */ struct ORCUS_PSR_DLLPUBLIC parser_config { diff --git a/include/orcus/json_parser.hpp b/include/orcus/json_parser.hpp index 6eeb6f0d..ae9e228e 100644 --- a/include/orcus/json_parser.hpp +++ b/include/orcus/json_parser.hpp @@ -110,7 +110,7 @@ public: * Parser for JSON documents. * * @tparam HandlerT Hanlder type with member functions for event callbacks. - * Refer to @p json_handler. + * Refer to json_handler. */ template class json_parser : public json::parser_base diff --git a/include/orcus/yaml_parser.hpp b/include/orcus/yaml_parser.hpp index bb14ccb1..b8d1a027 100644 --- a/include/orcus/yaml_parser.hpp +++ b/include/orcus/yaml_parser.hpp @@ -14,8 +14,8 @@ namespace orcus { /** - * Blank handler class for @p yaml_parser. One can sub-class this and - * overwrite callback functions one needs to handle. + * Blank handler class for yaml_parser. One can sub-class this and overwrite + * callback functions one needs to handle. */ class yaml_handler { @@ -111,7 +111,7 @@ public: * Parser for YAML documents. * * @tparam HandlerT Hanlder type with member functions for event callbacks. - * Refer to @p yaml_handler. + * Refer to yaml_handler. * * @warning This parser is still highly experimental. Use with caution. */ -- GitLab From 76cdac58f46b3c1647ec658c1abac9fda647d1ed Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 29 Nov 2022 18:22:04 -0500 Subject: [PATCH 56/91] All sax parsers to take contents as std::string_view --- include/orcus/sax_ns_parser.hpp | 6 +++--- include/orcus/sax_parser.hpp | 7 +++---- include/orcus/sax_token_parser.hpp | 6 +++--- src/liborcus/dom_tree.cpp | 3 +-- src/liborcus/orcus_xml.cpp | 2 +- src/liborcus/orcus_xml_map_def.cpp | 2 +- src/liborcus/xml_stream_parser.cpp | 2 +- src/liborcus/xml_structure_tree.cpp | 2 +- src/mso/encryption_info.cpp | 2 +- src/orcus_test_xml.cpp | 2 +- src/parser/parser_test_xml_validation.cpp | 2 +- src/parser/sax_ns_parser_test.cpp | 6 ++---- src/parser/sax_parser_test.cpp | 7 +++---- src/parser/sax_token_parser_test.cpp | 14 ++++++-------- src/parser/sax_token_parser_thread.cpp | 2 +- src/parser/xml_writer_test.cpp | 2 +- 16 files changed, 30 insertions(+), 37 deletions(-) diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp index 42323beb..f888fa2c 100644 --- a/include/orcus/sax_ns_parser.hpp +++ b/include/orcus/sax_ns_parser.hpp @@ -214,7 +214,7 @@ class sax_ns_parser public: typedef HandlerT handler_type; - sax_ns_parser(const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler); + sax_ns_parser(std::string_view content, xmlns_context& ns_cxt, handler_type& handler); ~sax_ns_parser() = default; /** @@ -357,8 +357,8 @@ private: template sax_ns_parser::sax_ns_parser( - const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler) : - m_wrapper(ns_cxt, handler), m_parser(content, size, m_wrapper) + std::string_view content, xmlns_context& ns_cxt, handler_type& handler) : + m_wrapper(ns_cxt, handler), m_parser(content, m_wrapper) { } diff --git a/include/orcus/sax_parser.hpp b/include/orcus/sax_parser.hpp index 0295a511..c26df428 100644 --- a/include/orcus/sax_parser.hpp +++ b/include/orcus/sax_parser.hpp @@ -134,7 +134,7 @@ public: typedef HandlerT handler_type; typedef ConfigT config_type; - sax_parser(const char* content, const size_t size, handler_type& handler); + sax_parser(std::string_view content, handler_type& handler); ~sax_parser() = default; void parse(); @@ -162,9 +162,8 @@ private: }; template -sax_parser::sax_parser( - const char* content, const size_t size, handler_type& handler) : - sax::parser_base(content, size), +sax_parser::sax_parser(std::string_view content, handler_type& handler) : + sax::parser_base(content.data(), content.size()), m_handler(handler) { } diff --git a/include/orcus/sax_token_parser.hpp b/include/orcus/sax_token_parser.hpp index a2598064..867c8b5b 100644 --- a/include/orcus/sax_token_parser.hpp +++ b/include/orcus/sax_token_parser.hpp @@ -111,7 +111,7 @@ public: typedef HandlerT handler_type; sax_token_parser( - const char* content, const size_t size, const tokens& _tokens, + std::string_view content, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler); ~sax_token_parser() = default; @@ -168,9 +168,9 @@ private: template sax_token_parser::sax_token_parser( - const char* content, const size_t size, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) : + std::string_view content, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) : m_wrapper(_tokens, handler), - m_parser(content, size, ns_cxt, m_wrapper) + m_parser(content, ns_cxt, m_wrapper) { } diff --git a/src/liborcus/dom_tree.cpp b/src/liborcus/dom_tree.cpp index f5e84122..8419c6aa 100644 --- a/src/liborcus/dom_tree.cpp +++ b/src/liborcus/dom_tree.cpp @@ -580,8 +580,7 @@ document_tree::~document_tree() {} void document_tree::load(std::string_view strm) { - sax_ns_parser parser( - strm.data(), strm.size(), mp_impl->m_ns_cxt, *mp_impl); + sax_ns_parser parser(strm, mp_impl->m_ns_cxt, *mp_impl); parser.parse(); } diff --git a/src/liborcus/orcus_xml.cpp b/src/liborcus/orcus_xml.cpp index 1de1169e..cb45f12d 100644 --- a/src/liborcus/orcus_xml.cpp +++ b/src/liborcus/orcus_xml.cpp @@ -585,7 +585,7 @@ void orcus_xml::read_stream(std::string_view stream) xml_data_sax_handler handler( *mp_impl->im_factory, mp_impl->link_positions, mp_impl->map_tree); - sax_ns_parser parser(stream.data(), stream.size(), ns_cxt, handler); + sax_ns_parser parser(stream, ns_cxt, handler); parser.parse(); } diff --git a/src/liborcus/orcus_xml_map_def.cpp b/src/liborcus/orcus_xml_map_def.cpp index bfe5dabe..7766cca0 100644 --- a/src/liborcus/orcus_xml_map_def.cpp +++ b/src/liborcus/orcus_xml_map_def.cpp @@ -189,7 +189,7 @@ void orcus_xml::read_map_definition(std::string_view stream) try { xml_map_sax_handler handler(*this); - sax_parser parser(stream.data(), stream.size(), handler); + sax_parser parser(stream, handler); parser.parse(); } catch (const parse_error& e) diff --git a/src/liborcus/xml_stream_parser.cpp b/src/liborcus/xml_stream_parser.cpp index daa565df..895821c4 100644 --- a/src/liborcus/xml_stream_parser.cpp +++ b/src/liborcus/xml_stream_parser.cpp @@ -64,7 +64,7 @@ void xml_stream_parser::parse() if (!mp_handler) return; - sax_token_parser sax(m_content, m_size, m_tokens, m_ns_cxt, *mp_handler); + sax_token_parser sax({m_content, m_size}, m_tokens, m_ns_cxt, *mp_handler); sax.parse(); } diff --git a/src/liborcus/xml_structure_tree.cpp b/src/liborcus/xml_structure_tree.cpp index c7828387..dbdb029d 100644 --- a/src/liborcus/xml_structure_tree.cpp +++ b/src/liborcus/xml_structure_tree.cpp @@ -523,7 +523,7 @@ xml_structure_tree::~xml_structure_tree() {} void xml_structure_tree::parse(std::string_view s) { xml_sax_handler hdl(mp_impl->m_pool); - sax_ns_parser parser(s.data(), s.size(), mp_impl->m_xmlns_cxt, hdl); + sax_ns_parser parser(s, mp_impl->m_xmlns_cxt, hdl); parser.parse(); mp_impl->mp_root = hdl.release_root_element(); } diff --git a/src/mso/encryption_info.cpp b/src/mso/encryption_info.cpp index 3ebfc15d..bca2bcd2 100644 --- a/src/mso/encryption_info.cpp +++ b/src/mso/encryption_info.cpp @@ -219,7 +219,7 @@ void encryption_info_reader::read(const char* p, size_t n) #endif orcus::xmlns_context cxt = mp_impl->m_ns_repo.create_context(); sax_handler hdl(cxt); - orcus::sax_ns_parser parser(p, n, cxt, hdl); + orcus::sax_ns_parser parser({p, n}, cxt, hdl); parser.parse(); } diff --git a/src/orcus_test_xml.cpp b/src/orcus_test_xml.cpp index 5def53fb..eb6bc954 100644 --- a/src/orcus_test_xml.cpp +++ b/src/orcus_test_xml.cpp @@ -203,7 +203,7 @@ void test_xml_encoded_attrs() assert(!content.empty()); sax_handler_encoded_attrs hdl; - sax_parser parser(content.data(), content.size(), hdl); + sax_parser parser(content.str(), hdl); parser.parse(); vector expected; diff --git a/src/parser/parser_test_xml_validation.cpp b/src/parser/parser_test_xml_validation.cpp index b1d50221..0e74e0aa 100644 --- a/src/parser/parser_test_xml_validation.cpp +++ b/src/parser/parser_test_xml_validation.cpp @@ -31,7 +31,7 @@ void test_invalid() orcus::file_content content(entry.string()); _handler hdl; - orcus::sax_parser<_handler> parser(content.data(), content.size(), hdl); + orcus::sax_parser<_handler> parser(content.str(), hdl); try { diff --git a/src/parser/sax_ns_parser_test.cpp b/src/parser/sax_ns_parser_test.cpp index 40ef8b08..eb7443f7 100644 --- a/src/parser/sax_ns_parser_test.cpp +++ b/src/parser/sax_ns_parser_test.cpp @@ -13,12 +13,11 @@ void test_handler() { const char* test_code = ""; - size_t len = std::strlen(test_code); orcus::sax_ns_handler hdl; orcus::xmlns_repository repo; orcus::xmlns_context cxt = repo.create_context(); - orcus::sax_ns_parser parser(test_code, len, cxt, hdl); + orcus::sax_ns_parser parser(test_code, cxt, hdl); parser.parse(); } @@ -53,7 +52,6 @@ void test_default_attr_ns() }; const char* test_code = ""; - size_t len = strlen(test_code); const orcus::xmlns_id_t predefined[] = { default_ns, nullptr }; @@ -65,7 +63,7 @@ void test_default_attr_ns() _handler hdl; hdl.default_ns_expected = default_ns; - orcus::sax_ns_parser<_handler> parser(test_code, len, cxt, hdl); + orcus::sax_ns_parser<_handler> parser(test_code, cxt, hdl); parser.parse(); } diff --git a/src/parser/sax_parser_test.cpp b/src/parser/sax_parser_test.cpp index b4227622..ec8b1f19 100644 --- a/src/parser/sax_parser_test.cpp +++ b/src/parser/sax_parser_test.cpp @@ -14,10 +14,9 @@ using namespace std; void test_handler() { const char* test_code = ""; - size_t len = std::strlen(test_code); orcus::sax_handler hdl; - orcus::sax_parser parser(test_code, len, hdl); + orcus::sax_parser parser(test_code, hdl); parser.parse(); } @@ -31,7 +30,7 @@ void test_attr_equal_with_whitespace() ; _handler hdl; - orcus::sax_parser<_handler> parser(content, strlen(content), hdl); + orcus::sax_parser<_handler> parser(content, hdl); parser.parse(); } @@ -52,7 +51,7 @@ void test_attr_with_encoded_chars_single_quotes() ; _handler hdl; - orcus::sax_parser<_handler> parser(content, strlen(content), hdl); + orcus::sax_parser<_handler> parser(content, hdl); parser.parse(); } diff --git a/src/parser/sax_token_parser_test.cpp b/src/parser/sax_token_parser_test.cpp index f3ad5927..d473196c 100644 --- a/src/parser/sax_token_parser_test.cpp +++ b/src/parser/sax_token_parser_test.cpp @@ -18,13 +18,12 @@ using namespace orcus; void test_handler() { const char* test_code = ""; - size_t len = strlen(test_code); orcus::sax_token_handler hdl; orcus::tokens token_map(nullptr, 0); orcus::xmlns_repository repo; orcus::xmlns_context cxt = repo.create_context(); - orcus::sax_token_parser parser(test_code, len, token_map, cxt, hdl); + orcus::sax_token_parser parser(test_code, token_map, cxt, hdl); parser.parse(); } @@ -32,7 +31,6 @@ void test_sax_token_parser_1() { // Test XML content. const char* content = ""; - size_t content_size = strlen(content); // Array of tokens to define for this test. const char* token_names[] = { @@ -115,7 +113,7 @@ void test_sax_token_parser_1() tokens token_map(token_names, token_count); xmlns_repository ns_repo; xmlns_context ns_cxt = ns_repo.create_context(); - sax_token_parser parser(content, content_size, token_map, ns_cxt, hdl); + sax_token_parser parser(content, token_map, ns_cxt, hdl); parser.parse(); assert(hdl.get_token_count() == std::size(checks)); @@ -162,13 +160,13 @@ void test_unicode_string() xmlns_repository ns_repo; xmlns_context ns_cxt = ns_repo.create_context(); handler hdl(u8"\u0021"); - sax_token_parser parser1(content1, strlen(content1), token_map, ns_cxt, hdl); + sax_token_parser parser1(content1, token_map, ns_cxt, hdl); parser1.parse(); hdl = handler(u8"\u00B6"); - sax_token_parser parser2(content2, strlen(content2), token_map, ns_cxt, hdl); + sax_token_parser parser2(content2, token_map, ns_cxt, hdl); parser2.parse(); hdl = handler(u8"\u20B9"); - sax_token_parser parser3(content3, strlen(content3), token_map, ns_cxt, hdl); + sax_token_parser parser3(content3, token_map, ns_cxt, hdl); parser3.parse(); } @@ -221,7 +219,7 @@ void test_declaration() { xml_declaration_t decl; handler hdl(decl); - sax_token_parser parser(c.content.data(), c.content.size(), token_map, ns_cxt, hdl); + sax_token_parser parser(c.content, token_map, ns_cxt, hdl); parser.parse(); assert(decl == c.decl); diff --git a/src/parser/sax_token_parser_thread.cpp b/src/parser/sax_token_parser_thread.cpp index 92ea0d6c..3d7b16b3 100644 --- a/src/parser/sax_token_parser_thread.cpp +++ b/src/parser/sax_token_parser_thread.cpp @@ -135,7 +135,7 @@ struct parser_thread::impl { try { - orcus::sax_token_parser parser(mp_char, m_size, m_tokens, m_ns_cxt, *this); + orcus::sax_token_parser parser({mp_char, m_size}, m_tokens, m_ns_cxt, *this); parser.parse(); } catch (const malformed_xml_error& e) diff --git a/src/parser/xml_writer_test.cpp b/src/parser/xml_writer_test.cpp index 8687db32..a6e4bed9 100644 --- a/src/parser/xml_writer_test.cpp +++ b/src/parser/xml_writer_test.cpp @@ -49,7 +49,7 @@ void test_encoded_content() _handler hdl; - sax_parser<_handler> parser(stream.data(), stream.size(), hdl); + sax_parser<_handler> parser(stream, hdl); parser.parse(); std::string content_read = hdl.os_content.str(); -- GitLab From 0ede4c01a6d5fd7ae6b8378db4436a4ded07decf Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 29 Nov 2022 19:13:02 -0500 Subject: [PATCH 57/91] json_parser and its handler to use std::string_view over char*-size pair --- benchmark/json_parser.cpp | 8 +++---- doc_example/json_parser_1.cpp | 11 ++++----- include/orcus/json_parser.hpp | 27 ++++++++++------------ include/orcus/json_parser_base.hpp | 2 +- src/liborcus/json_document_tree.cpp | 9 ++++---- src/liborcus/json_structure_tree.cpp | 10 ++++---- src/liborcus/orcus_json.cpp | 10 ++++---- src/parser/json_parser_base.cpp | 4 ++-- src/parser/json_parser_test.cpp | 3 +-- src/parser/json_parser_thread.cpp | 8 +++---- src/parser/parser_test_json_validation.cpp | 6 ++--- src/python/json.cpp | 10 ++++---- 12 files changed, 49 insertions(+), 59 deletions(-) diff --git a/benchmark/json_parser.cpp b/benchmark/json_parser.cpp index 5f9837f7..1917b608 100644 --- a/benchmark/json_parser.cpp +++ b/benchmark/json_parser.cpp @@ -96,9 +96,8 @@ public: do_work(); } - void object_key(const char* p, size_t len, bool transient) + void object_key(std::string_view s, bool transient) { - std::string_view s{p, len}; if (transient) s = m_pool.intern(s).first; @@ -130,9 +129,8 @@ public: do_work(); } - void string(const char* p, size_t len, bool transient) + void string(std::string_view s, bool transient) { - std::string_view s{p, len}; if (transient) s = m_pool.intern(s).first; @@ -171,7 +169,7 @@ int main(int argc, char** argv) try { stack_printer __stack_printer__("parsing"); - orcus::json_parser parser(content.data(), content.size(), hdl); + orcus::json_parser parser(content, hdl); parser.parse(); } diff --git a/doc_example/json_parser_1.cpp b/doc_example/json_parser_1.cpp index 832e5176..322316a1 100644 --- a/doc_example/json_parser_1.cpp +++ b/doc_example/json_parser_1.cpp @@ -8,14 +8,14 @@ using namespace std; class json_parser_handler : public orcus::json_handler { public: - void object_key(const char* p, size_t len, bool /*transient*/) + void object_key(std::string_view key, bool /*transient*/) { - cout << "object key: " << std::string_view(p, len) << endl; + cout << "object key: " << key << endl; } - void string(const char* p, size_t len, bool /*transient*/) + void string(std::string_view val, bool /*transient*/) { - cout << "string: " << std::string_view(p, len) << endl; + cout << "string: " << val << endl; } void number(double val) @@ -27,13 +27,12 @@ public: int main() { const char* test_code = "{\"key1\": [1,2,3,4,5], \"key2\": 12.3}"; - size_t n_test_code = strlen(test_code); cout << "JSON string: " << test_code << endl; // Instantiate the parser with an own handler. json_parser_handler hdl; - orcus::json_parser parser(test_code, n_test_code, hdl); + orcus::json_parser parser(test_code, hdl); // Parse the string. parser.parse(); diff --git a/include/orcus/json_parser.hpp b/include/orcus/json_parser.hpp index ae9e228e..e37d50a9 100644 --- a/include/orcus/json_parser.hpp +++ b/include/orcus/json_parser.hpp @@ -46,17 +46,16 @@ public: /** * Called when a key value string of an object is encountered. * - * @param p pointer to the first character of the key value string. - * @param len length of the key value string. + * @param key key value string. * @param transient true if the string value is stored in a temporary * buffer which is not guaranteed to hold the string * value after the end of this callback. When false, the * pointer points to somewhere in the JSON stream being * parsed. */ - void object_key(const char* p, size_t len, bool transient) + void object_key(std::string_view key, bool transient) { - (void)p; (void)len; (void)transient; + (void)key; (void)transient; } /** @@ -82,17 +81,16 @@ public: /** * Called when a string value is encountered. * - * @param p pointer to the first character of the string value. - * @param len length of the string value. + * @param val string value. * @param transient true if the string value is stored in a temporary * buffer which is not guaranteed to hold the string * value after the end of this callback. When false, the * pointer points to somewhere in the JSON stream being * parsed. */ - void string(const char* p, size_t len, bool transient) + void string(std::string_view val, bool transient) { - (void)p; (void)len; (void)transient; + (void)val; (void)transient; } /** @@ -121,11 +119,10 @@ public: /** * Constructor. * - * @param p pointer to a string stream containing JSON string. - * @param n size of the stream. + * @param content string stream containing JSON string. * @param hdl handler class instance. */ - json_parser(const char* p, size_t n, handler_type& hdl); + json_parser(std::string_view content, handler_type& hdl); /** * Call this method to start parsing. @@ -147,8 +144,8 @@ private: template json_parser<_Handler>::json_parser( - const char* p, size_t n, handler_type& hdl) : - json::parser_base(p, n), m_handler(hdl) {} + std::string_view content, handler_type& hdl) : + json::parser_base(content), m_handler(hdl) {} template void json_parser<_Handler>::parse() @@ -331,7 +328,7 @@ void json_parser<_Handler>::object() throw parse_error("object: unknown error while parsing a key value.", offset()); } - m_handler.object_key(res.str, res.length, res.transient); + m_handler.object_key({res.str, res.length}, res.transient); skip_ws(); if (cur_char() != ':') @@ -385,7 +382,7 @@ void json_parser<_Handler>::string() parse_quoted_string_state res = parse_string(); if (res.str) { - m_handler.string(res.str, res.length, res.transient); + m_handler.string({res.str, res.length}, res.transient); return; } diff --git a/include/orcus/json_parser_base.hpp b/include/orcus/json_parser_base.hpp index 3e49753a..461808eb 100644 --- a/include/orcus/json_parser_base.hpp +++ b/include/orcus/json_parser_base.hpp @@ -27,7 +27,7 @@ protected: parser_base(const parser_base&) = delete; parser_base& operator=(const parser_base&) = delete; - parser_base(const char* p, size_t n); + parser_base(std::string_view content); ~parser_base(); void skip_ws(); diff --git a/src/liborcus/json_document_tree.cpp b/src/liborcus/json_document_tree.cpp index 8db5c42b..071198b5 100644 --- a/src/liborcus/json_document_tree.cpp +++ b/src/liborcus/json_document_tree.cpp @@ -750,10 +750,10 @@ public: } } - void object_key(const char* p, size_t len, bool transient) + void object_key(std::string_view key, bool transient) { parser_stack& cur = m_stack.back(); - cur.key = std::string_view(p, len); + cur.key = key; if (m_config.persistent_string_values || transient) // The tree manages the life cycle of this string value. cur.key = m_res.str_pool.intern(cur.key).first; @@ -780,9 +780,8 @@ public: push_value(m_res.obj_pool.construct(detail::node_t::null)); } - void string(const char* p, size_t len, bool transient) + void string(std::string_view s, bool transient) { - std::string_view s(p, len); if (m_config.persistent_string_values || transient) // The tree manages the life cycle of this string value. s = m_res.str_pool.intern(s).first; @@ -1674,7 +1673,7 @@ document_tree& document_tree::operator= (object obj) void document_tree::load(std::string_view stream, const json_config& config) { json::parser_handler hdl(config, mp_impl->m_res); - json_parser parser(stream.data(), stream.size(), hdl); + json_parser parser(stream, hdl); parser.parse(); mp_impl->m_root = hdl.get_root(); diff --git a/src/liborcus/json_structure_tree.cpp b/src/liborcus/json_structure_tree.cpp index 8f03a6fa..0f20c71a 100644 --- a/src/liborcus/json_structure_tree.cpp +++ b/src/liborcus/json_structure_tree.cpp @@ -64,7 +64,7 @@ struct structure_node */ int32_t child_count = 0; - pstring name; //< value of a key for a object key node. + std::string_view name; //< value of a key for a object key node. /** * For a value node that is an immediate child of an array node, these @@ -203,10 +203,10 @@ struct structure_tree::impl push_stack(node_type::object); } - void object_key(const char* p, size_t len, bool transient) + void object_key(std::string_view key, bool transient) { structure_node node(node_type::object_key); - node.name = pstring(p, len); + node.name = key; if (transient) node.name = m_pool.intern(node.name).first; @@ -234,7 +234,7 @@ struct structure_tree::impl push_value(); } - void string(const char* /*p*/, size_t /*len*/, bool /*transient*/) + void string(std::string_view /*val*/, bool /*transient*/) { push_value(); } @@ -666,7 +666,7 @@ structure_tree::~structure_tree() {} void structure_tree::parse(std::string_view stream) { - json_parser parser(stream.data(), stream.size(), *mp_impl); + json_parser parser(stream, *mp_impl); parser.parse(); } diff --git a/src/liborcus/orcus_json.cpp b/src/liborcus/orcus_json.cpp index f60912a7..c1fa0e7c 100644 --- a/src/liborcus/orcus_json.cpp +++ b/src/liborcus/orcus_json.cpp @@ -136,9 +136,9 @@ public: push_node(json_map_tree::input_node_type::object); } - void object_key(const char* p, size_t len, bool /*transient*/) + void object_key(std::string_view key, bool /*transient*/) { - m_walker.set_object_key(p, len); + m_walker.set_object_key(key.data(), key.size()); } void end_object() @@ -167,10 +167,10 @@ public: pop_node(json_map_tree::input_node_type::value); } - void string(const char* p, size_t len, bool /*transient*/) + void string(std::string_view val, bool /*transient*/) { push_node(json_map_tree::input_node_type::value); - commit_value(json_value(p, len)); + commit_value(json_value(val.data(), val.size())); pop_node(json_map_tree::input_node_type::value); } @@ -386,7 +386,7 @@ void orcus_json::read_stream(std::string_view stream) } json_content_handler hdl(mp_impl->map_tree, *mp_impl->im_factory); - json_parser parser(stream.data(), stream.size(), hdl); + json_parser parser(stream, hdl); parser.parse(); mp_impl->im_factory->finalize(); diff --git a/src/parser/json_parser_base.cpp b/src/parser/json_parser_base.cpp index 17018f59..0dcdc3b7 100644 --- a/src/parser/json_parser_base.cpp +++ b/src/parser/json_parser_base.cpp @@ -36,8 +36,8 @@ struct parser_base::impl cell_buffer m_buffer; }; -parser_base::parser_base(const char* p, size_t n) : - orcus::parser_base(p, n), mp_impl(std::make_unique()) +parser_base::parser_base(std::string_view content) : + orcus::parser_base(content.data(), content.size()), mp_impl(std::make_unique()) { set_numeric_parser(parse_numeric_json); diff --git a/src/parser/json_parser_test.cpp b/src/parser/json_parser_test.cpp index 470b71c1..db648617 100644 --- a/src/parser/json_parser_test.cpp +++ b/src/parser/json_parser_test.cpp @@ -12,10 +12,9 @@ void test_handler() { const char* test_code = "{\"key1\": [1,2,3,4,5], \"key2\": 12.3}"; - size_t n = strlen(test_code); orcus::json_handler hdl; - orcus::json_parser parser(test_code, n, hdl); + orcus::json_parser parser(test_code, hdl); parser.parse(); } diff --git a/src/parser/json_parser_thread.cpp b/src/parser/json_parser_thread.cpp index 3e5b56e2..c9eea8d9 100644 --- a/src/parser/json_parser_thread.cpp +++ b/src/parser/json_parser_thread.cpp @@ -77,7 +77,7 @@ struct parser_thread::impl { try { - json_parser parser(mp_char, m_size, *this); + json_parser parser({mp_char, m_size}, *this); parser.parse(); } catch (const parse_error& e) @@ -119,9 +119,8 @@ struct parser_thread::impl check_and_notify(); } - void object_key(const char* p, size_t len, bool transient) + void object_key(std::string_view s, bool transient) { - std::string_view s{p, len}; if (transient) s = m_pool.intern(s).first; @@ -153,9 +152,8 @@ struct parser_thread::impl check_and_notify(); } - void string(const char* p, size_t len, bool transient) + void string(std::string_view s, bool transient) { - std::string_view s{p, len}; if (transient) s = m_pool.intern(s).first; diff --git a/src/parser/parser_test_json_validation.cpp b/src/parser/parser_test_json_validation.cpp index 65056bb6..a0012269 100644 --- a/src/parser/parser_test_json_validation.cpp +++ b/src/parser/parser_test_json_validation.cpp @@ -369,7 +369,7 @@ void test_pass() std::cout << test_file_name << std::endl; try { - orcus::json_parser parser(content.c_str(), content.size(), hdl); + orcus::json_parser parser(content, hdl); parser.parse(); } catch (const orcus::parse_error& e) @@ -391,7 +391,7 @@ void test_fail() std::cout << test_file_name << std::endl; bool failed = false; try { - orcus::json_parser parser(content.c_str(), content.size(), hdl); + orcus::json_parser parser(content, hdl); parser.parse(); } catch (const orcus::parse_error&) @@ -416,7 +416,7 @@ void test_indeterminate() std::string content = load_file(test_file_name); std::cout << test_file_name << std::endl; try { - orcus::json_parser parser(content.c_str(), content.size(), hdl); + orcus::json_parser parser(content, hdl); parser.parse(); } catch (const orcus::parse_error&) diff --git a/src/python/json.cpp b/src/python/json.cpp index 265ac1c4..f64032b1 100644 --- a/src/python/json.cpp +++ b/src/python/json.cpp @@ -177,10 +177,10 @@ public: } } - void object_key(const char* p, size_t len, bool /*transient*/) + void object_key(std::string_view key, bool /*transient*/) { parser_stack& cur = m_stack.back(); - cur.key = PyUnicode_FromStringAndSize(p, len); + cur.key = PyUnicode_FromStringAndSize(key.data(), key.size()); } void end_object() @@ -213,9 +213,9 @@ public: push_value(Py_None); } - void string(const char* p, size_t len, bool /*transient*/) + void string(std::string_view val, bool /*transient*/) { - push_value(PyUnicode_FromStringAndSize(p, len)); + push_value(PyUnicode_FromStringAndSize(val.data(), val.size())); } void number(double val) @@ -242,7 +242,7 @@ PyObject* json_loads(PyObject* /*module*/, PyObject* args, PyObject* kwargs) } json_parser_handler hdl; - orcus::json_parser parser(stream, strlen(stream), hdl); + orcus::json_parser parser(stream, hdl); try { parser.parse(); -- GitLab From a2d774549211b49254f1af447648e32b0ab4bb75 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 29 Nov 2022 19:56:11 -0500 Subject: [PATCH 58/91] Use std::string_view in public functions --- include/orcus/css_parser.hpp | 61 ++++++++++++++---------------- include/orcus/css_parser_base.hpp | 2 +- src/liborcus/css_document_tree.cpp | 42 ++++++++++---------- src/parser/css_parser_base.cpp | 4 +- src/parser/css_parser_test.cpp | 3 +- 5 files changed, 53 insertions(+), 59 deletions(-) diff --git a/include/orcus/css_parser.hpp b/include/orcus/css_parser.hpp index 063508b1..eba08265 100644 --- a/include/orcus/css_parser.hpp +++ b/include/orcus/css_parser.hpp @@ -31,19 +31,19 @@ namespace orcus { class css_handler { public: - void at_rule_name(const char* p, size_t n) + void at_rule_name(std::string_view name) { - (void)p; (void)n; + (void)name; } - void simple_selector_type(const char* p, size_t n) + void simple_selector_type(std::string_view type) { - (void)p; (void)n; + (void)type; } - void simple_selector_class(const char* p, size_t n) + void simple_selector_class(std::string_view cls) { - (void)p; (void)n; + (void)cls; } void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe) @@ -56,9 +56,9 @@ public: (void)pc; } - void simple_selector_id(const char* p, size_t n) + void simple_selector_id(std::string_view id) { - (void)p; (void)n; + (void)id; } void end_simple_selector() {} @@ -73,23 +73,21 @@ public: /** * Called at each property name. * - * @param p pointer to the char-array containing the property name string. - * @param n length of the property name string. + * @param name property name string. */ - void property_name(const char* p, size_t n) + void property_name(std::string_view name) { - (void)p; (void)n; + (void)name; } /** * Called at each ordinary property value string. * - * @param p pointer to the char-array containing the value string. - * @param n length of the value string. + * @param value value string. */ - void value(const char* p, size_t n) + void value(std::string_view value) { - (void)p; (void)n; + (void)value; } /** @@ -147,12 +145,11 @@ public: /** * Called at each URL value of a property. * - * @param p pointer to the char-array containing the URL value string. - * @param n length of the URL value string. + * @param url URL value string. */ - void url(const char* p, size_t n) + void url(std::string_view url) { - (void)p; (void)n; + (void)url; } /** @@ -200,7 +197,7 @@ class css_parser : public css::parser_base public: typedef HandlerT handler_type; - css_parser(const char* p, size_t n, handler_type& hdl); + css_parser(std::string_view content, handler_type& hdl); void parse(); private: @@ -227,8 +224,8 @@ private: }; template -css_parser<_Handler>::css_parser(const char* p, size_t n, handler_type& hdl) : - css::parser_base(p, n), m_handler(hdl) {} +css_parser<_Handler>::css_parser(std::string_view content, handler_type& hdl) : + css::parser_base(content), m_handler(hdl) {} template void css_parser<_Handler>::parse() @@ -305,7 +302,7 @@ void css_parser<_Handler>::at_rule_name() identifier(p, len); skip_blanks(); - m_handler.at_rule_name(p, len); + m_handler.at_rule_name({p, len}); #if ORCUS_DEBUG_CSS std::string foo(p, len); std::cout << "at-rule name: " << foo.c_str() << std::endl; @@ -348,7 +345,7 @@ void css_parser<_Handler>::simple_selector_name() std::string s(p, n); cout << " type=" << s; #endif - m_handler.simple_selector_type(p, n); + m_handler.simple_selector_type({p, n}); } bool in_loop = true; @@ -360,7 +357,7 @@ void css_parser<_Handler>::simple_selector_name() { next(); identifier(p, n); - m_handler.simple_selector_class(p, n); + m_handler.simple_selector_class({p, n}); #if ORCUS_DEBUG_CSS std::string s(p, n); std::cout << " class=" << s; @@ -371,7 +368,7 @@ void css_parser<_Handler>::simple_selector_name() { next(); identifier(p, n); - m_handler.simple_selector_id(p, n); + m_handler.simple_selector_id({p, n}); #if ORCUS_DEBUG_CSS std::string s(p, n); std::cout << " id=" << s; @@ -438,7 +435,7 @@ void css_parser<_Handler>::property_name() identifier(p, len); skip_comments_and_blanks(); - m_handler.property_name(p, len); + m_handler.property_name({p, len}); #if ORCUS_DEBUG_CSS std::string foo(p, len); std::cout << "property name: " << foo.c_str() << std::endl; @@ -494,7 +491,7 @@ void css_parser<_Handler>::quoted_value(char c) next(); skip_blanks(); - m_handler.value(p, len); + m_handler.value({p, len}); #if ORCUS_DEBUG_CSS std::string foo(p, len); std::cout << "quoted value: " << foo.c_str() << std::endl; @@ -522,7 +519,7 @@ void css_parser<_Handler>::value() return; } - m_handler.value(v.data(), v.size()); + m_handler.value(v); skip_comments_and_blanks(); @@ -693,7 +690,7 @@ void css_parser<_Handler>::function_url() literal(p, len, c); next(); skip_comments_and_blanks(); - m_handler.url(p, len); + m_handler.url({p, len}); #if ORCUS_DEBUG_CSS std::cout << "url(" << std::string(p, len) << ")" << std::endl; #endif @@ -705,7 +702,7 @@ void css_parser<_Handler>::function_url() size_t len; skip_to_or_blank(p, len, ")"); skip_comments_and_blanks(); - m_handler.url(p, len); + m_handler.url({p, len}); #if ORCUS_DEBUG_CSS std::cout << "url(" << std::string(p, len) << ")" << std::endl; #endif diff --git a/include/orcus/css_parser_base.hpp b/include/orcus/css_parser_base.hpp index e372d39c..45142691 100644 --- a/include/orcus/css_parser_base.hpp +++ b/include/orcus/css_parser_base.hpp @@ -21,7 +21,7 @@ namespace orcus { namespace css { class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base { public: - parser_base(const char* p, size_t n); + parser_base(std::string_view content); protected: diff --git a/src/liborcus/css_document_tree.cpp b/src/liborcus/css_document_tree.cpp index 46bf7e91..c4176450 100644 --- a/src/liborcus/css_document_tree.cpp +++ b/src/liborcus/css_document_tree.cpp @@ -36,7 +36,7 @@ class parser_handler css_document_tree& m_doc; std::vector m_cur_selector_group; css_properties_t m_cur_properties; - pstring m_cur_prop_name; + std::string_view m_cur_prop_name; std::vector m_cur_prop_values; css_selector_t m_cur_selector; /// current selector css_simple_selector_t m_cur_simple_selector; @@ -50,24 +50,23 @@ public: m_cur_combinator(css::combinator_t::descendant), m_in_prop(false) {} - void at_rule_name(const char* p, size_t n) + void at_rule_name(std::string_view name) { #if ORCUS_DEBUG_CSS_DOCTREE - cout << "@" << string(p, n).c_str(); + cout << "@" << name; #else - (void)p; - (void)n; + (void)name; #endif } - void simple_selector_type(const char* p, size_t n) + void simple_selector_type(std::string_view type) { - m_cur_simple_selector.name = pstring(p, n); + m_cur_simple_selector.name = type; } - void simple_selector_class(const char* p, size_t n) + void simple_selector_class(std::string_view cls) { - m_cur_simple_selector.classes.insert(pstring(p, n)); + m_cur_simple_selector.classes.insert(cls); } void simple_selector_pseudo_element(css::pseudo_element_t pe) @@ -81,9 +80,9 @@ public: m_cur_simple_selector.pseudo_classes |= pc; } - void simple_selector_id(const char* p, size_t n) + void simple_selector_id(std::string_view id) { - m_cur_simple_selector.id = std::string_view(p, n); + m_cur_simple_selector.id = id; } void end_simple_selector() @@ -119,20 +118,19 @@ public: m_cur_combinator = combinator; } - void property_name(const char* p, size_t n) + void property_name(std::string_view name) { - m_cur_prop_name = pstring(p, n); + m_cur_prop_name = name; #if ORCUS_DEBUG_CSS_DOCTREE - cout << string(p, n).c_str() << ":"; + cout << name << ":"; #endif } - void value(const char* p, size_t n) + void value(std::string_view s) { - std::string_view s(p, n); m_cur_prop_values.push_back(s); #if ORCUS_DEBUG_CSS_DOCTREE - cout << " '" << string(p, n).c_str() << "'"; + cout << " '" << s << "'"; #endif } @@ -180,14 +178,14 @@ public: m_cur_prop_values.push_back(val); } - void url(const char* p, size_t n) + void url(std::string_view url) { #if ORCUS_DEBUG_CSS_DOCTREE - cout << " url(" << pstring(p, n) << ")"; + cout << " url(" << url << ")"; #endif css_property_value_t val; val.type = orcus::css::property_value_t::url; - val.value = std::string_view(p, n); + val.value = url; m_cur_prop_values.push_back(val); } @@ -243,7 +241,7 @@ public: { m_cur_properties.insert( css_properties_t::value_type(m_cur_prop_name, m_cur_prop_values)); - m_cur_prop_name.clear(); + m_cur_prop_name = std::string_view{}; m_cur_prop_values.clear(); #if ORCUS_DEBUG_CSS_DOCTREE cout << endl; @@ -558,7 +556,7 @@ void css_document_tree::load(std::string_view stream) return; parser_handler handler(*this); - css_parser parser(stream.data(), stream.size(), handler); + css_parser parser(stream, handler); parser.parse(); } diff --git a/src/parser/css_parser_base.cpp b/src/parser/css_parser_base.cpp index 676d6f50..7d2b2282 100644 --- a/src/parser/css_parser_base.cpp +++ b/src/parser/css_parser_base.cpp @@ -17,8 +17,8 @@ namespace orcus { namespace css { -parser_base::parser_base(const char* p, size_t n) : - orcus::parser_base(p, n), +parser_base::parser_base(std::string_view content) : + orcus::parser_base(content.data(), content.size()), m_simple_selector_count(0), m_combinator(combinator_t::descendant) {} diff --git a/src/parser/css_parser_test.cpp b/src/parser/css_parser_test.cpp index 6c6c7831..95f4b1cc 100644 --- a/src/parser/css_parser_test.cpp +++ b/src/parser/css_parser_test.cpp @@ -12,10 +12,9 @@ void test_handler() { const char* test_code = "p { background-color: white; }"; - size_t n = strlen(test_code); orcus::css_handler hdl; - orcus::css_parser parser(test_code, n, hdl); + orcus::css_parser parser(test_code, hdl); parser.parse(); } -- GitLab From ff7970aad468b31241b8aaff67bd98d14f95648f Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 29 Nov 2022 21:02:44 -0500 Subject: [PATCH 59/91] Add more content to our CSS doc --- doc/cpp/parser/css.rst | 63 +++++++++++++++++++++-- include/orcus/css_parser.hpp | 96 +++++++++++++++++++++++++++++++++++- 2 files changed, 154 insertions(+), 5 deletions(-) diff --git a/doc/cpp/parser/css.rst b/doc/cpp/parser/css.rst index ce875f63..beb6cb21 100644 --- a/doc/cpp/parser/css.rst +++ b/doc/cpp/parser/css.rst @@ -16,12 +16,69 @@ CSS types --------- .. doxygenenum:: orcus::css::combinator_t - .. doxygenenum:: orcus::css::property_function_t - .. doxygenenum:: orcus::css::property_value_t .. doxygentypedef:: orcus::css::pseudo_element_t - .. doxygentypedef:: orcus::css::pseudo_class_t +.. doxygenstruct:: orcus::css::rgba_color_t +.. doxygenstruct:: orcus::css::hsla_color_t + + +Constants +--------- + +Pseudo elements +^^^^^^^^^^^^^^^ + +.. doxygenvariable:: orcus::css::pseudo_element_after +.. doxygenvariable:: orcus::css::pseudo_element_before +.. doxygenvariable:: orcus::css::pseudo_element_first_letter +.. doxygenvariable:: orcus::css::pseudo_element_first_line +.. doxygenvariable:: orcus::css::pseudo_element_selection +.. doxygenvariable:: orcus::css::pseudo_element_backdrop + +Pseudo classes +^^^^^^^^^^^^^^ + +.. doxygenvariable:: orcus::css::pseudo_class_active +.. doxygenvariable:: orcus::css::pseudo_class_checked +.. doxygenvariable:: orcus::css::pseudo_class_default +.. doxygenvariable:: orcus::css::pseudo_class_dir +.. doxygenvariable:: orcus::css::pseudo_class_disabled +.. doxygenvariable:: orcus::css::pseudo_class_empty +.. doxygenvariable:: orcus::css::pseudo_class_enabled +.. doxygenvariable:: orcus::css::pseudo_class_first +.. doxygenvariable:: orcus::css::pseudo_class_first_child +.. doxygenvariable:: orcus::css::pseudo_class_first_of_type +.. doxygenvariable:: orcus::css::pseudo_class_fullscreen +.. doxygenvariable:: orcus::css::pseudo_class_focus +.. doxygenvariable:: orcus::css::pseudo_class_hover +.. doxygenvariable:: orcus::css::pseudo_class_indeterminate +.. doxygenvariable:: orcus::css::pseudo_class_in_range +.. doxygenvariable:: orcus::css::pseudo_class_invalid +.. doxygenvariable:: orcus::css::pseudo_class_lang +.. doxygenvariable:: orcus::css::pseudo_class_last_child +.. doxygenvariable:: orcus::css::pseudo_class_last_of_type +.. doxygenvariable:: orcus::css::pseudo_class_left +.. doxygenvariable:: orcus::css::pseudo_class_link +.. doxygenvariable:: orcus::css::pseudo_class_not +.. doxygenvariable:: orcus::css::pseudo_class_nth_child +.. doxygenvariable:: orcus::css::pseudo_class_nth_last_child +.. doxygenvariable:: orcus::css::pseudo_class_nth_last_of_type +.. doxygenvariable:: orcus::css::pseudo_class_nth_of_type +.. doxygenvariable:: orcus::css::pseudo_class_only_child +.. doxygenvariable:: orcus::css::pseudo_class_only_of_type +.. doxygenvariable:: orcus::css::pseudo_class_optional +.. doxygenvariable:: orcus::css::pseudo_class_out_of_range +.. doxygenvariable:: orcus::css::pseudo_class_read_only +.. doxygenvariable:: orcus::css::pseudo_class_read_write +.. doxygenvariable:: orcus::css::pseudo_class_required +.. doxygenvariable:: orcus::css::pseudo_class_right +.. doxygenvariable:: orcus::css::pseudo_class_root +.. doxygenvariable:: orcus::css::pseudo_class_scope +.. doxygenvariable:: orcus::css::pseudo_class_target +.. doxygenvariable:: orcus::css::pseudo_class_valid +.. doxygenvariable:: orcus::css::pseudo_class_visited + diff --git a/include/orcus/css_parser.hpp b/include/orcus/css_parser.hpp index eba08265..7623f898 100644 --- a/include/orcus/css_parser.hpp +++ b/include/orcus/css_parser.hpp @@ -31,38 +31,123 @@ namespace orcus { class css_handler { public: + /** + * Called upon encountering an at-rule. + * + * @param name name of the at-rule. + */ void at_rule_name(std::string_view name) { (void)name; } + /** + * Called upon encountering a simple selector type. A simple selector may + * consist of + * + * @code{.txt} + * .# + * @endcode + * + * and this function only passes the type part of the simple selector + * expression. + * + * @param type simple selector type. + */ void simple_selector_type(std::string_view type) { (void)type; } + /** + * Called upon encountering a simple selector class. A simple selector may + * consist of + * + * @code{.txt} + * .# + * @endcode + * + * and this function only passes the class part of the simple selector + * expression. + * + * @param cls simple selector class. + */ void simple_selector_class(std::string_view cls) { (void)cls; } + /** + * Called upon encountering a pseudo element of a simple selector. For + * instance, given the following CSS block: + * + * @code{.css} + * p::first-line { + * color: blue; + * text-transform: uppercase; + * } + * @endcode + * + * the `first-line` part is the pseudo element of the selector named `p`. + * + * @param pe pseudo element of a simple selector. + */ void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe) { (void)pe; } + /** + * Called upon encountering a pseudo class of a simple selector. For + * instance, given the following CSS block: + * + * @code{.css} + * button:hover { + * color: blue; + * } + * @endcode + * + * the `hover` part is the pseudo class of the selector named `button`. + * + * @param pc pseudo class of a simple selector. + */ void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc) { (void)pc; } + /** + * Called upon encountering a simple selector id. A simple selector may + * consist of + * + * @code{.txt} + * .# + * @endcode + * + * and this function only passes the id part of the simle selector + * expression. + * + * @param id simple selector id. + */ void simple_selector_id(std::string_view id) { (void)id; } + /** + * Called at the end of a simple selector expression. + * + * @todo find out the difference between a simple selector and a selector, + * and document it. + */ void end_simple_selector() {} + /** + * Called at the end of a selector expression. + * + * @todo find out the difference between a simple selector and a selector, + * and document it. + */ void end_selector() {} void combinator(orcus::css::combinator_t combinator) @@ -175,12 +260,19 @@ public: void end_block() {} /** - * Called at the beginning of each property. + * Called at the beginning of a single property expression. Each property + * expression may consist of + * + * @code{.txt} + * : , ..., + * @endcode + * + * terminated by either a `;` or `}`. */ void begin_property() {} /** - * Called at the end of each property. + * Called at the end of a single property expression. */ void end_property() {} }; -- GitLab From 1df0147193431ff99534a86411011fbe6d4ed4ec Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 29 Nov 2022 21:12:03 -0500 Subject: [PATCH 60/91] Use std::string_view as the key type for sorted_string_map --- src/parser/css_types.cpp | 133 ++++++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 59 deletions(-) diff --git a/src/parser/css_types.cpp b/src/parser/css_types.cpp index b1289d92..90791593 100644 --- a/src/parser/css_types.cpp +++ b/src/parser/css_types.cpp @@ -22,26 +22,33 @@ const pseudo_element_t pseudo_element_backdrop = 0x0020; namespace { -typedef mdds::sorted_string_map pe_map_type; +namespace pseudo_elem { + +using map_type = mdds::sorted_string_map; // Keys must be sorted. -pe_map_type::entry pseudo_elem_type_entries[] = { - { MDDS_ASCII("after"), pseudo_element_after }, - { MDDS_ASCII("backdrop"), pseudo_element_backdrop }, - { MDDS_ASCII("before"), pseudo_element_before }, - { MDDS_ASCII("first-letter"), pseudo_element_first_letter }, - { MDDS_ASCII("first-line"), pseudo_element_first_line }, - { MDDS_ASCII("selection"), pseudo_element_selection }, +constexpr map_type::entry entries[] = { + { "after", pseudo_element_after }, + { "backdrop", pseudo_element_backdrop }, + { "before", pseudo_element_before }, + { "first-letter", pseudo_element_first_letter }, + { "first-line", pseudo_element_first_line }, + { "selection", pseudo_element_selection }, }; +const map_type& get() +{ + static map_type map(entries, std::size(entries), 0); + return map; +} + +} // namespace pseudo_elem + } pseudo_element_t to_pseudo_element(std::string_view s) { - static pe_map_type elem_map( - pseudo_elem_type_entries, std::size(pseudo_elem_type_entries), 0); - - return elem_map.find(s.data(), s.size()); + return pseudo_elem::get().find(s); } const pseudo_class_t pseudo_class_active = 0x0000000000000001; @@ -86,66 +93,74 @@ const pseudo_class_t pseudo_class_visited = 0x0000004000000000; namespace { -typedef mdds::sorted_string_map pc_map_type; +namespace pseudo_class { + +using map_type = mdds::sorted_string_map; // Keys must be sorted. -pc_map_type::entry pseudo_class_type_entries[] = { - { MDDS_ASCII("active"), pseudo_class_active }, - { MDDS_ASCII("checked"), pseudo_class_checked }, - { MDDS_ASCII("default"), pseudo_class_default }, - { MDDS_ASCII("dir"), pseudo_class_dir }, - { MDDS_ASCII("disabled"), pseudo_class_disabled }, - { MDDS_ASCII("empty"), pseudo_class_empty }, - { MDDS_ASCII("enabled"), pseudo_class_enabled }, - { MDDS_ASCII("first"), pseudo_class_first }, - { MDDS_ASCII("first-child"), pseudo_class_first_child }, - { MDDS_ASCII("first-of-type"), pseudo_class_first_of_type }, - { MDDS_ASCII("focus"), pseudo_class_focus }, - { MDDS_ASCII("fullscreen"), pseudo_class_fullscreen }, - { MDDS_ASCII("hover"), pseudo_class_hover }, - { MDDS_ASCII("in-range"), pseudo_class_in_range }, - { MDDS_ASCII("indeterminate"), pseudo_class_indeterminate }, - { MDDS_ASCII("invalid"), pseudo_class_invalid }, - { MDDS_ASCII("lang"), pseudo_class_lang }, - { MDDS_ASCII("last-child"), pseudo_class_last_child }, - { MDDS_ASCII("last-of-type"), pseudo_class_last_of_type }, - { MDDS_ASCII("left"), pseudo_class_left }, - { MDDS_ASCII("link"), pseudo_class_link }, - { MDDS_ASCII("not"), pseudo_class_not }, - { MDDS_ASCII("nth-child"), pseudo_class_nth_child }, - { MDDS_ASCII("nth-last-child"), pseudo_class_nth_last_child }, - { MDDS_ASCII("nth-last-of-type"), pseudo_class_nth_last_of_type }, - { MDDS_ASCII("nth-of-type"), pseudo_class_nth_of_type }, - { MDDS_ASCII("only-child"), pseudo_class_only_child }, - { MDDS_ASCII("only-of-type"), pseudo_class_only_of_type }, - { MDDS_ASCII("optional"), pseudo_class_optional }, - { MDDS_ASCII("out-of-range"), pseudo_class_out_of_range }, - { MDDS_ASCII("read-only"), pseudo_class_read_only }, - { MDDS_ASCII("read-write"), pseudo_class_read_write }, - { MDDS_ASCII("required"), pseudo_class_required }, - { MDDS_ASCII("right"), pseudo_class_right }, - { MDDS_ASCII("root"), pseudo_class_root }, - { MDDS_ASCII("scope"), pseudo_class_scope }, - { MDDS_ASCII("target"), pseudo_class_target }, - { MDDS_ASCII("valid"), pseudo_class_valid }, - { MDDS_ASCII("visited"), pseudo_class_visited }, +constexpr map_type::entry entries[] = { + { "active", pseudo_class_active }, + { "checked", pseudo_class_checked }, + { "default", pseudo_class_default }, + { "dir", pseudo_class_dir }, + { "disabled", pseudo_class_disabled }, + { "empty", pseudo_class_empty }, + { "enabled", pseudo_class_enabled }, + { "first", pseudo_class_first }, + { "first-child", pseudo_class_first_child }, + { "first-of-type", pseudo_class_first_of_type }, + { "focus", pseudo_class_focus }, + { "fullscreen", pseudo_class_fullscreen }, + { "hover", pseudo_class_hover }, + { "in-range", pseudo_class_in_range }, + { "indeterminate", pseudo_class_indeterminate }, + { "invalid", pseudo_class_invalid }, + { "lang", pseudo_class_lang }, + { "last-child", pseudo_class_last_child }, + { "last-of-type", pseudo_class_last_of_type }, + { "left", pseudo_class_left }, + { "link", pseudo_class_link }, + { "not", pseudo_class_not }, + { "nth-child", pseudo_class_nth_child }, + { "nth-last-child", pseudo_class_nth_last_child }, + { "nth-last-of-type", pseudo_class_nth_last_of_type }, + { "nth-of-type", pseudo_class_nth_of_type }, + { "only-child", pseudo_class_only_child }, + { "only-of-type", pseudo_class_only_of_type }, + { "optional", pseudo_class_optional }, + { "out-of-range", pseudo_class_out_of_range }, + { "read-only", pseudo_class_read_only }, + { "read-write", pseudo_class_read_write }, + { "required", pseudo_class_required }, + { "right", pseudo_class_right }, + { "root", pseudo_class_root }, + { "scope", pseudo_class_scope }, + { "target", pseudo_class_target }, + { "valid", pseudo_class_valid }, + { "visited", pseudo_class_visited }, }; +const map_type& get() +{ + static map_type map(entries, std::size(entries), 0); + return map; +} + +} // namespace pseudo_class + } pseudo_class_t to_pseudo_class(std::string_view s) { - static pc_map_type class_map(pseudo_class_type_entries, std::size(pseudo_class_type_entries), 0); - - return class_map.find(s.data(), s.size()); + return pseudo_class::get().find(s); } std::string pseudo_class_to_string(pseudo_class_t val) { std::ostringstream os; - std::size_t n = std::size(pseudo_class_type_entries); - const pc_map_type::entry* p = pseudo_class_type_entries; - const pc_map_type::entry* p_end = p + n; + std::size_t n = std::size(pseudo_class::entries); + const pseudo_class::map_type::entry* p = pseudo_class::entries; + const pseudo_class::map_type::entry* p_end = p + n; for (; p != p_end; ++p) { if (val & p->value) -- GitLab From badf216eb3909d9809de3c5c6c9e79a262d6daab Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 29 Nov 2022 21:43:25 -0500 Subject: [PATCH 61/91] Document combinators --- include/orcus/css_parser.hpp | 14 ++++++++++++++ include/orcus/css_types.hpp | 6 +++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/include/orcus/css_parser.hpp b/include/orcus/css_parser.hpp index 7623f898..93bbc146 100644 --- a/include/orcus/css_parser.hpp +++ b/include/orcus/css_parser.hpp @@ -150,6 +150,20 @@ public: */ void end_selector() {} + /** + * Calling upon encountering a combinator. A combinator is an operator that + * combines other selectors. Given the following CSS block: + * + * @code{.css} + * div > p { + * background-color: yellow; + * } + * @endcode + * + * the `>` is the combinator that combines the `div` and `p` selectors. + * + * @param combinator type of combinator encountered. + */ void combinator(orcus::css::combinator_t combinator) { (void)combinator; diff --git a/include/orcus/css_types.hpp b/include/orcus/css_types.hpp index 43f8c41f..75386ea3 100644 --- a/include/orcus/css_types.hpp +++ b/include/orcus/css_types.hpp @@ -18,11 +18,11 @@ namespace orcus { namespace css { enum class combinator_t { - /// 'E F' where F is a descendant of E. + /// `E F` where `F` is a descendant of `E`. descendant, - /// 'E > F' where F is a direct child of E. + /// `E > F` where `F` is a direct child of `E`. direct_child, - /// 'E + F' where F is a direct sibling of E where E precedes F. + /// `E + F` where `F` is a direct sibling of `E` where `E` precedes `F`. next_sibling }; -- GitLab From 1d1978d3d3ea10120e4ad5f5b7bd72d5a6bc7061 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 29 Nov 2022 22:05:59 -0500 Subject: [PATCH 62/91] csv_parser to adopt std::string_view where appropriate --- include/orcus/cell_buffer.hpp | 3 +++ include/orcus/csv_parser.hpp | 21 ++++++++++----------- include/orcus/csv_parser_base.hpp | 2 +- src/liborcus/orcus_csv.cpp | 7 +++---- src/parser/cell_buffer.cpp | 5 +++++ src/parser/csv_parser_base.cpp | 4 ++-- src/parser/csv_parser_test.cpp | 3 +-- 7 files changed, 25 insertions(+), 20 deletions(-) diff --git a/include/orcus/cell_buffer.hpp b/include/orcus/cell_buffer.hpp index 4f3858cd..edce8241 100644 --- a/include/orcus/cell_buffer.hpp +++ b/include/orcus/cell_buffer.hpp @@ -30,6 +30,9 @@ public: void append(const char* p, size_t len); void reset(); + + std::string_view str() const; + const char* get() const; /** diff --git a/include/orcus/csv_parser.hpp b/include/orcus/csv_parser.hpp index 01c55cb3..ae1dcd0e 100644 --- a/include/orcus/csv_parser.hpp +++ b/include/orcus/csv_parser.hpp @@ -38,8 +38,7 @@ public: /** * Called after every cell is parsed. * - * @param p pointer to the first character of a cell content. - * @param n number of characters the cell content consists of. + * @param value cell content. * @param transient when true, the text content has been converted and is * stored in a temporary buffer. In such case, there is * no guarantee that the text content remain available @@ -47,9 +46,9 @@ public: * the text content is guaranteed to be valid so long as * the original CSV stream content is valid. */ - void cell(const char* p, size_t n, bool transient) + void cell(std::string_view value, bool transient) { - (void)p; (void)n; (void)transient; + (void)value; (void)transient; } }; @@ -65,7 +64,7 @@ class csv_parser : public csv::parser_base public: typedef HandlerT handler_type; - csv_parser(const char* p, size_t n, handler_type& hdl, const csv::parser_config& config); + csv_parser(std::string_view content, handler_type& hdl, const csv::parser_config& config); void parse(); private: @@ -88,8 +87,8 @@ private: template csv_parser<_Handler>::csv_parser( - const char* p, size_t n, handler_type& hdl, const csv::parser_config& config) : - csv::parser_base(p, n, config), m_handler(hdl) {} + std::string_view content, handler_type& hdl, const csv::parser_config& config) : + csv::parser_base(content, config), m_handler(hdl) {} template void csv_parser<_Handler>::parse() @@ -205,14 +204,14 @@ void csv_parser<_Handler>::quoted_cell() } // Closing quote. - m_handler.cell(p0, len-1, false); + m_handler.cell({p0, len-1}, false); next(); skip_blanks(); return; } // Stream ended prematurely. Handle it gracefully. - m_handler.cell(p0, len, false); + m_handler.cell({p0, len}, false); } template @@ -256,7 +255,7 @@ void csv_parser<_Handler>::parse_cell_with_quote(const char* p0, size_t len0) // buffer, push the value to the handler, and exit normally. m_cell_buf.append(p_cur, cur_len); - m_handler.cell(m_cell_buf.get(), m_cell_buf.size(), true); + m_handler.cell(m_cell_buf.str(), true); next(); skip_blanks(); return; @@ -292,7 +291,7 @@ void csv_parser<_Handler>::push_cell_value(const char* p, size_t n) } } - m_handler.cell(p, len, false); + m_handler.cell({p, len}, false); #if ORCUS_DEBUG_CSV if (len) cout << "(cell:'" << std::string(p, len) << "')" << endl; diff --git a/include/orcus/csv_parser_base.hpp b/include/orcus/csv_parser_base.hpp index d7ceaad1..d1bb25a4 100644 --- a/include/orcus/csv_parser_base.hpp +++ b/include/orcus/csv_parser_base.hpp @@ -61,7 +61,7 @@ protected: cell_buffer m_cell_buf; protected: - parser_base(const char* p, size_t n, const parser_config& config); + parser_base(std::string_view content, const parser_config& config); /** * This is different from the global 'is_blank' in that it doesn't treat diff --git a/src/liborcus/orcus_csv.cpp b/src/liborcus/orcus_csv.cpp index 9bb4fa5a..5ea5e983 100644 --- a/src/liborcus/orcus_csv.cpp +++ b/src/liborcus/orcus_csv.cpp @@ -89,20 +89,19 @@ public: m_col = 0; } - void cell(const char* p, size_t n, bool transient) + void cell(std::string_view v, bool transient) { auto csv = std::get(m_app_config.data); if (m_sheet == 0 && size_t(m_row) < csv.header_row_size) { - std::string_view v{p, n}; if (transient) v = m_pool.intern(v).first; m_header_cells.emplace_back(m_row, m_col, v); } - mp_sheet->set_auto(m_row, m_col, {p, n}); + mp_sheet->set_auto(m_row, m_col, v); ++m_col; } @@ -148,7 +147,7 @@ struct orcus_csv::impl csv::parser_config config; config.delimiters.push_back(','); config.text_qualifier = '"'; - csv_parser parser(stream.data(), stream.size(), handler, config); + csv_parser parser(stream, handler, config); try { parser.parse(); diff --git a/src/parser/cell_buffer.cpp b/src/parser/cell_buffer.cpp index 1c4c5a39..7bec4715 100644 --- a/src/parser/cell_buffer.cpp +++ b/src/parser/cell_buffer.cpp @@ -46,6 +46,11 @@ void cell_buffer::reset() m_buf_size = 0; } +std::string_view cell_buffer::str() const +{ + return std::string_view{m_buffer.data(), m_buf_size}; +} + const char* cell_buffer::get() const { return m_buffer.data(); diff --git a/src/parser/csv_parser_base.cpp b/src/parser/csv_parser_base.cpp index b8b47bab..a5055bc0 100644 --- a/src/parser/csv_parser_base.cpp +++ b/src/parser/csv_parser_base.cpp @@ -16,8 +16,8 @@ parser_config::parser_config() : trim_cell_value(false) {} parser_base::parser_base( - const char* p, size_t n, const csv::parser_config& config) : - ::orcus::parser_base(p, n), m_config(config) + std::string_view content, const csv::parser_config& config) : + ::orcus::parser_base(content.data(), content.size()), m_config(config) { maybe_skip_bom(); } diff --git a/src/parser/csv_parser_test.cpp b/src/parser/csv_parser_test.cpp index 66b81d0b..18470f00 100644 --- a/src/parser/csv_parser_test.cpp +++ b/src/parser/csv_parser_test.cpp @@ -12,11 +12,10 @@ void test_handler() { const char* test_code = "1,2,3,4,5\n6,7,8,9,10\n"; - size_t n = strlen(test_code); orcus::csv_handler hdl; orcus::csv::parser_config config; - orcus::csv_parser parser(test_code, n, hdl, config); + orcus::csv_parser parser(test_code, hdl, config); parser.parse(); } -- GitLab From 9bb243e944543a10522d0f20e36c2b87a1579869 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 29 Nov 2022 22:17:02 -0500 Subject: [PATCH 63/91] yaml_parser to adopt std::string_view --- include/orcus/yaml_parser.hpp | 15 +++++++-------- include/orcus/yaml_parser_base.hpp | 2 +- src/liborcus/yaml_document_tree.cpp | 11 +++++------ src/parser/yaml_parser_base.cpp | 6 +++--- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/include/orcus/yaml_parser.hpp b/include/orcus/yaml_parser.hpp index b8d1a027..836a9021 100644 --- a/include/orcus/yaml_parser.hpp +++ b/include/orcus/yaml_parser.hpp @@ -73,12 +73,11 @@ public: /** * Called when a string value is encountered. * - * @param p pointer to the first character of the string value. - * @param n length of the string value. + * @param value string value. */ - void string(const char* p, size_t n) + void string(std::string_view value) { - (void)p; (void)n; + (void)value; } /** @@ -121,7 +120,7 @@ class yaml_parser : public yaml::parser_base public: typedef HandlerT handler_type; - yaml_parser(const char* p, size_t n, handler_type& hdl); + yaml_parser(std::string_view content, handler_type& hdl); void parse(); @@ -229,7 +228,7 @@ template void yaml_parser<_Handler>::handler_string(const char* p, size_t n) { push_parse_token(yaml::detail::parse_token_t::string); - m_handler.string(p, n); + m_handler.string({p, n}); } template @@ -261,8 +260,8 @@ void yaml_parser<_Handler>::handler_null() } template -yaml_parser<_Handler>::yaml_parser(const char* p, size_t n, handler_type& hdl) : - yaml::parser_base(p, n), m_handler(hdl) {} +yaml_parser<_Handler>::yaml_parser(std::string_view content, handler_type& hdl) : + yaml::parser_base(content), m_handler(hdl) {} template void yaml_parser<_Handler>::parse() diff --git a/include/orcus/yaml_parser_base.hpp b/include/orcus/yaml_parser_base.hpp index 94b2215a..13b4c915 100644 --- a/include/orcus/yaml_parser_base.hpp +++ b/include/orcus/yaml_parser_base.hpp @@ -87,7 +87,7 @@ protected: parser_base(const parser_base&) = delete; parser_base& operator=(const parser_base&) = delete; - parser_base(const char* p, size_t n); + parser_base(std::string_view content); ~parser_base(); void push_parse_token(detail::parse_token_t t); diff --git a/src/liborcus/yaml_document_tree.cpp b/src/liborcus/yaml_document_tree.cpp index 7ac0735b..eac35c70 100644 --- a/src/liborcus/yaml_document_tree.cpp +++ b/src/liborcus/yaml_document_tree.cpp @@ -96,8 +96,7 @@ struct yaml_value_string : public yaml_value std::string value_string; yaml_value_string() : yaml_value(node_t::string) {} - yaml_value_string(const std::string& s) : yaml_value(node_t::string), value_string(s) {} - yaml_value_string(const char* p, size_t n) : yaml_value(node_t::string), value_string(p, n) {} + yaml_value_string(std::string_view s) : yaml_value(node_t::string), value_string(s) {} virtual ~yaml_value_string() {} virtual std::string print() const @@ -302,17 +301,17 @@ public: m_stack.pop_back(); } - void string(const char* p, size_t n) + void string(std::string_view v) { assert(m_in_document); if (m_root) { - yaml_value* yv = push_value(std::make_unique(p, n)); + yaml_value* yv = push_value(std::make_unique(v)); assert(yv && yv->type == node_t::string); } else - m_root = std::make_unique(p, n); + m_root = std::make_unique(v); } void number(double val) @@ -543,7 +542,7 @@ document_tree::~document_tree() {} void document_tree::load(std::string_view s) { handler hdl; - yaml_parser parser(s.data(), s.size(), hdl); + yaml_parser parser(s, hdl); parser.parse(); hdl.swap(mp_impl->m_docs); } diff --git a/src/parser/yaml_parser_base.cpp b/src/parser/yaml_parser_base.cpp index 3a5dc5ff..d2bfa0a8 100644 --- a/src/parser/yaml_parser_base.cpp +++ b/src/parser/yaml_parser_base.cpp @@ -53,10 +53,10 @@ const size_t parser_base::parse_indent_blank_line = std::numeric_limits::max() - 1; const size_t parser_base::scope_empty = std::numeric_limits::max() - 2; -parser_base::parser_base(const char* p, size_t n) : - orcus::parser_base(p, n), mp_impl(std::make_unique()) {} +parser_base::parser_base(std::string_view content) : + orcus::parser_base(content.data(), content.size()), mp_impl(std::make_unique()) {} -parser_base::~parser_base() {} +parser_base::~parser_base() = default; void parser_base::push_parse_token(detail::parse_token_t t) { -- GitLab From 337592a0f77700c6b3d181f6ef854d90534aff83 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 29 Nov 2022 22:38:02 -0500 Subject: [PATCH 64/91] Remove get() and size() from cell_buffer in favor of str() --- include/orcus/cell_buffer.hpp | 9 --------- include/orcus/sax_parser.hpp | 2 +- src/liborcus/xlsx_context.cpp | 2 +- src/parser/cell_buffer.cpp | 10 ---------- src/parser/parser_global.cpp | 19 +++++++++++-------- src/parser/sax_parser_base.cpp | 2 +- src/parser/yaml_parser_base.cpp | 2 +- 7 files changed, 15 insertions(+), 31 deletions(-) diff --git a/include/orcus/cell_buffer.hpp b/include/orcus/cell_buffer.hpp index edce8241..60df7288 100644 --- a/include/orcus/cell_buffer.hpp +++ b/include/orcus/cell_buffer.hpp @@ -33,15 +33,6 @@ public: std::string_view str() const; - const char* get() const; - - /** - * Get the logical size of the buffer. This may differ from the actual - * buffer size. - * - * @return logical size of the buffer. - */ - size_t size() const; bool empty() const; }; diff --git a/include/orcus/sax_parser.hpp b/include/orcus/sax_parser.hpp index c26df428..1158a70d 100644 --- a/include/orcus/sax_parser.hpp +++ b/include/orcus/sax_parser.hpp @@ -518,7 +518,7 @@ void sax_parser::characters() if (buf.empty()) m_handler.characters(std::string_view{}, false); else - m_handler.characters(std::string_view(buf.get(), buf.size()), true); + m_handler.characters(buf.str(), true); return; } } diff --git a/src/liborcus/xlsx_context.cpp b/src/liborcus/xlsx_context.cpp index 77df51cc..6b4bf939 100644 --- a/src/liborcus/xlsx_context.cpp +++ b/src/liborcus/xlsx_context.cpp @@ -267,7 +267,7 @@ void xlsx_shared_strings_context::characters(std::string_view str, bool transien // Append the tail end. m_cell_buffer.append(p0, std::distance(p0, p)); - m_cur_str = m_pool.intern({m_cell_buffer.get(), m_cell_buffer.size()}).first; + m_cur_str = m_pool.intern(m_cell_buffer.str()).first; transient = false; } diff --git a/src/parser/cell_buffer.cpp b/src/parser/cell_buffer.cpp index 7bec4715..6815d71b 100644 --- a/src/parser/cell_buffer.cpp +++ b/src/parser/cell_buffer.cpp @@ -51,16 +51,6 @@ std::string_view cell_buffer::str() const return std::string_view{m_buffer.data(), m_buf_size}; } -const char* cell_buffer::get() const -{ - return m_buffer.data(); -} - -size_t cell_buffer::size() const -{ - return m_buf_size; -} - bool cell_buffer::empty() const { return m_buf_size == 0; diff --git a/src/parser/parser_global.cpp b/src/parser/parser_global.cpp index 59ae93b6..5489e216 100644 --- a/src/parser/parser_global.cpp +++ b/src/parser/parser_global.cpp @@ -184,19 +184,20 @@ parse_quoted_string_state parse_string_with_escaped_char( switch (*p) { case '"': + { // closing quote. buffer.append(p_head, len); ++p; // skip the quote. - ret.str = buffer.get(); - ret.length = buffer.size(); + std::string_view s = buffer.str(); + ret.str = s.data(); + ret.length = s.size(); return ret; - break; + } case '\\': { escape = true; continue; } - break; default: ; } @@ -243,8 +244,9 @@ parse_quoted_string_state parse_single_quoted_string_buffered( if (last == '\'') { buffer.append(p0, len-1); - ret.str = buffer.get(); - ret.length = buffer.size(); + auto s = buffer.str(); + ret.str = s.data(); + ret.length = s.size(); return ret; } } @@ -257,8 +259,9 @@ parse_quoted_string_state parse_single_quoted_string_buffered( if (last == '\'') { buffer.append(p0, len-1); - ret.str = buffer.get(); - ret.length = buffer.size(); + auto s = buffer.str(); + ret.str = s.data(); + ret.length = s.size(); return ret; } diff --git a/src/parser/sax_parser_base.cpp b/src/parser/sax_parser_base.cpp index 95ee9314..95d8c6e7 100644 --- a/src/parser/sax_parser_base.cpp +++ b/src/parser/sax_parser_base.cpp @@ -285,7 +285,7 @@ void parser_base::value_with_encoded_char(cell_buffer& buf, std::string_view& st buf.append(p0, mp_char-p0); if (!buf.empty()) - str = std::string_view(buf.get(), buf.size()); + str = buf.str(); // Skip the closing quote. assert(!has_char() || cur_char() == quote_char); diff --git a/src/parser/yaml_parser_base.cpp b/src/parser/yaml_parser_base.cpp index d2bfa0a8..df4db23f 100644 --- a/src/parser/yaml_parser_base.cpp +++ b/src/parser/yaml_parser_base.cpp @@ -287,7 +287,7 @@ std::string_view parser_base::merge_line_buffer() mp_impl->m_line_buffer.clear(); mp_impl->m_in_literal_block = false; - return std::string_view(buf.get(), buf.size()); + return buf.str(); } const char* parser_base::get_doc_hash() const -- GitLab From 0c7661c4a5f25b63335aa6ad43e1edac751a27ba Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 29 Nov 2022 23:11:29 -0500 Subject: [PATCH 65/91] Document the global filter interfaces & re-org the doc structure a bit --- doc/cpp/filter/config.rst | 13 +++++++ doc/cpp/filter/index.rst | 68 ++++------------------------------ doc/cpp/filter/spreadsheet.rst | 63 +++++++++++++++++++++++++++++++ doc/cpp/index.rst | 1 - doc/cpp/model/json.rst | 3 -- include/orcus/interface.hpp | 41 +++++++++++++++++++- src/liborcus/interface.cpp | 4 +- 7 files changed, 124 insertions(+), 69 deletions(-) create mode 100644 doc/cpp/filter/config.rst create mode 100644 doc/cpp/filter/spreadsheet.rst diff --git a/doc/cpp/filter/config.rst b/doc/cpp/filter/config.rst new file mode 100644 index 00000000..16388961 --- /dev/null +++ b/doc/cpp/filter/config.rst @@ -0,0 +1,13 @@ + +Configuration classes +===================== + +.. doxygenstruct:: orcus::config + :members: + +.. doxygenstruct:: orcus::json_config + :members: + +.. doxygenstruct:: orcus::yaml_config + :members: + diff --git a/doc/cpp/filter/index.rst b/doc/cpp/filter/index.rst index 85991d46..3aa7679b 100644 --- a/doc/cpp/filter/index.rst +++ b/doc/cpp/filter/index.rst @@ -1,64 +1,10 @@ -Spreadsheet import filters -========================== +Import filters +============== +.. toctree:: + :maxdepth: 1 -Plain text (CSV) ----------------- - -.. doxygenclass:: orcus::orcus_csv - :members: - - -Open document spreadsheet -------------------------- - -.. doxygenclass:: orcus::orcus_ods - :members: - -.. doxygenclass:: orcus::import_ods - :members: - - -Microsoft Excel 2003 XML ------------------------- - -.. doxygenclass:: orcus::orcus_xls_xml - :members: - - -Microsoft Excel 2007 XML ------------------------- - -.. doxygenclass:: orcus::orcus_xlsx - :members: - -.. doxygenclass:: orcus::import_xlsx - :members: - - -Gnumeric XML ------------- - -.. doxygenclass:: orcus::orcus_gnumeric - :members: - - -Generic XML ------------ - -.. doxygenclass:: orcus::orcus_xml - :members: - - -Generic JSON ------------- - -.. doxygenclass:: orcus::orcus_json - :members: - - -Utility functions ------------------ - -.. doxygenfunction:: orcus::detect + config.rst + interface.rst + spreadsheet.rst diff --git a/doc/cpp/filter/spreadsheet.rst b/doc/cpp/filter/spreadsheet.rst new file mode 100644 index 00000000..e53d19fd --- /dev/null +++ b/doc/cpp/filter/spreadsheet.rst @@ -0,0 +1,63 @@ + +Spreadsheet import filters +========================== + +Plain text (CSV) +---------------- + +.. doxygenclass:: orcus::orcus_csv + :members: + + +Open document spreadsheet +------------------------- + +.. doxygenclass:: orcus::orcus_ods + :members: + +.. doxygenclass:: orcus::import_ods + :members: + + +Microsoft Excel 2003 XML +------------------------ + +.. doxygenclass:: orcus::orcus_xls_xml + :members: + + +Microsoft Excel 2007 XML +------------------------ + +.. doxygenclass:: orcus::orcus_xlsx + :members: + +.. doxygenclass:: orcus::import_xlsx + :members: + + +Gnumeric XML +------------ + +.. doxygenclass:: orcus::orcus_gnumeric + :members: + + +Generic XML +----------- + +.. doxygenclass:: orcus::orcus_xml + :members: + + +Generic JSON +------------ + +.. doxygenclass:: orcus::orcus_json + :members: + + +Utility functions +----------------- + +.. doxygenfunction:: orcus::detect diff --git a/doc/cpp/index.rst b/doc/cpp/index.rst index 024d9d81..eef3edef 100644 --- a/doc/cpp/index.rst +++ b/doc/cpp/index.rst @@ -6,6 +6,5 @@ C++ API :maxdepth: 2 parser/index.rst - filter/interface.rst filter/index.rst model/index.rst diff --git a/doc/cpp/model/json.rst b/doc/cpp/model/json.rst index 12e9e354..607b7490 100644 --- a/doc/cpp/model/json.rst +++ b/doc/cpp/model/json.rst @@ -7,9 +7,6 @@ Document tree .. doxygenclass:: orcus::json::document_tree :members: -.. doxygenstruct:: orcus::json_config - :members: - .. doxygenclass:: orcus::json::const_node :members: diff --git a/include/orcus/interface.hpp b/include/orcus/interface.hpp index 7db68e99..494108ad 100644 --- a/include/orcus/interface.hpp +++ b/include/orcus/interface.hpp @@ -20,6 +20,9 @@ struct config; namespace iface { +/** + * Base interface for import filters. + */ class ORCUS_DLLPUBLIC import_filter { struct impl; @@ -29,23 +32,57 @@ public: import_filter(format_t input); virtual ~import_filter(); - /// expects a system path to a local file + /** + * Read the content of a file. + * + * @param filepath path to a local file. It must be a system path. + */ virtual void read_file(const std::string& filepath) = 0; - /// expects the whole content of the file + /** + * Read the content of an in-memory stream. + * + * @param stream in-memory stream to read from. + */ virtual void read_stream(std::string_view stream) = 0; + /** + * Get the name of a filter. + * + * @return name of a filter. + */ virtual std::string_view get_name() const = 0; void set_config(const orcus::config& v); const orcus::config& get_config() const; }; +/** + * Base interface for document content dumpers. + */ class ORCUS_DLLPUBLIC document_dumper { public: virtual ~document_dumper(); + + /** + * Dump the content of a document in a specified format, either into set of + * multiple files, or a single file. + * + * @param format Output format type in which to dump the content. + * @param output Depending on the output format type, this can be either an + * output directory path where multiple output files get + * created, or an output file path where the content of the + * entire document gets dumped into. + */ virtual void dump(dump_format_t format, const std::string& output) const = 0; + + /** + * Dump the content of a document in a special "check" format suitable as + * unit testing controls. + * + * @param os output stream to write the transformed content to. + */ virtual void dump_check(std::ostream& os) const = 0; }; diff --git a/src/liborcus/interface.cpp b/src/liborcus/interface.cpp index f61df5b6..2b743a2d 100644 --- a/src/liborcus/interface.cpp +++ b/src/liborcus/interface.cpp @@ -19,7 +19,7 @@ struct import_filter::impl import_filter::import_filter(format_t input) : mp_impl(std::make_unique(input)) {} -import_filter::~import_filter() {} +import_filter::~import_filter() = default; void import_filter::set_config(const config& v) { @@ -31,7 +31,7 @@ const config& import_filter::get_config() const return mp_impl->m_config; } -document_dumper::~document_dumper() {} +document_dumper::~document_dumper() = default; }} /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- GitLab From 747342a40fbd7f6f2373c9b53a15d7dab876415c Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 30 Nov 2022 18:35:59 -0500 Subject: [PATCH 66/91] Split the interface section into multiple pages --- doc/cpp/filter/index.rst | 2 +- doc/cpp/filter/interface.rst | 257 ----------------------- doc/cpp/filter/interface/functions.rst | 10 + doc/cpp/filter/interface/global.rst | 9 + doc/cpp/filter/interface/index.rst | 12 ++ doc/cpp/filter/interface/spreadsheet.rst | 83 ++++++++ doc/cpp/filter/interface/types.rst | 71 +++++++ 7 files changed, 186 insertions(+), 258 deletions(-) delete mode 100644 doc/cpp/filter/interface.rst create mode 100644 doc/cpp/filter/interface/functions.rst create mode 100644 doc/cpp/filter/interface/global.rst create mode 100644 doc/cpp/filter/interface/index.rst create mode 100644 doc/cpp/filter/interface/spreadsheet.rst create mode 100644 doc/cpp/filter/interface/types.rst diff --git a/doc/cpp/filter/index.rst b/doc/cpp/filter/index.rst index 3aa7679b..1971c6eb 100644 --- a/doc/cpp/filter/index.rst +++ b/doc/cpp/filter/index.rst @@ -6,5 +6,5 @@ Import filters :maxdepth: 1 config.rst - interface.rst + interface/index.rst spreadsheet.rst diff --git a/doc/cpp/filter/interface.rst b/doc/cpp/filter/interface.rst deleted file mode 100644 index 1e0aa909..00000000 --- a/doc/cpp/filter/interface.rst +++ /dev/null @@ -1,257 +0,0 @@ - -Spreadsheet types and interfaces -================================ - - -Global interface ----------------- - -.. doxygenclass:: orcus::iface::import_filter - :members: - -.. doxygenclass:: orcus::iface::document_dumper - :members: - -.. _spreadsheet-interface: - -Spreadsheet interface ---------------------- - -import_array_formula -^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_array_formula - :members: - -import_auto_filter -^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_auto_filter - :members: - -import_border_style -^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_border_style - :members: - -import_cell_protection -^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_cell_protection - :members: - -import_cell_style -^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_cell_style - :members: - -import_conditional_format -^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_conditional_format - :members: - -import_data_table -^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_data_table - :members: - -import_factory -^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_factory - :members: - -import_fill_style -^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_fill_style - :members: - -import_font_style -^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_font_style - :members: - -import_formula -^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_formula - :members: - -import_global_settings -^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_global_settings - :members: - -import_named_expression -^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_named_expression - :members: - -import_number_format -^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_number_format - :members: - -import_pivot_cache_definition -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_definition - :members: - -import_pivot_cache_records -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_records - :members: - -import_reference_resolver -^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_reference_resolver - :members: - -import_shared_strings -^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_shared_strings - :members: - -import_sheet -^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet - :members: - -import_sheet_properties -^^^^^^^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_properties - :members: - -import_sheet_view -^^^^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_view - :members: - -import_styles -^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_styles - :members: - -import_table -^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_table - :members: - -import_xf -^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::import_xf - :members: - -export_factory -^^^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::export_factory - :members: - -export_sheet -^^^^^^^^^^^^ - -.. doxygenclass:: orcus::spreadsheet::iface::export_sheet - :members: - - -Spreadsheet types ------------------ - -Type aliases -^^^^^^^^^^^^ - -.. doxygentypedef:: orcus::spreadsheet::row_t -.. doxygentypedef:: orcus::spreadsheet::col_t -.. doxygentypedef:: orcus::spreadsheet::sheet_t -.. doxygentypedef:: orcus::spreadsheet::color_elem_t -.. doxygentypedef:: orcus::spreadsheet::col_width_t -.. doxygentypedef:: orcus::spreadsheet::row_height_t -.. doxygentypedef:: orcus::spreadsheet::pivot_cache_id_t - - -Structs -^^^^^^^ - -.. doxygenstruct:: orcus::spreadsheet::underline_attrs_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::address_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::range_size_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::range_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::color_rgb_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::split_pane_t - :members: - -.. doxygenstruct:: orcus::spreadsheet::frozen_pane_t - :members: - - -Enums -^^^^^ - -.. doxygenenum:: orcus::spreadsheet::error_value_t -.. doxygenenum:: orcus::spreadsheet::border_direction_t -.. doxygenenum:: orcus::spreadsheet::border_style_t -.. doxygenenum:: orcus::spreadsheet::fill_pattern_t -.. doxygenenum:: orcus::spreadsheet::strikethrough_style_t -.. doxygenenum:: orcus::spreadsheet::strikethrough_type_t -.. doxygenenum:: orcus::spreadsheet::strikethrough_width_t -.. doxygenenum:: orcus::spreadsheet::strikethrough_text_t -.. doxygenenum:: orcus::spreadsheet::formula_grammar_t -.. doxygenenum:: orcus::spreadsheet::formula_t -.. doxygenenum:: orcus::spreadsheet::underline_t -.. doxygenenum:: orcus::spreadsheet::underline_width_t -.. doxygenenum:: orcus::spreadsheet::underline_mode_t -.. doxygenenum:: orcus::spreadsheet::underline_type_t -.. doxygenenum:: orcus::spreadsheet::hor_alignment_t -.. doxygenenum:: orcus::spreadsheet::ver_alignment_t -.. doxygenenum:: orcus::spreadsheet::data_table_type_t -.. doxygenenum:: orcus::spreadsheet::xf_category_t -.. doxygenenum:: orcus::spreadsheet::totals_row_function_t -.. doxygenenum:: orcus::spreadsheet::conditional_format_t -.. doxygenenum:: orcus::spreadsheet::condition_operator_t -.. doxygenenum:: orcus::spreadsheet::condition_type_t -.. doxygenenum:: orcus::spreadsheet::condition_date_t -.. doxygenenum:: orcus::spreadsheet::databar_axis_t -.. doxygenenum:: orcus::spreadsheet::pivot_cache_group_by_t -.. doxygenenum:: orcus::spreadsheet::sheet_pane_t -.. doxygenenum:: orcus::spreadsheet::pane_state_t - - -Spreadsheet global functions ----------------------------- - -.. doxygenfunction:: orcus::spreadsheet::get_default_column_width -.. doxygenfunction:: orcus::spreadsheet::get_default_row_height -.. doxygenfunction:: orcus::spreadsheet::to_totals_row_function_enum -.. doxygenfunction:: orcus::spreadsheet::to_pivot_cache_group_by_enum -.. doxygenfunction:: orcus::spreadsheet::to_error_value_enum -.. doxygenfunction:: orcus::spreadsheet::to_color_rgb diff --git a/doc/cpp/filter/interface/functions.rst b/doc/cpp/filter/interface/functions.rst new file mode 100644 index 00000000..b7e62ed9 --- /dev/null +++ b/doc/cpp/filter/interface/functions.rst @@ -0,0 +1,10 @@ + +Spreadsheet global functions +============================ + +.. doxygenfunction:: orcus::spreadsheet::get_default_column_width +.. doxygenfunction:: orcus::spreadsheet::get_default_row_height +.. doxygenfunction:: orcus::spreadsheet::to_totals_row_function_enum +.. doxygenfunction:: orcus::spreadsheet::to_pivot_cache_group_by_enum +.. doxygenfunction:: orcus::spreadsheet::to_error_value_enum +.. doxygenfunction:: orcus::spreadsheet::to_color_rgb diff --git a/doc/cpp/filter/interface/global.rst b/doc/cpp/filter/interface/global.rst new file mode 100644 index 00000000..e401d9c9 --- /dev/null +++ b/doc/cpp/filter/interface/global.rst @@ -0,0 +1,9 @@ + +Global interface +================ + +.. doxygenclass:: orcus::iface::import_filter + :members: + +.. doxygenclass:: orcus::iface::document_dumper + :members: diff --git a/doc/cpp/filter/interface/index.rst b/doc/cpp/filter/interface/index.rst new file mode 100644 index 00000000..836b71ef --- /dev/null +++ b/doc/cpp/filter/interface/index.rst @@ -0,0 +1,12 @@ + +Spreadsheet types and interfaces +================================ + +.. toctree:: + :maxdepth: 1 + + global.rst + spreadsheet.rst + types.rst + functions.rst + diff --git a/doc/cpp/filter/interface/spreadsheet.rst b/doc/cpp/filter/interface/spreadsheet.rst new file mode 100644 index 00000000..fa229eda --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet.rst @@ -0,0 +1,83 @@ + +.. _spreadsheet-interface: + +Spreadsheet interface +===================== + +.. doxygenclass:: orcus::spreadsheet::iface::import_array_formula + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_auto_filter + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_border_style + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_cell_protection + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_cell_style + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_conditional_format + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_data_table + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_factory + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_fill_style + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_font_style + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_formula + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_global_settings + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_named_expression + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_number_format + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_definition + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_records + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_reference_resolver + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_shared_strings + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_properties + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_view + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_styles + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_table + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_xf + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::export_factory + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::export_sheet + :members: diff --git a/doc/cpp/filter/interface/types.rst b/doc/cpp/filter/interface/types.rst new file mode 100644 index 00000000..01519092 --- /dev/null +++ b/doc/cpp/filter/interface/types.rst @@ -0,0 +1,71 @@ + +Spreadsheet types +================= + +Type aliases +------------ + +.. doxygentypedef:: orcus::spreadsheet::row_t +.. doxygentypedef:: orcus::spreadsheet::col_t +.. doxygentypedef:: orcus::spreadsheet::sheet_t +.. doxygentypedef:: orcus::spreadsheet::color_elem_t +.. doxygentypedef:: orcus::spreadsheet::col_width_t +.. doxygentypedef:: orcus::spreadsheet::row_height_t +.. doxygentypedef:: orcus::spreadsheet::pivot_cache_id_t + + +Structs +------- + +.. doxygenstruct:: orcus::spreadsheet::underline_attrs_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::address_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::range_size_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::range_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::color_rgb_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::split_pane_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::frozen_pane_t + :members: + + +Enums +----- + +.. doxygenenum:: orcus::spreadsheet::error_value_t +.. doxygenenum:: orcus::spreadsheet::border_direction_t +.. doxygenenum:: orcus::spreadsheet::border_style_t +.. doxygenenum:: orcus::spreadsheet::fill_pattern_t +.. doxygenenum:: orcus::spreadsheet::strikethrough_style_t +.. doxygenenum:: orcus::spreadsheet::strikethrough_type_t +.. doxygenenum:: orcus::spreadsheet::strikethrough_width_t +.. doxygenenum:: orcus::spreadsheet::strikethrough_text_t +.. doxygenenum:: orcus::spreadsheet::formula_grammar_t +.. doxygenenum:: orcus::spreadsheet::formula_t +.. doxygenenum:: orcus::spreadsheet::underline_t +.. doxygenenum:: orcus::spreadsheet::underline_width_t +.. doxygenenum:: orcus::spreadsheet::underline_mode_t +.. doxygenenum:: orcus::spreadsheet::underline_type_t +.. doxygenenum:: orcus::spreadsheet::hor_alignment_t +.. doxygenenum:: orcus::spreadsheet::ver_alignment_t +.. doxygenenum:: orcus::spreadsheet::data_table_type_t +.. doxygenenum:: orcus::spreadsheet::xf_category_t +.. doxygenenum:: orcus::spreadsheet::totals_row_function_t +.. doxygenenum:: orcus::spreadsheet::conditional_format_t +.. doxygenenum:: orcus::spreadsheet::condition_operator_t +.. doxygenenum:: orcus::spreadsheet::condition_type_t +.. doxygenenum:: orcus::spreadsheet::condition_date_t +.. doxygenenum:: orcus::spreadsheet::databar_axis_t +.. doxygenenum:: orcus::spreadsheet::pivot_cache_group_by_t +.. doxygenenum:: orcus::spreadsheet::sheet_pane_t +.. doxygenenum:: orcus::spreadsheet::pane_state_t -- GitLab From 3bd57018eaa7337808bf6cfc39a584d7e10a285a Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 30 Nov 2022 20:38:05 -0500 Subject: [PATCH 67/91] Tweak the global interface page & use std::string_view for read_file() --- doc/conf.py | 4 +- doc/cpp/filter/interface/global.rst | 7 +- doc/cpp/filter/interface/index.rst | 4 +- doc/cpp/filter/interface/spreadsheet.rst | 76 ++++++++++++------- doc_example/spreadsheet_doc_1.cpp | 3 +- .../spreadsheet_doc_1_num_and_formula.cpp | 3 +- doc_example/spreadsheet_doc_2.cpp | 3 +- ...preadsheet_doc_2_sheets_no_string_pool.cpp | 3 +- .../spreadsheet_doc_2_sheets_with_formula.cpp | 3 +- ...eadsheet_doc_2_sheets_with_string_pool.cpp | 3 +- include/orcus/interface.hpp | 2 +- include/orcus/orcus_csv.hpp | 2 +- include/orcus/orcus_gnumeric.hpp | 2 +- include/orcus/orcus_ods.hpp | 2 +- include/orcus/orcus_xls_xml.hpp | 2 +- include/orcus/orcus_xlsx.hpp | 2 +- src/liborcus/orcus_csv.cpp | 4 +- src/liborcus/orcus_gnumeric.cpp | 4 +- src/liborcus/orcus_ods.cpp | 4 +- src/liborcus/orcus_xls_xml.cpp | 2 +- src/liborcus/orcus_xlsx.cpp | 5 +- 21 files changed, 88 insertions(+), 52 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index fcdce518..4ad6082e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -117,7 +117,9 @@ html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -html_theme_options = {} +html_theme_options = { + "navigation_depth": 5, +} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] diff --git a/doc/cpp/filter/interface/global.rst b/doc/cpp/filter/interface/global.rst index e401d9c9..0e73e470 100644 --- a/doc/cpp/filter/interface/global.rst +++ b/doc/cpp/filter/interface/global.rst @@ -1,6 +1,9 @@ -Global interface -================ +Global interfaces +================= + +The following global interfaces are used to abstract the concrete filter and +document classes from orcus's CLI framework. .. doxygenclass:: orcus::iface::import_filter :members: diff --git a/doc/cpp/filter/interface/index.rst b/doc/cpp/filter/interface/index.rst index 836b71ef..1f7ec154 100644 --- a/doc/cpp/filter/interface/index.rst +++ b/doc/cpp/filter/interface/index.rst @@ -1,6 +1,6 @@ -Spreadsheet types and interfaces -================================ +Types and interfaces +==================== .. toctree:: :maxdepth: 1 diff --git a/doc/cpp/filter/interface/spreadsheet.rst b/doc/cpp/filter/interface/spreadsheet.rst index fa229eda..bef3f9c3 100644 --- a/doc/cpp/filter/interface/spreadsheet.rst +++ b/doc/cpp/filter/interface/spreadsheet.rst @@ -1,83 +1,107 @@ -.. _spreadsheet-interface: +.. _spreadsheet-interfaces: -Spreadsheet interface -===================== +Spreadsheet interfaces +====================== -.. doxygenclass:: orcus::spreadsheet::iface::import_array_formula - :members: +Document import +--------------- -.. doxygenclass:: orcus::spreadsheet::iface::import_auto_filter +.. doxygenclass:: orcus::spreadsheet::iface::import_shared_strings :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_border_style +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_properties :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_cell_protection +.. doxygenclass:: orcus::spreadsheet::iface::import_named_expression :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_cell_style +.. doxygenclass:: orcus::spreadsheet::iface::import_data_table :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_conditional_format +.. doxygenclass:: orcus::spreadsheet::iface::import_auto_filter :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_data_table +.. doxygenclass:: orcus::spreadsheet::iface::import_conditional_format :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_factory +.. doxygenclass:: orcus::spreadsheet::iface::import_table :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_fill_style +.. doxygenclass:: orcus::spreadsheet::iface::import_formula :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_font_style +.. doxygenclass:: orcus::spreadsheet::iface::import_array_formula :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_formula +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet :members: .. doxygenclass:: orcus::spreadsheet::iface::import_global_settings :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_named_expression +.. doxygenclass:: orcus::spreadsheet::iface::import_reference_resolver :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_number_format +.. doxygenclass:: orcus::spreadsheet::iface::import_factory :members: + +Pivot table import +------------------ + .. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_definition :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_records +.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_field_group :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_reference_resolver +.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_records :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_shared_strings + +Styles import +------------- + +.. doxygenclass:: orcus::spreadsheet::iface::import_styles :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet +.. doxygenclass:: orcus::spreadsheet::iface::import_font_style :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_properties +.. doxygenclass:: orcus::spreadsheet::iface::import_fill_style :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_view +.. doxygenclass:: orcus::spreadsheet::iface::import_border_style :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_styles +.. doxygenclass:: orcus::spreadsheet::iface::import_cell_protection :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_table +.. doxygenclass:: orcus::spreadsheet::iface::import_number_format :members: .. doxygenclass:: orcus::spreadsheet::iface::import_xf :members: -.. doxygenclass:: orcus::spreadsheet::iface::export_factory +.. doxygenclass:: orcus::spreadsheet::iface::import_cell_style + :members: + + +View properties import +---------------------- + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_view :members: + +Document export +--------------- + .. doxygenclass:: orcus::spreadsheet::iface::export_sheet :members: + +.. doxygenclass:: orcus::spreadsheet::iface::export_factory + :members: + + diff --git a/doc_example/spreadsheet_doc_1.cpp b/doc_example/spreadsheet_doc_1.cpp index 33f16458..91dcbeae 100644 --- a/doc_example/spreadsheet_doc_1.cpp +++ b/doc_example/spreadsheet_doc_1.cpp @@ -24,7 +24,8 @@ int main() // Pass the factory to the document loader, and read the content from a file // to populate the document. orcus_ods loader(&factory); - loader.read_file(input_dir / "document.ods"); + auto filepath = input_dir / "document.ods"; + loader.read_file(filepath.native()); // Now that the document is fully populated, access its content. const ixion::model_context& model = doc.get_model_context(); diff --git a/doc_example/spreadsheet_doc_1_num_and_formula.cpp b/doc_example/spreadsheet_doc_1_num_and_formula.cpp index 6ac6d1d9..f8476cbf 100644 --- a/doc_example/spreadsheet_doc_1_num_and_formula.cpp +++ b/doc_example/spreadsheet_doc_1_num_and_formula.cpp @@ -25,7 +25,8 @@ int main() // Pass the factory to the document loader, and read the content from a file // to populate the document. orcus_ods loader(&factory); - loader.read_file(input_dir / "document.ods"); + auto filepath = input_dir / "document.ods"; + loader.read_file(filepath.native()); doc.recalc_formula_cells(); // Now that the document is fully populated, access its content. diff --git a/doc_example/spreadsheet_doc_2.cpp b/doc_example/spreadsheet_doc_2.cpp index 3420ae60..614b50b8 100644 --- a/doc_example/spreadsheet_doc_2.cpp +++ b/doc_example/spreadsheet_doc_2.cpp @@ -34,10 +34,11 @@ public: int main() { std::filesystem::path input_dir = std::getenv("INPUTDIR"); + auto filepath = input_dir / "multi-sheets.ods"; my_empty_import_factory factory; orcus::orcus_ods loader(&factory); - loader.read_file(input_dir / "multi-sheets.ods"); + loader.read_file(filepath.native()); return EXIT_SUCCESS; } diff --git a/doc_example/spreadsheet_doc_2_sheets_no_string_pool.cpp b/doc_example/spreadsheet_doc_2_sheets_no_string_pool.cpp index 94049910..ea58d5e1 100644 --- a/doc_example/spreadsheet_doc_2_sheets_no_string_pool.cpp +++ b/doc_example/spreadsheet_doc_2_sheets_no_string_pool.cpp @@ -113,10 +113,11 @@ public: int main() { std::filesystem::path input_dir = std::getenv("INPUTDIR"); + auto filepath = input_dir / "multi-sheets.ods"; my_import_factory factory; orcus::orcus_ods loader(&factory); - loader.read_file(input_dir / "multi-sheets.ods"); + loader.read_file(filepath.native()); return EXIT_SUCCESS; } diff --git a/doc_example/spreadsheet_doc_2_sheets_with_formula.cpp b/doc_example/spreadsheet_doc_2_sheets_with_formula.cpp index 6f44aaac..29511e47 100644 --- a/doc_example/spreadsheet_doc_2_sheets_with_formula.cpp +++ b/doc_example/spreadsheet_doc_2_sheets_with_formula.cpp @@ -280,10 +280,11 @@ public: int main() { std::filesystem::path input_dir = std::getenv("INPUTDIR"); + auto filepath = input_dir / "multi-sheets.ods"; my_import_factory factory; orcus::orcus_ods loader(&factory); - loader.read_file(input_dir / "multi-sheets.ods"); + loader.read_file(filepath.native()); return EXIT_SUCCESS; } diff --git a/doc_example/spreadsheet_doc_2_sheets_with_string_pool.cpp b/doc_example/spreadsheet_doc_2_sheets_with_string_pool.cpp index 2888a580..76fcf807 100644 --- a/doc_example/spreadsheet_doc_2_sheets_with_string_pool.cpp +++ b/doc_example/spreadsheet_doc_2_sheets_with_string_pool.cpp @@ -194,10 +194,11 @@ public: int main() { std::filesystem::path input_dir = std::getenv("INPUTDIR"); + auto filepath = input_dir / "multi-sheets.ods"; my_import_factory factory; orcus::orcus_ods loader(&factory); - loader.read_file(input_dir / "multi-sheets.ods"); + loader.read_file(filepath.native()); return EXIT_SUCCESS; } diff --git a/include/orcus/interface.hpp b/include/orcus/interface.hpp index 494108ad..e6237929 100644 --- a/include/orcus/interface.hpp +++ b/include/orcus/interface.hpp @@ -37,7 +37,7 @@ public: * * @param filepath path to a local file. It must be a system path. */ - virtual void read_file(const std::string& filepath) = 0; + virtual void read_file(std::string_view filepath) = 0; /** * Read the content of an in-memory stream. diff --git a/include/orcus/orcus_csv.hpp b/include/orcus/orcus_csv.hpp index f3f07f24..3e34c154 100644 --- a/include/orcus/orcus_csv.hpp +++ b/include/orcus/orcus_csv.hpp @@ -29,7 +29,7 @@ public: orcus_csv(spreadsheet::iface::import_factory* factory); ~orcus_csv(); - virtual void read_file(const std::string& filepath) override; + virtual void read_file(std::string_view filepath) override; virtual void read_stream(std::string_view stream) override; virtual std::string_view get_name() const override; diff --git a/include/orcus/orcus_gnumeric.hpp b/include/orcus/orcus_gnumeric.hpp index a8bf3b38..54f74a27 100644 --- a/include/orcus/orcus_gnumeric.hpp +++ b/include/orcus/orcus_gnumeric.hpp @@ -30,7 +30,7 @@ public: static bool detect(const unsigned char* blob, size_t size); - virtual void read_file(const std::string& filepath) override; + virtual void read_file(std::string_view filepath) override; virtual void read_stream(std::string_view stream) override; diff --git a/include/orcus/orcus_ods.hpp b/include/orcus/orcus_ods.hpp index 74d385a2..08eb1972 100644 --- a/include/orcus/orcus_ods.hpp +++ b/include/orcus/orcus_ods.hpp @@ -33,7 +33,7 @@ public: static bool detect(const unsigned char* blob, size_t size); - virtual void read_file(const std::string& filepath) override; + virtual void read_file(std::string_view filepath) override; virtual void read_stream(std::string_view stream) override; diff --git a/include/orcus/orcus_xls_xml.hpp b/include/orcus/orcus_xls_xml.hpp index 14300b6a..4534bfc6 100644 --- a/include/orcus/orcus_xls_xml.hpp +++ b/include/orcus/orcus_xls_xml.hpp @@ -31,7 +31,7 @@ public: static bool detect(const unsigned char* blob, size_t size); - virtual void read_file(const std::string& filepath) override; + virtual void read_file(std::string_view filepath) override; virtual void read_stream(std::string_view stream) override; virtual std::string_view get_name() const override; diff --git a/include/orcus/orcus_xlsx.hpp b/include/orcus/orcus_xlsx.hpp index 6ad0ca24..68b01c09 100644 --- a/include/orcus/orcus_xlsx.hpp +++ b/include/orcus/orcus_xlsx.hpp @@ -38,7 +38,7 @@ public: static bool detect(const unsigned char* blob, size_t size); - virtual void read_file(const std::string& filepath) override; + virtual void read_file(std::string_view filepath) override; virtual void read_stream(std::string_view stream) override; virtual std::string_view get_name() const override; diff --git a/src/liborcus/orcus_csv.cpp b/src/liborcus/orcus_csv.cpp index 5ea5e983..dff2b2d4 100644 --- a/src/liborcus/orcus_csv.cpp +++ b/src/liborcus/orcus_csv.cpp @@ -170,9 +170,9 @@ orcus_csv::orcus_csv(spreadsheet::iface::import_factory* factory) : orcus_csv::~orcus_csv() {} -void orcus_csv::read_file(const string& filepath) +void orcus_csv::read_file(std::string_view filepath) { - file_content fc(filepath.data()); + file_content fc(filepath); mp_impl->parse(fc.str(), get_config()); mp_impl->factory->finalize(); } diff --git a/src/liborcus/orcus_gnumeric.cpp b/src/liborcus/orcus_gnumeric.cpp index 51755609..76c13682 100644 --- a/src/liborcus/orcus_gnumeric.cpp +++ b/src/liborcus/orcus_gnumeric.cpp @@ -122,13 +122,13 @@ bool orcus_gnumeric::detect(const unsigned char* buffer, size_t size) return false; } -void orcus_gnumeric::read_file(const string& filepath) +void orcus_gnumeric::read_file(std::string_view filepath) { #if ORCUS_DEBUG_GNUMERIC cout << "reading " << filepath << endl; #endif - file_content content(filepath.data()); + file_content content(filepath); if (content.empty()) return; diff --git a/src/liborcus/orcus_ods.cpp b/src/liborcus/orcus_ods.cpp index f6b24a34..15785e8d 100644 --- a/src/liborcus/orcus_ods.cpp +++ b/src/liborcus/orcus_ods.cpp @@ -183,9 +183,9 @@ bool orcus_ods::detect(const unsigned char* blob, size_t size) return true; } -void orcus_ods::read_file(const std::string& filepath) +void orcus_ods::read_file(std::string_view filepath) { - zip_archive_stream_fd stream(filepath.data()); + zip_archive_stream_fd stream(std::string{filepath}.c_str()); read_file_impl(&stream); } diff --git a/src/liborcus/orcus_xls_xml.cpp b/src/liborcus/orcus_xls_xml.cpp index 7c59d2df..06e323d5 100644 --- a/src/liborcus/orcus_xls_xml.cpp +++ b/src/liborcus/orcus_xls_xml.cpp @@ -102,7 +102,7 @@ bool orcus_xls_xml::detect(const unsigned char* buffer, size_t size) return false; } -void orcus_xls_xml::read_file(const string& filepath) +void orcus_xls_xml::read_file(std::string_view filepath) { file_content content(filepath.data()); if (content.empty()) diff --git a/src/liborcus/orcus_xlsx.cpp b/src/liborcus/orcus_xlsx.cpp index 01caec4c..b0f3ac4b 100644 --- a/src/liborcus/orcus_xlsx.cpp +++ b/src/liborcus/orcus_xlsx.cpp @@ -197,9 +197,10 @@ bool orcus_xlsx::detect(const unsigned char* blob, size_t size) } } -void orcus_xlsx::read_file(const string& filepath) +void orcus_xlsx::read_file(std::string_view filepath) { - std::unique_ptr stream(new zip_archive_stream_fd(filepath.c_str())); + std::unique_ptr stream( + new zip_archive_stream_fd(std::string{filepath}.c_str())); mp_impl->m_opc_reader.read_file(std::move(stream)); // Formulas need to be inserted to the document after the shared string -- GitLab From 0d9926657e07b0700dc28054432b88beb0da542b Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 30 Nov 2022 22:29:07 -0500 Subject: [PATCH 68/91] Add more docs to spreadsheet types This is still on-going. --- doc/cpp/filter/interface/types.rst | 2 + include/orcus/spreadsheet/types.hpp | 154 ++++++++++++++++++++++++---- 2 files changed, 138 insertions(+), 18 deletions(-) diff --git a/doc/cpp/filter/interface/types.rst b/doc/cpp/filter/interface/types.rst index 01519092..46ab68c1 100644 --- a/doc/cpp/filter/interface/types.rst +++ b/doc/cpp/filter/interface/types.rst @@ -2,6 +2,8 @@ Spreadsheet types ================= +These types are used throughout the spreadsheet import and export interfaces. + Type aliases ------------ diff --git a/include/orcus/spreadsheet/types.hpp b/include/orcus/spreadsheet/types.hpp index c588f892..5af0b45b 100644 --- a/include/orcus/spreadsheet/types.hpp +++ b/include/orcus/spreadsheet/types.hpp @@ -21,43 +21,95 @@ namespace orcus { namespace spreadsheet { -typedef int32_t row_t; -typedef int32_t col_t; -typedef int32_t sheet_t; -typedef uint8_t color_elem_t; -typedef uint16_t col_width_t; -typedef uint16_t row_height_t; -typedef uint32_t string_id_t; - -typedef uint32_t pivot_cache_id_t; +/** Row ID type. */ +using row_t = int32_t; +/** Column ID type. */ +using col_t = int32_t; +/** Sheet ID type. */ +using sheet_t = int32_t; +/** Individual color element type. */ +using color_elem_t = uint8_t; +/** Type for column width values. Column width values are stored in twips. */ +using col_width_t = uint16_t; +/** Type for row height values. Row height values are stored in twips. */ +using row_height_t = uint16_t; +/** Type for string ID's for string cells. */ +using string_id_t = uint32_t; +/** Pivot cache ID type. */ +using pivot_cache_id_t = uint32_t; +/** + * Get the special column width value that represents the default column + * width. The value itself is not to be used as an actual width value. + * + * @return value that represents the default column width. + */ ORCUS_DLLPUBLIC col_width_t get_default_column_width(); + +/** + * Get the special row height value that represents the default row height. + * The value itself is not to be used as an actual row height value. + * + * @return value that represents the default row height. + */ ORCUS_DLLPUBLIC row_height_t get_default_row_height(); +/** + * Type of error value in cells. + */ enum class error_value_t { + /** + * Error type unknown, typically used as an initial error value or generic + * default value. + */ unknown = 0, - null, // #NULL! - div0, // #DIV/0! - value, // #VALUE! - ref, // #REF! - name, // #NAME? - num, // #NUM! - na // #N/A! + /** Null reference error, displayed as `#NULL!`. */ + null, + /** Division-by-zero error, displayed as `#DIV/0`. */ + div0, + /** Formula expression error, displayed as `#VALUE!`. */ + value, + /** Reference error, displayed as `#REF!`. */ + ref, + /** Invalid named-expression error, displayed as `#NAME?` */ + name, + /** Invalid numeric value error, displayed as `#NUM!`. */ + num, + /** No value is available error, displayed as `#N/A!`. */ + na }; +/** + * Type of border direction, used to reference the position of a border in a + * cell. + */ enum class border_direction_t { + /** Unknown or uninitialized border direction value. */ unknown = 0, + /** Top border of a cell. */ top, + /** Bottom border of a cell. */ bottom, + /** Left border of a cell. */ left, + /** Right border of a cell. */ right, + /** + * Cross-diagonal borders of a cell. This is equivalent of both + * @p diagonal_bl_tr and @p diagonal_tl_br combined. + */ diagonal, + /** Diagonal border of a cell that runs from bottom-left to top-right. */ diagonal_bl_tr, + /** Diagonal border of a cell that runs from top-left to bottom-right. */ diagonal_tl_br }; +/** + * Type of border style. + */ enum class border_style_t { unknown = 0, @@ -80,6 +132,9 @@ enum class border_style_t fine_dashed }; +/** + * Type of fill pattern for cell background. + */ enum class fill_pattern_t { none = 0, @@ -103,6 +158,11 @@ enum class fill_pattern_t medium_gray }; +/** + * Strikethrough style as applied to a cell value. + * + * @note This is specific to ODS format. + */ enum class strikethrough_style_t { none = 0, @@ -115,6 +175,11 @@ enum class strikethrough_style_t wave }; +/** + * Strikethrough type as applied to a cell value. + * + * @note This is specific to ODS format. + */ enum class strikethrough_type_t { unknown = 0, @@ -123,6 +188,11 @@ enum class strikethrough_type_t double_type }; +/** + * Width of strikethrough applied to a cell value. + * + * @note This is specific to ODS format. + */ enum class strikethrough_width_t { unknown = 0, @@ -133,10 +203,17 @@ enum class strikethrough_width_t bold }; +/** + * Text used for strike-through. + * + * @note This is specific to ODS format. + */ enum class strikethrough_text_t { unknown = 0, + /** `/` is used as the text. */ slash, + /** `X` is used as the text. */ cross }; @@ -158,12 +235,20 @@ enum class formula_grammar_t gnumeric }; +/** + * Type of formula expression. + */ enum class formula_t { + /** Formula expression type unknown, or generic default value. */ unknown = 0, + /** Formula expression in an array of cells. */ array, + /** Formula expression in a data table. */ data_table, + /** Formula expression in a normal formula cell. */ normal, + /** Formula expression in a shared formula cell. */ shared }; @@ -203,13 +288,26 @@ enum class formula_error_policy_t skip }; +/** + * Underline type for a cell value. + */ enum class underline_t { none = 0, single_line, - single_accounting, // unique to xlsx + /** + * Single line for accounting format. + * + * @note This is unique to xlsx format. + */ + single_accounting, double_line, - double_accounting, // unique to xlsx + /** + * Double line for accounting format. + * + * @note This is unique to xlsx format. + */ + double_accounting, dotted, dash, long_dash, @@ -240,19 +338,33 @@ enum class underline_width_t positive_length }; +/** + * Underline mode that determines whether an underline is applied to both + * words and spaces, or words only. + */ enum class underline_mode_t { + /** Underline is applied to both words and spaces. */ continuous = 0, + /** Underline is applied only to words. */ skip_white_space }; +/** + * Whether a single line or a double line is used as an underline. + */ enum class underline_type_t { none = 0, + /** A single line is used as an underline. */ single_type, + /** A double line is used as an underline. */ double_type }; +/** + * Collection of various underline attributes. + */ struct underline_attrs_t { underline_t underline_style; @@ -261,6 +373,9 @@ struct underline_attrs_t underline_type_t underline_type; }; +/** + * Type of horizontal alignment applied to a cell content. + */ enum class hor_alignment_t { unknown = 0, @@ -272,6 +387,9 @@ enum class hor_alignment_t filled }; +/** + * Type of vertical alignment applied to a cell content. + */ enum class ver_alignment_t { unknown = 0, -- GitLab From 7fdc89c2a0fbbd4f4b482d1567f54caa831fcd09 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 5 Dec 2022 20:03:14 -0500 Subject: [PATCH 69/91] More on documenting the base spreadsheet types Move color_t, format_run and format_runs_t to document_types.hpp, since technically these types are only used in the backend document model. --- doc/cpp/filter/interface/functions.rst | 6 +- doc/cpp/filter/interface/index.rst | 1 + doc/cpp/filter/interface/types.rst | 19 +-- doc/cpp/filter/interface/view_types.rst | 20 +++ doc/cpp/model/index.rst | 4 +- doc/cpp/model/spreadsheet.rst | 7 + doc/overview/doc-user.rst | 2 +- include/orcus/spreadsheet/Makefile.am | 1 + include/orcus/spreadsheet/document_types.hpp | 55 ++++++++ include/orcus/spreadsheet/shared_strings.hpp | 2 +- include/orcus/spreadsheet/styles.hpp | 2 +- include/orcus/spreadsheet/types.hpp | 141 +++++++++++-------- slickedit/cpp.vpj | 6 +- src/liborcus/spreadsheet_types.cpp | 63 --------- src/spreadsheet/CMakeLists.txt | 1 + src/spreadsheet/Makefile.am | 1 + src/spreadsheet/document_types.cpp | 77 ++++++++++ src/spreadsheet/factory_shared_strings.cpp | 1 - src/spreadsheet/factory_shared_strings.hpp | 1 + 19 files changed, 271 insertions(+), 139 deletions(-) create mode 100644 doc/cpp/filter/interface/view_types.rst create mode 100644 include/orcus/spreadsheet/document_types.hpp create mode 100644 src/spreadsheet/document_types.cpp diff --git a/doc/cpp/filter/interface/functions.rst b/doc/cpp/filter/interface/functions.rst index b7e62ed9..8aaff512 100644 --- a/doc/cpp/filter/interface/functions.rst +++ b/doc/cpp/filter/interface/functions.rst @@ -1,6 +1,6 @@ -Spreadsheet global functions -============================ +Spreadsheet utility functions +============================= .. doxygenfunction:: orcus::spreadsheet::get_default_column_width .. doxygenfunction:: orcus::spreadsheet::get_default_row_height @@ -8,3 +8,5 @@ Spreadsheet global functions .. doxygenfunction:: orcus::spreadsheet::to_pivot_cache_group_by_enum .. doxygenfunction:: orcus::spreadsheet::to_error_value_enum .. doxygenfunction:: orcus::spreadsheet::to_color_rgb +.. doxygenfunction:: orcus::spreadsheet::to_rc_address +.. doxygenfunction:: orcus::spreadsheet::to_rc_range diff --git a/doc/cpp/filter/interface/index.rst b/doc/cpp/filter/interface/index.rst index 1f7ec154..24c5d2ad 100644 --- a/doc/cpp/filter/interface/index.rst +++ b/doc/cpp/filter/interface/index.rst @@ -8,5 +8,6 @@ Types and interfaces global.rst spreadsheet.rst types.rst + view_types.rst functions.rst diff --git a/doc/cpp/filter/interface/types.rst b/doc/cpp/filter/interface/types.rst index 46ab68c1..8d7e89fb 100644 --- a/doc/cpp/filter/interface/types.rst +++ b/doc/cpp/filter/interface/types.rst @@ -4,8 +4,8 @@ Spreadsheet types These types are used throughout the spreadsheet import and export interfaces. -Type aliases ------------- +Integral types +-------------- .. doxygentypedef:: orcus::spreadsheet::row_t .. doxygentypedef:: orcus::spreadsheet::col_t @@ -31,13 +31,16 @@ Structs .. doxygenstruct:: orcus::spreadsheet::range_t :members: -.. doxygenstruct:: orcus::spreadsheet::color_rgb_t +.. doxygenstruct:: orcus::spreadsheet::src_address_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::src_range_t :members: -.. doxygenstruct:: orcus::spreadsheet::split_pane_t +.. doxygenstruct:: orcus::spreadsheet::color_t :members: -.. doxygenstruct:: orcus::spreadsheet::frozen_pane_t +.. doxygenstruct:: orcus::spreadsheet::color_rgb_t :members: @@ -54,14 +57,16 @@ Enums .. doxygenenum:: orcus::spreadsheet::strikethrough_text_t .. doxygenenum:: orcus::spreadsheet::formula_grammar_t .. doxygenenum:: orcus::spreadsheet::formula_t +.. doxygenenum:: orcus::spreadsheet::formula_ref_context_t +.. doxygenenum:: orcus::spreadsheet::formula_error_policy_t .. doxygenenum:: orcus::spreadsheet::underline_t .. doxygenenum:: orcus::spreadsheet::underline_width_t .. doxygenenum:: orcus::spreadsheet::underline_mode_t .. doxygenenum:: orcus::spreadsheet::underline_type_t .. doxygenenum:: orcus::spreadsheet::hor_alignment_t .. doxygenenum:: orcus::spreadsheet::ver_alignment_t -.. doxygenenum:: orcus::spreadsheet::data_table_type_t .. doxygenenum:: orcus::spreadsheet::xf_category_t +.. doxygenenum:: orcus::spreadsheet::data_table_type_t .. doxygenenum:: orcus::spreadsheet::totals_row_function_t .. doxygenenum:: orcus::spreadsheet::conditional_format_t .. doxygenenum:: orcus::spreadsheet::condition_operator_t @@ -69,5 +74,3 @@ Enums .. doxygenenum:: orcus::spreadsheet::condition_date_t .. doxygenenum:: orcus::spreadsheet::databar_axis_t .. doxygenenum:: orcus::spreadsheet::pivot_cache_group_by_t -.. doxygenenum:: orcus::spreadsheet::sheet_pane_t -.. doxygenenum:: orcus::spreadsheet::pane_state_t diff --git a/doc/cpp/filter/interface/view_types.rst b/doc/cpp/filter/interface/view_types.rst new file mode 100644 index 00000000..413be92d --- /dev/null +++ b/doc/cpp/filter/interface/view_types.rst @@ -0,0 +1,20 @@ + +Spreadsheet view types +====================== + +Structs +------- + +.. doxygenstruct:: orcus::spreadsheet::split_pane_t + :members: + +.. doxygenstruct:: orcus::spreadsheet::frozen_pane_t + :members: + + +Enums +----- + +.. doxygenenum:: orcus::spreadsheet::sheet_pane_t +.. doxygenenum:: orcus::spreadsheet::pane_state_t + diff --git a/doc/cpp/model/index.rst b/doc/cpp/model/index.rst index 8a0c3d51..6dd0ccc6 100644 --- a/doc/cpp/model/index.rst +++ b/doc/cpp/model/index.rst @@ -1,6 +1,6 @@ -Document model -============== +Document models +=============== .. toctree:: :maxdepth: 1 diff --git a/doc/cpp/model/spreadsheet.rst b/doc/cpp/model/spreadsheet.rst index e940c2b1..b7cd4da7 100644 --- a/doc/cpp/model/spreadsheet.rst +++ b/doc/cpp/model/spreadsheet.rst @@ -3,6 +3,13 @@ Spreadsheet document ==================== +Document types +-------------- + +.. doxygenstruct:: orcus::spreadsheet::format_run +.. doxygentypedef:: orcus::spreadsheet::format_runs_t + + Document -------- diff --git a/doc/overview/doc-user.rst b/doc/overview/doc-user.rst index e6317a5c..a1292e5c 100644 --- a/doc/overview/doc-user.rst +++ b/doc/overview/doc-user.rst @@ -570,5 +570,5 @@ Implement more interfaces ------------------------- This section has covered only a part of the available spreadsheet interfaces -you can implement in your code. Refer to the :ref:`spreadsheet-interface` +you can implement in your code. Refer to the :ref:`spreadsheet-interfaces` section to see the complete list of interfaces. diff --git a/include/orcus/spreadsheet/Makefile.am b/include/orcus/spreadsheet/Makefile.am index d49cbb91..b01bce74 100644 --- a/include/orcus/spreadsheet/Makefile.am +++ b/include/orcus/spreadsheet/Makefile.am @@ -15,6 +15,7 @@ liborcus_HEADERS += \ auto_filter.hpp \ config.hpp \ document.hpp \ + document_types.hpp \ factory.hpp \ pivot.hpp \ shared_strings.hpp \ diff --git a/include/orcus/spreadsheet/document_types.hpp b/include/orcus/spreadsheet/document_types.hpp new file mode 100644 index 00000000..28d8c22f --- /dev/null +++ b/include/orcus/spreadsheet/document_types.hpp @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include "types.hpp" +#include + +namespace orcus { namespace spreadsheet { + +/** + * Stores a color value in ARGB format. + */ +struct ORCUS_SPM_DLLPUBLIC color_t +{ + color_elem_t alpha; + color_elem_t red; + color_elem_t green; + color_elem_t blue; + + color_t(); + color_t(color_elem_t _red, color_elem_t _green, color_elem_t _blue); + color_t(color_elem_t _alpha, color_elem_t _red, color_elem_t _green, color_elem_t _blue); + + void reset(); + + bool operator==(const color_t& other) const; + bool operator!=(const color_t& other) const; +}; + +struct ORCUS_SPM_DLLPUBLIC format_run +{ + size_t pos; + size_t size; + std::string_view font; + double font_size; + color_t color; + bool bold:1; + bool italic:1; + + format_run(); + + void reset(); + bool formatted() const; +}; + +using format_runs_t = std::vector; + +}} // namespace orcus::spreadsheet + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/orcus/spreadsheet/shared_strings.hpp b/include/orcus/spreadsheet/shared_strings.hpp index 9c3a257c..28799b9c 100644 --- a/include/orcus/spreadsheet/shared_strings.hpp +++ b/include/orcus/spreadsheet/shared_strings.hpp @@ -8,7 +8,7 @@ #ifndef INCLUDED_ORCUS_SPREADSHEET_SHARED_STRINGS_HPP #define INCLUDED_ORCUS_SPREADSHEET_SHARED_STRINGS_HPP -#include "types.hpp" +#include "document_types.hpp" #include #include diff --git a/include/orcus/spreadsheet/styles.hpp b/include/orcus/spreadsheet/styles.hpp index 62340a8e..f6097787 100644 --- a/include/orcus/spreadsheet/styles.hpp +++ b/include/orcus/spreadsheet/styles.hpp @@ -10,7 +10,7 @@ #include "../env.hpp" #include "../measurement.hpp" -#include "types.hpp" +#include "document_types.hpp" #include #include diff --git a/include/orcus/spreadsheet/types.hpp b/include/orcus/spreadsheet/types.hpp index 5af0b45b..c71044c3 100644 --- a/include/orcus/spreadsheet/types.hpp +++ b/include/orcus/spreadsheet/types.hpp @@ -265,26 +265,26 @@ enum class formula_ref_context_t */ global = 0, - /** base cell position of either a named range or expression. */ + /** Base cell position of either a named range or expression. */ named_expression_base, /** - * named range is a special case of named expression where the expression + * Named range is a special case of named expression where the expression * consists of only one range token. */ named_range, }; /** - * Policy on how to handle a formula cell containing an expression that has - * not been successfully parsed. + * Type of policy on how to handle a formula cell with an erroneous expression + * that has been parsed unsuccessfully. */ enum class formula_error_policy_t { unknown, - /** loading of the document will be halted. */ + /** Loading of the document will be halted. */ fail, - /** the error cell will be skipped. */ + /** The error cell will be skipped. */ skip }; @@ -401,17 +401,17 @@ enum class ver_alignment_t }; /** - * Cell format categories. The abbrevaition "xf" refers to "cell format" where - * the "x" stands for cell. + * Cell format categories. The abbreviation "xf" stands for "cell format" + * where the "x" is short for cell. */ enum class xf_category_t { unknown, - /** Direct cell format, also abbreviated as xf */ + /** Direct cell format, also often referenced as xf. */ cell, - /** Cell format for named styles */ + /** Cell format for named styles. */ cell_style, - /** Incremental cell format, also abbreviated as dxf */ + /** Incremental cell format, also referenced as dxf. */ differential, }; @@ -444,6 +444,9 @@ enum class totals_row_function_t custom }; +/** + * Type of conditional format. + */ enum class conditional_format_t { unknown = 0, @@ -455,6 +458,9 @@ enum class conditional_format_t iconset }; +/** + * Operator type associated with a conditional format rule. + */ enum class condition_operator_t { unknown = 0, @@ -484,6 +490,14 @@ enum class condition_operator_t expression }; +/** + * Type of a condition in a conditional format rule. This is applicable only + * when the type of a conditional format entry is either: + * + * @li @p colorscale, + * @li @p databar or + * @li @p iconset. + */ enum class condition_type_t { unknown = 0, @@ -496,6 +510,10 @@ enum class condition_type_t percentile }; +/** + * Type of a date condition when the type of a conditional format entry is + * @p date. + */ enum class condition_date_t { unknown = 0, @@ -514,6 +532,10 @@ enum class condition_date_t last_year, }; +/** + * Databar axis type, applicable only when the type of a conditional format + * entry is @p databar. + */ enum class databar_axis_t { none = 0, @@ -521,31 +543,57 @@ enum class databar_axis_t automatic }; +/** + * Type of range grouping in a group field of a pivot table cache. + */ enum class pivot_cache_group_by_t { + /** + * Type of range grouping is unknown. + * + * This is an implicit default value of this type. + */ unknown = 0, - days, // grouping on "days" for date values. - hours, // grouping on "hours" for date values. - minutes, // grouping on "minutes" for date values. - months, // grouping on "months" for date values. - quarters, // grouping on "quarters" for date values. - range, // grouping by numeric ranges for numeric values. - seconds, // grouping on "seconds" for date values. - years // grouping on "years" for date values. -}; - + /** Grouping on "days" for date values. */ + days, + /** Grouping on "hours" for date values. */ + hours, + /** Grouping on "minutes" for date values. */ + minutes, + /** Grouping on "months" for date values. */ + months, + /** Grouping on "quarters" for date values. */ + quarters, + /** Grouping by numeric ranges for numeric values. */ + range, + /** Grouping on "seconds" for date values. */ + seconds, + /** Grouping on "years" for date values. */ + years +}; + +/** + * Stores a 2-dimensional cell address. + */ struct address_t { row_t row; col_t column; }; +/** + * Stores the size of a range of a spreadsheet. + */ struct range_size_t { row_t rows; col_t columns; }; +/** + * Stores a 2-dimensional cell range by storing the positions of the top-left + * and bottom-right corners of the range. + */ struct range_t { address_t first; @@ -553,7 +601,7 @@ struct range_t }; /** - * Stores 3-dimensional cell address. The 'src' stands for + * Stores 3-dimensional cell address. The 'src' abbreviation stands for * sheet-row-column. */ struct src_address_t @@ -564,7 +612,7 @@ struct src_address_t }; /** - * Stores 3-dimensional range address. The 'src' stands for + * Stores 3-dimensional cell range address. The 'src' abbreviation stands for * sheet-row-column. */ struct src_range_t @@ -573,7 +621,16 @@ struct src_range_t src_address_t last; }; +/** + * Convert a 3-dimensional cell address to a 2-dimensional counterpart by + * dropping the sheet index. + */ ORCUS_DLLPUBLIC address_t to_rc_address(const src_address_t& r); + +/** + * Convert a 3-dimensional cell range address to a 2-dimensional counterpart + * by dropping the sheet indices. + */ ORCUS_DLLPUBLIC range_t to_rc_range(const src_range_t& r); ORCUS_DLLPUBLIC bool operator== (const address_t& left, const address_t& right); @@ -598,23 +655,9 @@ ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const address_t& v); ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const src_address_t& v); ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const range_t& v); -struct ORCUS_SPM_DLLPUBLIC color_t -{ - color_elem_t alpha; - color_elem_t red; - color_elem_t green; - color_elem_t blue; - - color_t(); - color_t(color_elem_t _red, color_elem_t _green, color_elem_t _blue); - color_t(color_elem_t _alpha, color_elem_t _red, color_elem_t _green, color_elem_t _blue); - - void reset(); - - bool operator==(const color_t& other) const; - bool operator!=(const color_t& other) const; -}; - +/** + * Stores a color value in RGB format. + */ struct color_rgb_t { color_elem_t red; @@ -622,24 +665,6 @@ struct color_rgb_t color_elem_t blue; }; -struct ORCUS_SPM_DLLPUBLIC format_run -{ - size_t pos; - size_t size; - std::string_view font; - double font_size; - color_t color; - bool bold:1; - bool italic:1; - - format_run(); - - void reset(); - bool formatted() const; -}; - -using format_runs_t = std::vector; - /** * Convert a string representation of a totals row function name to its * equivalent enum value. diff --git a/slickedit/cpp.vpj b/slickedit/cpp.vpj index 9141b986..e7869075 100644 --- a/slickedit/cpp.vpj +++ b/slickedit/cpp.vpj @@ -144,8 +144,6 @@ Name="Other Files" Filters=""> - - @@ -173,6 +171,7 @@ + @@ -552,6 +551,7 @@ + @@ -624,4 +624,6 @@ + + diff --git a/src/liborcus/spreadsheet_types.cpp b/src/liborcus/spreadsheet_types.cpp index 5a313d82..bd7c0cc9 100644 --- a/src/liborcus/spreadsheet_types.cpp +++ b/src/liborcus/spreadsheet_types.cpp @@ -294,36 +294,6 @@ std::ostream& write_name_for_pos( } // anonymous namespace -color_t::color_t() : - alpha(0), red(0), green(0), blue(0) -{ -} - -color_t::color_t(color_elem_t _red, color_elem_t _green, color_elem_t _blue) : - alpha(255), red(_red), green(_green), blue(_blue) -{ -} - -color_t::color_t(color_elem_t _alpha, color_elem_t _red, color_elem_t _green, color_elem_t _blue) : - alpha(_alpha), red(_red), green(_green), blue(_blue) -{ -} - -void color_t::reset() -{ - *this = color_t(); -} - -bool color_t::operator==(const color_t& other) const -{ - return alpha == other.alpha && red == other.red && green == other.green && blue == other.blue; -} - -bool color_t::operator!=(const color_t& other) const -{ - return !operator==(other); -} - address_t to_rc_address(const src_address_t& r) { address_t ret; @@ -448,39 +418,6 @@ std::ostream& operator<< (std::ostream& os, const range_t& v) return os; } -format_run::format_run() : - pos(0), size(0), - font_size(0), - bold(false), italic(false) {} - -void format_run::reset() -{ - pos = 0; - size = 0; - font = std::string_view{}; - font_size = 0; - bold = false; - italic = false; - color = color_t(); -} - -bool format_run::formatted() const -{ - if (bold || italic) - return true; - - if (font_size) - return true; - - if (!font.empty()) - return true; - - if (color.alpha || color.red || color.green || color.blue) - return true; - - return false; -} - col_width_t get_default_column_width() { return std::numeric_limits::max(); diff --git a/src/spreadsheet/CMakeLists.txt b/src/spreadsheet/CMakeLists.txt index 27482def..66415111 100644 --- a/src/spreadsheet/CMakeLists.txt +++ b/src/spreadsheet/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(orcus-spreadsheet-model-${ORCUS_API_VERSION} SHARED debug_state_dumper.cpp document.cpp document_impl.cpp + document_types.cpp dumper_global.cpp factory.cpp factory_pivot.cpp diff --git a/src/spreadsheet/Makefile.am b/src/spreadsheet/Makefile.am index 9f421049..17e731e6 100644 --- a/src/spreadsheet/Makefile.am +++ b/src/spreadsheet/Makefile.am @@ -24,6 +24,7 @@ liborcus_spreadsheet_model_@ORCUS_API_VERSION@_la_SOURCES = \ document.cpp \ document_impl.hpp \ document_impl.cpp \ + document_types.cpp \ dumper_global.hpp \ dumper_global.cpp \ factory.cpp \ diff --git a/src/spreadsheet/document_types.cpp b/src/spreadsheet/document_types.cpp new file mode 100644 index 00000000..88e07243 --- /dev/null +++ b/src/spreadsheet/document_types.cpp @@ -0,0 +1,77 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include + +namespace orcus { namespace spreadsheet { + +color_t::color_t() : + alpha(0), red(0), green(0), blue(0) +{ +} + +color_t::color_t(color_elem_t _red, color_elem_t _green, color_elem_t _blue) : + alpha(255), red(_red), green(_green), blue(_blue) +{ +} + +color_t::color_t(color_elem_t _alpha, color_elem_t _red, color_elem_t _green, color_elem_t _blue) : + alpha(_alpha), red(_red), green(_green), blue(_blue) +{ +} + +void color_t::reset() +{ + *this = color_t(); +} + +bool color_t::operator==(const color_t& other) const +{ + return alpha == other.alpha && red == other.red && green == other.green && blue == other.blue; +} + +bool color_t::operator!=(const color_t& other) const +{ + return !operator==(other); +} + +format_run::format_run() : + pos(0), size(0), + font_size(0), + bold(false), italic(false) {} + +void format_run::reset() +{ + pos = 0; + size = 0; + font = std::string_view{}; + font_size = 0; + bold = false; + italic = false; + color = color_t(); +} + +bool format_run::formatted() const +{ + if (bold || italic) + return true; + + if (font_size) + return true; + + if (!font.empty()) + return true; + + if (color.alpha || color.red || color.green || color.blue) + return true; + + return false; +} + +}} // namespace orcus::spreadsheet + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/src/spreadsheet/factory_shared_strings.cpp b/src/spreadsheet/factory_shared_strings.cpp index f155fc4c..a8375c0b 100644 --- a/src/spreadsheet/factory_shared_strings.cpp +++ b/src/spreadsheet/factory_shared_strings.cpp @@ -7,7 +7,6 @@ #include "factory_shared_strings.hpp" -#include #include #include #include diff --git a/src/spreadsheet/factory_shared_strings.hpp b/src/spreadsheet/factory_shared_strings.hpp index 21eaf230..b49d2744 100644 --- a/src/spreadsheet/factory_shared_strings.hpp +++ b/src/spreadsheet/factory_shared_strings.hpp @@ -8,6 +8,7 @@ #pragma once #include +#include #include -- GitLab From 5d047a0df31cb4e5baafb7e6d4bd13335e219a5c Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 5 Dec 2022 20:25:30 -0500 Subject: [PATCH 70/91] Split the spreadsheet interface section into multiple sub-pages --- doc/cpp/filter/interface/index.rst | 3 +- doc/cpp/filter/interface/spreadsheet.rst | 107 ------------------ .../filter/interface/spreadsheet/document.rst | 42 +++++++ .../interface/spreadsheet/document_export.rst | 9 ++ .../filter/interface/spreadsheet/index.rst | 14 +++ .../filter/interface/spreadsheet/pivot.rst | 12 ++ .../filter/interface/spreadsheet/styles.rst | 27 +++++ doc/cpp/filter/interface/spreadsheet/view.rst | 6 + doc/cpp/filter/interface/types.rst | 3 - doc/cpp/model/spreadsheet.rst | 1 + 10 files changed, 112 insertions(+), 112 deletions(-) delete mode 100644 doc/cpp/filter/interface/spreadsheet.rst create mode 100644 doc/cpp/filter/interface/spreadsheet/document.rst create mode 100644 doc/cpp/filter/interface/spreadsheet/document_export.rst create mode 100644 doc/cpp/filter/interface/spreadsheet/index.rst create mode 100644 doc/cpp/filter/interface/spreadsheet/pivot.rst create mode 100644 doc/cpp/filter/interface/spreadsheet/styles.rst create mode 100644 doc/cpp/filter/interface/spreadsheet/view.rst diff --git a/doc/cpp/filter/interface/index.rst b/doc/cpp/filter/interface/index.rst index 24c5d2ad..67f93e94 100644 --- a/doc/cpp/filter/interface/index.rst +++ b/doc/cpp/filter/interface/index.rst @@ -6,8 +6,7 @@ Types and interfaces :maxdepth: 1 global.rst - spreadsheet.rst + spreadsheet/index.rst types.rst view_types.rst functions.rst - diff --git a/doc/cpp/filter/interface/spreadsheet.rst b/doc/cpp/filter/interface/spreadsheet.rst deleted file mode 100644 index bef3f9c3..00000000 --- a/doc/cpp/filter/interface/spreadsheet.rst +++ /dev/null @@ -1,107 +0,0 @@ - -.. _spreadsheet-interfaces: - -Spreadsheet interfaces -====================== - -Document import ---------------- - -.. doxygenclass:: orcus::spreadsheet::iface::import_shared_strings - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_properties - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_named_expression - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_data_table - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_auto_filter - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_conditional_format - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_table - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_formula - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_array_formula - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_global_settings - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_reference_resolver - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_factory - :members: - - -Pivot table import ------------------- - -.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_definition - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_field_group - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_records - :members: - - -Styles import -------------- - -.. doxygenclass:: orcus::spreadsheet::iface::import_styles - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_font_style - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_fill_style - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_border_style - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_cell_protection - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_number_format - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_xf - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_cell_style - :members: - - -View properties import ----------------------- - -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_view - :members: - - -Document export ---------------- - -.. doxygenclass:: orcus::spreadsheet::iface::export_sheet - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::export_factory - :members: - - diff --git a/doc/cpp/filter/interface/spreadsheet/document.rst b/doc/cpp/filter/interface/spreadsheet/document.rst new file mode 100644 index 00000000..25f56225 --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/document.rst @@ -0,0 +1,42 @@ + +Document import +=============== + +.. doxygenclass:: orcus::spreadsheet::iface::import_shared_strings + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_properties + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_named_expression + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_data_table + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_auto_filter + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_conditional_format + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_table + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_formula + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_array_formula + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_global_settings + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_reference_resolver + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_factory + :members: diff --git a/doc/cpp/filter/interface/spreadsheet/document_export.rst b/doc/cpp/filter/interface/spreadsheet/document_export.rst new file mode 100644 index 00000000..5759d81f --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/document_export.rst @@ -0,0 +1,9 @@ + +Document export +=============== + +.. doxygenclass:: orcus::spreadsheet::iface::export_sheet + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::export_factory + :members: diff --git a/doc/cpp/filter/interface/spreadsheet/index.rst b/doc/cpp/filter/interface/spreadsheet/index.rst new file mode 100644 index 00000000..656875b1 --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/index.rst @@ -0,0 +1,14 @@ + +.. _spreadsheet-interfaces: + +Spreadsheet interfaces +====================== + +.. toctree:: + :maxdepth: 1 + + document.rst + pivot.rst + styles.rst + view.rst + document_export.rst diff --git a/doc/cpp/filter/interface/spreadsheet/pivot.rst b/doc/cpp/filter/interface/spreadsheet/pivot.rst new file mode 100644 index 00000000..48a036c8 --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/pivot.rst @@ -0,0 +1,12 @@ + +Pivot table import +================== + +.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_definition + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_field_group + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_records + :members: diff --git a/doc/cpp/filter/interface/spreadsheet/styles.rst b/doc/cpp/filter/interface/spreadsheet/styles.rst new file mode 100644 index 00000000..75752c0d --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/styles.rst @@ -0,0 +1,27 @@ + +Styles import +============= + +.. doxygenclass:: orcus::spreadsheet::iface::import_styles + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_font_style + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_fill_style + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_border_style + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_cell_protection + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_number_format + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_xf + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_cell_style + :members: diff --git a/doc/cpp/filter/interface/spreadsheet/view.rst b/doc/cpp/filter/interface/spreadsheet/view.rst new file mode 100644 index 00000000..1c4e97f9 --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/view.rst @@ -0,0 +1,6 @@ + +View properties import +====================== + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_view + :members: diff --git a/doc/cpp/filter/interface/types.rst b/doc/cpp/filter/interface/types.rst index 8d7e89fb..5fd38117 100644 --- a/doc/cpp/filter/interface/types.rst +++ b/doc/cpp/filter/interface/types.rst @@ -37,9 +37,6 @@ Structs .. doxygenstruct:: orcus::spreadsheet::src_range_t :members: -.. doxygenstruct:: orcus::spreadsheet::color_t - :members: - .. doxygenstruct:: orcus::spreadsheet::color_rgb_t :members: diff --git a/doc/cpp/model/spreadsheet.rst b/doc/cpp/model/spreadsheet.rst index b7cd4da7..3550e643 100644 --- a/doc/cpp/model/spreadsheet.rst +++ b/doc/cpp/model/spreadsheet.rst @@ -6,6 +6,7 @@ Spreadsheet document Document types -------------- +.. doxygenstruct:: orcus::spreadsheet::color_t .. doxygenstruct:: orcus::spreadsheet::format_run .. doxygentypedef:: orcus::spreadsheet::format_runs_t -- GitLab From c8ce41c8321fb1d907357be37e1d9d995494df31 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 5 Dec 2022 20:35:45 -0500 Subject: [PATCH 71/91] Separate the sheet import interfaces into own section --- .../filter/interface/spreadsheet/document.rst | 32 +++---------------- .../filter/interface/spreadsheet/index.rst | 1 + .../filter/interface/spreadsheet/sheet.rst | 27 ++++++++++++++++ 3 files changed, 32 insertions(+), 28 deletions(-) create mode 100644 doc/cpp/filter/interface/spreadsheet/sheet.rst diff --git a/doc/cpp/filter/interface/spreadsheet/document.rst b/doc/cpp/filter/interface/spreadsheet/document.rst index 25f56225..cb5fe60d 100644 --- a/doc/cpp/filter/interface/spreadsheet/document.rst +++ b/doc/cpp/filter/interface/spreadsheet/document.rst @@ -2,41 +2,17 @@ Document import =============== -.. doxygenclass:: orcus::spreadsheet::iface::import_shared_strings - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_properties - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_named_expression - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_data_table - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_auto_filter - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_conditional_format - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_table - :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_formula +.. doxygenclass:: orcus::spreadsheet::iface::import_factory :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_array_formula +.. doxygenclass:: orcus::spreadsheet::iface::import_global_settings :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_sheet +.. doxygenclass:: orcus::spreadsheet::iface::import_shared_strings :members: -.. doxygenclass:: orcus::spreadsheet::iface::import_global_settings +.. doxygenclass:: orcus::spreadsheet::iface::import_named_expression :members: .. doxygenclass:: orcus::spreadsheet::iface::import_reference_resolver :members: - -.. doxygenclass:: orcus::spreadsheet::iface::import_factory - :members: diff --git a/doc/cpp/filter/interface/spreadsheet/index.rst b/doc/cpp/filter/interface/spreadsheet/index.rst index 656875b1..4254c22f 100644 --- a/doc/cpp/filter/interface/spreadsheet/index.rst +++ b/doc/cpp/filter/interface/spreadsheet/index.rst @@ -8,6 +8,7 @@ Spreadsheet interfaces :maxdepth: 1 document.rst + sheet.rst pivot.rst styles.rst view.rst diff --git a/doc/cpp/filter/interface/spreadsheet/sheet.rst b/doc/cpp/filter/interface/spreadsheet/sheet.rst new file mode 100644 index 00000000..840a7e9e --- /dev/null +++ b/doc/cpp/filter/interface/spreadsheet/sheet.rst @@ -0,0 +1,27 @@ + +Sheet import +============ + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_sheet_properties + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_data_table + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_auto_filter + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_conditional_format + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_table + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_formula + :members: + +.. doxygenclass:: orcus::spreadsheet::iface::import_array_formula + :members: -- GitLab From 2fa3581fef1845337eba4c4e2fc86f6d9b2f8bd9 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 5 Dec 2022 21:37:24 -0500 Subject: [PATCH 72/91] Edit documentation for import_factory --- .../orcus/spreadsheet/import_interface.hpp | 104 ++++++++++++------ 1 file changed, 71 insertions(+), 33 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index e0aea72e..8507ccb3 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -792,101 +792,139 @@ public: }; /** - * This interface provides the filters a means to instantiate concrete - * classes that implement the above interfaces. The client code never has - * to manually delete objects returned by its methods; the implementor of - * this interface must manage the life cycles of objects it returns. + * This interface is the entry point for the import filter code to instantiate + * other, more specialized interfaces. The life cycles of any specialized + * interfaces returned from this interface shall be managed by the implementor + * of this interface. * - * The implementor of this interface normally wraps the document instance - * inside it and have the document instance manage the life cycles of - * various objects it creates. + * The implementer of this interface may wrap a backend document store that + * needs to be populated. */ class ORCUS_DLLPUBLIC import_factory { public: virtual ~import_factory(); + /** + * Obtain an optional interface for global settings, which is used to + * specify global filter settings that may affect how certain values and + * properties get imported by the filter. + * + * @return pointer to the global settings interface, or a @p nullptr if the + * implementor doesn't support it. + */ virtual import_global_settings* get_global_settings(); /** - * @return pointer to the shared strings instance. It may return NULL if - * the client app doesn't support shared strings. + * Obtain an optional interface for importing shared strings for string + * cells. Implementing this interface is required in order to import string + * cell values. + * + * @return pointer to the shared strings interface, or a @p nullptr if the + * implementor doesn't support it. */ virtual import_shared_strings* get_shared_strings(); + /** + * Obtain an optional interface for importing global named expressions. + * + * Note that @ref import_sheet also provides the same interface, but its + * interface is for importing sheet-local named expressions. + * + * @return pointer to the global named expression interface, or a @p nullptr + * if the implementor doesn't support it. + */ virtual import_named_expression* get_named_expression(); /** - * @return pointer to the styles instance. It may return NULL if the - * client app doesn't support styles. + * Obtain an optional interface for importing styles used to add formatting + * properties to cell values. + * + * @return pointer to the styles interface, or a @p nullptr if the + * implementor doesn't support it. */ virtual import_styles* get_styles(); + /** + * Obtain an optional interface for resolving cell and cell-range references + * from string values. + * + * @param cxt context in which the formula expression containing the + * references to be resolved occurs. + * + * @return pointer to the reference resolve interfance, or a @nullptr if the + * implementor doesn't support it. + */ virtual import_reference_resolver* get_reference_resolver(formula_ref_context_t cxt); /** - * Create an interface for pivot cache definition import for a specified - * cache ID. In case a pivot cache alrady exists for the passed ID, the - * client app should overwrite the existing cache with a brand-new cache - * instance. + * Obtain an optional interface for pivot cache definition import for a + * specified cache ID. In case a pivot cache alrady exists for the passed + * ID, the implementor should overwrite the existing cache with a brand-new + * cache instance. * * @param cache_id numeric ID associated with the pivot cache. * - * @return pointer to the pivot cache interface instance. If may return - * NULL if the client app doesn't support pivot tables. + * @return pointer to the pivot cache interface, or a @p nullptr if the + * implementor doesn't support pivot cache import. */ virtual import_pivot_cache_definition* create_pivot_cache_definition( pivot_cache_id_t cache_id); /** - * Create an interface for pivot cache records import for a specified - * cache ID. + * Obtain an optional interface for pivot cache records import for a + * specified cache ID. * * @param cache_id numeric ID associated with the pivot cache. * - * @return pointer to the pivot cache records interface instance. If may - * return nullptr if the client app doesn't support pivot tables. + * @return pointer to the pivot cache records interface, or a @p nullptr if + * the implementor doesn't support pivot cache import. */ virtual import_pivot_cache_records* create_pivot_cache_records( pivot_cache_id_t cache_id); /** - * Append a sheet with specified sheet position index and name. + * Append a sheet with a specified sheet position index and name and return + * an interface for importing its content. The implementor can use a call + * to this method as a signal to create and append a new sheet instance to + * the document store. * * @param sheet_index position index of the sheet to be appended. It is * 0-based i.e. the first sheet to be appended will * have an index value of 0. * @param name sheet name. * - * @return pointer to the sheet instance. It may return nullptr if the - * client app fails to append a new sheet. + * @return pointer to the sheet instance, or a @p nullptr if the implementor + * doesn't support it. Note, however, that if the implementor + * doesn't support this interface, no cell values will get imported. */ virtual import_sheet* append_sheet(sheet_t sheet_index, std::string_view name) = 0; /** - * Get a sheet instance by name. + * Get a sheet instance by name. The import filter may use this method to + * get access to an existing sheet after it has been created. * * @param name sheet name. * * @return pointer to the sheet instance whose name matches the name - * passed to this method. It returns nullptr if no sheet instance - * exists by the specified name. + * passed to this method. It returns a @p nullptr if no sheet + * instance exists by the specified name. */ virtual import_sheet* get_sheet(std::string_view name) = 0; /** - * Retrieve sheet instance by specified numerical sheet index. + * Retrieve a sheet instance by a specified numerical sheet index. * - * @param sheet_index sheet index + * @param sheet_index sheet index. * - * @return pointer to the sheet instance, or nullptr if no sheet instance - * exists at specified sheet index position. + * @return pointer to the sheet instance, or a @p nullptr if no sheet + * instance exists at the specified sheet index. */ virtual import_sheet* get_sheet(sheet_t sheet_index) = 0; /** - * This method is called at the end of import, to give the implementor a - * chance to perform post-processing if necessary. + * The import filter calls this method after completing its import, to give + * the implementor a chance to perform post-processing. */ virtual void finalize() = 0; }; -- GitLab From ebcab14008399e785f4b79d8021f8a5549e86660 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 5 Dec 2022 21:56:46 -0500 Subject: [PATCH 73/91] Go through and add or edit docs for global settings interface --- .../orcus/spreadsheet/import_interface.hpp | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index 8507ccb3..f8c0a4a8 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -717,6 +717,10 @@ public: virtual range_size_t get_sheet_size() const = 0; }; +/** + * Interface for specifying global settings that may affect how the + * implementor should process certain values and properties. + */ class ORCUS_DLLPUBLIC import_global_settings { public: @@ -724,7 +728,8 @@ public: /** * Set the date that is to be represented by a value of 0. All date - * values will be internally represented relative to this date afterward. + * values should be represented relative to this date. This may affect, for + * instance, values imported via @ref import_sheet::set_date_time(). * * @param year 1-based value representing year * @param month 1-based value representing month, varying from 1 through @@ -734,26 +739,33 @@ public: virtual void set_origin_date(int year, int month, int day) = 0; /** - * Set formula grammar to be used globally when parsing formulas if the - * grammar is not specified. This grammar will also be used when parsing + * Set the formula grammar to be used globally when parsing formulas if the + * grammar is not specified. This grammar should also be used when parsing * range strings associated with shared formula ranges, array formula * ranges, autofilter ranges etc. * - * @param grammar default formula grammar + * Note that the import filter may specify what formula grammar to use + * locally when importing formula expressions for cells via @ref + * import_formula::set_formula(), in which case the implementor should honor + * that one instead. + * + * @param grammar default formula grammar to use globally unless otherwise + * specified. */ virtual void set_default_formula_grammar(formula_grammar_t grammar) = 0; /** - * Get current default formula grammar. + * Get current global formula grammar. The import filter may use this + * method to query the current global formula grammar. * * @return current default formula grammar. */ virtual formula_grammar_t get_default_formula_grammar() const = 0; /** - * Set the character set to be used when parsing string values. + * Set the character set to use when parsing encoded string values. * - * @param charset character set to apply when parsing string values. + * @param charset character set to use when parsing encoded string values. */ virtual void set_character_set(character_set_t charset) = 0; }; @@ -806,9 +818,10 @@ public: virtual ~import_factory(); /** - * Obtain an optional interface for global settings, which is used to - * specify global filter settings that may affect how certain values and - * properties get imported by the filter. + * Obtain an optional interface for global settings, which the import filter + * uses to specify global filter settings that may affect how certain values + * and properties should be processed. The implementor can use this + * interface to decide how to process relevant values and properties. * * @return pointer to the global settings interface, or a @p nullptr if the * implementor doesn't support it. -- GitLab From 6b713551d8de28c83d107e35fa3de457e020ca8f Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 7 Dec 2022 20:36:43 -0500 Subject: [PATCH 74/91] Go through and edit the doc for import_shared_strings --- .../orcus/spreadsheet/import_interface.hpp | 88 ++++++++++++++----- 1 file changed, 64 insertions(+), 24 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index f8c0a4a8..dceaaac0 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -26,7 +26,45 @@ class import_pivot_cache_records; class import_sheet_view; /** - * Interface class designed to be derived by the implementor. + * Interface for importing raw string values shared in string cells. String + * values may be either with or without formatted segments. + * + * To insert an unformatted string, simply use either append() or add() + * method. The string will then be immediately pushed to the pool. + * + * To insert a string with mixed formatted segments, you need to first use one + * or more of: + * + * @li set_segment_font() + * @li set_segment_bold() + * @li set_segment_italic() + * @li set_segment_font_name() + * @li set_segment_font_size() + * @li set_segment_font_color() + * + * to define the format attribute(s) of a string segment followed by a call to + * append_segment(). This may be repeated as many times as necessary. Then + * as the final step, call commit_segments() to insert the entire series of + * formatted segments to the pool as a single string entry. The following + * example demonstrates how the code may look like: + * + * @code{.cpp} + * // store a segment with specific font, size and boldness. + * set_segment_font_name("FreeMono"); + * set_segment_font_size(14); + * set_segment_font_bold(true); + * append_segment("a bold and big segment"); + * + * // store an unformatted segment. + * append_segment(" followed by "); + * + * // store a segment with smaller, italic font. + * set_segment_font_size(7); + * set_segment_font_italic(true); + * append_segment("a small and italic segment"); + * + * commit_segments(); // commit the whole formatted string to the pool. + * @endcode */ class ORCUS_DLLPUBLIC import_shared_strings { @@ -34,67 +72,69 @@ public: virtual ~import_shared_strings(); /** - * Append new string to the string list. Order of insertion is important - * since that determines the numerical ID values of inserted strings. - * Note that this method assumes that the caller knows the string being - * appended is not yet in the pool. + * Append a new string to the sequence of strings. Order of insertion + * determines the numerical ID value of an inserted string. Note that this + * method assumes that the caller knows the string being appended is not yet + * in the pool; it does not check on duplicated strings. * * @param s string to append to the pool. * - * @return ID of the string just inserted. + * @return ID of the inserted string. */ virtual size_t append(std::string_view s) = 0; /** - * Similar to the append method, it adds new string to the string pool; + * Similar to the append() method, it adds a new string to the string pool; * however, this method checks if the string being added is already in the * pool before each insertion, to avoid duplicated strings. * * @param s string to add to the pool. * - * @return ID of the string just inserted. + * @return ID of the inserted string. */ virtual size_t add(std::string_view s) = 0; /** - * Set the index of a font to apply to the current format attributes. + * Set the index of a font to apply to the current format attributes. Refer + * to the import_font_style interface on how to obtain a font index. Note + * that a single font index is associated with multiple font-related + * formatting attributes, such as font name, font color, boldness and + * italics. * * @param font_index positive integer representing the font to use. */ virtual void set_segment_font(size_t font_index) = 0; /** - * Set whether or not to make the font bold to the current format - * attributes. + * Set whether or not to make the current segment bold. * * @param b true if it's bold, false otherwise. */ virtual void set_segment_bold(bool b) = 0; /** - * Set whether or not to set the font italic font to the current format - * attributes. + * Set whether or not to make the current segment italic. * * @param b true if it's italic, false otherwise. */ virtual void set_segment_italic(bool b) = 0; /** - * Set the name of a font to the current format attributes. + * Set the name of a font to the current segment. * * @param s font name. */ virtual void set_segment_font_name(std::string_view s) = 0; /** - * Set a font size to the current format attributes. + * Set a font size to the current segment. * * @param point font size in points. */ virtual void set_segment_font_size(double point) = 0; /** - * Set the color of a font in ARGB to the current format attributes. + * Set the color of a font in ARGB format to the current segment. * * @param alpha alpha component value (0-255). * @param red red component value (0-255). @@ -104,21 +144,21 @@ public: virtual void set_segment_font_color(color_elem_t alpha, color_elem_t red, color_elem_t green, color_elem_t blue) = 0; /** - * Append a string segment with the current format attributes to the - * formatted string buffer. + * Push the current string segment to the buffer. Any formatting attributes + * defined so far will be applied to this segment. * - * @param s string segment value. + * @param s string value for the segment. */ virtual void append_segment(std::string_view s) = 0; /** - * Store the formatted string in the current buffer to the shared strings - * store. The implementation may choose to unconditionally append the - * string to the store, or choose to look for an existing indentical - * formatted string to reuse and discard the new one if one exists. + * Store the entire formatted string in the current buffer to the shared + * strings pool. The implementor may choose to unconditionally append the + * string to the pool, or choose to find an existing duplicate and reuse + * it instead. * * @return ID of the string just inserted, or the ID of an existing string - * with identical formatting attributes. + * with identical formatting. */ virtual size_t commit_segments() = 0; }; -- GitLab From c0c1089a744ad31667b215b45c31f12d9acbd7bb Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 7 Dec 2022 22:20:31 -0500 Subject: [PATCH 75/91] Edit the doc for import_named_expression interface --- .../orcus/spreadsheet/import_interface.hpp | 73 +++++++++++++------ 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index dceaaac0..b5002bb3 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -49,21 +49,23 @@ class import_sheet_view; * example demonstrates how the code may look like: * * @code{.cpp} + * import_shared_strings* iface = ...; + * * // store a segment with specific font, size and boldness. - * set_segment_font_name("FreeMono"); - * set_segment_font_size(14); - * set_segment_font_bold(true); - * append_segment("a bold and big segment"); + * iface->set_segment_font_name("FreeMono"); + * iface->set_segment_font_size(14); + * iface->set_segment_font_bold(true); + * iface->append_segment("a bold and big segment"); * * // store an unformatted segment. - * append_segment(" followed by "); + * iface->append_segment(" followed by "); * * // store a segment with smaller, italic font. - * set_segment_font_size(7); - * set_segment_font_italic(true); - * append_segment("a small and italic segment"); + * iface->set_segment_font_size(7); + * iface->set_segment_font_italic(true); + * iface->append_segment("a small and italic segment"); * - * commit_segments(); // commit the whole formatted string to the pool. + * iface->commit_segments(); // commit the whole formatted string to the pool. * @endcode */ class ORCUS_DLLPUBLIC import_shared_strings @@ -208,14 +210,36 @@ public: /** * Interface for importing named expressions or ranges. * - * Note that this interface has two different methods for defining named - * expressions - set_named_expression() and set_named_range(). + * This interface has two different methods for defining named expressions: + * + * @li set_named_expression() and + * @li set_named_range(). + * + * Generally speaking, set_named_expression() can be used to define both named + * expression and named range. However, the implementor may choose to apply a + * different syntax rule to parse an expression passed to set_named_range(), + * depending on the formula grammar defined via @ref + * import_global_settings::set_default_formula_grammar(). For instance, the + * OpenDocument Spreadsheet format is known to use different syntax rules + * between named expressions and named ranges. * - * The set_named_expression() method is generally used to pass named - * expression strings. The set_named_range() method is used only when the - * format uses a different syntax to express a named range. A named range - * is a special case of named expression where the expression consists of - * one range token. + * A named range is a special case of a named expression where the expression + * consists of only one single cell range token. + * + * Here is a code example of how a named expression is defined: + * + * @code{.cpp} + * import_named_expression* iface = ...; + * + * // set the A1 on the first sheet as its origin (optional). + * src_address_t origin{0, 0, 0}; + * iface->set_base_position(origin); + * iface->set_named_expression("MyExpression", "SUM(A1:B10)+SUM(D1:D4)"); + * iface->commit(); + * @endcode + * + * Replace the above set_named_expression() call with set_named_range() if you + * wish to define a named range instead. */ class ORCUS_DLLPUBLIC import_named_expression { @@ -223,16 +247,16 @@ public: virtual ~import_named_expression(); /** - * Specify an optional base position from which to evaluate a named - * expression. If not specified, the implementor should use the top-left - * cell position on the first sheet as its implied base position. + * Specify an optional base position, or origin, from which to evaluate a + * named expression. If not specified, the implementor should use the + * top-left corner cell on the first sheet as its origin. * - * @param pos cell position to be used as the base. + * @param pos cell position to be used as the origin. */ virtual void set_base_position(const src_address_t& pos) = 0; /** - * Define a new named expression or overwrite an existing one. + * Set a named expression to the buffer. * * @param name name of the expression to be defined. * @param expression expression to be associated with the name. @@ -240,14 +264,17 @@ public: virtual void set_named_expression(std::string_view name, std::string_view expression) = 0; /** - * Define a new named range or overwrite an existin gone. Note that you - * can only define one named range or expression per single commit. + * Set a named range to the buffer. * * @param name name of the expression to be defined. * @param range range to be associated with the name. */ virtual void set_named_range(std::string_view name, std::string_view range) = 0; + /** + * Commit the named expression or range currently in the buffer to the + * document. + */ virtual void commit() = 0; }; -- GitLab From 7abedd3166f0a0dbdad98b874885957091b1e90a Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Sat, 10 Dec 2022 14:30:42 -0500 Subject: [PATCH 76/91] Provide a class description for import_reference_resolver interface --- include/orcus/spreadsheet/import_interface.hpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index b5002bb3..e543423a 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -837,6 +837,13 @@ public: virtual void set_character_set(character_set_t charset) = 0; }; +/** + * This is an interface to allow the implementor to provide its own reference + * address parsers, for both single cell references and cell range references. + * The implementor may choose to provide a different parser depending of the + * type of formula_ref_context_t argument given to the @ref + * import_factory::get_reference_resolver() call. + */ class ORCUS_DLLPUBLIC import_reference_resolver { public: @@ -932,8 +939,8 @@ public: * @param cxt context in which the formula expression containing the * references to be resolved occurs. * - * @return pointer to the reference resolve interfance, or a @nullptr if the - * implementor doesn't support it. + * @return pointer to the reference resolve interfance, or a @p nullptr if + * the implementor doesn't support it. */ virtual import_reference_resolver* get_reference_resolver(formula_ref_context_t cxt); -- GitLab From 0e922fe148e8a027c03827a75f8c113a583e0cb9 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Sat, 10 Dec 2022 14:34:57 -0500 Subject: [PATCH 77/91] Give a brief introduction for the document import section --- doc/cpp/filter/interface/spreadsheet/document.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/cpp/filter/interface/spreadsheet/document.rst b/doc/cpp/filter/interface/spreadsheet/document.rst index cb5fe60d..95eed76e 100644 --- a/doc/cpp/filter/interface/spreadsheet/document.rst +++ b/doc/cpp/filter/interface/spreadsheet/document.rst @@ -2,6 +2,9 @@ Document import =============== +The following interfaces handle importing of contents and properties related to +the entire document store. + .. doxygenclass:: orcus::spreadsheet::iface::import_factory :members: -- GitLab From 6b8b218a81d1eed6ca47cdbe6a1f757af2732fde Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 12 Dec 2022 20:18:51 -0500 Subject: [PATCH 78/91] Go through and edit the doc on import_sheet --- .../orcus/spreadsheet/import_interface.hpp | 73 +++++++++++++------ 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index e543423a..d8c155d4 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -591,69 +591,96 @@ public: }; /** - * Interface for sheet. + * Interface for importing the content and properties of a sheet. */ class ORCUS_DLLPUBLIC import_sheet { public: virtual ~import_sheet(); + /** + * Get an optional interface for importing properties that are specific to a + * view of a sheet. + * + * @return pointer to the interface for importing view properties, or a @p + * nullptr if the implementor doesn't support it. + */ virtual import_sheet_view* get_sheet_view(); + /** + * Get an optional interface for importing sheet properties. + * + * @return pointer to the interface for importing sheet properties, or a @p + * nullptr if the implementor doesn't support it. + */ virtual import_sheet_properties* get_sheet_properties(); /** - * Get an interface for importing data tables. Note that the implementer - * may decide not to support this feature in which case this method - * returns NULL. The implementer is responsible for managing the life - * cycle of the returned interface object. + * Get an optional interface for importing data tables. Note that the + * implementer may decide not to support this feature in which case this + * method should return a @p nullptr. * - * The implementor should also initialize the internal state of the - * temporary data table object when this method is called. + * The implementor should initialize the internal state of the temporary + * data table object when this method is called. * - * @return pointer to the data table interface object. + * @return pointer to the data table interface object, or a @p nullptr if + * the implementor doesn't support it. */ virtual import_data_table* get_data_table(); /** - * Get an interface for importing auto filter ranges. + * Get an optional interface for importing auto filter ranges. * - * The implementor should also initialize the internal state of the - * temporary auto filter object when this method is called. + * The implementor should initialize the internal state of the temporary + * auto filter object when this method is called. * - * @return pointer to the auto filter interface object. + * @return pointer to the auto filter interface object, or a @p nullptr if + * the implementor doesn't support it. */ virtual import_auto_filter* get_auto_filter(); /** - * Get an interface for importing tables. The implementer is responsible - * for managing the life cycle of the returned interface object. + * Get an interface for importing tables. * - * The implementor should also initialize the internal state of the - * temporary table object when this method is called. + * The implementor should initialize the internal state of the temporary + * table object when this method is called. * - * @return pointer to the table interface object, or NULL if the + * @return pointer to the table interface object, or @p nullptr if the * implementer doesn't support importing of tables. */ virtual import_table* get_table(); /** - * get an interface for importing conditional formats. The implementer is responsible - * for managing the life cycle of the returned interface object. + * Get an optional interface for importing conditional formats. * - * @return pointer to the conditional format interface object, or NULL - * if the implementer doesn't support importing conditional formats. + * @return pointer to the conditional format interface object, or @p nullptr + * if the implementer doesn't support importing conditional + * formats. */ virtual import_conditional_format* get_conditional_format(); + /** + * Get an optional interface for importing sheet-local named expressions. + * + * @return pointer to the sheet-local named expression interface, or a @p + * nullptr if the implementor doesn't support it. + */ virtual import_named_expression* get_named_expression(); + /** + * Get an optional interface for importing array formulas. An array formula + * is a formula expression applied to a range of cells where each cell may + * have a different result value. + * + * @return pointer to the array formula import interface, or a @p nullptr if + * the implementor doesn't support it. + */ virtual import_array_formula* get_array_formula(); /** - * Get an interface for importing formula cells. + * Get an optional interface for importing formula cells. * - * @return pointer to the formula interface object, or nullptr if the + * @return pointer to the formula interface object, or a @p nullptr if the * implementer doesn't support importing of formula cells. */ virtual import_formula* get_formula(); -- GitLab From bd3dd50edf12f7083729f9fc6b01138cd5830273 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 12 Dec 2022 20:42:29 -0500 Subject: [PATCH 79/91] Edit the doc on import_sheet_properties --- .../orcus/spreadsheet/import_interface.hpp | 35 +++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index d8c155d4..57e03f95 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -166,9 +166,13 @@ public: }; /** - * Interface for importing sheet properties. Sheet properties are those - * that are used for decorative purposes but are not necessarily a part of - * the sheet cell values. + * Interface for importing sheet properties. Sheet properties include: + * + * @li column widths and row heights, + * @li hidden flags for columns and rows, and + * @li merged cell ranges. + * + * These properties are independent of the cell contents of a sheet. */ class ORCUS_DLLPUBLIC import_sheet_properties { @@ -176,7 +180,7 @@ public: virtual ~import_sheet_properties(); /** - * Set column width to specified column(s). + * Set a column width to one or more columns. * * @param col 0-based position of the first column. * @param col_span number of contiguous columns to apply the width to. @@ -186,7 +190,7 @@ public: virtual void set_column_width(col_t col, col_t col_span, double width, orcus::length_unit_t unit) = 0; /** - * Set column hidden flag to specified column(s). + * Set a column hidden flag to one or more columns. * * @param col 0-based position of the first column. * @param col_span number of contiguous columns to apply the flag to. @@ -194,15 +198,32 @@ public: */ virtual void set_column_hidden(col_t col, col_t col_span, bool hidden) = 0; + /** + * Set a row height to specified row. + * + * @param row 0-based position of a row. + * @param height new row height value to set. + * @param unit unit of the new row height value. + * + * @todo Convert this to take a raw span. + */ virtual void set_row_height(row_t row, double height, orcus::length_unit_t unit) = 0; + /** + * Set a row hidden flag to a specified row. + * + * @param row 0-based position of a row. + * @param hidden flag indicating whether or not the row is hidden. + * + * @todo Convert this to take a raw span. + */ virtual void set_row_hidden(row_t row, bool hidden) = 0; /** - * Specify merged cell range. + * Set a merged cell range. * * @param range structure containing the top-left and bottom-right - * positions of the merged cell range. + * positions of a merged cell range. */ virtual void set_merge_cell_range(const range_t& range) = 0; }; -- GitLab From 3732a9287ffba8ba3814e08b54f6b6aa74dec0da Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 12 Dec 2022 21:33:09 -0500 Subject: [PATCH 80/91] Document import_data_table interface --- .../orcus/spreadsheet/import_interface.hpp | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index 57e03f95..eaed597b 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -307,14 +307,47 @@ class ORCUS_DLLPUBLIC import_data_table public: virtual ~import_data_table(); + /** + * Set the type of a data table. A data table can either: + * + * @li be a single-variable column-oriented, + * @li be a single-variable row-oriented, or + * @li use two variables that use both column and row. + * + * @param type type of a data table. + */ virtual void set_type(data_table_type_t type) = 0; + /** + * Set the range of a data table. + * + * @param range range of a data table. + */ virtual void set_range(const range_t& range) = 0; + /** + * Set the reference of the first input cell. + * + * @param ref reference of the first input cell. + * @param deleted whether or not this input cell has been deleted. + */ virtual void set_first_reference(std::string_view ref, bool deleted) = 0; + /** + * Set the reference of the second input cell but only if the data table + * uses two variables. + * + * @note This method gets called only if the data table uses two variables. + * + * @param ref reference of the second input cell. + * @param deleted whether or not this input cell has been deleted. + */ virtual void set_second_reference(std::string_view ref, bool deleted) = 0; + /** + * Store the current data table data in the buffer to the backend sheet + * storage. + */ virtual void commit() = 0; }; -- GitLab From e0dc2777e10a281bc560c44c359c7c084ecc3c29 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Mon, 12 Dec 2022 22:07:28 -0500 Subject: [PATCH 81/91] Go through and edit the doc on import_auto_filter --- .../filter/interface/spreadsheet/sheet.rst | 3 ++ .../orcus/spreadsheet/import_interface.hpp | 50 ++++++++++++++++--- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/doc/cpp/filter/interface/spreadsheet/sheet.rst b/doc/cpp/filter/interface/spreadsheet/sheet.rst index 840a7e9e..6482ac69 100644 --- a/doc/cpp/filter/interface/spreadsheet/sheet.rst +++ b/doc/cpp/filter/interface/spreadsheet/sheet.rst @@ -2,6 +2,9 @@ Sheet import ============ +The following interfaces handle importing of contents and properties related to +individual sheets. + .. doxygenclass:: orcus::spreadsheet::iface::import_sheet :members: diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index eaed597b..f8258cdb 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -351,6 +351,36 @@ public: virtual void commit() = 0; }; +/** + * Interface for importing auto filters. + * + * Importing a single auto filter would roughly follow the following flow: + * + * @code{.cpp} + * import_auto_filter* iface = ... ; + * + * range_t range; + * range.first.column = 0; + * range.first.row = 0; + * range.last.column = 3; + * range.last.row = 1000; + * iface->set_range(range); // Auto filter is applied for A1:D1001. + * + * // Column A is filtered for a value of "A". + * iface->set_column(0); + * iface->append_column_match_value("A"); + * iface->commit_column(); + * + * // Column D is filtered for values of 1 and 4. + * iface->set_column(3); + * iface->append_column_match_value("1"); + * iface->append_column_match_value("4"); + * iface->commit_column(); + * + * // Push the autofilter data in the current buffer to the sheet store. + * iface->commit(); + * @endcode + */ class ORCUS_DLLPUBLIC import_auto_filter { public: @@ -366,27 +396,35 @@ public: /** * Specify the column position of a filter. The position is relative to - * the first column in the auto filter range. + * the first column in the auto filter range. This method gets called at + * the beginning of each column filter data. The implementor may initialize + * the column filter data buffer when this method is called. + * + * @note This column position is relative to the first column in the + * autofilter range. * * @param col 0-based column position of a filter relative to the first - * column. + * column of the auto filter range. */ virtual void set_column(col_t col) = 0; /** - * Add a match value to the current column filter. + * Append a match value to the current column filter. A single column + * filter may have one or more match values. * - * @param value match value. + * @param value match value to append to the current column filter. */ virtual void append_column_match_value(std::string_view value) = 0; /** - * Commit current column filter to the current auto filter. + * Commit the current column filter data to the current auto filter buffer. + * The implementor may clear the current column filter buffer after this + * call. */ virtual void commit_column() = 0; /** - * Commit current auto filter to the model. + * Commit current auto filter data stored in the buffer to the sheet store. */ virtual void commit() = 0; }; -- GitLab From 80676ec15b65ea6564d4a2466dfcaa456ce36d85 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 13 Dec 2022 20:13:09 -0500 Subject: [PATCH 82/91] Test file for conditional formatting import This one contains only the basic rule set. Other features such as databar will be coming later. --- test/xlsx/conditional-format/basic.xlsx | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 test/xlsx/conditional-format/basic.xlsx diff --git a/test/xlsx/conditional-format/basic.xlsx b/test/xlsx/conditional-format/basic.xlsx new file mode 100644 index 00000000..53a9fff7 --- /dev/null +++ b/test/xlsx/conditional-format/basic.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a916ae594ece9e574507c2d881adaaabdef80e75999e14d49da1d05a642abe4 +size 11666 -- GitLab From 014ffb66079ddca7b7e6fa8e07a0483dc4963523 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 13 Dec 2022 21:48:40 -0500 Subject: [PATCH 83/91] Edit the doc for import_conditional_format We should revise this interface for 0.19.0. For 0.18.0 it will be unchanged. --- .../orcus/spreadsheet/import_interface.hpp | 53 +++++++++++++------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index f8258cdb..739a3b35 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -432,24 +432,45 @@ public: /** * This is an optional interface to import conditional formatting. * - * A conditional format consists of: - *
    - *
  • a range
  • - *
  • several entries
  • - *
+ * In general, a single conditional format consists of: * - * Each entry consists of: - *
    - *
  • a type
  • - *
  • a few properties depending on the type (optional)
  • - *
  • zero or more conditions depending on the type
  • - *
+ * @li a cell range the format is applied to, and + * @li one or more rule entries. * - * Each condition consists of: - *
    - *
  • a formula/value/string
  • - *
  • a color (optional)
  • - *
+ * Each rule entry consists of: + * + * @li a type of rule, + * @li zero or more rule properties, and + * @li zero or more conditions depending on the rule type. + * + * Lastly, each condition consists of: + * + * @li a formula, value, or string, + * @li an optional color. + * + * The flow of the import process varies depending on the type of the + * conditional formatting being imported. The following is an example of + * importing a conditional formatting that consists of a rule that applies a + * format when the cell value is greather than 2: + * + * @code{.cpp} + * import_conditional_format* iface = ... ; + * + * iface->set_range("A2:A13"); + * iface->set_xf_id(14); // apply differential format (dxf) whose ID is 14 + * iface->set_type(conditional_format_t::condition); // rule entry type + * iface->set_operator(condition_operator_t::expression); + * iface->set_operator(condition_operator_t::greater); + * + * iface->set_formula("2"); + * iface->commit_condition(); + * + * iface->commit_entry(); + * + * iface->commit_format(); + * @endcode + * + * @todo Revise this API for simplification. */ class ORCUS_DLLPUBLIC import_conditional_format { -- GitLab From 146300b1ba58382d1966cfc09b29e5c34d1bf2d2 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 13 Dec 2022 21:53:59 -0500 Subject: [PATCH 84/91] Update the test files for conditional formatting in xlsx --- test/xlsx/conditional-format/basic.xlsx | 4 ++-- test/xlsx/conditional-format/data-bars.xlsx | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 test/xlsx/conditional-format/data-bars.xlsx diff --git a/test/xlsx/conditional-format/basic.xlsx b/test/xlsx/conditional-format/basic.xlsx index 53a9fff7..847bb474 100644 --- a/test/xlsx/conditional-format/basic.xlsx +++ b/test/xlsx/conditional-format/basic.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a916ae594ece9e574507c2d881adaaabdef80e75999e14d49da1d05a642abe4 -size 11666 +oid sha256:41916486884cd5f351e5f996445cd9301e4cf597311d933a3b50e740910414b7 +size 13482 diff --git a/test/xlsx/conditional-format/data-bars.xlsx b/test/xlsx/conditional-format/data-bars.xlsx new file mode 100644 index 00000000..a5b77184 --- /dev/null +++ b/test/xlsx/conditional-format/data-bars.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12ef557de31ea71d44b8e0fd6b020dbc49c22d158117df075a0f23125760408 +size 11334 -- GitLab From 63c56e1a1a00f4fa8a8d6cc422b1042cea314eb3 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 13 Dec 2022 23:12:44 -0500 Subject: [PATCH 85/91] Document import_table interface --- .../orcus/spreadsheet/import_interface.hpp | 115 +++++++++++++++++- 1 file changed, 113 insertions(+), 2 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index 739a3b35..29c340eb 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -585,40 +585,151 @@ public: }; /** - * Interface for table. A table is a range within a sheet that consists of - * one or more data columns with a header row that contains their labels. + * Interface for table. A table is a range of cells within a sheet that + * consists of one or more data columns with a header row that contains their + * labels. */ class ORCUS_DLLPUBLIC import_table { public: virtual ~import_table(); + /** + * Get an optional interface for importing auto filter data stored as part + * of a table. + * + * The implementor should initialize the internal state of the temporary + * auto filter object when this method is called. + * + * @return pointer to the auto filter interface object, or a @p nullptr if + * the implementor doesn't support it. + */ virtual import_auto_filter* get_auto_filter(); + /** + * Set an integral identifier unique to the table. + * + * @param id identifier associated with the table. + */ virtual void set_identifier(size_t id) = 0; + /** + * Set a cell range associated with the table. + * + * @param ref cell range associated with the table. + * + * @todo use reference resolver for this. + */ virtual void set_range(std::string_view ref) = 0; + /** + * Set the number of totals rows. + * + * @param row_count number of totals rows. + */ virtual void set_totals_row_count(size_t row_count) = 0; + /** + * Set the internal name of the table. + * + * @param name name of the table. + */ virtual void set_name(std::string_view name) = 0; + /** + * Set the displayed name of the table. + * + * @param name displayed name of the table. + */ virtual void set_display_name(std::string_view name) = 0; + /** + * Set the number of columns the table contains. + * + * @param n number of columns in the table. + * + * @note This method gets called before the column data gets imported. The + * implementor can use this call to initialize the buffer for storing + * the column data. + */ virtual void set_column_count(size_t n) = 0; + /** + * Set an integral identifier for a column. + * + * @param id integral identifier for a column. + */ virtual void set_column_identifier(size_t id) = 0; + + /** + * Set a name of a column. + * + * @param name name of a column. + */ virtual void set_column_name(std::string_view name) = 0; + + /** + * Set the totals row label for a column. + * + * @param label row label for a column. + */ virtual void set_column_totals_row_label(std::string_view label) = 0; + + /** + * Set the totals row function for a column. + * + * @param func totals row function for a column. + */ virtual void set_column_totals_row_function(totals_row_function_t func) = 0; + + /** + * Push and append the column data stored in the current column data buffer + * into the table buffer. + */ virtual void commit_column() = 0; + /** + * Set the name of a style to apply to the table. + * + * @param name name of a style to apply to the table. + */ virtual void set_style_name(std::string_view name) = 0; + + /** + * Specify whether or not the first column in the table should have the + * style applied. + * + * @param b whether or not the first column in the table should have the + * style applied. + */ virtual void set_style_show_first_column(bool b) = 0; + + /** + * Specify whether or not the last column in the table should have the style + * applied. + * + * @param b whether or not the last column in the table should have the + * style applied. + */ virtual void set_style_show_last_column(bool b) = 0; + + /** + * Specify whether or not row stripe formatting is applied. + * + * @param b whether or not row stripe formatting is applied. + */ virtual void set_style_show_row_stripes(bool b) = 0; + + /** + * Specify whether or not column stripe formatting is applied. + * + * @param b whether or not column stripe formatting is applied. + */ virtual void set_style_show_column_stripes(bool b) = 0; + /** + * Push the data stored in the table buffer into the document store. + */ virtual void commit() = 0; }; -- GitLab From 1661fcfd0064f7d70052107104ac1ff8f0580d3e Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Tue, 13 Dec 2022 23:19:57 -0500 Subject: [PATCH 86/91] Add description to import_formula interface --- include/orcus/spreadsheet/import_interface.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index 29c340eb..5dcf92c3 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -733,6 +733,11 @@ public: virtual void commit() = 0; }; +/** + * Interface for importing the properties of a single formula cell. A formula + * cell contains a formula expression that can be computed, and optionally a + * cached result of the last computation performed on the expression. + */ class ORCUS_DLLPUBLIC import_formula { public: -- GitLab From 61327a44622474ff1f9e92861fc3f4f433350f88 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 14 Dec 2022 20:17:36 -0500 Subject: [PATCH 87/91] Document import_array_formula --- .../orcus/spreadsheet/import_interface.hpp | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index 5dcf92c3..542c4468 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -799,23 +799,72 @@ public: virtual void commit() = 0; }; +/** + * Interface for importing the properties of an array formula which occupies a + * range of cells. Cells that are part of an array formula share the same + * formula expression but may have different calculation results. + */ class ORCUS_DLLPUBLIC import_array_formula { public: virtual ~import_array_formula(); + /** + * Set the range of an array formula. + * + * @param range range of an array formula. + */ virtual void set_range(const range_t& range) = 0; + /** + * Set the formula expression of an array formula. + * + * @param grammar grammar to use to compile the formula string into + * tokens. + * @param formula formula expression of an array formula. + */ virtual void set_formula(formula_grammar_t grammar, std::string_view formula) = 0; + /** + * Set a cached string result of a cell within the array formula range. + * + * @param row 0-based row position of a cell. + * @param col 0-based column position of a cell. + * @param value cached string value to set. + */ virtual void set_result_string(row_t row, col_t col, std::string_view value) = 0; + /** + * Set a cached numeric result of a cell within the array formula range. + * + * @param row 0-based row position of a cell. + * @param col 0-based column position of a cell. + * @param value cached numeric value to set. + */ virtual void set_result_value(row_t row, col_t col, double value) = 0; + /** + * Set a cached boolean result of a cell within the array formula range. + * + * @param row 0-based row position of a cell. + * @param col 0-based column position of a cell. + * @param value cached boolean value to set. + */ virtual void set_result_bool(row_t row, col_t col, bool value) = 0; + /** + * Set an empty value as a cached result to a cell within the array formula + * range. + * + * @param row 0-based row position of a cell. + * @param col 0-based column position of a cell. + */ virtual void set_result_empty(row_t row, col_t col) = 0; + /** + * Push the properties of an array formula currently stored in the buffer to + * the sheet store. + */ virtual void commit() = 0; }; -- GitLab From 3ed31f183e8412b097cd51c090a72de129dbdb88 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 14 Dec 2022 21:11:09 -0500 Subject: [PATCH 88/91] import_table::set_range() to take a range_t value --- .../orcus/spreadsheet/import_interface.hpp | 8 +- src/liborcus/xlsx_table_context.cpp | 144 +++++++++--------- src/liborcus/xlsx_table_context.hpp | 4 +- src/spreadsheet/factory_table.cpp | 12 +- src/spreadsheet/factory_table.hpp | 2 +- 5 files changed, 78 insertions(+), 92 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index 542c4468..f340a276 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -614,13 +614,11 @@ public: virtual void set_identifier(size_t id) = 0; /** - * Set a cell range associated with the table. + * Set a 2-dimensional cell range associated with the table. * - * @param ref cell range associated with the table. - * - * @todo use reference resolver for this. + * @param range cell range associated with the table. */ - virtual void set_range(std::string_view ref) = 0; + virtual void set_range(const range_t& range) = 0; /** * Set the number of totals rows. diff --git a/src/liborcus/xlsx_table_context.cpp b/src/liborcus/xlsx_table_context.cpp index 2d476100..108018b5 100644 --- a/src/liborcus/xlsx_table_context.cpp +++ b/src/liborcus/xlsx_table_context.cpp @@ -17,67 +17,15 @@ #include "orcus/spreadsheet/import_interface.hpp" #include +#include using namespace std; +namespace ss = orcus::spreadsheet; namespace orcus { namespace { -class table_attr_parser -{ - string_pool* m_pool; - - long m_id; - long m_totals_row_count; - - pstring m_name; - pstring m_display_name; - pstring m_ref; - -public: - table_attr_parser(string_pool* pool) : m_pool(pool), m_id(-1), m_totals_row_count(-1) {} - - void operator() (const xml_token_attr_t& attr) - { - if (attr.ns && attr.ns != NS_ooxml_xlsx) - return; - - switch (attr.name) - { - case XML_id: - m_id = to_long(attr.value); - break; - case XML_totalsRowCount: - m_totals_row_count = to_long(attr.value); - break; - case XML_name: - m_name = attr.value; - if (attr.transient) - m_name = m_pool->intern(m_name).first; - break; - case XML_displayName: - m_display_name = attr.value; - if (attr.transient) - m_display_name = m_pool->intern(m_display_name).first; - break; - case XML_ref: - m_ref = attr.value; - if (attr.transient) - m_ref = m_pool->intern(m_ref).first; - break; - default: - ; - } - } - - long get_id() const { return m_id; } - long get_totals_row_count() const { return m_totals_row_count; } - pstring get_name() const { return m_name; } - pstring get_display_name() const { return m_display_name; } - pstring get_ref() const { return m_ref; } -}; - class table_column_attr_parser { string_pool* m_pool; @@ -232,27 +180,9 @@ void xlsx_table_context::start_element(xmlns_id_t ns, xml_token_t name, const xm case XML_table: { xml_element_expected(parent, XMLNS_UNKNOWN_ID, XML_UNKNOWN_TOKEN); - table_attr_parser func(&get_session_context().spool); - func = for_each(attrs.begin(), attrs.end(), func); - - if (get_config().debug) - { - cout << "* table (range=" << func.get_ref() << "; id=" << func.get_id() - << "; name=" << func.get_name() << "; display name=" - << func.get_display_name() << ")" << endl; - cout << " * totals row count: " << func.get_totals_row_count() << endl; - } - - m_table.set_identifier(func.get_id()); - str = func.get_ref(); - m_table.set_range(str); - str = func.get_name(); - m_table.set_name(str); - str = func.get_display_name(); - m_table.set_display_name(str); - m_table.set_totals_row_count(func.get_totals_row_count()); + start_element_table(attrs); + break; } - break; case XML_tableColumns: { xml_element_expected(parent, NS_ooxml_xlsx, XML_table); @@ -317,8 +247,72 @@ bool xlsx_table_context::end_element(xmlns_id_t ns, xml_token_t name) return pop_stack(ns, name); } -void xlsx_table_context::characters(std::string_view /*str*/, bool /*transient*/) +void xlsx_table_context::start_element_table(const xml_token_attrs_t& attrs) { + long id = -1; + long totals_row_count = -1; + + std::optional name; + std::optional display_name; + std::optional ref; + + for (const xml_token_attr_t& attr : attrs) + { + if (attr.ns) + continue; + + switch (attr.name) + { + case XML_id: + id = to_long(attr.value); + break; + case XML_totalsRowCount: + totals_row_count = to_long(attr.value); + break; + case XML_name: + name = attr.value; + break; + case XML_displayName: + display_name = attr.value; + break; + case XML_ref: + ref = attr.value; + break; + } + } + + if (get_config().debug) + { + auto str_or_not = [](const auto& v) -> std::string_view + { + return v ? *v : "-"; + }; + + cout << "* table (range=" << str_or_not(ref) + << "; id=" << id + << "; name=" << str_or_not(name) + << "; display name=" << str_or_not(display_name) << ")" << endl; + + cout << " * totals row count: " << totals_row_count << endl; + } + + if (id >= 0) + m_table.set_identifier(id); + + if (ref) + { + ss::range_t range = to_rc_range(m_resolver.resolve_range(*ref)); + m_table.set_range(range); + } + + if (name) + m_table.set_name(*name); + + if (display_name) + m_table.set_display_name(*display_name); + + if (totals_row_count >= 0) + m_table.set_totals_row_count(totals_row_count); } } diff --git a/src/liborcus/xlsx_table_context.hpp b/src/liborcus/xlsx_table_context.hpp index a277d2b8..8b3e629a 100644 --- a/src/liborcus/xlsx_table_context.hpp +++ b/src/liborcus/xlsx_table_context.hpp @@ -34,7 +34,9 @@ public: virtual void start_element(xmlns_id_t ns, xml_token_t name, const xml_token_attrs_t& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); - virtual void characters(std::string_view str, bool transient); + +private: + void start_element_table(const xml_token_attrs_t& attrs); private: spreadsheet::iface::import_table& m_table; diff --git a/src/spreadsheet/factory_table.cpp b/src/spreadsheet/factory_table.cpp index 432228dc..a77b7af3 100644 --- a/src/spreadsheet/factory_table.cpp +++ b/src/spreadsheet/factory_table.cpp @@ -105,17 +105,9 @@ iface::import_auto_filter* import_table::get_auto_filter() return &mp_impl->m_auto_filter; } -void import_table::set_range(std::string_view ref) +void import_table::set_range(const range_t& range) { - const ixion::formula_name_resolver* resolver = - mp_impl->m_doc.get_formula_name_resolver(spreadsheet::formula_ref_context_t::global); - if (!resolver) - return; - - ixion::abs_range_t& range = mp_impl->mp_data->range; - range = to_abs_range(*resolver, ref.data(), ref.size()); - if (range.valid()) - range.first.sheet = range.last.sheet = mp_impl->m_sheet.get_index(); + mp_impl->mp_data->range = to_abs_range(range, mp_impl->m_sheet.get_index()); } void import_table::set_identifier(size_t id) diff --git a/src/spreadsheet/factory_table.hpp b/src/spreadsheet/factory_table.hpp index 766f1e88..0a274b4c 100644 --- a/src/spreadsheet/factory_table.hpp +++ b/src/spreadsheet/factory_table.hpp @@ -28,7 +28,7 @@ public: virtual iface::import_auto_filter* get_auto_filter() override; - virtual void set_range(std::string_view ref) override; + virtual void set_range(const range_t& range) override; virtual void set_identifier(size_t id) override; virtual void set_name(std::string_view name) override; virtual void set_display_name(std::string_view name) override; -- GitLab From 8b47b0c44c24426d12b5a2ba2b838349dc135d3f Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 14 Dec 2022 21:37:00 -0500 Subject: [PATCH 89/91] Remove 'using namespace std' --- src/liborcus/xlsx_table_context.cpp | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/liborcus/xlsx_table_context.cpp b/src/liborcus/xlsx_table_context.cpp index 108018b5..47e2f7c5 100644 --- a/src/liborcus/xlsx_table_context.cpp +++ b/src/liborcus/xlsx_table_context.cpp @@ -19,7 +19,6 @@ #include #include -using namespace std; namespace ss = orcus::spreadsheet; namespace orcus { @@ -94,31 +93,31 @@ public: case XML_name: mp_table->set_style_name(attr.value); if (m_debug) - cout << " * table style info (name=" << attr.value << ")" << endl; + std::cout << " * table style info (name=" << attr.value << ")" << std::endl; break; case XML_showFirstColumn: b = to_bool(attr.value); mp_table->set_style_show_first_column(b); if (m_debug) - cout << " * show first column: " << b << endl; + std::cout << " * show first column: " << b << std::endl; break; case XML_showLastColumn: b = to_bool(attr.value); mp_table->set_style_show_last_column(b); if (m_debug) - cout << " * show last column: " << b << endl; + std::cout << " * show last column: " << b << std::endl; break; case XML_showRowStripes: b = to_bool(attr.value); mp_table->set_style_show_row_stripes(b); if (m_debug) - cout << " * show row stripes: " << b << endl; + std::cout << " * show row stripes: " << b << std::endl; break; case XML_showColumnStripes: b = to_bool(attr.value); mp_table->set_style_show_column_stripes(b); if (m_debug) - cout << " * show column stripes: " << b << endl; + std::cout << " * show column stripes: " << b << std::endl; break; default: ; @@ -173,7 +172,7 @@ void xlsx_table_context::start_element(xmlns_id_t ns, xml_token_t name, const xm if (ns != NS_ooxml_xlsx) return; - pstring str; + std::string_view str; switch (name) { @@ -189,7 +188,7 @@ void xlsx_table_context::start_element(xmlns_id_t ns, xml_token_t name, const xm single_long_attr_getter func(NS_ooxml_xlsx, XML_count); long column_count = for_each(attrs.begin(), attrs.end(), func).get_value(); if (get_config().debug) - cout << " * column count: " << column_count << endl; + std::cout << " * column count: " << column_count << std::endl; m_table.set_column_count(column_count); } @@ -201,9 +200,9 @@ void xlsx_table_context::start_element(xmlns_id_t ns, xml_token_t name, const xm func = for_each(attrs.begin(), attrs.end(), func); if (get_config().debug) { - cout << " * table column (id=" << func.get_id() << "; name=" << func.get_name() << ")" << endl; - cout << " * totals row label: " << func.get_totals_row_label() << endl; - cout << " * totals func: " << static_cast(func.get_totals_row_function()) << endl; + std::cout << " * table column (id=" << func.get_id() << "; name=" << func.get_name() << ")" << std::endl; + std::cout << " * totals row label: " << func.get_totals_row_label() << std::endl; + std::cout << " * totals func: " << static_cast(func.get_totals_row_function()) << std::endl; } m_table.set_column_identifier(func.get_id()); @@ -288,12 +287,12 @@ void xlsx_table_context::start_element_table(const xml_token_attrs_t& attrs) return v ? *v : "-"; }; - cout << "* table (range=" << str_or_not(ref) + std::cout << "* table (range=" << str_or_not(ref) << "; id=" << id << "; name=" << str_or_not(name) - << "; display name=" << str_or_not(display_name) << ")" << endl; + << "; display name=" << str_or_not(display_name) << ")" << std::endl; - cout << " * totals row count: " << totals_row_count << endl; + std::cout << " * totals row count: " << totals_row_count << std::endl; } if (id >= 0) -- GitLab From d9b740e1e5c0fa0aa0dd10dacf7748acb74dcd95 Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Wed, 14 Dec 2022 22:51:57 -0500 Subject: [PATCH 90/91] Document pivot table related interfaces Also rename create_field_group() to start_field_group(), and check for the field group interface pointer before dereferencing it. --- .../filter/interface/spreadsheet/pivot.rst | 2 + .../spreadsheet/import_interface_pivot.hpp | 53 ++++++++++++++++--- src/liborcus/xlsx_pivot_context.cpp | 31 ++++++----- src/liborcus/xlsx_pivot_context.hpp | 1 - src/spreadsheet/factory_pivot.cpp | 2 +- src/spreadsheet/factory_pivot.hpp | 2 +- 6 files changed, 64 insertions(+), 27 deletions(-) diff --git a/doc/cpp/filter/interface/spreadsheet/pivot.rst b/doc/cpp/filter/interface/spreadsheet/pivot.rst index 48a036c8..28d8df77 100644 --- a/doc/cpp/filter/interface/spreadsheet/pivot.rst +++ b/doc/cpp/filter/interface/spreadsheet/pivot.rst @@ -2,6 +2,8 @@ Pivot table import ================== +The folloiwng interfaces handle importing of contents related to pivot tables. + .. doxygenclass:: orcus::spreadsheet::iface::import_pivot_cache_definition :members: diff --git a/include/orcus/spreadsheet/import_interface_pivot.hpp b/include/orcus/spreadsheet/import_interface_pivot.hpp index 8fd53334..ada288d2 100644 --- a/include/orcus/spreadsheet/import_interface_pivot.hpp +++ b/include/orcus/spreadsheet/import_interface_pivot.hpp @@ -23,7 +23,7 @@ namespace orcus { namespace spreadsheet { namespace iface { class import_pivot_cache_field_group; /** - * Interface for importing pivot cache definition. + * Interface for importing pivot cache definitions. */ class ORCUS_DLLPUBLIC import_pivot_cache_definition { @@ -36,6 +36,8 @@ public: * * @param ref range string specifying the source range. * @param sheet_name name of the worksheet where the source data is located. + * + * @todo use the ref resolver to resolve the range. */ virtual void set_worksheet_source(std::string_view ref, std::string_view sheet_name) = 0; @@ -90,16 +92,17 @@ public: virtual void set_field_max_date(const date_time_t& dt) = 0; /** - * Mark the current field as a group field. + * Mark the current field as a group field and initiate its import. * - * This method gets called first to signify that the current field is a - * group field. + * The implementor should create an internal storage to prepare for the + * importing of field group data when this method gets called. * * @param base_index 0-based index of the field this field is the parent * group of. - * @return interface for importing group field data. + * @return interface for importing group field data, or a @p nullptr if the + * implementor doesn't support it. */ - virtual import_pivot_cache_field_group* create_field_group(size_t base_index) = 0; + virtual import_pivot_cache_field_group* start_field_group(size_t base_index) = 0; /** * Commit the field in the current field buffer to the pivot cache model. @@ -147,7 +150,7 @@ public: }; /** - * Interface for importing group field settings in a pivot cache. + * Interface for importing group field settings in a pivot cache definition. */ class ORCUS_DLLPUBLIC import_pivot_cache_field_group { @@ -291,19 +294,53 @@ class ORCUS_DLLPUBLIC import_pivot_cache_records public: virtual ~import_pivot_cache_records(); + /** + * Set the number of records included in pivot cache records. + * + * @note This method gets called before the very first record gets imported. + * The implementor can use this call as an opportunity to initialize + * any internal buffers used to store the imported records. + * + * @param n number of records included in pivot cache records. + */ virtual void set_record_count(size_t n) = 0; + /** + * Append to the current record buffer a numeric value as a column value. + * + * @param v numeric value to append to the current record buffer as a column + * value. + */ virtual void append_record_value_numeric(double v) = 0; + /** + * Append to the current record buffer a character value as a column value. + * + * @param s character value to append to the current record buffer as a + * column value. + */ virtual void append_record_value_character(std::string_view s) = 0; + /** + * Append to the current record buffer a column value referenced by an index + * into the shared items table of a pivot cache field. The corresponding + * field in the pivot cache definition should provide the shared items table + * that this index references. + * + * @param index index into the shared items table of a pivot cache field. + */ virtual void append_record_value_shared_item(size_t index) = 0; /** - * Commit the record in the current buffer, and clears the buffer. + * Commit the record in the current record buffer. + * + * The implementor can clear the buffer afterward. */ virtual void commit_record() = 0; + /** + * Commit the entire records set to the document store. + */ virtual void commit() = 0; }; diff --git a/src/liborcus/xlsx_pivot_context.cpp b/src/liborcus/xlsx_pivot_context.cpp index f351a0e2..48d1d250 100644 --- a/src/liborcus/xlsx_pivot_context.cpp +++ b/src/liborcus/xlsx_pivot_context.cpp @@ -298,7 +298,7 @@ void xlsx_pivot_cache_def_context::start_element(xmlns_id_t ns, xml_token_t name if (group_base >= 0) { // This is a group field. - m_pcache_field_group = m_pcache.create_field_group(group_base); + m_pcache_field_group = m_pcache.start_field_group(group_base); } break; } @@ -385,18 +385,21 @@ void xlsx_pivot_cache_def_context::start_element(xmlns_id_t ns, xml_token_t name } ); - // Pass the values to the interface. - m_pcache_field_group->set_range_grouping_type(group_by); - m_pcache_field_group->set_range_auto_start(auto_start); - m_pcache_field_group->set_range_auto_end(auto_end); - m_pcache_field_group->set_range_start_number(start); - m_pcache_field_group->set_range_end_number(end); - m_pcache_field_group->set_range_interval(interval); + if (m_pcache_field_group) + { + // Pass the values to the interface. + m_pcache_field_group->set_range_grouping_type(group_by); + m_pcache_field_group->set_range_auto_start(auto_start); + m_pcache_field_group->set_range_auto_end(auto_end); + m_pcache_field_group->set_range_start_number(start); + m_pcache_field_group->set_range_end_number(end); + m_pcache_field_group->set_range_interval(interval); - if (start_date) - m_pcache_field_group->set_range_start_date(*start_date); - if (end_date) - m_pcache_field_group->set_range_end_date(*end_date); + if (start_date) + m_pcache_field_group->set_range_start_date(*start_date); + if (end_date) + m_pcache_field_group->set_range_end_date(*end_date); + } if (get_config().debug) { @@ -1102,10 +1105,6 @@ bool xlsx_pivot_cache_rec_context::end_element(xmlns_id_t ns, xml_token_t name) return pop_stack(ns, name); } -void xlsx_pivot_cache_rec_context::characters(std::string_view /*str*/, bool /*transient*/) -{ -} - xlsx_pivot_table_context::xlsx_pivot_table_context(session_context& cxt, const tokens& tokens) : xml_context_base(cxt, tokens) {} diff --git a/src/liborcus/xlsx_pivot_context.hpp b/src/liborcus/xlsx_pivot_context.hpp index c292fe75..f9610ba1 100644 --- a/src/liborcus/xlsx_pivot_context.hpp +++ b/src/liborcus/xlsx_pivot_context.hpp @@ -93,7 +93,6 @@ public: virtual void end_child_context(xmlns_id_t ns, xml_token_t name, xml_context_base* child); virtual void start_element(xmlns_id_t ns, xml_token_t name, const::std::vector& attrs); virtual bool end_element(xmlns_id_t ns, xml_token_t name); - virtual void characters(std::string_view str, bool transient); }; /** diff --git a/src/spreadsheet/factory_pivot.cpp b/src/spreadsheet/factory_pivot.cpp index 518c3aa8..2761da55 100644 --- a/src/spreadsheet/factory_pivot.cpp +++ b/src/spreadsheet/factory_pivot.cpp @@ -171,7 +171,7 @@ void import_pivot_cache_def::set_field_name(std::string_view name) m_current_field.name = intern(name); } -iface::import_pivot_cache_field_group* import_pivot_cache_def::create_field_group(size_t base_index) +iface::import_pivot_cache_field_group* import_pivot_cache_def::start_field_group(size_t base_index) { m_current_field_group = std::make_unique(m_doc, m_current_field, base_index); diff --git a/src/spreadsheet/factory_pivot.hpp b/src/spreadsheet/factory_pivot.hpp index 01173745..465fef1b 100644 --- a/src/spreadsheet/factory_pivot.hpp +++ b/src/spreadsheet/factory_pivot.hpp @@ -58,7 +58,7 @@ public: virtual void set_field_name(std::string_view name) override; - virtual iface::import_pivot_cache_field_group* create_field_group(size_t base_index) override; + virtual iface::import_pivot_cache_field_group* start_field_group(size_t base_index) override; virtual void set_field_min_value(double v) override; -- GitLab From 450b69e7c5f5e9a5984acf1f3c8535dd239eb10b Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Thu, 15 Dec 2022 17:08:08 -0500 Subject: [PATCH 91/91] Reword the description of the base_index parameter The old wording sounded weird and probably was not factual. --- include/orcus/spreadsheet/import_interface_pivot.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/orcus/spreadsheet/import_interface_pivot.hpp b/include/orcus/spreadsheet/import_interface_pivot.hpp index ada288d2..275ed444 100644 --- a/include/orcus/spreadsheet/import_interface_pivot.hpp +++ b/include/orcus/spreadsheet/import_interface_pivot.hpp @@ -97,8 +97,8 @@ public: * The implementor should create an internal storage to prepare for the * importing of field group data when this method gets called. * - * @param base_index 0-based index of the field this field is the parent - * group of. + * @param base_index 0-based index of the field this group field uses as its + * base. * @return interface for importing group field data, or a @p nullptr if the * implementor doesn't support it. */ -- GitLab