foray-commit Mailing List for FOray (Page 23)
Modular XSL-FO Implementation for Java.
Status: Alpha
Brought to you by:
victormote
You can subscribe to this list here.
| 2006 |
Jan
|
Feb
|
Mar
(139) |
Apr
(98) |
May
(250) |
Jun
(394) |
Jul
(84) |
Aug
(13) |
Sep
(420) |
Oct
(186) |
Nov
(1) |
Dec
(3) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2007 |
Jan
(108) |
Feb
(202) |
Mar
(291) |
Apr
(247) |
May
(374) |
Jun
(227) |
Jul
(231) |
Aug
(60) |
Sep
(31) |
Oct
(45) |
Nov
(18) |
Dec
|
| 2008 |
Jan
(38) |
Feb
(71) |
Mar
(142) |
Apr
|
May
(59) |
Jun
(6) |
Jul
(10) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2009 |
Jan
(12) |
Feb
(4) |
Mar
(88) |
Apr
(121) |
May
(17) |
Jun
(30) |
Jul
|
Aug
(5) |
Sep
|
Oct
(1) |
Nov
|
Dec
|
| 2010 |
Jan
(11) |
Feb
(76) |
Mar
(11) |
Apr
|
May
(11) |
Jun
|
Jul
|
Aug
(44) |
Sep
(14) |
Oct
(7) |
Nov
|
Dec
|
| 2011 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(9) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(10) |
Nov
|
Dec
|
| 2012 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
(3) |
Jul
(4) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2016 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(168) |
| 2017 |
Jan
(77) |
Feb
(11) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2018 |
Jan
|
Feb
|
Mar
(1) |
Apr
(6) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2019 |
Jan
|
Feb
(88) |
Mar
(118) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2020 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(6) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(141) |
| 2021 |
Jan
(170) |
Feb
(20) |
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
|
Sep
|
Oct
(62) |
Nov
(189) |
Dec
(162) |
| 2022 |
Jan
(201) |
Feb
(118) |
Mar
(8) |
Apr
|
May
(2) |
Jun
(47) |
Jul
(19) |
Aug
(14) |
Sep
(3) |
Oct
|
Nov
(28) |
Dec
(235) |
| 2023 |
Jan
(112) |
Feb
(23) |
Mar
(2) |
Apr
(2) |
May
|
Jun
(1) |
Jul
|
Aug
(70) |
Sep
(92) |
Oct
(20) |
Nov
(1) |
Dec
(1) |
| 2024 |
Jan
|
Feb
|
Mar
(1) |
Apr
(1) |
May
(14) |
Jun
(11) |
Jul
(1) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2025 |
Jan
(10) |
Feb
(29) |
Mar
|
Apr
(162) |
May
(245) |
Jun
(83) |
Jul
|
Aug
(1) |
Sep
|
Oct
|
Nov
|
Dec
|
|
From: <vic...@us...> - 2024-05-03 16:55:31
|
Revision: 13326
http://sourceforge.net/p/foray/code/13326
Author: victormote
Date: 2024-05-03 16:55:28 +0000 (Fri, 03 May 2024)
Log Message:
-----------
Move dictionaryMap and related methods to superclass, so that the variant class can use.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2024-05-03 16:41:27 UTC (rev 13325)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2024-05-03 16:55:28 UTC (rev 13326)
@@ -30,7 +30,9 @@
import org.axsl.fotree.text.FoOrthographyServer;
import org.axsl.i18n.WritingSystem;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.regex.Pattern;
/**
@@ -38,6 +40,9 @@
*/
public abstract class OrthographyServer4a implements FoOrthographyServer {
+ /** Map whose key is a dictionary ID, and whose value is the matching dictionary resource. */
+ private Map<String, DictionaryResource> dictionaryMap = new HashMap<String, DictionaryResource>();
+
@Override
public abstract Orthography4a getOrthography(WritingSystem writingSystem);
@@ -58,4 +63,25 @@
*/
public abstract List<DerivativePattern> getDerivativePatterns(String id);
+ /**
+ * Registers a dictionary.
+ * @param id The id of the dictionary to be registered.
+ * @param resource The dictionary resource being registered.
+ */
+ public void registerDictionary(final String id, final DictionaryResource resource) {
+ if (this.dictionaryMap.get(id) != null) {
+ throw new IllegalArgumentException("DictionaryResource already exist for id: " + id);
+ }
+ this.dictionaryMap.put(id, resource);
+ }
+
+ /**
+ * Returns the dictionary resource for a given Id.
+ * @param id The id of the dictionary to be returned.
+ * @return The dictionary resource for {@code id}.
+ */
+ public DictionaryResource getDictionaryResource(final String id) {
+ return this.dictionaryMap.get(id);
+ }
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2024-05-03 16:41:27 UTC (rev 13325)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2024-05-03 16:55:28 UTC (rev 13326)
@@ -62,9 +62,6 @@
private Map<WritingSystem, Orthography4aStandard> orthographyMap =
new HashMap<WritingSystem, Orthography4aStandard>();
- /** Map whose key is a dictionary ID, and whose value is the matching dictionary resource. */
- private Map<String, DictionaryResource> dictionaryMap = new HashMap<String, DictionaryResource>();
-
/** The map of match rule lists, keyed by id. */
private Map<String, List<Pattern>> matchRuleLists = new HashMap<String, List<Pattern>>();
@@ -118,7 +115,7 @@
// }
/* Not every orthography has a dictionary resource. */
if (resource != null) {
- this.dictionaryMap.put(resource.getId(), resource);
+ registerDictionary(resource.getId(), resource);
}
}
@@ -161,30 +158,9 @@
return this.derivativeRuleLists.get(id);
}
- /**
- * Registers a dictionary.
- * @param id The id of the dictionary to be registered.
- * @param resource The dictionary resource being registered.
- */
- public void registerDictionary(final String id, final DictionaryResource resource) {
- if (this.dictionaryMap.get(id) != null) {
- throw new IllegalArgumentException("DictionaryResource already exist for id: " + id);
- }
- this.dictionaryMap.put(id, resource);
- }
-
- /**
- * Returns the dictionary resource for a given Id.
- * @param id The id of the dictionary to be returned.
- * @return The dictionary resource for {@code id}.
- */
- public DictionaryResource getDictionaryResource(final String id) {
- return this.dictionaryMap.get(id);
- }
-
@Override
public SegmentDictionary getDictionary(final String dictionaryId) {
- final DictionaryResource resource = this.dictionaryMap.get(dictionaryId);
+ final DictionaryResource resource = this.getDictionaryResource(dictionaryId);
if (resource == null) {
this.logger.warn("Dictionary not found: {}", dictionaryId);
return null;
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java 2024-05-03 16:41:27 UTC (rev 13325)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java 2024-05-03 16:55:28 UTC (rev 13326)
@@ -29,7 +29,6 @@
package org.foray.orthography;
import org.axsl.i18n.WritingSystem;
-import org.axsl.orthography.Dictionary;
import java.util.HashMap;
import java.util.List;
@@ -67,8 +66,13 @@
}
@Override
- public Dictionary getDictionary(final String dictionaryId) {
- return this.wrapped.getDictionary(dictionaryId);
+ public SegmentDictionary getDictionary(final String dictionaryId) {
+ final DictionaryResource resource = getDictionaryResource(dictionaryId);
+ if (resource == null) {
+ return this.wrapped.getDictionary(dictionaryId);
+ } else {
+ return resource.getResource();
+ }
}
@Override
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2024-05-03 16:41:29
|
Revision: 13325
http://sourceforge.net/p/foray/code/13325
Author: victormote
Date: 2024-05-03 16:41:27 +0000 (Fri, 03 May 2024)
Log Message:
-----------
Remove no-longer-needed code.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2024-05-03 15:26:53 UTC (rev 13324)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2024-05-03 16:41:27 UTC (rev 13325)
@@ -40,7 +40,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
@@ -59,12 +58,6 @@
/** The logger. */
private Logger logger = LoggerFactory.getLogger(OrthographyServer4aStandard.class);
-// /** The configuration for this server. */
-// private OrthographyServerConfig config;
-
- /** An EntityResolver to be used by XML parsers (for handling DTD catalogs, etc.). */
- private EntityResolver entityResolver = null;
-
/** Map of writing systems and their orthographies. */
private Map<WritingSystem, Orthography4aStandard> orthographyMap =
new HashMap<WritingSystem, Orthography4aStandard>();
@@ -110,22 +103,6 @@
}
/**
- * Returns the EntityResolver for this server.
- * @return The EntityResolver.
- */
- public EntityResolver getEntityResolver() {
- return this.entityResolver;
- }
-
- /**
- * Sets the EntityResolver for this server.
- * @param entityResolver The new EntityResolver.
- */
- public void setEntityResolver(final EntityResolver entityResolver) {
- this.entityResolver = entityResolver;
- }
-
- /**
* Registers a configuration for a given orthography.
* @param writingSystem The orthography for which the configuration should be registered.
* @param orthography The configuration for {@code orthography}.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2024-05-03 15:26:55
|
Revision: 13324
http://sourceforge.net/p/foray/code/13324
Author: victormote
Date: 2024-05-03 15:26:53 +0000 (Fri, 03 May 2024)
Log Message:
-----------
Handle possibility of multiple writing system dictionaries being parsed from a single dictionary file.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java 2024-05-03 12:48:17 UTC (rev 13323)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java 2024-05-03 15:26:53 UTC (rev 13324)
@@ -76,13 +76,19 @@
return this.wrapped.getLexer();
}
+ /**
+ * Returns the registered orthography variant for a given writing system, if any.
+ * @param writingSystem The writing system for which the orthography is needed.
+ * @return The registered orthography variant for {@code writingSystem}, or null if none is found.
+ */
+ public Orthography4aVariant getOrthographyVariant(final WritingSystem writingSystem) {
+ return this.orthographyMap.get(writingSystem);
+ }
+
@Override
public Orthography4a getOrthography(final WritingSystem writingSystem) {
- final Orthography4aVariant variant = this.orthographyMap.get(writingSystem);
- if (variant != null) {
- return variant;
- }
- return this.wrapped.getOrthography(writingSystem);
+ final Orthography4aVariant variant = getOrthographyVariant(writingSystem);
+ return variant == null ? this.wrapped.getOrthography(writingSystem) : variant;
}
@Override
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2024-05-03 12:48:17 UTC (rev 13323)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2024-05-03 15:26:53 UTC (rev 13324)
@@ -137,8 +137,8 @@
/** The character that should actually be used in the word content as the hard hyphen characters. */
private char actualHardHyphenChar = '-';
- /** The current dictionary being parsed. (Not to be confused with {@link #currentDictionary}, the dictionary object
- * that will eventually be returned to the client code). */
+ /** The current dictionary element being parsed. (Not to be confused with {@link #parsedDictionaries}, the
+ * dictionary instances that will eventually be returned to the client code). */
private DictionaryElement currentDictionaryElement;
/** The list of dictionaries that have been parsed by this parser. */
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2024-05-03 12:48:17 UTC (rev 13323)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2024-05-03 15:26:53 UTC (rev 13324)
@@ -209,10 +209,12 @@
final List<SegmentDictionary> dictionaries = dictParser.parse(source);
for (SegmentDictionary dictionary : dictionaries) {
final WritingSystem writingSystem = dictionary.getWritingSystem();
- final Orthography4aStandard baseOrthography = baseServer.getOrthography(writingSystem);
- final Orthography4aVariant variantOrthography =
- new Orthography4aVariant(baseOrthography, dictionary);
- variantServer.registerOrthography(writingSystem, variantOrthography);
+ Orthography4aVariant variantOrthography = variantServer.getOrthographyVariant(writingSystem);
+ if (variantOrthography == null) {
+ final Orthography4aStandard baseOrthography = baseServer.getOrthography(writingSystem);
+ variantOrthography = new Orthography4aVariant(baseOrthography, dictionary);
+ variantServer.registerOrthography(writingSystem, variantOrthography);
+ }
}
}
this.server = variantServer;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2024-05-03 12:48:21
|
Revision: 13323
http://sourceforge.net/p/foray/code/13323
Author: victormote
Date: 2024-05-03 12:48:17 +0000 (Fri, 03 May 2024)
Log Message:
-----------
Improvements to dictionaries and orthographies.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
trunk/foray/foray-primitive/src/main/java/org/foray/primitive/CharacterUtils.java
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2024-05-03 10:58:13 UTC (rev 13322)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2024-05-03 12:48:17 UTC (rev 13323)
@@ -12103,6 +12103,7 @@
<w><t>b/f</t></w>
<w><t>b/l</t></w>
<w><t>b/s</t></w>
+<w><t>b.</t><abbrev referenced-word="book"/></w>
<w><t>b.c.</t><abbrev referenced-word="Before Christ"/></w>
<w><t>baa</t></w>
<w><t>BAA</t></w>
@@ -29883,7 +29884,7 @@
<phrase><t>co-i-tus res-er-va-tus</t></phrase>
<w><t>co-jo-nes</t></w>
<w><t>coke</t></w>
-<w><t>Coke</t></w>
+<w><t>Coke</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>coke-like</t></w>
<w><t>cok-er</t></w>
<w><t>co-ker-nut</t></w>
@@ -37432,7 +37433,7 @@
<w><t>Dal-ma-tian</t></w>
<w><t>dal-mat-ic</t></w>
<w><t>Dal-ny</t></w>
-<w><t>Dal-rym-ple</t></w>
+<w><t>Dal-rym-ple</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>dal seg-no</t></phrase>
<w><t>dal-ton</t></w>
<w><t>Dal-ton</t></w>
@@ -73503,6 +73504,7 @@
<w><t>i-bex-es</t></w>
<w><t>I-bib-i-o</t></w>
<w><t>ib-i-ces</t></w>
+<w><t>i-bid</t><abbrev referenced-word="ibidem (Latin)"/></w>
<w><t>i-bi-dem</t></w>
<w><t>i-bis</t></w>
<w><t>i-bis-es</t></w>
@@ -90429,7 +90431,7 @@
<w><t>Lyn-brook</t></w>
<w><t>lyn-ce-an</t></w>
<w><t>lynch</t></w>
-<w><t>Lynch</t></w>
+<w><t>Lynch</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Lynch-burg</t></w>
<w><t>lynch-er</t></w>
<w><t>lynch-et</t></w>
@@ -139053,7 +139055,7 @@
<phrase><t>Rob-ert I</t></phrase>
<w><t>Ro-ber-to</t></w>
<w><t>Ro-berts</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>Rob-ert-son</t></w>
+<w><t>Rob-ert-son</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ro-ber-val</t></w>
<w><t>robes=de=cham-bre</t></w>
<w><t>Robe-son</t></w>
@@ -148052,7 +148054,7 @@
<w><t>sim-ply=fur-nished</t></w>
<phrase><t>Simp-son Des-ert</t></phrase>
<w><t>sim-pu-lum</t></w>
-<w><t>Sims</t></w>
+<w><t>Sims</t><noun><convertible-to-possessive/></noun></w>
<w><t>Sims-bur-y</t></w>
<w><t>sim-sim</t></w>
<w><t>sim-ul</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2024-05-03 10:58:13 UTC (rev 13322)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2024-05-03 12:48:17 UTC (rev 13323)
@@ -17,6 +17,7 @@
<w><t>ab-sent-i-a</t></w>
<w><t>ac-tu</t></w>
<w><t>ad</t></w>
+<w><t>al.</t><abbrev referenced-word="alia, alii, aliae, English 'others'."/></w>
<w><t>a-li-as</t></w>
<w><t>al-ter-i-us</t></w>
<w><t>am-or</t></w>
@@ -131,6 +132,7 @@
<w><t>hy-dro-ma-ni-a</t></w>
<w><t>i.e.</t><abbrev referenced-word="id est"/></w>
<w><t>i. e.</t><abbrev referenced-word="id est"/><comment>Contains embedded non-breaking space.</comment></w>
+<w><t>ib.</t><abbrev referenced-word="ibidem"/></w>
<w><t>ibid.</t><abbrev referenced-word="ibidem"/></w>
<w><t>idem</t></w>
<w><t>ig-nis</t></w>
@@ -285,6 +287,7 @@
<w><t>su-a</t></w>
<w><t>su-as</t></w>
<w><t>sub</t></w>
+<w><t>sub-stra-tum</t></w>
<w><t>sug-ges-tio</t></w>
<w><t>sui</t></w>
<w><t>sum-mi</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2024-05-03 10:58:13 UTC (rev 13322)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2024-05-03 12:48:17 UTC (rev 13323)
@@ -10,8 +10,8 @@
<match desc="Arabic digits">^[0-9]+[¼½¾]?$</match>
<match desc="Formatted Arabic digits">^[0-9]{1,3}(,[0-9]{3})*(\.[0-9]*)?$</match>
<match desc="Lower-bounded Arabic digits">^[0-9]{1,3}(,[0-9]{3})*\+?$</match>
- <match desc="Uppercase Roman numerals">^C?M{0,3}C?D?X?C{0,3}L?I?X{0,3}V?I{0,3}\.?(’s)?$</match>
- <match desc="Lowercase Roman numerals">^c?m{0,3}c?d?x?c{0,3}l?i?x{0,3}v?i{0,3}\.?(’s)?$</match>
+ <match desc="Uppercase Roman numerals">^C?M{0,3}C?D?X?C{0,3}L?I?X{0,3}V?I{0,3}[VX]?\.?(’s)?$</match>
+ <match desc="Lowercase Roman numerals">^c?m{0,3}c?d?x?c{0,3}l?i?x{0,3}v?i{0,3}[vx]?\.?(’s)?$</match>
<match desc="Currency">^[$£][0-9]+[0-9,\.]*$</match>
<match desc="British Fractional Currency">^[0-9]+[sd]$</match>
<match desc="Percentage">^[0-9]*\.?[0-9]*%$</match>
Modified: trunk/foray/foray-primitive/src/main/java/org/foray/primitive/CharacterUtils.java
===================================================================
--- trunk/foray/foray-primitive/src/main/java/org/foray/primitive/CharacterUtils.java 2024-05-03 10:58:13 UTC (rev 13322)
+++ trunk/foray/foray-primitive/src/main/java/org/foray/primitive/CharacterUtils.java 2024-05-03 12:48:17 UTC (rev 13323)
@@ -77,6 +77,7 @@
private static final String POSSIBLE_INTRAWORD_PUNCTUATION = new String(new char[] {
U0000_Basic_Latin.APOSTROPHE, //English example: 'Tis the season
U2000_General_Punctuation.RIGHT_SINGLE_QUOTATION_MARK, //English example: ’Tis the season
+ U2000_General_Punctuation.LEFT_SINGLE_QUOTATION_MARK, //Scottish name: M‘Culloch
U0000_Basic_Latin.LEFT_PARENTHESIS, //English example of alternate spelling:
U0000_Basic_Latin.RIGHT_PARENTHESIS, // pa(e)leography
U0000_Basic_Latin.FULL_STOP, //English example: Section 8.16
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2024-05-03 10:58:16
|
Revision: 13322
http://sourceforge.net/p/foray/code/13322
Author: victormote
Date: 2024-05-03 10:58:13 +0000 (Fri, 03 May 2024)
Log Message:
-----------
Add left single quotation mark as possible intraword punctuation, to handle certain configurations of Scottish names, e.g. M'Culloch.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2024-05-02 19:42:44 UTC (rev 13321)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2024-05-03 10:58:13 UTC (rev 13322)
@@ -139,11 +139,8 @@
/** Surrogate for {@link TokenType#AMBIGUOUS_TRAILING_PUNCTUATION}. */
AMBIGUOUS_TRAILING_PUNCTUATION(TokenType.AMBIGUOUS_TRAILING_PUNCTUATION),
+ /* ********** The items below are used during processing, do not map to any TokenType.********** */
-
-
- /* The items below are used during processing, do not map to any TokenType. */
-
/** Ambiguous leading punctuation that is currently thought to be resolvable when the context is considered. */
TRANSIENT_LEADING_PUNCTUATION(null),
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2024-05-02 19:42:47
|
Revision: 13321
http://sourceforge.net/p/foray/code/13321
Author: victormote
Date: 2024-05-02 19:42:44 +0000 (Thu, 02 May 2024)
Log Message:
-----------
Improvements to dictionaries/orthographies.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2024-05-02 19:42:44 UTC (rev 13321)
@@ -21,6 +21,8 @@
<w><t>be-hove</t><verb><regular-root/></verb></w>
<w><t>cen-tre</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>co=la-bour-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<word-placeholder><t>col-or</t><different-country country="USA"/></word-placeholder>
+<w><t>col-our</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-til</t><verb/></w>
<w><t>dis-tils</t><verb><lemma>distil</lemma></verb></w>
<w><t>draught</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -60,6 +62,10 @@
<w><t>mould-i-est</t></w>
<w><t>mould-ing</t></w>
<w><t>mould-y</t><adjective><extensible/></adjective></w>
+<word-placeholder><t>neighbor</t><different-country country="USA"/></word-placeholder>
+<word-placeholder><t>neighborhood</t><different-country country="USA"/></word-placeholder>
+<w><t>neigh-bour</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>neigh-bour-hood</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>rig-our</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<word-placeholder><t>superior</t><different-country country="USA"/></word-placeholder>
<w><t>su-pe-ri-our</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective/></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2024-05-02 19:42:44 UTC (rev 13321)
@@ -21,6 +21,8 @@
<w><t>be-hoove</t><verb><regular-root/></verb></w>
<w><t>cen-ter</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>co=la-bor-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>col-or</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<word-placeholder><t>col-our</t><different-country country="GBR"/></word-placeholder>
<w><t>dis-till</t><verb/></w>
<w><t>draft</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>draft-a-ble</t></w>
@@ -60,6 +62,10 @@
<w><t>mold-warp</t></w>
<w><t>mold-y</t><adjective><extensible/></adjective></w>
<word-placeholder><t>mould</t><different-country country="GBR"/></word-placeholder>
+<w><t>neigh-bor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>neigh-bor-hood</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<word-placeholder><t>neighbour</t><different-country country="GBR"/></word-placeholder>
+<word-placeholder><t>neighbourhood</t><different-country country="GBR"/></word-placeholder>
<w><t>rig-or</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>su-pe-ri-or</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective/></w>
<word-placeholder><t>superiour</t><different-country country="GBR"/></word-placeholder>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml 2024-05-02 19:42:44 UTC (rev 13321)
@@ -115,6 +115,8 @@
<w><t>se-cret</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb><adjective/></w>
<w><t>seek-est</t><verb><regular-root value="false"/></verb></w>
<w><t>se-lect-edst</t><verb><lemma>select</lemma></verb></w>
+<w><t>shew</t><verb/></w>
+<w><t>shewn</t><verb><lemma>shew</lemma></verb></w>
<w><t>show-eth</t><verb><lemma>show</lemma></verb></w>
<w><t>sin-neth</t></w>
<w><t>sleep-est</t><verb><regular-root value="false"/></verb></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2024-05-02 19:42:44 UTC (rev 13321)
@@ -13140,7 +13140,7 @@
<w><t>ban-ian</t></w>
<w><t>ban-ish</t><verb><regular-root/></verb></w>
<w><t>ban-ish-er</t></w>
-<w><t>ban-ish-ment</t></w>
+<w><t>ban-ish-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ban-is-ter</t></w>
<w><t>ban-is-ters</t></w>
<w><t>ban-iya</t></w>
@@ -17408,7 +17408,7 @@
<w><t>bloom-y</t></w>
<w><t>bloop-er</t></w>
<w><t>Blos-som</t></w>
-<w><t>blos-som</t></w>
+<w><t>blos-som</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>blos-som-less</t></w>
<w><t>blos-som-y</t></w>
<w><t>blot</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -20424,8 +20424,8 @@
<w><t>bu-cra-ni-um</t></w>
<w><t>Bu-cu-re</t></w>
<w><t>Bu-cu-resti</t></w>
-<w><t>bud</t></w>
<w><t>Bud</t></w>
+<w><t>bud</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Bu-da-pest</t></w>
<w><t>Budd</t></w>
<w><t>bud-ded</t></w>
@@ -30254,7 +30254,7 @@
<w><t>col-o-pho-ny</t></w>
<w><t>co-loph-o-ny</t></w>
<w><t>col-o-quin-ti-da</t></w>
-<w><t>col-or</t></w>
+<word-placeholder><t>color</t><country-specific country="USA"/></word-placeholder>
<w><t>col-or-a-bil-i-ty</t></w>
<w><t>col-or-a-ble</t></w>
<w><t>col-or-a-ble-ness</t></w>
@@ -30322,6 +30322,7 @@
<w><t>co-los-tral</t></w>
<w><t>co-los-trum</t></w>
<w><t>co-lot-o-my</t></w>
+<word-placeholder><t>col-our</t><country-specific country="GBR"/></word-placeholder>
<phrase><t>col-our bar</t></phrase>
<phrase><t>col-our code</t></phrase>
<phrase><t>col-our guard</t></phrase>
@@ -40756,7 +40757,7 @@
<w><t>des-tri-er</t></w>
<w><t>de-stroy</t><verb><regular-root/></verb></w>
<w><t>de-stroy-a-ble</t></w>
-<w><t>de-stroy-er</t></w>
+<w><t>de-stroy-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>de-stroy-er es-cort</t></phrase>
<w><t>de-struct</t></w>
<w><t>de-struct-i-bil-i-ty</t></w>
@@ -42575,7 +42576,7 @@
<w><t>dis-as-so-ci-at-ing</t></w>
<w><t>dis-as-so-ci-a-tion</t></w>
<w><t>dis-as-ter</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>dis-as-trous</t></w>
+<w><t>dis-as-trous</t><adjective/></w>
<w><t>dis-as-trous-ly</t></w>
<w><t>dis-as-trous-ness</t></w>
<w><t>dis-a-vow</t><verb><regular-root/></verb></w>
@@ -45402,7 +45403,7 @@
<w><t>Drei-ser</t></w>
<w><t>drek</t></w>
<w><t>D-ren</t></w>
-<w><t>drench</t></w>
+<w><t>drench</t><verb><regular-root/></verb></w>
<w><t>drench-er</t></w>
<w><t>drench-ing-ly</t></w>
<w><t>D-ren-matt</t></w>
@@ -45907,7 +45908,7 @@
<w><t>Dul-ci-tone</t></w>
<w><t>Dul-cy</t></w>
<w><t>du-li-a</t></w>
-<w><t>dull</t></w>
+<w><t>dull</t><adjective><extensible/></adjective></w>
<w><t>dull-ard</t></w>
<w><t>Dul-les</t></w>
<w><t>dull-ish</t></w>
@@ -49844,7 +49845,7 @@
<w><t>en-trée</t></w>
<w><t>en-tre-lac</t></w>
<w><t>en-tre-mets</t></w>
-<w><t>en-trench</t></w>
+<w><t>en-trench</t><verb><regular-root/></verb></w>
<w><t>en-trench-ment</t></w>
<phrase><t>en-tre nous</t></phrase>
<w><t>en-tre-p</t></w>
@@ -52589,7 +52590,7 @@
<phrase><t>ex hy-poth-e-si</t></phrase>
<w><t>ex-i-geant</t></w>
<w><t>ex-i-geante</t></w>
-<w><t>ex-i-gence</t></w>
+<w><t>ex-i-gence</t><noun><pluralizable/></noun></w>
<w><t>ex-i-gen-cy</t><noun><pluralizable/></noun></w>
<w><t>ex-i-gent</t></w>
<w><t>ex-i-gent-ly</t></w>
@@ -54210,7 +54211,7 @@
<w><t>fas-ci-nat-ed-ly</t></w>
<w><t>fas-ci-nat-ing</t></w>
<w><t>fas-ci-nat-ing-ly</t></w>
-<w><t>fas-ci-na-tion</t></w>
+<w><t>fas-ci-na-tion</t><noun><pluralizable/></noun></w>
<w><t>fas-ci-na-tive</t></w>
<w><t>fas-ci-na-tor</t></w>
<w><t>fas-cine</t></w>
@@ -55130,7 +55131,8 @@
<w><t>feu-da-to-ry</t></w>
<w><t>feud-ist</t></w>
<w><t>Feu-er-bach</t></w>
-<w><t>Feuil-lant</t></w>
+<w><t>Feuil-lant</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>feuil-lant</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Feuil-lée</t></w>
<w><t>feuil-le-ton</t></w>
<w><t>feuil-le-ton-ism</t></w>
@@ -62072,7 +62074,7 @@
<w><t>gi-ron</t></w>
<w><t>Gi-ronde</t></w>
<w><t>Gi-ron-dism</t></w>
-<w><t>Gi-ron-dist</t></w>
+<w><t>Gi-ron-dist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>gi-ron-ny</t></w>
<w><t>gir-o-sol</t></w>
<w><t>gi-rou-ette</t></w>
@@ -64439,7 +64441,7 @@
<w><t>Gren-del</t></w>
<w><t>Gren-fell</t></w>
<w><t>Gre-no-ble</t></w>
-<w><t>Gren-ville</t></w>
+<w><t>Gren-ville</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Gresh-am</t></w>
<phrase><t>Gresh-am’s law</t></phrase>
<w><t>gres-so-ri-al</t></w>
@@ -79148,7 +79150,7 @@
<w><t>in-tra-ve-nous-ly</t></w>
<w><t>in-tra-vi-tal</t></w>
<phrase><t>in-tra-zon-al soil</t></phrase>
-<w><t>in-treat</t></w>
+<w><t>in-treat</t><verb><regular-root/></verb></w>
<w><t>in-trench</t><verb><regular-root/></verb></w>
<w><t>in-trench-er</t></w>
<w><t>in-trench-ment</t></w>
@@ -80725,7 +80727,8 @@
<w><t>Ja-co-bi</t></w>
<w><t>Ja-co-bi-an</t></w>
<phrase><t>Ja-co-bi-an de-ter-mi-nant</t></phrase>
-<w><t>Jac-o-bin</t></w>
+<w><t>Jac-o-bin</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>jac-o-bin</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ja-co-bi-na</t></w>
<w><t>Ja-co-bine</t></w>
<w><t>Jac-o-bin-ic</t></w>
@@ -89796,7 +89799,7 @@
<w><t>loy-al</t></w>
<w><t>loy-al-ism</t></w>
<w><t>Loy-al-ist</t></w>
-<w><t>loy-al-ist</t></w>
+<w><t>loy-al-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>loy-al-ly</t></w>
<w><t>loy-al-ness</t></w>
<w><t>loy-al-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -97442,7 +97445,7 @@
<w><t>mo-nar-chi-cal</t></w>
<w><t>mo-nar-chi-cal-ly</t></w>
<w><t>mon-ar-chism</t></w>
-<w><t>mon-ar-chist</t></w>
+<w><t>mon-ar-chist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mon-ar-chist-ic</t></w>
<w><t>mon-ar-chy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mo-nar-da</t></w>
@@ -100752,14 +100755,14 @@
<w><t>Ne-he-mi-as</t></w>
<w><t>Neh-ru</t></w>
<w><t>neigh</t></w>
-<w><t>neigh-bor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>neigh-bor-hood</t></w>
+<word-placeholder><t>neighbor</t><country-specific country="USA"/></word-placeholder>
+<word-placeholder><t>neighborhood</t><country-specific country="USA"/></word-placeholder>
<w><t>neigh-bor-ing</t></w>
<w><t>neigh-bor-less</t></w>
<w><t>neigh-bor-li-ness</t></w>
<w><t>neigh-bor-ly</t></w>
-<w><t>neigh-bour</t></w>
-<w><t>neigh-bour-hood</t></w>
+<word-placeholder><t>neighbour</t><country-specific country="GBR"/></word-placeholder>
+<word-placeholder><t>neighbourhood</t><country-specific country="GBR"/></word-placeholder>
<w><t>neigh-bour-ing</t></w>
<w><t>neigh-bour-less</t></w>
<w><t>neigh-bour-li-ness</t></w>
@@ -108468,6 +108471,7 @@
<w><t>ob-scu-ra-tion</t></w>
<w><t>ob-scure</t><verb><regular-root/></verb></w>
<w><t>ob-scur-ed-ly</t></w>
+<w><t>ob-scure-ly</t><adverb/></w>
<w><t>ob-scure-ness</t></w>
<w><t>ob-scu-ri-ty</t></w>
<phrase><t>ob-scur-um per ob-scur-i-us</t></phrase>
@@ -114381,7 +114385,7 @@
<w><t>pail-lettes</t></w>
<w><t>pai=loo</t></w>
<w><t>pain</t></w>
-<w><t>Paine</t></w>
+<w><t>Paine</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pained</t></w>
<w><t>Paines-ville</t></w>
<w><t>pain-ful</t></w>
@@ -116836,7 +116840,7 @@
<w><t>pe-dal-fer</t></w>
<w><t>ped-a-lo</t></w>
<phrase><t>ped-al point</t></phrase>
-<w><t>ped-ant</t></w>
+<w><t>ped-ant</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ped-ant-esque</t></w>
<w><t>ped-ant-hood</t></w>
<w><t>pe-dan-tic</t></w>
@@ -121278,6 +121282,7 @@
<w><t>plun-der</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>plun-der-a-ble</t></w>
<w><t>plun-der-age</t></w>
+<w><t>plun-der-er</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>plunge</t><noun><pluralizable/></noun><verb><regular-root/></verb></w>
<w><t>plung-er</t></w>
<w><t>plung-ing</t></w>
@@ -123982,7 +123987,7 @@
<w><t>pre-cir-cu-la-tion</t></w>
<w><t>pre-cis</t></w>
<w><t>pré-cis</t></w>
-<w><t>pre-cise</t></w>
+<w><t>pre-cise</t><adjective><extensible/></adjective></w>
<w><t>pre-cise-ly</t></w>
<w><t>pre-cise-ness</t></w>
<w><t>pre-ci-sian</t></w>
@@ -126331,7 +126336,7 @@
<w><t>pre-scrib-ing</t></w>
<w><t>pre-script</t></w>
<w><t>pre-scrip-ti-ble</t></w>
-<w><t>pre-scrip-tion</t></w>
+<w><t>pre-scrip-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pre-scrip-tive</t></w>
<w><t>pre-scrip-tive-ly</t></w>
<w><t>pre-scrip-tive-ness</t></w>
@@ -132876,8 +132881,9 @@
<w><t>rav-en-ing</t></w>
<w><t>rav-en-ing-ly</t></w>
<w><t>Ra-ven-na</t></w>
-<w><t>rav-en-ous</t></w>
-<w><t>rav-en-ous-ly</t></w>
+<w><t>rav-en-ous</t><adjective><extensible value="false"/></adjective></w>
+<w><t>rav-en-ous-ly</t><adverb/></w>
+<w><t>rav-en-ous-ness</t><noun/></w>
<w><t>Ra-vens-wood</t></w>
<w><t>rav-er</t></w>
<w><t>ra-vi-gote</t></w>
@@ -135718,7 +135724,7 @@
<w><t>re-lat-ed-ness</t></w>
<w><t>re-lat-er</t></w>
<w><t>re-lat-ing</t></w>
-<w><t>re-la-tion</t></w>
+<w><t>re-la-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>re-la-tion-al</t></w>
<w><t>re-la-tion-less</t></w>
<w><t>re-la-tions</t></w>
@@ -136906,8 +136912,9 @@
<w><t>re-scru-ti-nized</t></w>
<w><t>re-scru-ti-niz-ing</t></w>
<w><t>re-scru-ti-ny</t></w>
-<w><t>res-cue</t></w>
+<w><t>res-cue</t><noun><pluralizable/><convertible-to-possessive/></noun><verb/></w>
<w><t>res-cued</t></w>
+<w><t>res-cues</t></w>
<w><t>res-cu-ing</t></w>
<w><t>re-seal</t></w>
<w><t>re-seal-a-ble</t></w>
@@ -139132,7 +139139,7 @@
<w><t>rock-i-est</t></w>
<w><t>rock-i-ness</t></w>
<phrase><t>rock-ing chair</t></phrase>
-<w><t>Rock-ing-ham</t></w>
+<w><t>Rock-ing-ham</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>rock-ing horse</t></phrase>
<w><t>rock-ing-ly</t></w>
<phrase><t>rock-ing stone</t></phrase>
@@ -139893,7 +139900,7 @@
<w><t>roy-al-ised</t></w>
<w><t>roy-al-is-ing</t></w>
<w><t>roy-al-ism</t></w>
-<w><t>roy-al-ist</t></w>
+<w><t>roy-al-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>roy-al-is-tic</t></w>
<w><t>roy-al-i-za-tion</t></w>
<w><t>roy-al-ize</t></w>
@@ -173951,8 +173958,9 @@
<w><t>un-lack-eyed</t></w>
<w><t>un-la-con-ic</t></w>
<w><t>un-lac-quered</t></w>
-<w><t>un-lade</t></w>
+<w><t>un-lade</t><verb></verb></w>
<w><t>un-la-den</t></w>
+<w><t>un-la-ding</t><verb><lemma>unlade</lemma></verb></w>
<w><t>un-la-dled</t></w>
<w><t>un-lag-ging</t></w>
<w><t>un-laid</t></w>
@@ -179837,7 +179845,7 @@
<w><t>u-surp</t><verb><regular-root/></verb></w>
<w><t>u-sur-pa-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>u-surp-a-tive</t></w>
-<w><t>u-surp-er</t></w>
+<w><t>u-surp-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>u-surp-ing-ly</t></w>
<w><t>u-su-ry</t></w>
<w><t>USW</t></w>
@@ -181226,7 +181234,7 @@
<w><t>vet-ting</t></w>
<w><t>Ve-us-es</t></w>
<w><t>vex</t><verb><regular-root/></verb></w>
-<w><t>vex-a-tion</t></w>
+<w><t>vex-a-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>vex-a-tious</t></w>
<w><t>vex-a-tious-ly</t></w>
<w><t>vex-a-tious-ness</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml 2024-05-02 19:42:44 UTC (rev 13321)
@@ -6,7 +6,7 @@
<axsl-dictionary
id="org.foray.fre.Latn.ZZZ"
- language="ita" script="Latn"
+ language="fre" script="Latn" country="ZZZ"
hard-hyphen-char="=" soft-hyphen-char="-">
@@ -26,6 +26,7 @@
<w><t>coup</t></w>
<w><t>covert</t></w>
<w><t>crèche</t></w>
+<w><t>dans</t></w>
<w><t>de</t></w>
<w><t>der-nier</t></w>
<w><t>des</t></w>
@@ -35,6 +36,7 @@
<w><t>é-clat</t><noun/></w>
<w><t>en</t></w>
<w><t>es-prit</t></w>
+<w><t>etat</t><noun><pluralizable/></noun></w>
<w><t>feme</t><noun><pluralizable/></noun></w>
<w><t>femme</t></w>
<w><t>fi-let mi-gnon</t></w>
@@ -46,7 +48,9 @@
<w><t>ils</t></w>
<w><t>jér-é-mi-ade</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>la</t></w>
+<w><t>l’Amérique</t></w>
<w><t>l’égard</t></w>
+<w><t>les</t></w>
<w><t>lit-er-a-teur</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>loi</t></w>
<w><t>masse</t></w>
@@ -53,8 +57,10 @@
<w><t>moi</t></w>
<w><t>mo-tif</t></w>
<w><t>nom</t></w>
+<w><t>nou-veau</t></w>
<w><t>n’y</t></w>
<w><t>ont</t></w>
+<w><t>par</t></w>
<w><t>que</t></w>
<w><t>qui</t></w>
<w><t>rai-son</t></w>
@@ -66,6 +72,7 @@
<w><t>sujets</t></w>
<w><t>tels</t></w>
<w><t>tou-jours</t></w>
+<w><t>unis</t></w>
<w><t>voy-age</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2024-04-03 13:09:29 UTC (rev 13320)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2024-05-02 19:42:44 UTC (rev 13321)
@@ -10,8 +10,8 @@
<match desc="Arabic digits">^[0-9]+[¼½¾]?$</match>
<match desc="Formatted Arabic digits">^[0-9]{1,3}(,[0-9]{3})*(\.[0-9]*)?$</match>
<match desc="Lower-bounded Arabic digits">^[0-9]{1,3}(,[0-9]{3})*\+?$</match>
- <match desc="Uppercase Roman numerals">^[IVXLCDM]+\.?(’s)?$</match>
- <match desc="Lowercase Roman numerals">^[ivxlcdm]+$</match>
+ <match desc="Uppercase Roman numerals">^C?M{0,3}C?D?X?C{0,3}L?I?X{0,3}V?I{0,3}\.?(’s)?$</match>
+ <match desc="Lowercase Roman numerals">^c?m{0,3}c?d?x?c{0,3}l?i?x{0,3}v?i{0,3}\.?(’s)?$</match>
<match desc="Currency">^[$£][0-9]+[0-9,\.]*$</match>
<match desc="British Fractional Currency">^[0-9]+[sd]$</match>
<match desc="Percentage">^[0-9]*\.?[0-9]*%$</match>
@@ -34,6 +34,10 @@
<match desc="Lowercase Roman numerals">^[ivxlcdm]+$</match>
</match-rule-list>
+ <match-rule-list id="fre-Latn-match-rules">
+ <match desc="A single capital letter, such as a person's initial">^[A-Z]$</match>
+ </match-rule-list>
+
<derivative-pattern-list id="eng-Latn-derivative-patterns">
<derivative-pattern desc="ends with /’s/">
<match>^([a-zæœëöA-ZÆŒ\-]+)’s$</match>
@@ -418,6 +422,7 @@
</orthography>
<orthography language-iso-3char="fre" script-iso-4char="Latn" country-iso-3char="ZZZ">
+ <match-rules reference="fre-Latn-match-rules"/>
<derivative-rules reference="fre-Latn-derivative-patterns"/>
<dictionary reference="org.foray.fre.Latn.ZZZ"/>
</orthography>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2024-04-03 13:09:32
|
Revision: 13320
http://sourceforge.net/p/foray/code/13320
Author: victormote
Date: 2024-04-03 13:09:29 +0000 (Wed, 03 Apr 2024)
Log Message:
-----------
Improvements to dictionaries and orthographies.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Grek-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/data/dictionaries/por-Latn-ZZZ.dict.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2024-03-28 11:34:38 UTC (rev 13319)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
@@ -38,6 +38,10 @@
<w><t>ful-fil</t><verb/></w>
<w><t>ful-fil-ment</t><noun/></w>
<w><t>ful-fils</t><verb><vf><singular/></vf></verb></w>
+<word-placeholder><t>hon-or</t><different-country country="USA"/></word-placeholder>
+<word-placeholder><t>hon-or-a-ble</t><different-country country="USA"/></word-placeholder>
+<w><t>hon-our</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
+<w><t>hon-our-a-ble</t><adjective/></w>
<word-placeholder><t>labor</t><different-country country="USA"/></word-placeholder>
<w><t>la-bour</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>la-boured</t><adjective/></w>
@@ -56,4 +60,7 @@
<w><t>mould-i-est</t></w>
<w><t>mould-ing</t></w>
<w><t>mould-y</t><adjective><extensible/></adjective></w>
+<w><t>rig-our</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<word-placeholder><t>superior</t><different-country country="USA"/></word-placeholder>
+<w><t>su-pe-ri-our</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective/></w>
</axsl-dictionary>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2024-03-28 11:34:38 UTC (rev 13319)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
@@ -38,6 +38,10 @@
<word-placeholder><t>en-deav-our</t><different-country country="GBR"/></word-placeholder>
<w><t>ful-fill</t><verb><regular-root/></verb></w>
<w><t>ful-fill-ment</t><noun/></w>
+<w><t>hon-or</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
+<w><t>hon-or-a-ble</t><adjective/></w>
+<word-placeholder><t>hon-our</t><different-country country="GBR"/></word-placeholder>
+<word-placeholder><t>hon-our-a-ble</t><different-country country="GBR"/></word-placeholder>
<w><t>la-bor</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>la-bored</t><adjective/></w>
<w><t>la-bor-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -56,4 +60,7 @@
<w><t>mold-warp</t></w>
<w><t>mold-y</t><adjective><extensible/></adjective></w>
<word-placeholder><t>mould</t><different-country country="GBR"/></word-placeholder>
+<w><t>rig-or</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>su-pe-ri-or</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective/></w>
+<word-placeholder><t>superiour</t><different-country country="GBR"/></word-placeholder>
</axsl-dictionary>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2024-03-28 11:34:38 UTC (rev 13319)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
@@ -10260,7 +10260,7 @@
<w><t>as-sail</t><verb><regular-root/></verb></w>
<w><t>as-sail-a-ble</t></w>
<w><t>as-sail-a-ble-ness</t></w>
-<w><t>as-sail-ant</t></w>
+<w><t>as-sail-ant</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>as-sail-er</t></w>
<w><t>as-sail-ment</t></w>
<w><t>As-sam</t></w>
@@ -16808,6 +16808,7 @@
<w><t>Bjoer-ling</t></w>
<w><t>Björn-son</t></w>
<w><t>Björn-stjer-ne</t></w>
+<w><t>bk</t><abbrev referenced-word="book"/></w>
<w><t>bkcy</t></w>
<w><t>bkg</t></w>
<w><t>bkpt</t></w>
@@ -22342,7 +22343,7 @@
<w><t>cam-pan-u-la-ceous</t></w>
<w><t>cam-pan-u-late</t></w>
<w><t>Cam-pan-us</t></w>
-<w><t>Camp-bell</t></w>
+<w><t>Camp-bell</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Camp-bell=Ban-ner-man</t></w>
<w><t>Camp-bell-ism</t></w>
<w><t>Camp-bell-ite</t></w>
@@ -52913,7 +52914,7 @@
<w><t>ex-plic-it</t></w>
<w><t>ex-plic-it-ly</t></w>
<w><t>ex-plic-it-ness</t></w>
-<w><t>ex-plode</t></w>
+<w><t>ex-plode</t><verb><regular-root/></verb></w>
<w><t>ex-plod-ed</t></w>
<phrase><t>ex-plod-ed view</t></phrase>
<w><t>ex-plod-ent</t></w>
@@ -54794,7 +54795,7 @@
<w><t>fem-o-ral</t></w>
<w><t>fe-mur</t></w>
<w><t>fe-murs</t></w>
-<w><t>fen</t></w>
+<w><t>fen</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fe-na-gle</t></w>
<w><t>fe-na-gler</t></w>
<w><t>fence</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
@@ -69099,7 +69100,7 @@
<w><t>He-ro-di-as</t></w>
<w><t>He-rod-o-tus</t></w>
<w><t>he-roes</t><noun><plural/><convertible-to-possessive/></noun></w>
-<w><t>he-ro-ic</t></w>
+<w><t>he-ro-ic</t><adjective/></w>
<phrase><t>he-ro-ic age</t></phrase>
<w><t>he-ro-i-cal</t></w>
<w><t>he-ro-i-cal-ly</t></w>
@@ -70907,11 +70908,12 @@
<w><t>hon-ky</t></w>
<w><t>honk-y=tonk</t></w>
<w><t>Hon-o-lu-lu</t></w>
-<w><t>hon-or</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
+<word-placeholder><t>hon-or</t><country-specific country="USA"/></word-placeholder>
<w><t>Hon-or</t></w>
<w><t>Ho-no-ra</t></w>
<w><t>Hon-or-a-ble</t></w>
-<w><t>hon-or-a-ble</t></w>
+<word-placeholder><t>hon-or-a-ble</t><country-specific country="USA"/></word-placeholder>
+<w><t>hon-or-a-ble</t><adjective/></w>
<phrase><t>hon-or-a-ble dis-charge</t></phrase>
<w><t>hon-or-a-ble-ness</t></w>
<w><t>hon-or-a-bly</t></w>
@@ -70933,6 +70935,8 @@
<phrase><t>hon-or school</t></phrase>
<phrase><t>hon-ors list</t></phrase>
<phrase><t>hon-ors of war</t></phrase>
+<word-placeholder><t>hon-our</t><country-specific country="GBR"/></word-placeholder>
+<w><t>hon-our</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Hon-shu</t></w>
<w><t>Ho-nus</t></w>
<w><t>hoo</t></w>
@@ -75985,7 +75989,7 @@
<w><t>in-dict-er</t></w>
<w><t>in-dic-tion</t></w>
<w><t>in-dic-tion-al</t></w>
-<w><t>in-dict-ment</t></w>
+<w><t>in-dict-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-dict-or</t></w>
<w><t>In-dienne</t></w>
<w><t>In-dies</t></w>
@@ -88889,7 +88893,7 @@
<w><t>lock-a-ble</t></w>
<w><t>lock-age</t></w>
<w><t>lock-box</t></w>
-<w><t>Locke</t></w>
+<w><t>Locke</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Lock-e-an</t></w>
<w><t>Lock-e-an-ism</t></w>
<w><t>lock-er</t></w>
@@ -98028,7 +98032,7 @@
<w><t>Moor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>moor-age</t></w>
<w><t>moor-cock</t></w>
-<w><t>Moore</t></w>
+<w><t>Moore</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Moores-ville</t></w>
<w><t>moor-fowl</t></w>
<w><t>moor-hen</t></w>
@@ -117507,7 +117511,7 @@
<w><t>Pe-pusch</t></w>
<w><t>Pepys</t></w>
<w><t>Pepys-i-an</t></w>
-<w><t>Pe-quot</t></w>
+<w><t>Pe-quot</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>per</t></w>
<w><t>Pe-r</t></w>
<w><t>Pe-ra</t></w>
@@ -129601,7 +129605,7 @@
<w><t>pu-bic</t></w>
<w><t>pu-bis</t></w>
<w><t>publ</t></w>
-<w><t>pub-lic</t></w>
+<w><t>pub-lic</t><adjective/></w>
<phrase><t>pub-lic=ad-dress sys-tem</t></phrase>
<w><t>pub-li-can</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>pub-lic as-sis-tance</t></phrase>
@@ -138699,8 +138703,7 @@
<w><t>rig-ol</t></w>
<w><t>rig-o-let</t></w>
<w><t>Rig-o-let-to</t></w>
-<w><t>rig-or</t></w>
-<w><t>ri-gor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<word-placeholder><t>rig-or</t><country-specific country="USA"/></word-placeholder>
<w><t>rig-or-ism</t></w>
<w><t>rig-or-ist</t></w>
<w><t>rig-or-is-tic</t></w>
@@ -138707,6 +138710,7 @@
<phrase><t>rig-or mor-tis</t></phrase>
<w><t>rig-or-ous</t></w>
<w><t>rig-or-ous-ly</t></w>
+<word-placeholder><t>rig-our</t><country-specific country="GBR"/></word-placeholder>
<w><t>Rigs-dag</t></w>
<w><t>rigs-da-ler</t></w>
<w><t>Rig-ve-da</t></w>
@@ -150084,7 +150088,7 @@
<w><t>soi-gn</t></w>
<w><t>soi-gné</t></w>
<w><t>soi-gnée</t></w>
-<w><t>soil</t></w>
+<w><t>soil</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>soil-age</t></w>
<phrase><t>soil con-ser-va-tion</t></phrase>
<w><t>soil-ure</t></w>
@@ -157591,12 +157595,13 @@
<w><t>su-per-in-tol-er-a-ble-ness</t></w>
<w><t>su-per-in-un-da-tion</t></w>
<w><t>su-per-in-vo-lu-tion</t></w>
-<w><t>su-pe-ri-or</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<word-placeholder><t>superior</t><country-specific country="USA"/></word-placeholder>
<w><t>Su-pe-ri-or</t></w>
<phrase><t>su-pe-ri-or court</t></phrase>
<w><t>su-pe-ri-or-i-ty</t></w>
<phrase><t>su-pe-ri-or-i-ty com-plex</t></phrase>
<phrase><t>su-pe-ri-or plan-et</t></phrase>
+<word-placeholder><t>superiour</t><country-specific country="GBR"/></word-placeholder>
<w><t>su-per-ir-ri-ta-bil-i-ty</t></w>
<w><t>su-per-ja-cent</t></w>
<w><t>su-per-ju-di-cial</t></w>
@@ -182022,7 +182027,7 @@
<w><t>vi-tu-per-ate</t></w>
<w><t>vi-tu-per-at-ed</t></w>
<w><t>vi-tu-per-at-ing</t></w>
-<w><t>vi-tu-per-a-tion</t></w>
+<w><t>vi-tu-per-a-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>vi-tu-per-a-tor</t></w>
<w><t>Vi-tus</t></w>
<w><t>vi-va</t></w>
@@ -185546,7 +185551,7 @@
<w><t>win-ter-weight</t></w>
<phrase><t>win-ter wheat</t></phrase>
<w><t>win-ter-y</t></w>
-<w><t>Win-throp</t></w>
+<w><t>Win-throp</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>win-tle</t></w>
<w><t>win-tri-er</t></w>
<w><t>win-tri-est</t></w>
@@ -186354,7 +186359,7 @@
<w><t>wrnt</t></w>
<w><t>Wro-claw</t></w>
<w><t>wrong</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
-<w><t>wrong-do-er</t></w>
+<w><t>wrong-do-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>wrong-do-ing</t></w>
<w><t>wrong-er</t></w>
<w><t>wrong-ful</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Grek-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Grek-ZZZ.dict.xml 2024-03-28 11:34:38 UTC (rev 13319)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Grek-ZZZ.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
@@ -37,6 +37,8 @@
<w><t>διώκετε</t></w>
<w><t>δοκίμιον</t></w>
<w><t>δόξα</t></w>
+<w><t>δουλοι</t></w>
+<w><t>δουλος</t></w>
<w><t>ἐὰν</t></w>
<w><t>ἐθέλω</t></w>
<w><t>ἐκκλησία</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml 2024-03-28 11:34:38 UTC (rev 13319)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
@@ -17,6 +17,7 @@
<w><t>beth</t></w>
<w><t>béth</t></w>
<w><t>Che-bar</t></w>
+<w><t>ebed</t></w>
<w><t>El</t></w>
<w><t>Gib-bor</t></w>
<w><t>hayin</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2024-03-28 11:34:38 UTC (rev 13319)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
@@ -62,6 +62,7 @@
<w><t>cui</t></w>
<w><t>cus-tos</t></w>
<w><t>de</t></w>
+<w><t>deb-et</t></w>
<w><t>de-cem-vir</t></w>
<w><t>de-cem-vir-i</t></w>
<w><t>de-men-tant</t></w>
@@ -86,6 +87,7 @@
<w><t>e.g.</t><abbrev referenced-word="id est"/><comment>Latin "that is."</comment></w>
<w><t>e. g.</t><abbrev referenced-word="id est"/><comment>Latin "that is."</comment></w>
<w><t>e-go</t></w>
+<w><t>ei</t></w>
<w><t>e-o</t></w>
<w><t>e-o-dem</t></w>
<w><t>e-pis-co-pi</t></w>
@@ -97,8 +99,10 @@
<w><t>ex</t></w>
<w><t>ex-trem-um</t></w>
<w><t>fac</t></w>
+<w><t>fa-cere</t></w>
<w><t>fa-cias</t></w>
<w><t>fac-to</t></w>
+<w><t>fal-si</t></w>
<w><t>fa-to</t></w>
<w><t>fat-u-us</t></w>
<w><t>fe-lo</t></w>
@@ -112,10 +116,12 @@
<w><t>for-ti-o-ri</t></w>
<w><t>for-tis-si-mus</t></w>
<w><t>fu-gi-mus</t></w>
+<w><t>fuit</t></w>
<w><t>gen-er-a-ti-o</t></w>
<w><t>gen-er-is</t></w>
<w><t>ge-nus</t></w>
<w><t>glo-ri-a</t></w>
+<w><t>gross</t></w>
<w><t>ha-be-as</t></w>
<w><t>ha-bet</t></w>
<w><t>hac</t></w>
@@ -131,7 +137,9 @@
<w><t>im-mer-go</t></w>
<w><t>im-per-i-i</t></w>
<w><t>in</t></w>
+<w><t>in-cert-um</t></w>
<w><t>in-cog-ni-tum</t></w>
+<w><t>in-de-term-in-at-um</t></w>
<w><t>in-fi-del-i-um</t></w>
<w><t>in-fin-i-tum</t></w>
<w><t>in-san-i-a</t></w>
@@ -154,6 +162,7 @@
<w><t>mag-na</t></w>
<w><t>ma-jor-i</t></w>
<w><t>man-dam-us</t></w>
+<w><t>mane</t></w>
<w><t>max-im</t><noun><singular/></noun></w>
<w><t>max-ims</t><noun><plural/></noun></w>
<w><t>me-ro</t></w>
@@ -179,6 +188,7 @@
<w><t>non</t></w>
<w><t>nos</t></w>
<w><t>no-vo</t></w>
+<w><t>o-di-um</t></w>
<w><t>of-fi-cio</t></w>
<w><t>om-nes</t></w>
<w><t>om-nia</t></w>
@@ -213,6 +223,9 @@
<w><t>pos-sum-us</t></w>
<w><t>post</t></w>
<w><t>po-tent-i-æ</t></w>
+<w><t>po-test</t></w>
+<w><t>præ-cept-um</t></w>
+<w><t>præ-stat-ur</t></w>
<w><t>præ-ter-e-a</t></w>
<w><t>pre-amble</t></w>
<w><t>prin-ci-pii</t></w>
@@ -220,14 +233,18 @@
<w><t>pri-us</t></w>
<w><t>pro</t></w>
<w><t>prop-a-gan-da</t></w>
+<w><t>purum</t></w>
<w><t>qua</t></w>
<w><t>quad-ru-plex</t></w>
<w><t>quaere</t></w>
<w><t>quære</t></w>
+<w><t>quale</t></w>
<w><t>quan-tum</t></w>
<w><t>quem</t></w>
<w><t>qui</t></w>
+<w><t>quic-quid</t></w>
<w><t>quid</t></w>
+<w><t>quis</t></w>
<w><t>quo</t></w>
<w><t>quo-ad</t></w>
<w><t>quo-rum</t><noun><singular/></noun></w>
@@ -243,6 +260,7 @@
<w><t>sac-rae</t></w>
<w><t>sac-ris</t></w>
<w><t>sal-us</t></w>
+<w><t>scire</t></w>
<w><t>scrip-tur-a</t></w>
<w><t>se</t></w>
<w><t>se-cun-dum</t></w>
@@ -251,6 +269,8 @@
<w><t>se-quens</t></w>
<w><t>se-qui-tur</t></w>
<w><t>ser-vi-re</t></w>
+<w><t>serv-il-i-um</t></w>
+<w><t>serv-it-i-um</t></w>
<w><t>ser-vi-tus</t></w>
<w><t>sig-no</t></w>
<w><t>si-len-ti-o</t></w>
@@ -258,6 +278,7 @@
<w><t>si-ne</t></w>
<w><t>so-la</t></w>
<w><t>so-li</t></w>
+<w><t>spe-cies</t></w>
<w><t>sta-tu</t></w>
<w><t>sta-tus</t></w>
<w><t>stim-u-li</t></w>
@@ -264,8 +285,10 @@
<w><t>su-a</t></w>
<w><t>su-as</t></w>
<w><t>sub</t></w>
+<w><t>sug-ges-tio</t></w>
<w><t>sui</t></w>
<w><t>sum-mi</t></w>
+<w><t>sup-pres-sio</t></w>
<w><t>su-tor</t></w>
<w><t>sym-bol-um</t></w>
<w><t>ta-bu-la</t></w>
@@ -272,6 +295,7 @@
<w><t>te</t></w>
<w><t>tem.</t><abbrev referenced-word="tempore"/></w>
<w><t>tem-po-re</t></w>
+<w><t>tene-tur</t></w>
<w><t>ter-ræ</t></w>
<w><t>ter-ror-em</t></w>
<w><t>tes-te</t><comment>1. the witnessing or concluding clause of an
@@ -293,16 +317,21 @@
<w><t>va-lo-rem</t></w>
<w><t>ven-ue</t></w>
<w><t>ver-a</t></w>
+<w><t>ver-i</t></w>
<w><t>ver-ba-tim</t><adjective/><adverb/></w>
<w><t>ver-sa</t></w>
<w><t>ver-sus</t></w>
+<w><t>ves-pere</t></w>
<w><t>ve-to</t></w>
<w><t>vi-a</t></w>
<w><t>vice</t></w>
+<w><t>vil-lein</t></w>
+<w><t>vil-le-nag-i-um</t></w>
<w><t>vin-ces</t></w>
<w><t>vir</t></w>
<w><t>vi-va</t></w>
<w><t>vive</t></w>
+<w><t>viz</t><abbrev referenced-word="videlicet, a contraction of videre licet, meaning 'it is permitted to see'."/></w>
<w><t>vo-ce</t></w>
<w><t>vol-unt</t></w>
<w><t>vox</t></w>
Added: trunk/foray/foray-orthography/src/main/data/dictionaries/por-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/por-Latn-ZZZ.dict.xml (rev 0)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/por-Latn-ZZZ.dict.xml 2024-04-03 13:09:29 UTC (rev 13320)
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!DOCTYPE axsl-dictionary
+ PUBLIC "-//aXSL//DTD Dictionary V0.1//EN"
+ "http://www.axsl.org/dtds/0.1/en/axsl-dictionary.dtd">
+
+<axsl-dictionary
+ id="org.foray.por.Latn.ZZZ"
+ language="por" script="Latn"
+ hard-hyphen-char="=" soft-hyphen-char="-">
+
+<!--
+Dictionary of Portuguese words.
+-->
+
+<w><t>com-mer-ci-o</t></w>
+<w><t>do</t></w>
+<w><t>jour-nal</t></w>
+
+
+</axsl-dictionary>
Property changes on: trunk/foray/foray-orthography/src/main/data/dictionaries/por-Latn-ZZZ.dict.xml
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2024-03-28 11:34:38 UTC (rev 13319)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2024-04-03 13:09:29 UTC (rev 13320)
@@ -370,6 +370,16 @@
</unparsed-dictionary>
</dictionary-resource>
+
+ <dictionary-resource id="org.foray.por.Latn.ZZZ">
+ <unparsed-dictionary>
+ <dictionary-element>
+ <resource-location type="url">../dictionaries/por-Latn-ZZZ.dict.xml</resource-location>
+ </dictionary-element>
+ </unparsed-dictionary>
+ </dictionary-resource>
+
+
<hyphenation-patterns-resource id="hyph-patterns-eng">
<parsed-resource>
<resource-location type="classpath">/resources/org/foray/orthography/hyphPatterns/eng.jbso</resource-location>
@@ -405,43 +415,48 @@
<orthography language-iso-3char="ita" script-iso-4char="Latn" country-iso-3char="ZZZ">
<dictionary reference="org.foray.ita.Latn.ZZZ"/>
- </orthography>
+ </orthography>
<orthography language-iso-3char="fre" script-iso-4char="Latn" country-iso-3char="ZZZ">
<derivative-rules reference="fre-Latn-derivative-patterns"/>
<dictionary reference="org.foray.fre.Latn.ZZZ"/>
- </orthography>
+ </orthography>
<orthography language-iso-3char="ger" script-iso-4char="Latn" country-iso-3char="ZZZ">
<derivative-rules reference="ger-Latn-derivative-patterns"/>
<dictionary reference="org.foray.ger.Latn.ZZZ"/>
- </orthography>
+ </orthography>
<orthography language-iso-3char="grc" script-iso-4char="Latn" country-iso-3char="ZZZ">
<dictionary reference="org.foray.grc.Latn.ZZZ"/>
- </orthography>
+ </orthography>
<orthography language-iso-3char="grc" script-iso-4char="Grek" country-iso-3char="ZZZ">
<dictionary reference="org.foray.grc.Grek.ZZZ"/>
- </orthography>
+ </orthography>
<orthography language-iso-3char="heb" script-iso-4char="Latn" country-iso-3char="ZZZ">
<dictionary reference="org.foray.heb.Latn.ZZZ"/>
- </orthography>
+ </orthography>
<!-- Aramaic. -->
<orthography language-iso-3char="arc" script-iso-4char="Latn" country-iso-3char="ZZZ">
<dictionary reference="org.foray.arc.Latn.ZZZ"/>
- </orthography>
+ </orthography>
<!-- Old Norse. -->
<orthography language-iso-3char="non" script-iso-4char="Latn" country-iso-3char="ZZZ">
<dictionary reference="org.foray.non.Latn.ZZZ"/>
- </orthography>
+ </orthography>
<!-- Polish. -->
<orthography language-iso-3char="pol" script-iso-4char="Latn" country-iso-3char="ZZZ">
<dictionary reference="org.foray.pol.Latn.ZZZ"/>
- </orthography>
+ </orthography>
+ <!-- Portuguese. -->
+ <orthography language-iso-3char="por" script-iso-4char="Latn" country-iso-3char="ZZZ">
+ <dictionary reference="org.foray.por.Latn.ZZZ"/>
+ </orthography>
+
</axsl-orthography-config>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2024-03-28 11:34:42
|
Revision: 13319
http://sourceforge.net/p/foray/code/13319
Author: victormote
Date: 2024-03-28 11:34:38 +0000 (Thu, 28 Mar 2024)
Log Message:
-----------
Address new checkstyle warnings.
Modified Paths:
--------------
trunk/foray/foray-areatree/src/main/java/org/foray/area/AreaNode4a.java
trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryTree.java
trunk/foray/foray-font/src/main/java/org/foray/font/config/FontConfigParser.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
trunk/foray/foray-ps/src/main/java/org/foray/ps/java2d/demo/DemoFrame.java
trunk/foray/foray-render/src/main/java/org/foray/render/awt/viewer/PreviewDialog.java
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/AreaNode4a.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/AreaNode4a.java 2023-12-14 15:07:35 UTC (rev 13318)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/AreaNode4a.java 2024-03-28 11:34:38 UTC (rev 13319)
@@ -639,7 +639,7 @@
/**
* Private inner class to provide an iterator over the children.
*/
- private class ChildIterator implements Iterator<AreaNodeG5> {
+ private final class ChildIterator implements Iterator<AreaNodeG5> {
/** Index to the next child element to be returned. */
private int nextIndex = 0;
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryTree.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryTree.java 2023-12-14 15:07:35 UTC (rev 13318)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryTree.java 2024-03-28 11:34:38 UTC (rev 13319)
@@ -688,7 +688,7 @@
/**
* Private inner class used for optimizing suffixes.
*/
- private class SuffixXref {
+ private final class SuffixXref {
/** The suffix being referenced from {@link #nodeIndex}. */
private String suffix;
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/config/FontConfigParser.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/config/FontConfigParser.java 2023-12-14 15:07:35 UTC (rev 13318)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/config/FontConfigParser.java 2024-03-28 11:34:38 UTC (rev 13319)
@@ -112,7 +112,7 @@
* Inner Data Transfer Object class storing parsed "font" elements whose related "font-content" element has not yet
* been parsed.
*/
- private class UnresolvedFont {
+ private final class UnresolvedFont {
/** The font that is unresolved. */
private RegisteredFont font;
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java 2023-12-14 15:07:35 UTC (rev 13318)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java 2024-03-28 11:34:38 UTC (rev 13319)
@@ -3300,7 +3300,7 @@
/**
* Private inner class to provide an iterator over the children.
*/
- private class ChildIterator implements Iterator<Fo> {
+ private final class ChildIterator implements Iterator<Fo> {
/** Index to the next child element to be returned. */
private int nextIndex = 0;
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2023-12-14 15:07:35 UTC (rev 13318)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2024-03-28 11:34:38 UTC (rev 13319)
@@ -177,7 +177,7 @@
/**
* A single break in an {@link InputItem}.
*/
- private class InputBreak {
+ private final class InputBreak {
/** The offset into an {@link InputItem} text, indicating the location of the break. */
private int offset;
@@ -189,7 +189,7 @@
/**
* Stores the items presented as input to this lexer.
*/
- private class InputItem {
+ private final class InputItem {
/** The text. */
private CharSequence text;
@@ -209,7 +209,7 @@
* Wrapper around the list of input items that provides a flattened view of the content of those items, allowing
* them to be treated as a single sequence of tokens.
*/
- private class Input {
+ private final class Input {
/** The list of input items that have been submitted for processing. */
private List<InputItem> items = new ArrayList<InputItem>();
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-12-14 15:07:35 UTC (rev 13318)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2024-03-28 11:34:38 UTC (rev 13319)
@@ -70,7 +70,7 @@
/**
* Container for the "axsl-dictionary" element as it is being parsed.
*/
- private class DictionaryElement {
+ private final class DictionaryElement {
/** The id of the dictionary. */
private String id;
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-12-14 15:07:35 UTC (rev 13318)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2024-03-28 11:34:38 UTC (rev 13319)
@@ -101,7 +101,7 @@
* This class is a SAX parser, but is designed to work with any(???) XML document, and is therefore agnostic about
* the elements or their content, so we capture that information in this class.
*/
- private class Element {
+ private final class Element {
/** The namespace URI, if specified. */
private String namespace;
Modified: trunk/foray/foray-ps/src/main/java/org/foray/ps/java2d/demo/DemoFrame.java
===================================================================
--- trunk/foray/foray-ps/src/main/java/org/foray/ps/java2d/demo/DemoFrame.java 2023-12-14 15:07:35 UTC (rev 13318)
+++ trunk/foray/foray-ps/src/main/java/org/foray/ps/java2d/demo/DemoFrame.java 2024-03-28 11:34:38 UTC (rev 13319)
@@ -77,7 +77,7 @@
/**
* Private inner class listening for window events.
*/
- private class WindowHandler extends WindowAdapter {
+ private final class WindowHandler extends WindowAdapter {
@Override
public void windowClosing(final WindowEvent we) {
Modified: trunk/foray/foray-render/src/main/java/org/foray/render/awt/viewer/PreviewDialog.java
===================================================================
--- trunk/foray/foray-render/src/main/java/org/foray/render/awt/viewer/PreviewDialog.java 2023-12-14 15:07:35 UTC (rev 13318)
+++ trunk/foray/foray-render/src/main/java/org/foray/render/awt/viewer/PreviewDialog.java 2024-03-28 11:34:38 UTC (rev 13319)
@@ -599,7 +599,7 @@
* This class is used to reload document in
* a thread safe way.
*/
- private class Reloader extends Thread {
+ private final class Reloader extends Thread {
@Override
public void run() {
PreviewDialog.this.previewImageLabel.setIcon(null);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-12-14 15:07:38
|
Revision: 13318
http://sourceforge.net/p/foray/code/13318
Author: victormote
Date: 2023-12-14 15:07:35 +0000 (Thu, 14 Dec 2023)
Log Message:
-----------
Improvements to dictionaries and orthographies.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/data/dictionaries/ger-Latn-ZZZ.dict.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-11-02 20:16:15 UTC (rev 13317)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-12-14 15:07:35 UTC (rev 13318)
@@ -720,7 +720,7 @@
<w><t>a-cad-e-mism</t></w>
<w><t>a-cad-e-mize</t></w>
<w><t>Ac-a-de-mus</t></w>
-<w><t>a-cad-e-my</t></w>
+<w><t>a-cad-e-my</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>A-cad-e-my</t></w>
<w><t>A-ca-di-a</t></w>
<w><t>A-ca-di-an</t></w>
@@ -2248,7 +2248,7 @@
<w><t>ad-verse-ly</t></w>
<w><t>ad-verse-ness</t></w>
<phrase><t>ad-verse pos-ses-sion</t></phrase>
-<w><t>ad-ver-si-ty</t></w>
+<w><t>ad-ver-si-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ad-vert</t><verb><regular-root/></verb></w>
<w><t>ad-vert-ence</t></w>
<w><t>ad-vert-en-cy</t></w>
@@ -2558,7 +2558,8 @@
<w><t>aes-cu-lin</t></w>
<w><t>Ae-se-pus</t></w>
<w><t>Ae-sir</t></w>
-<w><t>Ae-sop</t></w>
+<w><t>Ae-sop</t><noun><convertible-to-possessive/></noun></w>
+<w><t>Æ-sop</t><noun><convertible-to-possessive/></noun></w>
<w><t>Ae-so-pi-an</t></w>
<w><t>Ae-sta-tis</t></w>
<w><t>aes-the-sia</t></w>
@@ -2566,6 +2567,7 @@
<w><t>aes-the-sis</t></w>
<w><t>aes-thete</t></w>
<w><t>aes-thet-ic</t></w>
+<w><t>æs-thet-ic</t></w>
<w><t>aes-thet-i-cal</t></w>
<w><t>aes-thet-i-cal-ly</t></w>
<w><t>aes-the-ti-cian</t></w>
@@ -2651,7 +2653,7 @@
<w><t>af-fined</t></w>
<w><t>af-fine-ly</t></w>
<w><t>af-fin-i-tive</t></w>
-<w><t>af-fin-i-ty</t></w>
+<w><t>af-fin-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>af-firm</t><verb><regular-root/></verb></w>
<w><t>af-firm-a-ble</t></w>
<w><t>af-firm-a-bly</t></w>
@@ -3696,7 +3698,7 @@
<w><t>al-che-mise</t></w>
<w><t>al-che-mised</t></w>
<w><t>al-che-mis-ing</t></w>
-<w><t>al-che-mist</t></w>
+<w><t>al-che-mist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>al-che-mize</t></w>
<w><t>al-che-mized</t></w>
<w><t>al-che-miz-ing</t></w>
@@ -6229,7 +6231,7 @@
<w><t>Ang-kor</t></w>
<w><t>an-glaise</t></w>
<w><t>An-gle</t></w>
-<w><t>an-gle</t></w>
+<w><t>an-gle</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>an-gle brack-et</t></phrase>
<w><t>an-gled</t></w>
<w><t>An-gle-doz-er</t></w>
@@ -6745,7 +6747,7 @@
<w><t>an-tag-o-nise</t></w>
<w><t>an-tag-o-nised</t></w>
<w><t>an-tag-o-nis-ing</t></w>
-<w><t>an-tag-o-nism</t></w>
+<w><t>an-tag-o-nism</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>an-tag-o-nist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>an-tag-o-nis-tic</t></w>
<w><t>an-tag-o-nis-ti-cal-ly</t></w>
@@ -7875,7 +7877,8 @@
<w><t>an-ti-the-o-log-i-cal</t></w>
<w><t>an-ti-the-ol-o-giz-ing</t></w>
<w><t>an-ti-the-ol-o-gy</t></w>
-<w><t>an-tith-e-sis</t></w>
+<w><t>an-tith-e-ses</t><noun><plural/></noun></w>
+<w><t>an-tith-e-sis</t><noun><singular/></noun></w>
<w><t>an-ti-thet-ic</t></w>
<w><t>an-ti-thet-i-cal</t></w>
<w><t>an-ti-thet-i-cal-ly</t></w>
@@ -9594,7 +9597,7 @@
<w><t>a-round=the=clock</t></w>
<w><t>a-rous-a-ble</t></w>
<w><t>a-rous-al</t></w>
-<w><t>a-rouse</t></w>
+<w><t>a-rouse</t><verb><regular-root/></verb></w>
<w><t>a-roused</t></w>
<w><t>a-rous-er</t></w>
<w><t>a-rous-ing</t></w>
@@ -10649,7 +10652,7 @@
<phrase><t>As-wan High Dam</t></phrase>
<w><t>a-swarm</t></w>
<w><t>a-syl-lab-ic</t></w>
-<w><t>a-sy-lum</t></w>
+<w><t>a-sy-lum</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-sym-met-ric</t></w>
<w><t>a-sym-met-ri-cal</t></w>
<w><t>a-sym-met-ri-cal-ly</t></w>
@@ -11092,7 +11095,7 @@
<w><t>at-trit-ed</t></w>
<w><t>at-trite-ness</t></w>
<w><t>at-trit-ing</t></w>
-<w><t>at-tri-tion</t></w>
+<w><t>at-tri-tion</t><noun><pluralizable/></noun></w>
<w><t>at-tri-tion-al</t></w>
<w><t>at-tri-tive</t></w>
<w><t>At-tu</t></w>
@@ -11735,7 +11738,7 @@
<w><t>A-ve-lla-ne-da</t></w>
<phrase><t>A-ve Ma-ri-a</t></phrase>
<w><t>av-e-na-ceous</t></w>
-<w><t>a-venge</t></w>
+<w><t>a-venge</t><verb><regular-root/></verb></w>
<w><t>a-venged</t></w>
<w><t>a-venge-ful</t></w>
<w><t>a-veng-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -11773,7 +11776,7 @@
<w><t>a-verse</t></w>
<w><t>a-verse-ly</t></w>
<w><t>a-verse-ness</t></w>
-<w><t>a-ver-sion</t></w>
+<w><t>a-ver-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>a-ver-sion ther-a-py</t></phrase>
<w><t>a-ver-sive</t></w>
<w><t>a-vert</t><verb><regular-root/></verb></w>
@@ -11890,7 +11893,7 @@
<w><t>a-wak-en</t><verb><regular-root/></verb></w>
<w><t>a-wak-en-a-ble</t></w>
<w><t>a-wak-en-er</t></w>
-<w><t>a-wak-en-ing</t></w>
+<w><t>a-wak-en-ing</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-wak-en-ing-ly</t></w>
<w><t>a-wak-ing</t></w>
<w><t>a-ward</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
@@ -13028,7 +13031,7 @@
<w><t>Ban-croft</t></w>
<w><t>band</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ban-da</t></w>
-<w><t>band-age</t></w>
+<w><t>band-age</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>band-aged</t></w>
<w><t>band-ag-er</t></w>
<w><t>band-ag-ing</t></w>
@@ -14567,7 +14570,7 @@
<w><t>bed-rid-den</t></w>
<w><t>bed-rock</t></w>
<w><t>bed-roll</t></w>
-<w><t>bed-room</t></w>
+<w><t>bed-room</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Beds</t></w>
<w><t>bed-side</t></w>
<w><t>bed-sit-ter</t></w>
@@ -15307,7 +15310,7 @@
<w><t>Be-re-a</t><noun><convertible-to-possessive/></noun></w>
<w><t>Be-re-an</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>be-reave</t><verb><regular-root/></verb></w>
-<w><t>be-reave-ment</t></w>
+<w><t>be-reave-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>be-reav-er</t></w>
<w><t>be-reav-ing</t></w>
<w><t>Ber-e-cyn-ti-a</t></w>
@@ -15374,7 +15377,7 @@
<w><t>ber-ley</t></w>
<w><t>Ber-lich-ing-en</t></w>
<w><t>ber-lin</t></w>
-<w><t>Ber-lin</t></w>
+<w><t>Ber-lin</t><noun><convertible-to-possessive/></noun></w>
<w><t>ber-line</t></w>
<w><t>Ber-lin-er</t></w>
<w><t>Ber-li-ner</t></w>
@@ -18656,7 +18659,7 @@
<w><t>bou-gain-vil-lae-a</t></w>
<w><t>Bou-gain-ville</t></w>
<w><t>bou-gain-vil-le-a</t></w>
-<w><t>bough</t></w>
+<w><t>bough</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bough-less</t></w>
<w><t>bough-pot</t></w>
<w><t>bought</t></w>
@@ -20112,7 +20115,7 @@
<w><t>brow-beat-er</t></w>
<w><t>brow-beat-ing</t></w>
<w><t>brow-less</t></w>
-<w><t>Brown</t></w>
+<w><t>Brown</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>brown</t></w>
<phrase><t>brown al-gae</t></phrase>
<w><t>Browne</t></w>
@@ -20816,7 +20819,7 @@
<w><t>Bun-uel</t></w>
<w><t>Bu-nus</t></w>
<w><t>bun-ya=bun-ya</t></w>
-<w><t>Bun-yan</t></w>
+<w><t>Bun-yan</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Bun-yan-esque</t></w>
<w><t>bun-yip</t></w>
<w><t>Buo-na-par-te</t></w>
@@ -25767,7 +25770,7 @@
<w><t>chap-book</t></w>
<w><t>chape</t></w>
<w><t>cha-peau</t></w>
-<w><t>chap-el</t></w>
+<w><t>chap-el</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>chap-eled</t></w>
<w><t>chape-less</t></w>
<w><t>chap-el-ing</t></w>
@@ -30003,7 +30006,7 @@
<w><t>col-laps-a-bil-i-ty</t></w>
<w><t>col-laps-a-ble</t></w>
<w><t>col-lap-sar</t></w>
-<w><t>col-lapse</t></w>
+<w><t>col-lapse</t><verb><regular-root/></verb></w>
<w><t>col-lapsed</t></w>
<w><t>col-laps-i-bil-i-ty</t></w>
<w><t>col-laps-i-ble</t></w>
@@ -31211,7 +31214,7 @@
<w><t>com-prized</t></w>
<w><t>com-priz-ing</t></w>
<w><t>com-pro-mis</t></w>
-<w><t>com-pro-mise</t></w>
+<w><t>com-pro-mise</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>com-pro-mised</t></w>
<w><t>com-pro-mis-er</t></w>
<w><t>com-pro-mis-ing</t></w>
@@ -31261,7 +31264,7 @@
<w><t>com-put-ing</t></w>
<w><t>com-put-ist</t></w>
<w><t>Comr</t></w>
-<w><t>com-rade</t></w>
+<w><t>com-rade</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>com-rade-ry</t></w>
<w><t>com-rade-ship</t></w>
<w><t>com-sat</t></w>
@@ -32148,9 +32151,9 @@
<w><t>con-scious</t></w>
<w><t>con-scious-ly</t></w>
<w><t>con-scious-ness</t></w>
-<w><t>con-script</t></w>
+<w><t>con-script</t><verb><regular-root/></verb></w>
<phrase><t>con-script fa-thers</t></phrase>
-<w><t>con-scrip-tion</t></w>
+<w><t>con-scrip-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-scrip-tion-al</t></w>
<w><t>con-scrip-tion-ist</t></w>
<w><t>con-se-crate</t><verb><regular-root/></verb></w>
@@ -32817,7 +32820,7 @@
<phrase><t>con-trol tow-er</t></phrase>
<w><t>con-tro-ver-sial</t></w>
<w><t>con-tro-ver-sial-ism</t></w>
-<w><t>con-tro-ver-sial-ist</t></w>
+<w><t>con-tro-ver-sial-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-tro-ver-sial-ly</t></w>
<w><t>con-tro-ver-sy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-tro-vert</t><verb><regular-root/></verb></w>
@@ -33258,7 +33261,7 @@
<w><t>cop-per-ah</t></w>
<w><t>cop-per-as</t></w>
<phrase><t>Cop-per Belt</t></phrase>
-<w><t>cop-per-head</t></w>
+<w><t>cop-per-head</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Cop-per-head-ism</t></w>
<w><t>cop-per=leaf</t></w>
<w><t>Cop-per-mine</t></w>
@@ -34578,7 +34581,7 @@
<w><t>cov-e-nan-tor</t></w>
<phrase><t>Cov-ent Gar-den</t></phrase>
<w><t>Cov-en-try</t></w>
-<w><t>cov-er</t></w>
+<w><t>cov-er</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>cov-er-a-ble</t></w>
<w><t>cov-er-age</t></w>
<w><t>cov-er-all</t></w>
@@ -34588,7 +34591,7 @@
<phrase><t>cov-ered wag-on</t></phrase>
<w><t>cov-er-er</t></w>
<phrase><t>cov-er girl</t></phrase>
-<w><t>cov-er-ing</t></w>
+<w><t>cov-er-ing</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>cov-er-ing let-ter</t></phrase>
<w><t>cov-er-less</t></w>
<w><t>cov-er-let</t></w>
@@ -35356,7 +35359,7 @@
<w><t>crim-i-nate</t></w>
<w><t>crim-i-nat-ed</t></w>
<w><t>crim-i-nat-ing</t></w>
-<w><t>crim-i-na-tion</t></w>
+<w><t>crim-i-na-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>crim-i-na-tive</t></w>
<w><t>crim-i-na-tor</t></w>
<w><t>crim-i-na-to-ry</t></w>
@@ -35914,7 +35917,7 @@
<w><t>cru-ra</t></w>
<w><t>cru-ral</t></w>
<w><t>crus</t></w>
-<w><t>cru-sade</t></w>
+<w><t>cru-sade</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cru-sad-ed</t></w>
<w><t>cru-sad-er</t></w>
<w><t>cru-sad-ing</t></w>
@@ -37209,6 +37212,7 @@
<w><t>D/L</t></w>
<w><t>D/O</t></w>
<w><t>D/W</t></w>
+<w><t>D.C.</t><abbrev referenced-word="District of Columbia"/></w>
<w><t>dab</t></w>
<w><t>dab-ber</t></w>
<w><t>dab-bing</t></w>
@@ -37419,7 +37423,7 @@
<w><t>dal-li-er</t></w>
<w><t>Dal-lin</t></w>
<w><t>Dal-lo-way</t></w>
-<w><t>dal-ly</t></w>
+<w><t>dal-ly</t><verb><regular-root/></verb></w>
<w><t>dal-ly-ing</t></w>
<w><t>dal-ly-ing-ly</t></w>
<w><t>Dal-ma-tia</t></w>
@@ -37891,7 +37895,7 @@
<w><t>daugh-ters=in=law</t></w>
<w><t>dauk</t></w>
<w><t>Dau-mier</t></w>
-<w><t>daunt</t></w>
+<w><t>daunt</t><verb><regular-root/></verb></w>
<w><t>daunt-ing-ly</t></w>
<w><t>daunt-ing-ness</t></w>
<w><t>daunt-less</t></w>
@@ -39101,7 +39105,7 @@
<w><t>deg-ra-da-tion</t></w>
<w><t>deg-ra-da-tion-al</t></w>
<w><t>deg-ra-da-tive</t></w>
-<w><t>de-grade</t></w>
+<w><t>de-grade</t><verb><regular-root/></verb></w>
<w><t>de-grad-ed</t></w>
<w><t>de-grad-ed-ly</t></w>
<w><t>de-grad-ed-ness</t></w>
@@ -39651,7 +39655,7 @@
<w><t>de-mo-bi-liz-ing</t></w>
<w><t>De-moc-o-on</t></w>
<w><t>de-moc-ra-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>dem-o-crat</t></w>
+<w><t>dem-o-crat</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Dem-o-crat</t></w>
<w><t>dem-o-crat-ic</t></w>
<w><t>dem-o-crat-i-cal</t></w>
@@ -43046,7 +43050,7 @@
<w><t>dis-im-pris-on</t></w>
<w><t>dis-im-pris-on-ment</t></w>
<w><t>dis-in-cen-tive</t></w>
-<w><t>dis-in-cli-na-tion</t></w>
+<w><t>dis-in-cli-na-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-in-cline</t></w>
<w><t>dis-in-clined</t></w>
<w><t>dis-in-clin-ing</t></w>
@@ -43738,7 +43742,7 @@
<w><t>dis-u-ni-fy-ing</t></w>
<w><t>dis-un-ion</t></w>
<w><t>dis-un-ion-ism</t></w>
-<w><t>dis-un-ion-ist</t></w>
+<w><t>dis-un-ion-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-u-nite</t></w>
<w><t>dis-u-nit-ed</t></w>
<w><t>dis-u-nit-er</t></w>
@@ -44936,7 +44940,7 @@
<w><t>Dough-ty</t></w>
<w><t>dough-ty</t></w>
<w><t>dough-y</t></w>
-<w><t>Doug-las</t></w>
+<w><t>Doug-las</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Doug-las fir</t></phrase>
<w><t>Doug-las=Home</t></w>
<w><t>Doug-lass</t></w>
@@ -47161,7 +47165,7 @@
<w><t>ed-u-ca-ble</t></w>
<w><t>ed-u-cat-a-bil-i-ty</t></w>
<w><t>ed-u-cat-a-ble</t></w>
-<w><t>ed-u-cate</t></w>
+<w><t>ed-u-cate</t><verb><regular-root/></verb></w>
<w><t>ed-u-cat-ed</t></w>
<w><t>ed-u-cat-ee</t></w>
<w><t>ed-u-cat-ing</t></w>
@@ -48322,10 +48326,10 @@
<w><t>e-man-ci-pate</t></w>
<w><t>e-man-ci-pat-ed</t></w>
<w><t>e-man-ci-pat-ing</t></w>
-<w><t>e-man-ci-pa-tion</t></w>
+<w><t>e-man-ci-pa-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>e-man-ci-pa-tion-ist</t></w>
<w><t>e-man-ci-pa-tive</t></w>
-<w><t>e-man-ci-pa-tor</t></w>
+<w><t>e-man-ci-pa-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>e-man-ci-pa-to-ry</t></w>
<w><t>e-man-ci-pist</t></w>
<w><t>E-man-u-el</t></w>
@@ -48396,7 +48400,7 @@
<w><t>em-blazed</t></w>
<w><t>em-blaz-er</t></w>
<w><t>em-blaz-ing</t></w>
-<w><t>em-bla-zon</t></w>
+<w><t>em-bla-zon</t><verb><regular-root/></verb></w>
<w><t>em-bla-zon-er</t></w>
<w><t>em-bla-zon-ment</t></w>
<w><t>em-bla-zon-ry</t></w>
@@ -50132,7 +50136,7 @@
<w><t>Ep-i-daur-us</t></w>
<w><t>ep-i-deic-tic</t></w>
<w><t>Ep-i-dem-i-a-rum</t></w>
-<w><t>ep-i-dem-ic</t></w>
+<w><t>ep-i-dem-ic</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ep-i-dem-i-cal</t></w>
<w><t>ep-i-dem-i-cal-ly</t></w>
<phrase><t>ep-i-dem-ic en-ceph-a-li-tis</t></phrase>
@@ -51132,7 +51136,7 @@
<w><t>es-sence</t></w>
<w><t>Es-sene</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Es-se-ni-an</t></w>
-<w><t>es-sen-tial</t></w>
+<w><t>es-sen-tial</t><noun><pluralizable/></noun><adjective/></w>
<w><t>es-sen-tial-ism</t></w>
<w><t>es-sen-tial-ist</t></w>
<w><t>es-sen-ti-al-i-ty</t></w>
@@ -52829,7 +52833,7 @@
<w><t>ex-pe-ri-en-tial-ist</t></w>
<w><t>ex-pe-ri-en-tial-is-tic</t></w>
<w><t>ex-pe-ri-en-tial-ly</t></w>
-<w><t>ex-per-i-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>ex-per-i-ment</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>ex-per-i-men-tal</t></w>
<w><t>ex-per-i-men-tal-ise</t></w>
<w><t>ex-per-i-men-tal-ism</t></w>
@@ -53356,7 +53360,7 @@
<w><t>ex-ude</t></w>
<w><t>ex-ud-ed</t></w>
<w><t>ex-ud-ing</t></w>
-<w><t>ex-ult</t></w>
+<w><t>ex-ult</t><verb><regular-root/></verb></w>
<w><t>ex-ult-an-cy</t></w>
<w><t>ex-ult-ant</t></w>
<w><t>ex-ult-ant-ly</t></w>
@@ -54200,7 +54204,7 @@
<w><t>fas-ci-cule</t></w>
<w><t>fas-cic-u-lus</t></w>
<w><t>fas-ci-cu-lus</t></w>
-<w><t>fas-ci-nate</t></w>
+<w><t>fas-ci-nate</t><verb><regular-root/></verb></w>
<w><t>fas-ci-nat-ed</t></w>
<w><t>fas-ci-nat-ed-ly</t></w>
<w><t>fas-ci-nat-ing</t></w>
@@ -55108,7 +55112,7 @@
<w><t>feu-age</t></w>
<w><t>feu-ar</t></w>
<w><t>Feucht-wang-er</t></w>
-<w><t>feud</t></w>
+<w><t>feud</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>feu-dal</t></w>
<w><t>feu-dal-ise</t></w>
<w><t>feu-dal-ised</t></w>
@@ -55503,7 +55507,7 @@
<w><t>fil-i-at-ing</t></w>
<w><t>fil-i-a-tion</t></w>
<w><t>fil-i-beg</t></w>
-<w><t>fil-i-bus-ter</t></w>
+<w><t>fil-i-bus-ter</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>fil-i-bus-ter-er</t></w>
<w><t>fil-i-bus-ter-ism</t></w>
<w><t>fil-i-bus-ter-ous</t></w>
@@ -56257,7 +56261,7 @@
<w><t>flap-ping</t></w>
<w><t>flap-py</t></w>
<w><t>flaps</t></w>
-<w><t>flare</t></w>
+<w><t>flare</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>flare-back</t></w>
<w><t>flar-ing</t></w>
<w><t>flar-ing-ly</t></w>
@@ -58051,7 +58055,7 @@
<w><t>fos-sette</t></w>
<w><t>fos-sick</t></w>
<w><t>fos-sick-er</t></w>
-<w><t>fos-sil</t></w>
+<w><t>fos-sil</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>fos-sil fu-el</t></phrase>
<w><t>fos-sil-if-er-ous</t></w>
<w><t>fos-sil-i-sa-tion</t></w>
@@ -59608,7 +59612,7 @@
<w><t>fu-run-cle</t></w>
<w><t>fu-run-cu-lar</t></w>
<w><t>fu-run-cu-lo-sis</t></w>
-<w><t>fu-ry</t></w>
+<w><t>fu-ry</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>furze</t></w>
<w><t>furz-i-er</t></w>
<w><t>furz-i-est</t></w>
@@ -61485,7 +61489,7 @@
<w><t>Gé-ri-cault</t></w>
<w><t>Ge-ring</t></w>
<w><t>Ger-la-chov-ka</t></w>
-<w><t>germ</t></w>
+<w><t>germ</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ger-main</t></w>
<w><t>Ger-maine</t></w>
<w><t>ger-man</t></w>
@@ -63378,7 +63382,7 @@
<w><t>Gor-cha-kov</t></w>
<w><t>gor-cock</t></w>
<w><t>Gor-di-an</t></w>
-<phrase><t>Gor-di-an knot</t></phrase>
+<w><t>Gor-di-an knot</t></w>
<w><t>Gor-don</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Gor-don set-ter</t></phrase>
<w><t>gore</t></w>
@@ -64915,7 +64919,7 @@
<w><t>Gru-is</t></w>
<w><t>Gruit-hui-sen</t></w>
<w><t>grum</t></w>
-<w><t>grum-ble</t></w>
+<w><t>grum-ble</t><verb><regular-root/></verb></w>
<w><t>grum-bler</t></w>
<w><t>grum-bling-ly</t></w>
<w><t>grum-bly</t></w>
@@ -65013,9 +65017,10 @@
<w><t>gua-ra-ni</t></w>
<w><t>Gua-ra-ni</t></w>
<w><t>Gua-ra-ní</t></w>
-<w><t>guar-an-tee</t></w>
-<w><t>guar-an-teed</t></w>
-<w><t>guar-an-tee-ing</t></w>
+<w><t>guar-an-tee</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root value="false"/></verb></w>
+<w><t>guar-an-teed</t><verb><lemma>guarantee</lemma></verb></w>
+<w><t>guar-an-tee-ing</t><verb><lemma>guarantee</lemma></verb></w>
+<w><t>guar-an-tees</t><verb><lemma>guarantee</lemma></verb></w>
<w><t>guar-an-tied</t></w>
<w><t>guar-an-tor</t></w>
<w><t>guar-an-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -66567,7 +66572,7 @@
<w><t>hal-looed</t></w>
<w><t>hal-loo-ing</t></w>
<w><t>hal-loth</t></w>
-<w><t>hal-low</t></w>
+<w><t>hal-low</t><verb><regular-root/></verb></w>
<w><t>hal-lowed</t></w>
<w><t>hal-lowed-ly</t></w>
<w><t>hal-lowed-ness</t></w>
@@ -67435,7 +67440,7 @@
<w><t>hast-y</t></w>
<w><t>has-ty</t></w>
<phrase><t>has-ty pud-ding</t></phrase>
-<w><t>hat</t></w>
+<w><t>hat</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>hat-a-ble</t></w>
<w><t>Ha-ta-su</t></w>
<w><t>hat-band</t></w>
@@ -68367,7 +68372,7 @@
<w><t>He-li-us</t></w>
<w><t>he-lix</t></w>
<w><t>he’ll</t></w>
-<w><t>hell</t></w>
+<w><t>hell</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Hell</t></w>
<w><t>Hel-la-di-an</t></w>
<w><t>Hel-lad-ic</t></w>
@@ -71312,7 +71317,7 @@
<w><t>hos-pi-ta-ble</t></w>
<w><t>hos-pi-ta-ble-ness</t></w>
<w><t>hos-pi-ta-bly</t></w>
-<w><t>hos-pi-tal</t></w>
+<w><t>hos-pi-tal</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>hos-pi-tal cor-ner</t></phrase>
<w><t>Hos-pi-tal-er</t></w>
<w><t>hos-pi-tal-er</t></w>
@@ -71552,7 +71557,7 @@
<w><t>hov-elled</t></w>
<w><t>hov-el-ling</t></w>
<w><t>ho-ven</t></w>
-<w><t>hov-er</t></w>
+<w><t>hov-er</t><verb><regular-root/></verb></w>
<w><t>Hov-er-craft</t></w>
<w><t>hov-er-craft</t></w>
<w><t>hov-er-er</t></w>
@@ -71718,7 +71723,7 @@
<w><t>Hug-li</t></w>
<w><t>Hu-go</t></w>
<w><t>Hu-go-ton</t></w>
-<w><t>Hu-gue-not</t></w>
+<w><t>Hu-gue-not</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Hu-gue-not-ic</t></w>
<w><t>Hu-gue-not-ism</t></w>
<w><t>huh</t></w>
@@ -72045,7 +72050,7 @@
<w><t>Hus</t></w>
<w><t>Hu-s</t></w>
<w><t>Hu-sain</t></w>
-<w><t>hus-band</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>hus-band</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>hus-band-age</t></w>
<w><t>hus-band-er</t></w>
<w><t>hus-band-less</t></w>
@@ -72053,7 +72058,7 @@
<w><t>hus-band-men</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>hus-band-ry</t></w>
<phrase><t>Hu-sein ibn=A-li</t></phrase>
-<w><t>hush</t></w>
+<w><t>hush</t><verb><regular-root/></verb></w>
<w><t>hush-a-by</t></w>
<w><t>hush-ed-ly</t></w>
<w><t>hush-ful</t></w>
@@ -75216,7 +75221,7 @@
<w><t>in-ar-tis-tic</t></w>
<w><t>in-ar-tis-ti-cal</t></w>
<w><t>in-ar-tis-ti-cal-ly</t></w>
-<w><t>in-as-much</t></w>
+<w><t>in-as-much</t><adverb/></w>
<phrase><t>in-as-much as</t></phrase>
<w><t>in-at-ten-tion</t></w>
<w><t>in-at-ten-tive</t></w>
@@ -75694,7 +75699,7 @@
<w><t>in-cross</t></w>
<w><t>in-crust</t></w>
<w><t>in-crust-ant</t></w>
-<w><t>in-crus-ta-tion</t></w>
+<w><t>in-crus-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-cu-bate</t></w>
<w><t>in-cu-bat-ed</t></w>
<w><t>in-cu-bat-ing</t></w>
@@ -77464,7 +77469,7 @@
<w><t>in-stalled</t></w>
<w><t>in-stall-er</t></w>
<w><t>in-stal-ling</t></w>
-<w><t>in-stall-ment</t></w>
+<w><t>in-stall-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>in-stall-ment plan</t></phrase>
<w><t>in-stal-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-stance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -77557,7 +77562,7 @@
<phrase><t>in-stru-men-tal con-di-tion-ing</t></phrase>
<w><t>in-stru-men-tal-ism</t></w>
<w><t>in-stru-men-tal-ist</t></w>
-<w><t>in-stru-men-tal-i-ty</t></w>
+<w><t>in-stru-men-tal-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-stru-men-tal-ly</t></w>
<w><t>in-stru-men-ta-tion</t></w>
<phrase><t>in-stru-ment fly-ing</t></phrase>
@@ -77692,7 +77697,7 @@
<w><t>in-teg-ri-ty</t></w>
<w><t>in-teg-u-ment</t></w>
<w><t>in-teg-u-men-ta-ry</t></w>
-<w><t>in-tel-lect</t></w>
+<w><t>in-tel-lect</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-tel-lec-tion</t></w>
<w><t>in-tel-lec-tive</t></w>
<w><t>in-tel-lec-tive-ly</t></w>
@@ -79075,7 +79080,7 @@
<w><t>in-town</t></w>
<w><t>in-tox-i-ca-ble</t></w>
<w><t>in-tox-i-cant</t></w>
-<w><t>in-tox-i-cate</t></w>
+<w><t>in-tox-i-cate</t><verb><regular-root/></verb></w>
<w><t>in-tox-i-cat-ed</t></w>
<w><t>in-tox-i-cat-ed-ly</t></w>
<w><t>in-tox-i-cat-ing</t></w>
@@ -79293,8 +79298,10 @@
<w><t>in-vag-i-nat-ed</t></w>
<w><t>in-vag-i-nat-ing</t></w>
<w><t>in-vag-i-na-tion</t></w>
-<w><t>in-val-id</t></w>
-<w><t>in-va-lid</t></w>
+<word-group>
+ <w><t>in-val-id</t><adjective/></w>
+ <w><t>in-va-lid</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+</word-group>
<w><t>in-val-i-date</t></w>
<w><t>in-val-i-dat-ed</t></w>
<w><t>in-val-i-dat-ing</t></w>
@@ -79934,9 +79941,9 @@
<w><t>ir-re-proach-a-ble-ness</t></w>
<w><t>ir-re-proach-a-bly</t></w>
<w><t>ir-re-sist-i-bil-i-ty</t></w>
-<w><t>ir-re-sist-i-ble</t></w>
-<w><t>ir-re-sist-i-ble-ness</t></w>
-<w><t>ir-re-sist-i-bly</t></w>
+<w><t>ir-re-sist-i-ble</t><adjective/></w>
+<w><t>ir-re-sist-i-ble-ness</t><noun/></w>
+<w><t>ir-re-sist-i-bly</t><adverb/></w>
<w><t>ir-re-sol-u-bil-i-ty</t></w>
<w><t>ir-res-o-lu-ble</t></w>
<w><t>ir-res-o-lute</t></w>
@@ -81698,7 +81705,7 @@
<w><t>John-so-ni-an</t></w>
<w><t>John-so-ni-an-ism</t></w>
<w><t>John-so-ni-an-ly</t></w>
-<w><t>John-ston</t></w>
+<w><t>John-ston</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Johns-town</t></w>
<phrase><t>John the Bap-tist</t></phrase>
<w><t>Jo-hor</t></w>
@@ -81830,7 +81837,7 @@
<phrase><t>Jos-quin des Pr</t></phrase>
<w><t>joss</t></w>
<w><t>jos-ser</t></w>
-<w><t>jos-tle</t></w>
+<w><t>jos-tle</t><verb><regular-root/></verb></w>
<w><t>jos-tle-ment</t></w>
<w><t>jos-tler</t></w>
<w><t>Jos-u-e</t></w>
@@ -86255,7 +86262,8 @@
<phrase><t>lay fig-ure</t></phrase>
<w><t>lay-ing</t></w>
<phrase><t>lay in-to</t></phrase>
-<w><t>lay-man</t></w>
+<w><t>lay-man</t><noun><singular/><convertible-to-possessive/></noun></w>
+<w><t>lay-men</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>lay-off</t></w>
<w><t>lay-out</t></w>
<w><t>lay-o-ver</t></w>
@@ -86614,7 +86622,7 @@
<w><t>left-wing-er</t></w>
<w><t>left-y</t></w>
<w><t>leg</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>leg-a-cy</t></w>
+<w><t>leg-a-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>le-gal</t></w>
<phrase><t>le-gal aid</t></phrase>
<phrase><t>le-gal cap</t></phrase>
@@ -86630,7 +86638,7 @@
<w><t>le-gal-is-ti-cal-ly</t></w>
<w><t>le-gal-i-ty</t></w>
<w><t>le-gal-i-za-tion</t></w>
-<w><t>le-gal-ize</t></w>
+<w><t>le-gal-ize</t><verb><regular-root/></verb></w>
<w><t>le-gal-ized</t></w>
<w><t>le-gal-iz-ing</t></w>
<w><t>le-gal-ly</t></w>
@@ -88612,7 +88620,7 @@
<w><t>lit-ur-gism</t></w>
<w><t>lit-ur-gist</t></w>
<w><t>lit-ur-gis-tic</t></w>
-<w><t>lit-ur-gy</t></w>
+<w><t>lit-ur-gy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>lit-u-us</t></w>
<w><t>Lit-vi-nov</t></w>
<w><t>Lit-y-er-ses</t></w>
@@ -90270,7 +90278,7 @@
<phrase><t>Lu-te-tia Pa-ris-i-o-rum</t></phrase>
<w><t>lu-te-ti-um</t></w>
<w><t>Luth</t></w>
-<w><t>Lu-ther</t></w>
+<w><t>Lu-ther</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Lu-ther-an</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Lu-ther-an-ism</t></w>
<w><t>Lu-ther-ism</t></w>
@@ -91560,7 +91568,8 @@
<w><t>mal-to-bi-ose</t></w>
<w><t>malt-ol</t></w>
<w><t>malt-ose</t></w>
-<w><t>mal-treat</t></w>
+<w><t>mal-treat</t><verb><regular-root/></verb></w>
+<w><t>mal-treat-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>malt-ster</t></w>
<w><t>malt-y</t></w>
<w><t>Ma-lu-ku</t></w>
@@ -92556,7 +92565,7 @@
<phrase><t>mar-su-pi-al mouse</t></phrase>
<w><t>mar-su-pi-um</t></w>
<w><t>Mar-sy-as</t></w>
-<w><t>mart</t></w>
+<w><t>mart</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Mar-ta</t></w>
<w><t>Mar-ta-ban</t></w>
<w><t>mar-ta-gon</t></w>
@@ -94645,7 +94654,8 @@
<w><t>mes-tee</t></w>
<w><t>mes-ter</t></w>
<w><t>Mes-thles</t></w>
-<w><t>mes-ti-zo</t></w>
+<w><t>mes-ti-zo</t><noun><singular/></noun></w>
+<w><t>mes-ti-zos</t><noun><plural/></noun></w>
<w><t>Mes-tor</t></w>
<w><t>mes-tra-nol</t></w>
<w><t>met</t></w>
@@ -96244,7 +96254,7 @@
<w><t>mis-cal-cu-late</t></w>
<w><t>mis-cal-cu-lat-ed</t></w>
<w><t>mis-cal-cu-lat-ing</t></w>
-<w><t>mis-cal-cu-la-tion</t></w>
+<w><t>mis-cal-cu-la-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mis-cal-cu-la-tor</t></w>
<w><t>mis-call</t></w>
<w><t>mis-call-er</t></w>
@@ -98095,7 +98105,7 @@
<w><t>Mo-ra-va</t></w>
<w><t>Mo-ra-via</t></w>
<w><t>Mo-ra-vi-a</t></w>
-<w><t>Mo-ra-vi-an</t></w>
+<w><t>Mo-ra-vi-an</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Mo-ra-vi-an Church</t></phrase>
<phrase><t>Mo-ra-vi-an Gate</t></phrase>
<w><t>Mor-ax-el-la</t></w>
@@ -98362,7 +98372,7 @@
<w><t>Mo-sul</t></w>
<w><t>Mosz-kow-ski</t></w>
<w><t>mot</t></w>
-<w><t>mote</t></w>
+<w><t>mote</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mo-tel</t></w>
<w><t>mo-tet</t></w>
<w><t>mote-y</t></w>
@@ -98821,7 +98831,8 @@
<w><t>mu-ka-de</t></w>
<w><t>Muk-den</t></w>
<w><t>muk-luk</t></w>
-<w><t>mu-lat-to</t></w>
+<w><t>mu-lat-to</t><noun><singular/></noun></w>
+<w><t>mu-lat-toes</t><noun><plural/></noun></w>
<w><t>mul-ber-ry</t></w>
<w><t>Mul-ber-ry</t></w>
<w><t>mulch</t></w>
@@ -99059,7 +99070,7 @@
<w><t>mul-ti-plic-i-ty</t></w>
<w><t>mul-ti-plied</t></w>
<w><t>mul-ti-pli-er</t></w>
-<w><t>mul-ti-ply</t></w>
+<w><t>mul-ti-ply</t><verb><regular-root/></verb></w>
<w><t>mul-ti-ply-ing</t></w>
<w><t>mul-ti-point-ed</t></w>
<w><t>mul-ti-po-lar</t></w>
@@ -99191,7 +99202,7 @@
<phrase><t>Mu-nich Pact</t></phrase>
<w><t>mu-nic-i-pal</t></w>
<w><t>mu-nic-i-pal-ise</t></w>
-<w><t>mu-nic-i-pal-i-ty</t></w>
+<w><t>mu-nic-i-pal-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mu-nic-i-pal-i-za-tion</t></w>
<w><t>mu-nic-i-pal-ize</t></w>
<w><t>mu-nic-i-pal-ized</t></w>
@@ -106612,7 +106623,7 @@
<w><t>non-skilled</t></w>
<w><t>non-skip-ping</t></w>
<w><t>non-slan-der-ous</t></w>
-<w><t>non-slave-hold-ing</t></w>
+<w><t>non-slave-hold-ing</t><adjective><extensible value="false"/></adjective></w>
<w><t>non=Slav-ic</t></w>
<w><t>non-slip</t></w>
<w><t>non-slip-per-y</t></w>
@@ -110325,7 +110336,7 @@
<w><t>Or-i-gen-is-tic</t></w>
<w><t>or-i-gin</t></w>
<w><t>o-rig-i-na-ble</t></w>
-<w><t>o-rig-i-nal</t></w>
+<w><t>o-rig-i-nal</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective/></w>
<w><t>o-rig-i-nal-i-ty</t></w>
<w><t>o-rig-i-nal-ly</t></w>
<phrase><t>o-rig-i-nal sin</t></phrase>
@@ -110437,7 +110448,7 @@
<w><t>o-ro-tun-di-ty</t></w>
<w><t>O-ro-ville</t></w>
<w><t>O-roz-co</t></w>
-<w><t>or-phan</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>or-phan</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>or-phan-age</t></w>
<w><t>or-phan-hood</t></w>
<w><t>or-phar-i-on</t></w>
@@ -111189,7 +111200,7 @@
<w><t>out-fig-ured</t></w>
<w><t>out-fig-ur-ing</t></w>
<w><t>out-fish</t></w>
-<w><t>out-fit</t></w>
+<w><t>out-fit</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>out-fit-ted</t></w>
<w><t>out-fit-ter</t></w>
<w><t>out-fit-ting</t></w>
@@ -111433,7 +111444,7 @@
<w><t>out-pop-u-lat-ing</t></w>
<w><t>out-port</t></w>
<w><t>out-por-ter</t></w>
-<w><t>out-post</t></w>
+<w><t>out-post</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>out-pour</t></w>
<w><t>out-pour-ing</t></w>
<w><t>out-prac-tice</t></w>
@@ -114422,7 +114433,7 @@
<w><t>pal</t></w>
<w><t>Pal</t></w>
<w><t>pa-la-bra</t></w>
-<w><t>pal-ace</t></w>
+<w><t>pal-ace</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pal-aced</t></w>
<w><t>pal-ace-like</t></w>
<phrase><t>pal-ace rev-o-lu-tion</t></phrase>
@@ -114539,6 +114550,7 @@
<w><t>pale-ly</t></w>
<w><t>Pa-lem-bang</t></w>
<w><t>Pa-len-cia</t></w>
+<w><t>pale-ness</t><noun><singular/></noun></w>
<w><t>Pa-len-que</t></w>
<w><t>Pa-le-o=A-si-at-ic</t></w>
<w><t>pa-le-o-bi-o-log-i-cal</t></w>
@@ -114791,7 +114803,7 @@
<w><t>pam-per-er</t></w>
<w><t>pam-pe-ro</t></w>
<w><t>pamph</t></w>
-<w><t>pam-phlet</t></w>
+<w><t>pam-phlet</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pam-phlet-ar-y</t></w>
<w><t>pam-phlet-eer</t></w>
<w><t>pam-phlet-ize</t></w>
@@ -115726,7 +115738,7 @@
<phrase><t>Par-lia-men-ta-ry Com-mis-sion-er</t></phrase>
<phrase><t>par-lia-men-ta-ry pri-vate sec-re-tar-y</t></phrase>
<phrase><t>par-lia-men-ta-ry sec-re-tar-y</t></phrase>
-<w><t>par-lor</t></w>
+<w><t>par-lor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>par-lor-maid</t></w>
<w><t>par-lour</t></w>
<phrase><t>par-lour car</t></phrase>
@@ -117838,7 +117850,7 @@
<w><t>per-i-od-ic</t></w>
<w><t>pe-ri-od-ic</t></w>
<phrase><t>per-i-od-ic ac-id</t></phrase>
-<w><t>pe-ri-od-i-cal</t></w>
+<w><t>pe-ri-od-i-cal</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pe-ri-od-i-cal-ism</t></w>
<w><t>pe-ri-od-i-cal-ist</t></w>
<w><t>pe-ri-od-i-cal-ly</t></w>
@@ -118867,7 +118879,7 @@
<w><t>phi-lan-thro-pise</t></w>
<w><t>phi-lan-thro-pised</t></w>
<w><t>phi-lan-thro-pis-ing</t></w>
-<w><t>phi-lan-thro-pist</t></w>
+<w><t>phi-lan-thro-pist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>phi-lan-thro-pis-tic</t></w>
<w><t>phi-lan-thro-pize</t></w>
<w><t>phi-lan-thro-pized</t></w>
@@ -122161,7 +122173,7 @@
<w><t>pon-cho</t></w>
<w><t>pon-choed</t></w>
<w><t>pond</t></w>
-<w><t>pon-der</t></w>
+<w><t>pon-der</t><verb><regular-root/></verb></w>
<w><t>pon-der-a-ble</t></w>
<w><t>pon-der-er</t></w>
<w><t>pon-der-os-i-ty</t></w>
@@ -127259,7 +127271,7 @@
<phrase><t>pri-vate school</t></phrase>
<phrase><t>pri-vate sec-re-tar-y</t></phrase>
<phrase><t>pri-vate trea-ty</t></phrase>
-<w><t>pri-va-tion</t></w>
+<w><t>pri-va-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>priv-a-tive</t></w>
<w><t>priv-a-tive-ly</t></w>
<w><t>priv-et</t></w>
@@ -127753,7 +127765,7 @@
<w><t>pro-gam-bling</t></w>
<w><t>pro-gen-i-tive</t></w>
<w><t>pro-gen-i-tive-ness</t></w>
-<w><t>pro-gen-i-tor</t></w>
+<w><t>pro-gen-i-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-gen-i-to-ri-al</t></w>
<w><t>pro=Gen-tile</t></w>
<w><t>prog-e-ny</t></w>
@@ -128039,6 +128051,7 @@
<w><t>prompt</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb><adjective/></w>
<w><t>prompt-book</t></w>
<w><t>prompt-er</t></w>
+<w><t>prompt-ing</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>promp-ti-tude</t></w>
<w><t>prompt-ly</t></w>
<w><t>prompt-ness</t></w>
@@ -128742,7 +128755,7 @@
<w><t>pro-ven-tric-u-lar</t></w>
<w><t>pro-ven-tric-u-lus</t></w>
<w><t>prov-er</t></w>
-<w><t>prov-erb</t></w>
+<w><t>prov-erb</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-ver-bi-al</t></w>
<w><t>pro-ver-bi-al-ly</t></w>
<w><t>prov-erb-like</t></w>
@@ -134677,7 +134690,7 @@
<w><t>re-e-rup-tion</t></w>
<w><t>reest</t></w>
<w><t>re-es-tab-lish</t><verb><regular-root/></verb></w>
-<w><t>re-es-tab-lish-ment</t></w>
+<w><t>re-es-tab-lish-ment</t><noun/></w>
<w><t>re-es-ti-mate</t></w>
<w><t>re-es-ti-mat-ed</t></w>
<w><t>re-es-ti-mat-ing</t></w>
@@ -136005,7 +136018,7 @@
<w><t>Rem-ing-ton</t></w>
<w><t>rem-i-nisce</t></w>
<w><t>rem-i-nisced</t></w>
-<w><t>rem-i-nis-cence</t></w>
+<w><t>rem-i-nis-cence</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>rem-i-nis-cent</t></w>
<w><t>rem-i-nisc-ing</t></w>
<w><t>re-mint</t></w>
@@ -143227,7 +143240,7 @@
<w><t>seal-like</t></w>
<w><t>seal-skin</t></w>
<phrase><t>Seal-y-ham ter-ri-er</t></phrase>
-<w><t>seam</t></w>
+<w><t>seam</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sea-man</t><noun><singular/></noun></w>
<w><t>sea-man-like</t></w>
<w><t>sea-man-ship</t></w>
@@ -143345,7 +143358,7 @@
<w><t>se-ces-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>se-ces-sion-al</t></w>
<w><t>se-ces-sion-ism</t></w>
-<w><t>se-ces-sion-ist</t></w>
+<w><t>se-ces-sion-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sech</t></w>
<w><t>se-clude</t></w>
<w><t>se-clud-ed</t></w>
@@ -145829,7 +145842,7 @@
<w><t>Se-ren-i-ta-tis</t></w>
<w><t>se-ren-i-ty</t></w>
<w><t>Se-ren-i-ty</t></w>
-<w><t>serf</t></w>
+<w><t>serf</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>serf-dom</t></w>
<w><t>serf-ish</t></w>
<w><t>serf-ish-ly</t></w>
@@ -148506,7 +148519,7 @@
<w><t>skeigh</t></w>
<w><t>skeigh-ish</t></w>
<w><t>skein</t></w>
-<w><t>skel-e-ton</t></w>
+<w><t>skel-e-ton</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>skel-e-ton-ise</t></w>
<w><t>skel-e-ton-ised</t></w>
<w><t>skel-e-ton-is-ing</t></w>
@@ -148671,7 +148684,7 @@
<w><t>Skir-o-pho-ri-a</t></w>
<w><t>skirr</t></w>
<w><t>skir-ret</t></w>
-<w><t>skirt</t></w>
+<w><t>skirt</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>skirt-er</t></w>
<w><t>skirt-ing</t></w>
<phrase><t>skirt-ing board</t></phrase>
@@ -149095,7 +149108,7 @@
<w><t>slok-ing</t></w>
<w><t>sloop</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sloot</t></w>
-<w><t>slop</t></w>
+<w><t>slop</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>slop a-round</t></phrase>
<w><t>slope</t></w>
<w><t>slop-ing</t></w>
@@ -150983,7 +150996,7 @@
<w><t>span-dril</t></w>
<w><t>spa-ne-mic</t></w>
<w><t>spang</t></w>
-<w><t>span-gle</t></w>
+<w><t>span-gle</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Spang-ler</t></w>
<w><t>span-gly</t></w>
<w><t>Span-iard</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -151901,7 +151914,7 @@
<w><t>splic-ing</t></w>
<w><t>spline</t></w>
<w><t>splin-ing</t></w>
-<w><t>splint</t></w>
+<w><t>splint</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>splin-ter</t></w>
<phrase><t>splin-ter group</t></phrase>
<w><t>splin-ter-less</t></w>
@@ -153669,6 +153682,7 @@
<w><t>still=hunt-er</t></w>
<w><t>stil-li-cide</t></w>
<w><t>stil-li-form</t></w>
+<w><t>still-ness</t><noun/></w>
<phrase><t>Still-son wrench</t></phrase>
<w><t>Still-wa-ter</t></w>
<w><t>stil-ly</t></w>
@@ -154421,7 +154435,7 @@
<w><t>strid-u-lous</t></w>
<w><t>strid-u-lous-ly</t></w>
<w><t>strid-u-lous-ness</t></w>
-<w><t>strife</t></w>
+<w><t>strife</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>strife-ful</t></w>
<w><t>strife-less</t></w>
<w><t>stri-ges</t></w>
@@ -155602,7 +155616,7 @@
<w><t>sub-li-mate</t></w>
<w><t>sub-li-ma-tion</t></w>
<w><t>sub-li-ma-tion-al</t></w>
-<w><t>sub-lime</t></w>
+<w><t>sub-lime</t><adjective><extensible/></adjective></w>
<w><t>sub-limed</t></w>
<w><t>sub-lime-ly</t></w>
<w><t>sub-lime-ness</t></w>
@@ -158747,7 +158761,7 @@
<w><t>swine-herd-ship</t></w>
<w><t>swine-pox</t></w>
<phrase><t>swine ve-sic-u-lar dis-ease</t></phrase>
-<w><t>swing</t></w>
+<w><t>swing</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root value="false"/></verb></w>
<w><t>swing-a-ble</t></w>
<w><t>swing-back</t></w>
<w><t>swing-boat</t></w>
@@ -158763,6 +158777,7 @@
<w><t>swin-gle-bar</t></w>
<w><t>swin-gle-tree</t></w>
<w><t>swing-om-e-ter</t></w>
+<w><t>swings</t><verb><lemma>swing</lemma></verb></w>
<w><t>swing-tree</t></w>
<w><t>swin-ish</t></w>
<w><t>swin-ish-ly</t></w>
@@ -159035,7 +159050,7 @@
<w><t>sym-pa-this-er</t></w>
<w><t>sym-pa-this-ing</t></w>
<w><t>sym-pa-thize</t><verb><regular-root/></verb></w>
-<w><t>sym-pa-thiz-er</t></w>
+<w><t>sym-pa-thiz-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sym-pa-thiz-ing-ly</t></w>
<w><t>sym-pa-tho-lyt-ic</t></w>
<w><t>sym-pa-tho-mi-met-ic</t></w>
@@ -159680,7 +159695,7 @@
<w><t>tail-less-ness</t></w>
<w><t>tail-light</t></w>
<w><t>tail-like</t></w>
-<w><t>tai-lor</t></w>
+<w><t>tai-lor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>tai-lor-bird</t></w>
<w><t>tai-lored</t></w>
<w><t>tai-lor=made</t></w>
@@ -160643,7 +160658,7 @@
<w><t>tee</t></w>
<w><t>tee-ing</t></w>
<w><t>teel</t></w>
-<w><t>teem</t></w>
+<w><t>teem</t><verb><regular-root/></verb></w>
<w><t>teem-er</t></w>
<w><t>teem-ing</t></w>
<w><t>teem-ing-ly</t></w>
@@ -161419,8 +161434,9 @@
<w><t>ter-ry</t></w>
<w><t>Ter-ry-ville</t></w>
<w><t>Ter-sanc-tus</t></w>
-<w><t>terse</t></w>
-<w><t>terse-ness</t></w>
+<w><t>terse</t><adjective><extensible/></adjective></w>
+<w><t>terse-ly</t><adverb/></w>
+<w><t>terse-ness</t><noun><singular/></noun></w>
<w><t>ters-er</t></w>
<w><t>ters-est</t></w>
<w><t>ter-tial</t></w>
@@ -161777,10 +161793,10 @@
<w><t>the-ar-chic</t></w>
<w><t>the-ar-chy</t></w>
<w><t>The-a-ri-ca</t></w>
-<w><t>the-a-ter</t></w>
+<w><t>the-a-ter</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>the-a-ter=in=the=round</t></w>
<w><t>The-a-tine</t></w>
-<w><t>the-a-tre</t></w>
+<w><t>the-a-tre</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Thé-â-tre=Fran-çais</t></w>
<w><t>the-a-tre-go-er</t></w>
<w><t>the-atre=in=the=round</t></w>
@@ -162606,7 +162622,7 @@
<w><t>throat-less</t></w>
<phrase><t>throat mi-cro-phone</t></phrase>
<w><t>throat-y</t></w>
-<w><t>throb</t></w>
+<w><t>throb</t><verb><regular-root/></verb></w>
<w><t>throb-bing</t></w>
<w><t>throb-less</t></w>
<w><t>throe</t></w>
@@ -164661,7 +164677,7 @@
<phrase><t>trans-fer RNA</t></phrase>
<w><t>trans-fig-u-ra-tion</t></w>
<w><t>Trans-fig-u-ra-tion</t></w>
-<w><t>trans-fig-ure</t></w>
+<w><t>trans-fig-ure</t><verb><regular-root/></verb></w>
<w><t>trans-fig-ure-ment</t></w>
<w><t>trans-fil-tra-tion</t></w>
<w><t>trans-fi-nite</t></w>
@@ -170879,6 +170895,7 @@
<w><t>un-dev-as-tat-ed</t></w>
<w><t>un-dev-as-tat-ing</t></w>
<w><t>un-de-vel-op-a-ble</t></w>
+<w><t>un-de-vel-oped</t><adjective/></w>
<w><t>un-de-vel-op-ing</t></w>
<w><t>un-de-vel-op-men-tal</t></w>
<w><t>un-de-vi-a-ble</t></w>
@@ -173654,7 +173671,7 @@
<w><t>Un-ion-ism</t></w>
<w><t>un-ion-ism</t></w>
<w><t>Un-ion-ist</t></w>
-<w><t>un-ion-ist</t></w>
+<w><t>un-ion-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>un-ion-is-tic</t></w>
<phrase><t>Un-ion-ist Par-ty</t></phrase>
<w><t>un-ion-ize</t></w>
@@ -173787,7 +173804,7 @@
<w><t>u-ni-verse</t></w>
<phrase><t>u-ni-verse of dis-course</t></phrase>
<w><t>u-ni-ver-si-tar-i-an</t></w>
-<w><t>u-ni-ver-si-ty</t></w>
+<w><t>u-ni-ver-si-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>u-ni-vo-cal</t></w>
<w><t>un-jack-et-ed</t></w>
<w><t>un-jad-ed</t></w>
@@ -179872,7 +179889,7 @@
<phrase><t>Ut-tar Pra-desh</t></phrase>
<w><t>ut-ter</t><verb><regular-root/></verb><adjective><extensible value="false"/></adjective></w>
<w><t>ut-ter-a-ble</t></w>
-<w><t>ut-ter-ance</t></w>
+<w><t>ut-ter-ance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>ut-ter bar-ris-ter</t></phrase>
<w><t>ut-ter-er</t></w>
<w><t>ut-ter-less</t></w>
@@ -179921,7 +179938,7 @@
<phrase><t>va-cant pos-ses-sion</t></phrase>
<w><t>va-cat-a-ble</t></w>
<w><t>va-cate</t><verb><regular-root/></verb></w>
-<w><t>va-ca-tion</t></w>
+<w><t>va-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>va-ca-tion-er</t></w>
<w><t>va-ca-tion-ist</t></w>
<w><t>va-ca-tion-less</t></w>
@@ -180813,7 +180830,7 @@
<w><t>ven-tril-o-quy</t></w>
<w><t>Ven-tris</t></w>
<w><t>ven-trot-o-my</t></w>
-<w><t>ven-ture</t></w>
+<w><t>ven-ture</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>ven-ture cap-i-tal</t></phrase>
<w><t>ven-tured</t></w>
<w><t>Ven-tur-er</t></w>
@@ -184413,7 +184430,7 @@
<w><t>Wes-ker</t></w>
<w><t>wes-kit</t></w>
<w><t>Wes-la-co</t></w>
-<w><t>Wes-ley</t></w>
+<w><t>Wes-ley</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Wes-ley-an</t></w>
<w><t>Wes-ley-an-ism</t></w>
<w><t>Wes-ley-ism</t></w>
@@ -184716,7 +184733,7 @@
<w><t>whif-fle</t></w>
<w><t>whif-fler</t></w>
<w><t>whif-fle-tree</t></w>
-<w><t>whig</t></w>
+<w><t>whig</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Whig</t></w>
<w><t>Whig-ger-y</t></w>
<w><t>whig-ging</t></w>
@@ -185941,8 +185958,9 @@
<w><t>Wood-lawn</t></w>
<w><t>wood-less</t></w>
<w><t>wood-louse</t></w>
-<w><t>wood-man</t></w>
+<w><t>wood-man</t><noun><singular/><convertible-to-possessive/></noun></w>
<w><t>wood-man-craft</t></w>
+<w><t>wood-men</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>wood-note</t></w>
<w><t>wood-peck-er</t></w>
<phrase><t>wood pig-eon</t></phrase>
@@ -186107,7 +186125,7 @@
<w><t>work-room</t></w>
<w><t>works</t></w>
<phrase><t>works coun-cil</t></phrase>
-<w><t>work-shop</t></w>
+<w><t>work-shop</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>work-shy</t></w>
<w><t>Work-sop</t></w>
<w><t>work-ta-ble</t></w>
@@ -186634,7 +186652,7 @@
<w><t>Ya-ni-na</t></w>
<w><t>yank</t></w>
<w><t>Yank</t></w>
-<w><t>Yan-kee</t></w>
+<w><t>Yan-kee</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Yan-kee-dom</t></w>
<phrase><t>Yan-kee Doo-dle</t></phrase>
<w><t>Yan-kee-fied</t></w>
Added: trunk/foray/foray-orthography/src/main/data/dictionaries/ger-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/ger-Latn-ZZZ.dict.xml (rev 0)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/ger-Latn-ZZZ.dict.xml 2023-12-14 15:07:35 UTC (rev 13318)
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!DOCTYPE axsl-dictionary
+ PUBLIC "-//aXSL//DTD Dictionary V0.1//EN"
+ "http://www.axsl.org/dtds/0.1/en/axsl-dictionary.dtd">
+
+<axsl-dictionary
+ id="org.foray.ger.Latn.ZZZ"
+ language="ita" script="Latn"
+ hard-hyphen-char="=" soft-hyphen-char="-">
+
+<!--
+Dictionary of German words.
+-->
+
+
+<w><t>alle</t></w>
+<w><t>A-mer-i-can-ische</t></w>
+<w><t>Bot-schaf-ter</t></w>
+<w><t>bür-ger-krieg</t></w>
+<w><t>Christ-liche</t></w>
+<w><t>dan-ket</t></w>
+<w><t>das</t></w>
+<w><t>der</t></w>
+<w><t>Gott</t></w>
+<w><t>in</t></w>
+<w><t>leben</t></w>
+<w><t>Nord=A-mer-i-ka</t></w>
+<w><t>nun</t></w>
+<w><t>und</t></w>
+
+</axsl-dictionary>
Property changes on: trunk/foray/foray-orthography/src/main/data/dictionaries/ger-Latn-ZZZ.dict.xml
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml 2023-11-02 20:16:15 UTC (rev 13317)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml 2023-12-14 15:07:35 UTC (rev 13318)
@@ -29,6 +29,7 @@
<w><t>Sab-a-oth</t></w>
<w><t>sha-lom</t></w>
<w><t>te-vi-lah</t></w>
+<w><t>to-hu-wha-bo-hu</t></w>
<w><t>yaw=raw</t><comment>Strong's 3384.</comment></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2023-11-02 20:16:15 UTC (rev 13317)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2023-12-14 15:07:35 UTC (rev 13318)
@@ -42,11 +42,14 @@
<w><t>car-men</t></w>
<w><t>car-pe</t></w>
<w><t>ca-sus</t></w>
+<w><t>ca-tech-u-men-or-um</t></w>
<w><t>cau-tel-a</t></w>
<w><t>cent</t><abbrev referenced-word="centum"/></w>
<w><t>cen-tum</t><comment>100, as in "per centum" or "percent."</comment></w>
+<w><t>char-ta</t></w>
<w><t>Christ</t></w>
<w><t>cir-ca</t></w>
+<w><t>clar-us</t></w>
<w><t>com-i-ta-tus</t></w>
<w><t>con</t><abbrev referenced-word="contradicente"/></w>
<w><t>con-trac-tus</t></w>
@@ -61,10 +64,13 @@
<w><t>de</t></w>
<w><t>de-cem-vir</t></w>
<w><t>de-cem-vir-i</t></w>
+<w><t>de-men-tant</t></w>
<w><t>De-o</t></w>
<w><t>de-sid-er-a-ta</t></w>
<w><t>de-sid-er-a-tum</t></w>
<w><t>de-struc-ti-o</t></w>
+<w><t>De-um</t></w>
+<w><t>De-us</t><noun/></w>
<w><t>dic-tus</t></w>
<w><t>die</t></w>
<w><t>die-bus</t></w>
@@ -82,6 +88,7 @@
<w><t>e-go</t></w>
<w><t>e-o</t></w>
<w><t>e-o-dem</t></w>
+<w><t>e-pis-co-pi</t></w>
<w><t>est</t></w>
<w><t>et</t></w>
<w><t>etc.</t><abbrev referenced-word="et cetera"/></w>
@@ -89,6 +96,7 @@
<w><t>e-van-gel-i-ar-i-um</t></w>
<w><t>ex</t></w>
<w><t>ex-trem-um</t></w>
+<w><t>fac</t></w>
<w><t>fa-cias</t></w>
<w><t>fac-to</t></w>
<w><t>fa-to</t></w>
@@ -95,6 +103,7 @@
<w><t>fat-u-us</t></w>
<w><t>fe-lo</t></w>
<w><t>fide</t></w>
+<w><t>fi-del-i-um</t></w>
<w><t>fi-eri</t></w>
<w><t>fit</t></w>
<w><t>foe-de-ris</t></w>
@@ -101,6 +110,7 @@
<w><t>fœ-de-ris</t></w>
<w><t>fœ-tus</t></w>
<w><t>for-ti-o-ri</t></w>
+<w><t>for-tis-si-mus</t></w>
<w><t>fu-gi-mus</t></w>
<w><t>gen-er-a-ti-o</t></w>
<w><t>gen-er-is</t></w>
@@ -111,6 +121,7 @@
<w><t>hac</t></w>
<w><t>hoc</t></w>
<w><t>ho-mi-nem</t></w>
+<w><t>hor-as</t></w>
<w><t>hy-dro-ma-ni-a</t></w>
<w><t>i.e.</t><abbrev referenced-word="id est"/></w>
<w><t>i. e.</t><abbrev referenced-word="id est"/><comment>Contains embedded non-breaking space.</comment></w>
@@ -134,11 +145,13 @@
<w><t>leg-em</t></w>
<w><t>lex</t></w>
<w><t>li-ber-or-um</t></w>
+<w><t>li-ber-tas</t></w>
<w><t>li-ber-um</t></w>
<w><t>lib-i-tum</t></w>
<w><t>lin-qui-mus</t></w>
<w><t>lo-ci</t></w>
<w><t>lo-co</t></w>
+<w><t>mag-na</t></w>
<w><t>ma-jor-i</t></w>
<w><t>man-dam-us</t></w>
<w><t>max-im</t><noun><singular/></noun></w>
@@ -147,7 +160,9 @@
<w><t>me-um</t></w>
<w><t>mi-nu-tiæ</t></w>
<w><t>mi-se-ra</t></w>
+<w><t>mis-sa</t></w>
<w><t>Mons Sacer</t></w>
+<w><t>mor-as</t></w>
<w><t>mort-main</t></w>
<w><t>mo-tu</t></w>
<w><t>nas-ci-tur</t></w>
@@ -165,6 +180,8 @@
<w><t>nos</t></w>
<w><t>no-vo</t></w>
<w><t>of-fi-cio</t></w>
+<w><t>om-nes</t></w>
+<w><t>om-nia</t></w>
<w><t>or-i-gin-es</t></w>
<w><t>o-vum</t></w>
<w><t>pacta</t></w>
@@ -182,7 +199,9 @@
<w><t>pa-tri-cian</t><noun><singular/></noun></w>
<w><t>pa-tri-cians</t><noun><plural/></noun></w>
<w><t>Pen-te-cost-it</t></w>
+<w><t>pen-um-bra</t><noun/></w>
<w><t>per</t></w>
+<w><t>per-de-re</t></w>
<w><t>pe-ri-ti</t></w>
<w><t>per-son-ae</t></w>
<w><t>pe-ti-tio</t></w>
@@ -191,6 +210,7 @@
<w><t>pop-u-li</t></w>
<w><t>pos-se</t></w>
<w><t>pos-se-det-is</t></w>
+<w><t>pos-sum-us</t></w>
<w><t>post</t></w>
<w><t>po-tent-i-æ</t></w>
<w><t>præ-ter-e-a</t></w>
@@ -197,6 +217,7 @@
<w><t>pre-amble</t></w>
<w><t>prin-ci-pii</t></w>
<w><t>pri-or-i</t></w>
+<w><t>pri-us</t></w>
<w><t>pro</t></w>
<w><t>prop-a-gan-da</t></w>
<w><t>qua</t></w>
@@ -204,6 +225,7 @@
<w><t>quaere</t></w>
<w><t>quære</t></w>
<w><t>quan-tum</t></w>
+<w><t>quem</t></w>
<w><t>qui</t></w>
<w><t>quid</t></w>
<w><t>quo</t></w>
@@ -228,9 +250,11 @@
<w><t>seq</t><abbrev referenced-word="sequens"/></w>
<w><t>se-quens</t></w>
<w><t>se-qui-tur</t></w>
+<w><t>ser-vi-re</t></w>
<w><t>ser-vi-tus</t></w>
<w><t>sig-no</t></w>
<w><t>si-len-ti-o</t></w>
+<w><t>sim-i-le</t></w>
<w><t>si-ne</t></w>
<w><t>so-la</t></w>
<w><t>so-li</t></w>
@@ -238,6 +262,7 @@
<w><t>sta-tus</t></w>
<w><t>stim-u-li</t></w>
<w><t>su-a</t></w>
+<w><t>su-as</t></w>
<w><t>sub</t></w>
<w><t>sui</t></w>
<w><t>sum-mi</t></w>
@@ -244,6 +269,7 @@
<w><t>su-tor</t></w>
<w><t>sym-bol-um</t></w>
<w><t>ta-bu-la</t></w>
+<w><t>te</t></w>
<w><t>tem.</t><abbrev referenced-word="tempore"/></w>
<w><t>tem-po-re</t></w>
<w><t>ter-ræ</t></w>
@@ -266,6 +292,7 @@
<w><t>vag-um</t></w>
<w><t>va-lo-rem</t></w>
<w><t>ven-ue</t></w>
+<w><t>ver-a</t></w>
<w><t>ver-ba-tim</t><adjective/><adverb/></w>
<w><t>ver-sa</t></w>
<w><t>ver-sus</t></w>
@@ -273,9 +300,11 @@
<w><t>vi-a</t></w>
<w><t>vice</t></w>
<w><t>vin-ces</t></w>
+<w><t>vir</t></w>
<w><t>vi-va</t></w>
<w><t>vive</t></w>
<w><t>vo-ce</t></w>
+<w><t>vol-unt</t></w>
<w><t>vox</t></w>
<w><t>vul-gus</t></w>
<w><t>war-rant-o</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-11-02 20:16:15 UTC (rev 13317)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-12-14 15:07:35 UTC (rev 13318)
@@ -233,6 +233,10 @@
</derivative-pattern-list>
+ <derivative-pattern-list id="ger-Latn-derivative-patterns">
+ </derivative-pattern-list>
+
+
<derivative-factory-list id="eng-Latn-derivatives">
<derivative-factory class="org.foray.orthography.wrapper.LatinPlural1WordFactory"/>
<derivative-factory class="org.foray.orthography.wrapper.LatinPlural2WordFactory"/>
@@ -302,6 +306,14 @@
</unparsed-dictionary>
</dictionary-resource>
+ <dictionary-resource id="org.foray.ger.Latn.ZZZ">
+ <unparsed-dictionary>
+ <dictionary-element>
+ <resource-location type="url">../dictionaries/ger-Latn-ZZZ.dict.xml</resource-location>
+ </dictionary-element>
+ </unparsed-dictionary>
+ </dictionary-resource>
+
<dictionary-resource id="org.foray.grc.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
@@ -400,6 +412,11 @@
<dictionary reference="org.foray.fre.Latn.ZZZ"/>
</orthography>
+ <orthography language-iso-3char="ger" script-iso-4char="Latn" country-iso-3char="ZZZ">
+ <derivative-rules reference="ger-Latn-derivative-patterns"/>
+ <dictionary reference="org.foray.ger.Latn.ZZZ"/>
+ </orthography>
+
<orthography language-iso-3char="grc" script-iso-4char="Latn" country-iso-3char="ZZZ">
<dictionary reference="org.foray.grc.Latn.ZZZ"/>
</orthography>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-11-02 20:16:18
|
Revision: 13317
http://sourceforge.net/p/foray/code/13317
Author: victormote
Date: 2023-11-02 20:16:15 +0000 (Thu, 02 Nov 2023)
Log Message:
-----------
Improvements to dictionaries and orthographies.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2023-10-22 12:16:29 UTC (rev 13316)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2023-11-02 20:16:15 UTC (rev 13317)
@@ -33,6 +33,8 @@
<w><t>draughts-board</t></w>
<w><t>draughts-man</t></w>
<w><t>draught-y</t></w>
+<word-placeholder><t>en-deav-or</t><different-country country="USA"/></word-placeholder>
+<w><t>en-deav-our</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>ful-fil</t><verb/></w>
<w><t>ful-fil-ment</t><noun/></w>
<w><t>ful-fils</t><verb><vf><singular/></vf></verb></w>
@@ -45,6 +47,7 @@
<w><t>ma-nœu-vre</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>mod-elled</t><verb/></w>
<w><t>mod-ell-ing</t><noun><singular/><convertible-to-possessive/></noun><verb/></w>
+<word-placeholder><t>mold</t><different-country country="USA"/></word-placeholder>
<w><t>mould</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>mould-board</t></w>
<w><t>mould-er</t><verb><regular-root/></verb></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2023-10-22 12:16:29 UTC (rev 13316)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2023-11-02 20:16:15 UTC (rev 13317)
@@ -34,6 +34,8 @@
<w><t>drafts-man</t></w>
<w><t>drafts-man-ship</t></w>
<w><t>draft-y</t></w>
+<w><t>en-deav-or</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
+<word-placeholder><t>en-deav-our</t><different-country country="GBR"/></word-placeholder>
<w><t>ful-fill</t><verb><regular-root/></verb></w>
<w><t>ful-fill-ment</t><noun/></w>
<w><t>la-bor</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
@@ -53,4 +55,5 @@
<w><t>mold-ing</t></w>
<w><t>mold-warp</t></w>
<w><t>mold-y</t><adjective><extensible/></adjective></w>
+<word-placeholder><t>mould</t><different-country country="GBR"/></word-placeholder>
</axsl-dictionary>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml 2023-10-22 12:16:29 UTC (rev 13316)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml 2023-11-02 20:16:15 UTC (rev 13317)
@@ -93,8 +93,10 @@
<w><t>pre-ëm-i-nence</t></w>
<w><t>pre-ëm-i-nent-ly</t><adverb/></w>
<w><t>pro-nounc-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>re-äd-just</t><verb><regular-root/></verb></w>
<w><t>re-ceiv-eth</t></w>
<w><t>re-ëch-o</t><verb><regular-root/></verb></w>
+<w><t>re-ëd-it</t><verb><regular-root/></verb></w>
<w><t>re-ë-lect</t><verb><regular-root/></verb></w>
<w><t>re-ë-lec-tion</t><noun><pluralizable/></noun></w>
<w><t>re-ël-i-gi-bil-i-ty</t><noun/></w>
@@ -102,6 +104,8 @@
<w><t>re-ën-act</t><verb><regular-root/></verb></w>
<w><t>re-ën-force</t><verb><regular-root/></verb></w>
<w><t>re-ën-ter</t><verb><regular-root/></verb></w>
+<w><t>re-ës-tab-lish</t><verb><regular-root/></verb></w>
+<w><t>re-ës-tab-lish-ment</t></w>
<w><t>re-ëx-am-ine</t><verb><regular-root/></verb></w>
<w><t>re-prov-eth</t><verb><lemma>reprove</lemma></verb></w>
<w><t>re-turn-eth</t><verb><lemma>return</lemma></verb></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-10-22 12:16:29 UTC (rev 13316)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-11-02 20:16:15 UTC (rev 13317)
@@ -134,7 +134,7 @@
<w><t>a-ba-tage</t></w>
<w><t>a-bate</t></w>
<w><t>a-bat-ed</t></w>
-<w><t>a-bate-ment</t></w>
+<w><t>a-bate-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-bat-er</t></w>
<w><t>a-bat-ing</t></w>
<w><t>ab-a-tis</t></w>
@@ -179,7 +179,7 @@
<w><t>ab-bre-vi-ate</t></w>
<w><t>ab-bre-vi-at-ed</t></w>
<w><t>ab-bre-vi-at-ing</t></w>
-<w><t>ab-bre-vi-a-tion</t></w>
+<w><t>ab-bre-vi-a-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ab-bre-vi-a-tor</t></w>
<w><t>Ab-by</t></w>
<w><t>ABC</t><noun><pluralizable/></noun><comment>basics, rudiments</comment></w>
@@ -499,10 +499,10 @@
<w><t>a-bridge</t><verb><regular-root/></verb></w>
<w><t>a-bridge-a-ble</t></w>
<w><t>a-bridged</t></w>
-<w><t>a-bridge-ment</t></w>
+<w><t>a-bridge-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-bridg-er</t></w>
<w><t>a-bridg-ing</t></w>
-<w><t>a-bridg-ment</t></w>
+<w><t>a-bridg-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-bris</t></w>
<w><t>a-broach</t></w>
<w><t>a-broad</t></w>
@@ -586,7 +586,7 @@
<w><t>ab-solv-er</t></w>
<w><t>ab-solv-ing</t></w>
<w><t>ab-so-nant</t></w>
-<w><t>ab-sorb</t></w>
+<w><t>ab-sorb</t><verb><regular-root/></verb></w>
<w><t>ab-sorb-a-bil-i-ty</t></w>
<w><t>ab-sorb-a-ble</t></w>
<w><t>ab-sorb-ance</t></w>
@@ -627,7 +627,7 @@
<w><t>ab-sti-nen-cy</t></w>
<w><t>ab-sti-nent</t></w>
<w><t>ab-sti-nent-ly</t></w>
-<w><t>ab-stract</t></w>
+<w><t>ab-stract</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb><adjective/></w>
<w><t>ab-stract-ed</t></w>
<w><t>ab-stract-ed-ly</t></w>
<w><t>ab-stract-ed-ness</t></w>
@@ -779,7 +779,7 @@
<w><t>a-cau-line</t></w>
<w><t>Ac-cad</t></w>
<w><t>Ac-ca-di-an</t></w>
-<w><t>ac-cede</t></w>
+<w><t>ac-cede</t><verb><regular-root/></verb></w>
<w><t>ac-ced-ed</t></w>
<w><t>ac-ced-ence</t></w>
<w><t>ac-ced-er</t></w>
@@ -813,7 +813,7 @@
<w><t>ac-cept-a-ble</t></w>
<w><t>ac-cept-a-ble-ness</t></w>
<w><t>ac-cept-a-bly</t></w>
-<w><t>ac-cept-ance</t></w>
+<w><t>ac-cept-ance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ac-cept-an-cy</t></w>
<w><t>ac-cept-ant</t></w>
<w><t>ac-cep-ta-tion</t></w>
@@ -992,7 +992,7 @@
<w><t>ac-cu-mu-late</t></w>
<w><t>ac-cu-mu-lat-ed</t></w>
<w><t>ac-cu-mu-lat-ing</t></w>
-<w><t>ac-cu-mu-la-tion</t></w>
+<w><t>ac-cu-mu-la-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>ac-cu-mu-la-tion point</t></phrase>
<w><t>ac-cu-mu-la-tive</t></w>
<w><t>ac-cu-mu-la-tive-ly</t></w>
@@ -2266,7 +2266,7 @@
<w><t>ad-ver-tize-ment</t></w>
<w><t>ad-ver-tiz-er</t></w>
<w><t>ad-ver-tiz-ing</t></w>
-<w><t>ad-vice</t></w>
+<w><t>ad-vice</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ad-vis-a-bil-i-ty</t></w>
<w><t>ad-vis-a-ble</t></w>
<w><t>ad-vis-a-ble-ness</t></w>
@@ -2658,7 +2658,7 @@
<w><t>af-firm-ance</t></w>
<w><t>af-firm-ant</t></w>
<w><t>af-fir-ma-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>af-firm-a-tive</t></w>
+<w><t>af-firm-a-tive</t><noun><pluralizable/></noun><adjective/></w>
<w><t>af-firm-a-tive=ac-tion</t></w>
<w><t>af-firm-a-tive-ly</t></w>
<w><t>af-firm-a-to-ry</t></w>
@@ -2960,6 +2960,7 @@
<w><t>ag-grad-ing</t></w>
<w><t>ag-gran-dise</t></w>
<w><t>ag-gran-dised</t></w>
+<w><t>ag-gran-dise-ment</t><noun/></w>
<w><t>ag-gran-dis-er</t></w>
<w><t>ag-gran-dis-ing</t></w>
<w><t>ag-gran-dize</t></w>
@@ -4025,7 +4026,7 @@
<w><t>al-i-cy-clic</t></w>
<w><t>al-i-dad</t></w>
<w><t>al-i-dade</t></w>
-<w><t>al-ien</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>a-li-en</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective/></w>
<w><t>al-ien-a-bil-i-ty</t></w>
<w><t>al-ien-a-ble</t></w>
<w><t>al-ien-age</t></w>
@@ -4641,7 +4642,7 @@
<w><t>al-ter-cate</t></w>
<w><t>al-ter-cat-ed</t></w>
<w><t>al-ter-cat-ing</t></w>
-<w><t>al-ter-ca-tion</t></w>
+<w><t>al-ter-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>al-tered chord</t></phrase>
<phrase><t>al-ter e-go</t></phrase>
<w><t>al-tern</t></w>
@@ -7582,7 +7583,7 @@
<w><t>an-ti-path-o-gen</t></w>
<w><t>an-ti-path-o-gene</t></w>
<w><t>an-ti-path-o-gen-ic</t></w>
-<w><t>an-tip-a-thy</t></w>
+<w><t>an-tip-a-thy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>an-ti-pa-tri-arch</t></w>
<w><t>an-ti-pa-tri-ar-chal</t></w>
<w><t>an-ti-pa-tri-ar-chy</t></w>
@@ -8565,7 +8566,7 @@
<w><t>ap-por-tion</t><verb><regular-root/></verb></w>
<w><t>ap-por-tion-a-ble</t></w>
<w><t>ap-por-tion-er</t></w>
-<w><t>ap-por-tion-ment</t></w>
+<w><t>ap-por-tion-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ap-pos-a-bil-i-ty</t></w>
<w><t>ap-pos-a-ble</t></w>
<w><t>ap-pose</t></w>
@@ -9098,7 +9099,7 @@
<w><t>ar-chi-pho-neme</t></w>
<w><t>ar-chi-plasm</t></w>
<w><t>ar-chi-plas-mic</t></w>
-<w><t>ar-chi-tect</t></w>
+<w><t>ar-chi-tect</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ar-chi-tec-ton-ic</t></w>
<w><t>ar-chi-tec-ton-i-cal-ly</t></w>
<w><t>ar-chi-tec-ton-ics</t></w>
@@ -9466,7 +9467,7 @@
<w><t>Ar-ma-gnac</t></w>
<w><t>ar-ma-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ar-ma-men-tar-i-um</t></w>
-<w><t>Ar-mand</t></w>
+<w><t>Ar-mand</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ar-mar-i-a</t></w>
<w><t>ar-mar-i-an</t></w>
<w><t>ar-mar-i-um</t></w>
@@ -11171,7 +11172,7 @@
<w><t>au-dit</t></w>
<w><t>au-di-tion</t></w>
<w><t>au-di-tive</t></w>
-<w><t>au-di-tor</t></w>
+<w><t>au-di-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>au-di-to-ri-a</t></w>
<w><t>au-di-to-ri-al-ly</t></w>
<w><t>au-di-to-ri-ly</t></w>
@@ -11203,7 +11204,7 @@
<w><t>au-git-ic</t></w>
<w><t>aug-ment</t><verb><regular-root/></verb></w>
<w><t>aug-ment-a-ble</t></w>
-<w><t>aug-men-ta-tion</t></w>
+<w><t>aug-men-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>aug-ment-a-tive</t></w>
<w><t>aug-men-ta-tive-ly</t></w>
<w><t>aug-ment-ed</t></w>
@@ -11420,7 +11421,7 @@
<w><t>au-then-tic</t></w>
<w><t>au-then-ti-cal</t></w>
<w><t>au-then-ti-cal-ly</t></w>
-<w><t>au-then-ti-cate</t></w>
+<w><t>au-then-ti-cate</t><verb><regular-root/></verb></w>
<w><t>au-then-ti-cat-ed</t></w>
<w><t>au-then-ti-ca-tion</t></w>
<w><t>au-then-ti-ca-tor</t></w>
@@ -12774,7 +12775,7 @@
<w><t>bal-drick</t></w>
<w><t>bal-dricked</t></w>
<w><t>Bal-dur</t></w>
-<w><t>Bald-win</t></w>
+<w><t>Bald-win</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Bald-win I</t></phrase>
<w><t>Bald-wins-ville</t></w>
<w><t>bale</t></w>
@@ -14462,7 +14463,7 @@
<w><t>Beck-ford</t></w>
<w><t>Beck-ley</t></w>
<w><t>Beck-mann</t></w>
-<w><t>beck-on</t></w>
+<w><t>beck-on</t><verb><regular-root/></verb></w>
<w><t>beck-on-er</t></w>
<w><t>beck-on-ing-ly</t></w>
<w><t>Beck-y</t></w>
@@ -14517,7 +14518,7 @@
<w><t>be-dew</t><verb><regular-root/></verb></w>
<w><t>bed-fast</t></w>
<w><t>bed-fel-low</t></w>
-<w><t>Bed-ford</t></w>
+<w><t>Bed-ford</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Bed-ford cord</t></phrase>
<w><t>Bed-ford-shire</t></w>
<w><t>bed-frame</t></w>
@@ -15119,7 +15120,7 @@
<w><t>ben-e-fac-tor</t></w>
<w><t>ben-e-fac-tress</t></w>
<w><t>be-nef-ic</t></w>
-<w><t>ben-e-fice</t></w>
+<w><t>ben-e-fice</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ben-e-ficed</t></w>
<w><t>ben-e-fice-less</t></w>
<w><t>be-nef-i-cence</t></w>
@@ -15231,7 +15232,7 @@
<w><t>Be-nu-e</t></w>
<w><t>Be-nue=Con-go</t></w>
<w><t>Be-nu-e=Con-go</t></w>
-<w><t>be-numb</t></w>
+<w><t>be-numb</t><verb><regular-root/></verb></w>
<w><t>be-numbed-ness</t></w>
<w><t>be-numb-ing-ly</t></w>
<w><t>be-numb-ment</t></w>
@@ -16734,7 +16735,7 @@
<w><t>bit-te</t></w>
<w><t>bit-ted</t></w>
<w><t>bit-ten</t></w>
-<w><t>bit-ter</t></w>
+<w><t>bit-ter</t><adjective><extensible value="false"/></adjective></w>
<phrase><t>bit-ter ap-ple</t></phrase>
<phrase><t>bit-ter end</t></phrase>
<w><t>bit-ter-end-er</t></w>
@@ -19017,7 +19018,7 @@
<w><t>Brad-bur-y</t></w>
<w><t>Brad-dock</t></w>
<w><t>Bra-den-ton</t></w>
-<w><t>Brad-ford</t></w>
+<w><t>Brad-ford</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Brad-ley</t></w>
<w><t>Brad-man</t></w>
<w><t>bra-doon</t></w>
@@ -19184,7 +19185,7 @@
<w><t>bran-dreth</t></w>
<w><t>bran-drith</t></w>
<w><t>Brandt</t></w>
-<w><t>bran-dy</t></w>
+<w><t>bran-dy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>bran-dy bot-tle</t></phrase>
<phrase><t>bran-dy but-ter</t></phrase>
<w><t>bran-dy-ing</t></w>
@@ -19668,7 +19669,7 @@
<w><t>Brig</t></w>
<w><t>bri-gade</t></w>
<w><t>bri-gad-ed</t></w>
-<w><t>brig-a-dier</t></w>
+<w><t>brig-a-dier</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>brig-a-dier gen-er-al</t></phrase>
<w><t>brig-a-dier-ship</t></w>
<w><t>bri-gad-ing</t></w>
@@ -21188,7 +21189,7 @@
<w><t>Bu-tes</t></w>
<w><t>Bute-shire</t></w>
<w><t>but-ler</t></w>
-<w><t>But-ler</t></w>
+<w><t>But-ler</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>but-ler-age</t></w>
<w><t>but-ler-ies</t></w>
<w><t>but-ler-like</t></w>
@@ -23383,7 +23384,7 @@
<w><t>care-less-ness</t></w>
<w><t>Car-en</t></w>
<w><t>car-er</t></w>
-<w><t>ca-ress</t></w>
+<w><t>ca-ress</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Ca-resse</t></w>
<w><t>ca-ress-er</t></w>
<w><t>ca-ress-ing-ly</t></w>
@@ -23710,7 +23711,7 @@
<w><t>Car-rière</t></w>
<w><t>car-ri-er=free</t></w>
<phrase><t>car-ri-er pig-eon</t></phrase>
-<w><t>Car-ring-ton</t></w>
+<w><t>Car-ring-ton</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>car-ri-ole</t></w>
<w><t>car-ri-on</t></w>
<phrase><t>car-ri-on crow</t></phrase>
@@ -23717,7 +23718,7 @@
<phrase><t>car-ri-on flow-er</t></phrase>
<w><t>car-ritch</t></w>
<w><t>Car-rol</t></w>
-<w><t>Car-roll</t></w>
+<w><t>Car-roll</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>car-roll-ite</t></w>
<w><t>Car-roll-ton</t></w>
<w><t>car-rom</t></w>
@@ -24770,7 +24771,7 @@
<w><t>cel-e-brat-ed-ness</t></w>
<w><t>cel-e-brat-er</t></w>
<w><t>cel-e-brat-ing</t></w>
-<w><t>cel-e-bra-tion</t></w>
+<w><t>cel-e-bra-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cel-e-bra-tive</t></w>
<w><t>cel-e-bra-tor</t></w>
<w><t>cel-e-bra-to-ry</t></w>
@@ -24913,7 +24914,7 @@
<w><t>cense</t></w>
<w><t>cen-ser</t></w>
<w><t>cen-ser-less</t></w>
-<w><t>cen-sor</t></w>
+<w><t>cen-sor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cen-sor-a-ble</t></w>
<w><t>cen-so-ri-al</t></w>
<w><t>Cen-sor-i-nus</t></w>
@@ -25423,9 +25424,9 @@
<phrase><t>Cha-gas’ dis-ease</t></phrase>
<w><t>Cha-ga-tai</t></w>
<w><t>Cha-gres</t></w>
-<w><t>cha-grin</t></w>
+<w><t>cha-grin</t><noun/></w>
+<w><t>cha-grined</t><verb/></w>
<w><t>cha-grin-ing</t></w>
-<w><t>cha-grinned</t></w>
<w><t>cha-grin-ning</t></w>
<w><t>cha-gul</t></w>
<w><t>Cha-har</t></w>
@@ -26038,7 +26039,7 @@
<w><t>chas-ing</t></w>
<w><t>Chas-ka</t></w>
<w><t>Chasles</t></w>
-<w><t>chasm</t></w>
+<w><t>chasm</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>chas-mal</t></w>
<w><t>chas-mic</t></w>
<w><t>chas-mo-gam-ic</t></w>
@@ -26487,7 +26488,7 @@
<w><t>Chet-nik</t></w>
<w><t>che-val=de=frise</t></w>
<phrase><t>che-val glass</t></phrase>
-<w><t>chev-a-lier</t></w>
+<w><t>chev-a-lier</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Che-va-lier</t></w>
<w><t>Che-va-lier=Mon-tra-chet</t></w>
<w><t>Che-val-lier</t></w>
@@ -28467,7 +28468,7 @@
<w><t>clar-i-o-net</t></w>
<w><t>Cla-ris-sa</t></w>
<w><t>clar-i-ty</t></w>
-<w><t>Clark</t></w>
+<w><t>Clark</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>clark-i-a</t></w>
<w><t>Clarks-burg</t></w>
<w><t>Clarks-dale</t></w>
@@ -28647,7 +28648,7 @@
<w><t>clean-up</t></w>
<w><t>clear</t><verb><regular-root/></verb><adjective><extensible/></adjective></w>
<w><t>clear-a-ble</t></w>
-<w><t>clear-ance</t></w>
+<w><t>clear-ance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>clear a-way</t></phrase>
<w><t>Cle-ar-chus</t></w>
<w><t>clear-cole</t></w>
@@ -29270,7 +29271,7 @@
<w><t>co-ac-tiv-i-ty</t></w>
<w><t>co-ac-tor</t></w>
<w><t>co-ad-ju-tant</t></w>
-<w><t>co-ad-ju-tor</t></w>
+<w><t>co-ad-ju-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>co-ad-ju-tress</t></w>
<w><t>co-ad-ju-tri-ces</t></w>
<w><t>co-ad-ju-trix</t></w>
@@ -31045,7 +31046,7 @@
<phrase><t>com-ple-ment fix-a-tion test</t></phrase>
<w><t>com-ple-men-tiz-er</t></w>
<w><t>com-plet-a-ble</t></w>
-<w><t>com-plete</t></w>
+<w><t>com-plete</t><verb><regular-root/></verb><adjective/></w>
<w><t>com-plet-ed</t></w>
<w><t>com-plet-ed-ness</t></w>
<w><t>com-plete-ly</t></w>
@@ -31243,7 +31244,7 @@
<w><t>com-put-a-bil-i-ty</t></w>
<w><t>com-put-a-ble</t></w>
<w><t>com-put-a-bly</t></w>
-<w><t>com-pu-ta-tion</t></w>
+<w><t>com-pu-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>com-pu-ta-tion-al</t></w>
<w><t>com-pu-ta-tive</t></w>
<w><t>com-pu-ta-tive-ly</t></w>
@@ -31324,7 +31325,7 @@
<w><t>con-cel-e-brat-ing</t></w>
<w><t>con-cel-e-bra-tion</t></w>
<w><t>con-cent</t></w>
-<w><t>con-cen-ter</t></w>
+<w><t>con-cen-ter</t><verb><regular-root/></verb></w>
<w><t>con-cen-trate</t><verb><regular-root/></verb></w>
<w><t>con-cen-trat-ed</t></w>
<w><t>con-cen-trat-ing</t></w>
@@ -31367,7 +31368,7 @@
<w><t>con-cern-ing-ly</t></w>
<w><t>con-cern-ing-ness</t></w>
<w><t>con-cern-ment</t></w>
-<w><t>con-cert</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>con-cert</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>con-cer-tan-te</t></w>
<w><t>con-cer-ta-to</t></w>
<w><t>con-cert-ed</t></w>
@@ -31715,7 +31716,7 @@
<w><t>con-fet-to</t></w>
<w><t>con-fi-dant</t></w>
<w><t>con-fi-dante</t></w>
-<w><t>con-fide</t></w>
+<w><t>con-fide</t><verb><regular-root/></verb></w>
<w><t>con-fid-ed</t></w>
<w><t>con-fi-dence</t></w>
<phrase><t>con-fi-dence game</t></phrase>
@@ -32543,7 +32544,7 @@
<w><t>con-temp-tu-ous-ness</t></w>
<w><t>con-tend</t><verb><regular-root/></verb></w>
<w><t>con-tend-ing-ly</t></w>
-<w><t>con-tent</t><noun><pluralizable/></noun><adjective/></w>
+<w><t>con-tent</t><noun><pluralizable/></noun><verb><regular-root/></verb><adjective/></w>
<w><t>con-tent-a-ble</t></w>
<w><t>con-tent-ed</t></w>
<w><t>con-tent-ed-ly</t></w>
@@ -32757,7 +32758,7 @@
<w><t>con-tra-sug-gest-i-ble</t></w>
<w><t>con-trate</t></w>
<w><t>con-tra-val-la-tion</t></w>
-<w><t>con-tra-vene</t></w>
+<w><t>con-tra-vene</t><verb><regular-root/></verb></w>
<w><t>con-tra-vened</t></w>
<w><t>con-tra-ven-er</t></w>
<w><t>con-tra-ven-ing</t></w>
@@ -34833,7 +34834,7 @@
<w><t>Cra-io-va</t></w>
<w><t>crake</t></w>
<w><t>cra-kow</t></w>
-<w><t>cram</t></w>
+<w><t>cram</t><verb><regular-root/></verb></w>
<w><t>Cram</t></w>
<w><t>cram-bo</t></w>
<w><t>cr-a-mel</t></w>
@@ -36575,7 +36576,7 @@
<w><t>cur-rant-worm</t></w>
<w><t>cur-ra-wong</t></w>
<w><t>cur-ren-cy</t></w>
-<w><t>cur-rent</t></w>
+<w><t>cur-rent</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective/></w>
<w><t>Cur-rent</t></w>
<phrase><t>cur-rent ac-count</t></phrase>
<phrase><t>cur-rent as-sets</t></phrase>
@@ -37491,7 +37492,7 @@
<w><t>dam-mer</t></w>
<w><t>dam-ming</t></w>
<w><t>dam-mit</t></w>
-<w><t>damn</t></w>
+<w><t>damn</t><verb><regular-root/></verb></w>
<w><t>dam-na-bil-i-ty</t></w>
<w><t>dam-na-ble</t></w>
<w><t>dam-na-ble-ness</t></w>
@@ -37592,6 +37593,7 @@
<w><t>dan-dy-ish</t></w>
<w><t>dan-dy-ism</t></w>
<phrase><t>dan-dy roll</t></phrase>
+<w><t>Dane</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Da-neen</t></w>
<w><t>dane-geld</t></w>
<w><t>Dane-geld</t></w>
@@ -38180,7 +38182,7 @@
<w><t>de-bat-er</t></w>
<w><t>de-bat-ing</t></w>
<w><t>de-bat-ing-ly</t></w>
-<w><t>de-bauch</t></w>
+<w><t>de-bauch</t><verb><regular-root/></verb></w>
<w><t>de-bauched</t></w>
<w><t>de-bauch-ed-ly</t></w>
<w><t>de-bauch-ed-ness</t></w>
@@ -38435,7 +38437,7 @@
<w><t>de-cen-tral-ized</t></w>
<w><t>de-cen-tral-iz-ing</t></w>
<w><t>de-cen-tre</t></w>
-<w><t>de-cep-tion</t></w>
+<w><t>de-cep-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-cep-tive</t></w>
<w><t>de-cep-tive-ly</t></w>
<w><t>de-cep-tive-ness</t></w>
@@ -38568,7 +38570,7 @@
<w><t>dec-li-na-tion-al</t></w>
<w><t>de-clin-a-tory</t></w>
<w><t>de-clin-a-ture</t></w>
-<w><t>de-cline</t></w>
+<w><t>de-cline</t><verb><regular-root/></verb></w>
<w><t>de-clined</t></w>
<w><t>de-clin-er</t></w>
<w><t>de-clin-ing</t></w>
@@ -38842,7 +38844,7 @@
<w><t>de-fal-cate</t></w>
<w><t>de-fal-cat-ed</t></w>
<w><t>de-fal-cat-ing</t></w>
-<w><t>de-fal-ca-tion</t></w>
+<w><t>de-fal-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-fal-ca-tor</t></w>
<w><t>def-a-ma-tion</t></w>
<w><t>de-fam-a-to-ry</t></w>
@@ -40290,7 +40292,7 @@
<w><t>dep-u-ra-tive</t></w>
<w><t>dep-u-ra-tor</t></w>
<w><t>dep-u-ta-ble</t></w>
-<w><t>dep-u-ta-tion</t></w>
+<w><t>dep-u-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-pute</t></w>
<w><t>de-put-ed</t></w>
<w><t>de-put-ing</t></w>
@@ -40691,7 +40693,7 @@
<w><t>de-spoil-er</t></w>
<w><t>de-spoil-ment</t></w>
<w><t>de-spo-li-a-tion</t></w>
-<w><t>de-spond</t></w>
+<w><t>de-spond</t><verb><regular-root/></verb></w>
<w><t>de-spond-ence</t></w>
<w><t>de-spond-en-cy</t></w>
<w><t>de-spond-ent</t></w>
@@ -41929,7 +41931,7 @@
<w><t>dig-ni-tar-i-al</t></w>
<w><t>dig-ni-tar-ies</t></w>
<w><t>dig-ni-tar-y</t></w>
-<w><t>dig-ni-ty</t></w>
+<w><t>dig-ni-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Di-go=Sua-rez</t></w>
<w><t>di-graph</t></w>
<w><t>di-graph-ic</t></w>
@@ -42475,7 +42477,7 @@
<w><t>Dis</t></w>
<w><t>dis-a-bil-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>dis-a-bil-i-ty clause</t></phrase>
-<w><t>dis-a-ble</t></w>
+<w><t>dis-a-ble</t><verb><regular-root/></verb></w>
<w><t>dis-a-bled</t></w>
<w><t>dis-a-ble-ment</t></w>
<phrase><t>dis-a-ble-ment ben-e-fit</t></phrase>
@@ -42530,7 +42532,7 @@
<w><t>dis-a-noint</t></w>
<w><t>dis-ap-pear</t><verb><regular-root/></verb></w>
<w><t>dis-ap-pear-ance</t></w>
-<w><t>dis-ap-point</t></w>
+<w><t>dis-ap-point</t><verb><regular-root/></verb></w>
<w><t>dis-ap-point-ed</t></w>
<w><t>dis-ap-point-ed-ly</t></w>
<w><t>dis-ap-point-er</t></w>
@@ -42597,7 +42599,7 @@
<w><t>dis-bud</t></w>
<w><t>dis-bud-ded</t></w>
<w><t>dis-bud-ding</t></w>
-<w><t>dis-bur-den</t></w>
+<w><t>dis-bur-den</t><verb><regular-root/></verb></w>
<w><t>dis-bur-den-ment</t></w>
<w><t>dis-burs-a-ble</t></w>
<w><t>dis-burse</t></w>
@@ -42661,7 +42663,7 @@
<w><t>dis-ci-plin-ing</t></w>
<w><t>dis-cis-sion</t></w>
<phrase><t>disc jock-ey</t></phrase>
-<w><t>dis-claim</t></w>
+<w><t>dis-claim</t><verb><regular-root/></verb></w>
<w><t>dis-claim-er</t></w>
<w><t>dis-cla-ma-tion</t></w>
<w><t>dis-clam-a-to-ry</t></w>
@@ -42749,7 +42751,7 @@
<w><t>dis-con-tin-u-ous-ness</t></w>
<w><t>dis-co-phil</t></w>
<w><t>dis-co-phile</t></w>
-<w><t>dis-cord</t></w>
+<w><t>dis-cord</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-cord-ance</t></w>
<w><t>dis-cord-an-cy</t></w>
<w><t>dis-cor-dan-cy</t></w>
@@ -42766,7 +42768,7 @@
<w><t>dis-coun-te-nanc-ing</t></w>
<w><t>dis-count-er</t></w>
<phrase><t>dis-count house</t></phrase>
-<w><t>dis-cour-age</t></w>
+<w><t>dis-cour-age</t><verb><regular-root/></verb></w>
<w><t>dis-cour-age-a-ble</t></w>
<w><t>dis-cour-aged</t></w>
<w><t>dis-cour-age-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -42801,7 +42803,7 @@
<w><t>dis-creet-ly</t></w>
<w><t>dis-creet-ness</t></w>
<w><t>dis-crep-ance</t></w>
-<w><t>dis-crep-an-cy</t></w>
+<w><t>dis-crep-an-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-crep-ant</t></w>
<w><t>dis-crep-ant-ly</t></w>
<w><t>dis-crete</t></w>
@@ -42939,7 +42941,7 @@
<w><t>dis-for-est</t></w>
<w><t>dis-fran-chise</t></w>
<w><t>dis-fran-chised</t></w>
-<w><t>dis-fran-chise-ment</t></w>
+<w><t>dis-fran-chise-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-fran-chis-ing</t></w>
<w><t>dis-frock</t></w>
<w><t>dis-fur-nish</t></w>
@@ -43488,7 +43490,7 @@
<w><t>dis-sil-i-en-cy</t></w>
<w><t>dis-sil-i-ent</t></w>
<w><t>dis-sim-i-lar</t></w>
-<w><t>dis-sim-i-lar-i-ty</t></w>
+<w><t>dis-sim-i-lar-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-sim-i-lar-ly</t></w>
<w><t>dis-sim-i-late</t></w>
<w><t>dis-sim-i-lat-ed</t></w>
@@ -43655,7 +43657,7 @@
<w><t>dis-tor-tion-al</t></w>
<w><t>dis-tor-tive</t></w>
<w><t>distr</t></w>
-<w><t>dis-tract</t></w>
+<w><t>dis-tract</t><verb><regular-root/></verb></w>
<w><t>dis-tract-ed</t></w>
<w><t>dis-tract-ed-ly</t></w>
<w><t>dis-tract-ed-ness</t></w>
@@ -45220,7 +45222,7 @@
<w><t>Dra-gui-gnan</t></w>
<w><t>draht-haar</t></w>
<w><t>drail</t></w>
-<w><t>drain</t></w>
+<w><t>drain</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>drain-a-ble</t></w>
<w><t>drain-age</t></w>
<phrase><t>drain-age ba-sin</t></phrase>
@@ -46237,7 +46239,7 @@
<w><t>du-ti-a-ble</t></w>
<w><t>du-ti-ful</t></w>
<w><t>du-ti-ful-ly</t></w>
-<w><t>du-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>du-ty</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>du-ty=bound</t></w>
<w><t>du-ty=free</t></w>
<phrase><t>du-ty=free shop</t></phrase>
@@ -46301,7 +46303,7 @@
<w><t>dye-a-ble</t></w>
<w><t>dye-ing</t></w>
<w><t>dye-line</t></w>
-<w><t>Dy-er</t></w>
+<w><t>Dy-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dy-er</t></w>
<w><t>dy-er’s=broom</t></w>
<w><t>dy-er’s=green-weed</t></w>
@@ -47060,7 +47062,7 @@
<w><t>Ed-gar</t></w>
<phrase><t>Ed-gar Ath-e-ling</t></phrase>
<w><t>Ed-gard</t></w>
-<w><t>edge</t></w>
+<w><t>edge</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>edge-bone</t></w>
<w><t>Edge-field</t></w>
<w><t>Edge-hill</t></w>
@@ -47093,7 +47095,7 @@
<w><t>E-die</t></w>
<w><t>ed-i-fi-ca-tion</t></w>
<w><t>e-dif-i-ca-to-ry</t></w>
-<w><t>ed-i-fice</t></w>
+<w><t>ed-i-fice</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ed-i-fi-cial</t></w>
<w><t>ed-i-fied</t></w>
<w><t>ed-i-fi-er</t></w>
@@ -48364,7 +48366,7 @@
<w><t>em-bar-ring</t></w>
<w><t>em-bas-sa-dor</t></w>
<w><t>em-bas-sage</t></w>
-<w><t>em-bas-sy</t></w>
+<w><t>em-bas-sy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>em-bat-tle</t></w>
<w><t>em-bat-tled</t></w>
<w><t>em-bat-tle-ment</t></w>
@@ -48809,7 +48811,7 @@
<w><t>en-act</t><verb><regular-root/></verb></w>
<w><t>en-act-a-ble</t></w>
<w><t>en-ac-tive</t></w>
-<w><t>en-act-ment</t></w>
+<w><t>en-act-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>en-ac-tor</t></w>
<w><t>en-ac-to-ry</t></w>
<w><t>en-al-la-ge</t></w>
@@ -49045,8 +49047,9 @@
<w><t>en-dear</t><verb><regular-root/></verb></w>
<w><t>en-dear-ing-ly</t></w>
<w><t>en-dear-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>en-deav-or</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
+<word-placeholder><t>en-deav-or</t><country-specific country="USA"/></word-placeholder>
<w><t>en-deav-or-er</t></w>
+<word-placeholder><t>en-deav-our</t><country-specific country="GBR"/></word-placeholder>
<w><t>En-de-cott</t></w>
<w><t>En-de-ïs</t></w>
<w><t>endemic</t></w>
@@ -49324,7 +49327,7 @@
<w><t>Eng-el-bert</t></w>
<w><t>En-gel-ber-ta</t></w>
<w><t>En-gels</t></w>
-<w><t>en-gen-der</t></w>
+<w><t>en-gen-der</t><verb><regular-root/></verb></w>
<w><t>en-gen-der-er</t></w>
<w><t>en-gen-der-ment</t></w>
<w><t>En-ghien</t></w>
@@ -49448,7 +49451,7 @@
<w><t>en-laced</t></w>
<w><t>en-lace-ment</t></w>
<w><t>en-lac-ing</t></w>
-<w><t>en-large</t></w>
+<w><t>en-large</t><verb><regular-root/></verb></w>
<w><t>en-large-a-ble</t></w>
<w><t>en-larged</t></w>
<w><t>en-larged-ly</t></w>
@@ -51635,7 +51638,7 @@
<w><t>Eu-ler=Chel-pin</t></w>
<w><t>Eu-lis</t></w>
<w><t>eu-lo-gi-a</t></w>
-<w><t>eu-lo-gise</t></w>
+<w><t>eu-lo-gise</t><verb><regular-root/></verb></w>
<w><t>eu-lo-gised</t></w>
<w><t>eu-lo-gis-er</t></w>
<w><t>eu-lo-gis-ing</t></w>
@@ -51646,7 +51649,7 @@
<w><t>eu-lo-gi-um</t></w>
<w><t>eu-lo-gi-ums</t></w>
<w><t>eu-lo-gi-za-tion</t></w>
-<w><t>eu-lo-gize</t></w>
+<w><t>eu-lo-gize</t><verb><regular-root/></verb></w>
<w><t>eu-lo-gized</t></w>
<w><t>eu-lo-giz-er</t></w>
<w><t>eu-lo-giz-ing</t></w>
@@ -52259,7 +52262,7 @@
<phrase><t>ex-cess sup-ply</t></phrase>
<w><t>exch</t></w>
<w><t>ex=Chan-cel-lor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>ex-change</t></w>
+<w><t>ex-change</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>ex-change-a-bil-i-ty</t></w>
<w><t>ex-change-a-ble</t></w>
<w><t>ex-change-a-bly</t></w>
@@ -52324,7 +52327,7 @@
<w><t>ex-clud-er</t></w>
<w><t>ex-clud-i-ble</t></w>
<w><t>ex-clud-ing</t></w>
-<w><t>ex-clu-sion</t></w>
+<w><t>ex-clu-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-clu-sion-ar-y</t></w>
<w><t>ex-clu-sion-er</t></w>
<w><t>ex-clu-sion-ism</t></w>
@@ -54576,7 +54579,7 @@
<w><t>fed-dan</t></w>
<w><t>fed-e-li-ni</t></w>
<w><t>fed-er-a-cy</t></w>
-<w><t>fed-er-al</t></w>
+<w><t>fed-er-al</t><adjective/></w>
<w><t>Fed-er-al</t></w>
<phrase><t>fed-er-al dis-trict</t></phrase>
<w><t>fed-er-al-ese</t></w>
@@ -54611,7 +54614,7 @@
<w><t>fe-do-ra</t></w>
<w><t>Fe-do-ra</t></w>
<w><t>fee</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>fee-ble</t></w>
+<w><t>fee-ble</t><adjective><extensible/></adjective></w>
<w><t>fee-ble=mind-ed</t></w>
<w><t>fee-ble=mind-ed-ness</t></w>
<w><t>fee-ble-ness</t></w>
@@ -55144,7 +55147,7 @@
<w><t>fe-ver-weed</t></w>
<w><t>fe-ver-wort</t></w>
<w><t>Fé-vri-er</t></w>
-<w><t>few</t></w>
+<w><t>few</t><noun><plural/></noun><adjective><extensible/></adjective></w>
<w><t>few-er</t></w>
<w><t>few-ness</t></w>
<w><t>few-ter-er</t></w>
@@ -55595,7 +55598,7 @@
<w><t>fil-trate</t></w>
<w><t>fil-trat-ed</t></w>
<w><t>fil-trat-ing</t></w>
-<w><t>fil-tra-tion</t></w>
+<w><t>fil-tra-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fil-tre</t></w>
<w><t>fi-lum</t></w>
<w><t>F-i-ma</t></w>
@@ -56769,7 +56772,7 @@
<w><t>flo-rif-er-ous-ness</t></w>
<w><t>flo-ri-gen</t></w>
<w><t>flo-ri-le-gi-um</t></w>
-<w><t>flor-in</t></w>
+<w><t>flor-in</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Flo-ri-na</t></w>
<w><t>Flo-rine</t></w>
<w><t>Flo-ri-o</t></w>
@@ -56847,7 +56850,7 @@
<w><t>flow=on</t></w>
<w><t>Floy</t></w>
<w><t>Floyce</t></w>
-<w><t>Floyd</t></w>
+<w><t>Floyd</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Floyd-a-da</t></w>
<w><t>fl-rie</t></w>
<w><t>flu</t></w>
@@ -56972,7 +56975,7 @@
<w><t>flur-ried-ly</t></w>
<w><t>flur-ry</t></w>
<w><t>flur-ry-ing</t></w>
-<w><t>flush</t></w>
+<w><t>flush</t><verb><regular-root/></verb><adjective/></w>
<w><t>flush-er</t></w>
<w><t>Flush-ing</t></w>
<w><t>flush-ing-ly</t></w>
@@ -57258,7 +57261,7 @@
<w><t>fol-low=through</t></w>
<phrase><t>fol-low up</t></phrase>
<w><t>fol-low=up</t></w>
-<w><t>fol-ly</t></w>
+<w><t>fol-ly</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Fol-som</t></w>
<phrase><t>Fol-som man</t></phrase>
<w><t>Fo-mal-haut</t></w>
@@ -57355,7 +57358,7 @@
<w><t>foot-mak-er</t></w>
<w><t>foot-man</t></w>
<w><t>foot-mark</t></w>
-<w><t>foot-note</t></w>
+<w><t>foot-note</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>foot-not-ed</t></w>
<w><t>foot-not-ing</t></w>
<w><t>foot-pace</t></w>
@@ -57624,13 +57627,14 @@
<w><t>fore-sad-dle</t></w>
<w><t>fore-said</t></w>
<w><t>fore-sail</t></w>
-<w><t>fore-saw</t></w>
-<w><t>fore-see</t></w>
+<w><t>fore-saw</t><verb><lemma>foresee</lemma></verb></w>
+<w><t>fore-see</t><verb><regular-root value="false"/></verb></w>
<w><t>fore-see-a-bil-i-ty</t></w>
<w><t>fore-see-a-ble</t></w>
<w><t>fore-see-ing</t></w>
<w><t>fore-seen</t></w>
<w><t>fore-se-er</t></w>
+<w><t>fore-sees</t><verb><lemma>foresee</lemma></verb></w>
<w><t>fore-shad-ow</t><verb><regular-root/></verb></w>
<w><t>fore-shad-ow-er</t></w>
<w><t>fore-shank</t></w>
@@ -58838,7 +58842,7 @@
<w><t>frieze</t></w>
<w><t>friez-ing</t></w>
<w><t>frig</t></w>
-<w><t>frig-ate</t></w>
+<w><t>frig-ate</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>frig-ate bird</t></phrase>
<w><t>Frigg</t></w>
<w><t>Frig-ga</t></w>
@@ -59553,7 +59557,7 @@
<w><t>furl-er</t></w>
<w><t>fur-less</t></w>
<w><t>fur-long</t></w>
-<w><t>fur-lough</t></w>
+<w><t>fur-lough</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>fur-men-ty</t></w>
<w><t>fur-me-ty</t></w>
<w><t>fur-mint</t></w>
@@ -60831,7 +60835,7 @@
<w><t>ga-zelle=boy</t></w>
<w><t>ga-zelle-like</t></w>
<w><t>gaz-er</t></w>
-<w><t>ga-zette</t></w>
+<w><t>ga-zette</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ga-zet-ted</t></w>
<phrase><t>ga-zet-ted of-fic-er</t></phrase>
<w><t>gaz-et-teer</t></w>
@@ -61922,7 +61926,7 @@
<w><t>gil-ly</t></w>
<w><t>gil-ly-flow-er</t></w>
<w><t>gil-ly-ing</t></w>
-<w><t>Gil-man</t></w>
+<w><t>Gil-man</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Gil-mer</t></w>
<w><t>Gi-lo-lo</t></w>
<w><t>Gil-roy</t></w>
@@ -63910,7 +63914,7 @@
<w><t>Granth</t></w>
<w><t>Grant-land</t></w>
<phrase><t>grant of pro-bate</t></phrase>
-<w><t>gran-tor</t></w>
+<w><t>gran-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>gran tor-is-mo</t></phrase>
<phrase><t>Grant Sa-hib</t></phrase>
<w><t>gran-u-lar</t></w>
@@ -64313,7 +64317,7 @@
<w><t>Green-cas-tle</t></w>
<w><t>Green-dale</t></w>
<phrase><t>green drag-on</t></phrase>
-<w><t>Greene</t></w>
+<w><t>Greene</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>green-er-ies</t></w>
<w><t>green-er-y</t></w>
<w><t>Green-field</t></w>
@@ -65156,7 +65160,7 @@
<w><t>Gui-enne</t></w>
<w><t>guige</t></w>
<w><t>guild</t></w>
-<w><t>guil-der</t></w>
+<w><t>guil-der</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Guild-ford</t></w>
<w><t>guild-hall</t></w>
<w><t>guild-ship</t></w>
@@ -66817,7 +66821,7 @@
<w><t>hand-i-ness</t></w>
<w><t>hand-i-work</t></w>
<w><t>hand-ker-chief</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>han-dle</t></w>
+<w><t>han-dle</t><verb><regular-root/></verb></w>
<w><t>han-dle-a-ble</t></w>
<w><t>han-dle-bar</t></w>
<phrase><t>han-dle-bar mous-tache</t></phrase>
@@ -67518,7 +67522,7 @@
<w><t>Haunce</t></w>
<w><t>haunch</t></w>
<w><t>haunch-less</t></w>
-<w><t>haunt</t></w>
+<w><t>haunt</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>haunt-ed</t></w>
<w><t>haunt-er</t></w>
<w><t>haunt-ing</t></w>
@@ -69976,7 +69980,7 @@
<w><t>hire-a-ble</t></w>
<w><t>hire-ling</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>hire=pur-chase</t></w>
-<w><t>hir-er</t></w>
+<w><t>hir-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>hi-ri Mo-tu</t></phrase>
<w><t>hir-ing</t></w>
<w><t>Hi-ro-hi-to</t></w>
@@ -71478,7 +71482,7 @@
<phrase><t>house-hold name</t></phrase>
<phrase><t>house-hold troops</t></phrase>
<w><t>house-keep</t></w>
-<w><t>house-keep-er</t></w>
+<w><t>house-keep-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>house-keep-er-like</t></w>
<w><t>house-keep-ing</t></w>
<w><t>house-kept</t></w>
@@ -71536,7 +71540,7 @@
<w><t>Hous-man</t></w>
<w><t>Hous-say</t></w>
<w><t>Hous-ton</t></w>
-<w><t>Hou-ston</t></w>
+<w><t>Hou-ston</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>hous-to-ni-a</t></w>
<w><t>hout-ing</t></w>
<w><t>Hou-yhn-hnm</t></w>
@@ -71567,7 +71571,7 @@
<w><t>howe</t></w>
<w><t>how-e’er</t></w>
<w><t>how-el</t></w>
-<w><t>How-ell</t></w>
+<w><t>How-ell</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>How-ells</t></w>
<w><t>how-ev-er</t></w>
<w><t>howf</t></w>
@@ -73757,7 +73761,7 @@
<w><t>id-i-o-graph</t></w>
<w><t>id-i-o-graph-ic</t></w>
<w><t>id-i-o-lect</t></w>
-<w><t>id-i-om</t></w>
+<w><t>id-i-om</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>id-i-o-mat-ic</t></w>
<w><t>id-i-o-mat-i-cal</t></w>
<w><t>id-i-o-mat-i-cal-ly</t></w>
@@ -73794,11 +73798,11 @@
<w><t>id-i-ot-ized</t></w>
<w><t>id-i-ot-iz-ing</t></w>
<w><t>id-i-o-trop-ic</t></w>
-<w><t>i-dle</t></w>
+<w><t>i-dle</t><verb><regular-root/></verb><adjective></adjective></w>
<w><t>i-dled</t></w>
<w><t>i-dle-ness</t></w>
<phrase><t>i-dle pul-ley</t></phrase>
-<w><t>i-dler</t></w>
+<w><t>i-dler</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>i-dler pul-ley</t></phrase>
<w><t>i-dlesse</t></w>
<w><t>i-dlest</t></w>
@@ -74003,7 +74007,7 @@
<w><t>Il-kley</t></w>
<w><t>I’ll</t></w>
<w><t>Ill</t></w>
-<w><t>ill</t></w>
+<w><t>ill</t><noun><pluralizable/></noun><adjective/></w>
<w><t>ill=ad-vised</t></w>
<w><t>ill=af-fect-ed</t></w>
<w><t>il-la-mon</t></w>
@@ -75027,7 +75031,7 @@
<w><t>im-pro-pri-at-ing</t></w>
<w><t>im-pro-pri-a-tion</t></w>
<w><t>im-pro-pri-a-tor</t></w>
-<w><t>im-pro-pri-e-ty</t></w>
+<w><t>im-pro-pri-e-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>im-prov-a-bil-i-ty</t></w>
<w><t>im-prov-a-ble</t></w>
<w><t>im-prov-a-ble-ness</t></w>
@@ -75104,7 +75108,7 @@
<w><t>Im-re</t></w>
<w><t>Im-roz</t></w>
<w><t>IMS</t></w>
-<w><t>in</t></w>
+<w><t>in</t><noun><pluralizable/></noun></w>
<w><t>I-na</t></w>
<w><t>in-a-bil-i-ty</t></w>
<phrase><t>in ab-sen-ti-a</t></phrase>
@@ -75284,7 +75288,7 @@
<w><t>in-ca-pac-i-tate</t></w>
<w><t>in-ca-pac-i-tat-ed</t></w>
<w><t>in-ca-pac-i-tat-ing</t></w>
-<w><t>in-ca-pac-i-ta-tion</t></w>
+<w><t>in-ca-pac-i-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-ca-pac-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>In-cap-a-ri-na</t></w>
<w><t>in-cap-su-late</t></w>
@@ -75457,7 +75461,7 @@
<w><t>in-com-bus-ti-ble</t></w>
<w><t>in-com-bus-ti-ble-ness</t></w>
<w><t>in-com-bus-ti-bly</t></w>
-<w><t>in-come</t></w>
+<w><t>in-come</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>in-come bond</t></phrase>
<phrase><t>in-come group</t></phrase>
<w><t>in-come-less</t></w>
@@ -75764,7 +75768,7 @@
<w><t>in-da-mine</t></w>
<w><t>In-dan-threne</t></w>
<w><t>in-de</t></w>
-<w><t>in-debt</t></w>
+<w><t>in-debt</t><verb><regular-root/></verb></w>
<w><t>in-debt-ed</t></w>
<w><t>in-debt-ed-ness</t></w>
<w><t>in-de-cen-cy</t></w>
@@ -77451,7 +77455,7 @@
<w><t>in-spis-sa-tor</t></w>
<w><t>inst</t></w>
<w><t>Inst</t></w>
-<w><t>in-sta-bil-i-ty</t></w>
+<w><t>in-sta-bil-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-sta-ble</t></w>
<w><t>in-stal</t></w>
<w><t>in-stall</t></w>
@@ -77880,13 +77884,13 @@
<w><t>in-ter-cat-e-nat-ed</t></w>
<w><t>in-ter-caus-a-tive</t></w>
<w><t>in-ter-cav-ern-ous</t></w>
-<w><t>in-ter-cede</t></w>
+<w><t>in-ter-cede</t><verb><regular-root/></verb></w>
<w><t>in-ter-ced-ed</t></w>
<w><t>in-ter-ced-er</t></w>
<w><t>in-ter-ced-ing</t></w>
<w><t>in-ter-cel-lu-lar</t></w>
<w><t>in-ter-cen-tral</t></w>
-<w><t>in-ter-cept</t></w>
+<w><t>in-ter-cept</t><verb><regular-root/></verb></w>
<w><t>in-ter-cept-er</t></w>
<w><t>in-ter-cep-tion</t></w>
<w><t>in-ter-cep-tive</t></w>
@@ -78428,7 +78432,7 @@
<w><t>in-ter-mi-na-ble</t></w>
<w><t>in-ter-mi-na-ble-ness</t></w>
<w><t>in-ter-mi-na-bly</t></w>
-<w><t>in-ter-min-gle</t></w>
+<w><t>in-ter-min-gle</t><verb><regular-root/></verb></w>
<w><t>in-ter-min-gle-ment</t></w>
<w><t>in-ter-min-is-te-ri-al</t></w>
<w><t>in-ter-mis-sion</t></w>
@@ -79017,7 +79021,7 @@
<w><t>in-ti-ma</t></w>
<w><t>in-ti-ma-cy</t></w>
<w><t>in-ti-mal</t></w>
-<w><t>in-ti-mate</t></w>
+<w><t>in-ti-mate</t><verb><regular-root/></verb><adjective></adjective></w>
<w><t>in-ti-mat-ed</t></w>
<w><t>in-ti-mate-ly</t></w>
<w><t>in-ti-mate-ness</t></w>
@@ -79992,7 +79996,7 @@
<w><t>ir-ri-tat-ed-ly</t></w>
<w><t>ir-ri-tat-ing</t></w>
<w><t>ir-ri-tat-ing-ly</t></w>
-<w><t>ir-ri-ta-tion</t></w>
+<w><t>ir-ri-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ir-ri-ta-tive</t></w>
<w><t>ir-ri-ta-tive-ness</t></w>
<w><t>ir-ri-ta-tor</t></w>
@@ -85452,7 +85456,7 @@
<w><t>land-like</t></w>
<w><t>land-locked</t></w>
<w><t>land-lop-er</t></w>
-<w><t>land-lord</t></w>
+<w><t>land-lord</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>land-lord-ism</t></w>
<w><t>land-lord-ly</t></w>
<w><t>land-lord-ry</t></w>
@@ -85591,7 +85595,7 @@
<w><t>lan-sat</t></w>
<w><t>Lans-berg</t></w>
<w><t>Lans-ford</t></w>
-<w><t>Lan-sing</t></w>
+<w><t>Lan-sing</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>lans-que-net</t></w>
<w><t>Lan-tan-a</t></w>
<w><t>lan-ta-na</t></w>
@@ -85757,7 +85761,7 @@
<w><t>larg-en</t></w>
<w><t>large-ness</t></w>
<w><t>larg-er</t></w>
-<w><t>lar-gess</t></w>
+<w><t>lar-gess</t><noun><pluralizable/></noun></w>
<w><t>lar-gesse</t></w>
<w><t>larg-est</t></w>
<w><t>lar-ghet-to</t></w>
@@ -85850,7 +85854,7 @@
<w><t>las-civ-i-ous-ness</t></w>
<w><t>lase</t></w>
<w><t>la-ser</t></w>
-<w><t>lash</t></w>
+<w><t>lash</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>lashed</t></w>
<w><t>lash-er</t></w>
<w><t>lash-ing</t></w>
@@ -86008,7 +86012,7 @@
<w><t>La-trobe</t></w>
<w><t>Lat-ta-ki-a</t></w>
<w><t>lat-ten</t></w>
-<w><t>lat-ter</t></w>
+<w><t>lat-ter</t><noun><convertible-to-possessive/></noun></w>
<w><t>lat-ter=day</t></w>
<phrase><t>Lat-ter=day Saint</t></phrase>
<w><t>lat-ter-ly</t></w>
@@ -86107,7 +86111,7 @@
<w><t>Lau-rence</t></w>
<w><t>lau-rence</t></w>
<w><t>Lau-ren-cin</t></w>
-<w><t>Lau-rens</t></w>
+<w><t>Lau-rens</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Lau-rent</t></w>
<w><t>Lau-ren-tian</t></w>
<phrase><t>Lau-ren-tian Moun-tains</t></phrase>
@@ -86638,7 +86642,7 @@
<w><t>leg-a-tee</t></w>
<w><t>leg-ate-ship</t></w>
<w><t>leg-a-tine</t></w>
-<w><t>le-ga-tion</t></w>
+<w><t>le-ga-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>le-ga-tion-ar-y</t></w>
<w><t>le-ga-to</t></w>
<w><t>leg-a-tor</t></w>
@@ -88756,7 +88760,7 @@
<w><t>loam-i-ness</t></w>
<w><t>loam-less</t></w>
<w><t>loam-y</t></w>
-<w><t>loan</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>loan</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>loan-a-ble</t></w>
<w><t>loan-blend</t></w>
<phrase><t>loan col-lec-tion</t></phrase>
@@ -92512,7 +92516,7 @@
<w><t>mar-shaled</t></w>
<w><t>mar-shal-ing</t></w>
<w><t>mar-shall</t></w>
-<w><t>Mar-shall</t></w>
+<w><t>Mar-shall</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mar-shalled</t></w>
<w><t>Mar-shall-ese</t></w>
<w><t>mar-shal-ling</t></w>
@@ -92811,7 +92815,7 @@
<phrase><t>mass spec-trom-e-ter</t></phrase>
<w><t>mass-y</t></w>
<w><t>Mas-sys</t></w>
-<w><t>mast</t></w>
+<w><t>mast</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mas-ta-ba</t></w>
<w><t>mas-ta-bah</t></w>
<w><t>mas-tax</t></w>
@@ -94301,7 +94305,7 @@
<w><t>mer-cer-iz-er</t></w>
<w><t>mer-cer-iz-ing</t></w>
<w><t>mer-cer-y</t></w>
-<w><t>mer-chan-dise</t></w>
+<w><t>mer-chan-dise</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mer-chan-dised</t></w>
<w><t>mer-chan-dis-er</t></w>
<w><t>mer-chan-dis-ing</t></w>
@@ -96021,7 +96025,7 @@
<phrase><t>mi-nor suit</t></phrase>
<phrase><t>mi-nor term</t></phrase>
<w><t>Mi-nos</t></w>
-<w><t>Mi-not</t></w>
+<w><t>Mi-not</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Min-o-taur</t></w>
<w><t>Minsk</t></w>
<w><t>min-ster</t></w>
@@ -96470,7 +96474,7 @@
<w><t>mis-in-fer-ring</t></w>
<w><t>mis-in-form</t><verb><regular-root/></verb></w>
<w><t>mis-in-form-ant</t></w>
-<w><t>mis-in-for-ma-tion</t></w>
+<w><t>mis-in-for-ma-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mis-in-form-a-tive</t></w>
<w><t>mis-in-form-er</t></w>
<w><t>mis-in-struct</t></w>
@@ -96809,7 +96813,7 @@
<w><t>mis-trans-la-tion</t></w>
<w><t>mis-treat</t><verb><regular-root/></verb></w>
<w><t>mis-treat-ment</t></w>
-<w><t>mis-tress</t></w>
+<w><t>mis-tress</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Mis-tress</t></w>
<w><t>mis-tri-al</t></w>
<w><t>mis-trust</t></w>
@@ -97082,7 +97086,7 @@
<w><t>mod-at-ed</t></w>
<w><t>mod-at-ing</t></w>
<w><t>mode</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>mod-el</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>mod-el</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>mod-el-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<!--<w><t>mod-el-ler</t></w>-->
<w><t>mo-dem</t></w>
@@ -97268,6 +97272,7 @@
<w><t>mo-lar</t></w>
<w><t>mo-lar-i-ty</t></w>
<w><t>mo-las-ses</t></w>
+<word-placeholder><t>mold</t><country-specific country="USA"/></word-placeholder>
<w><t>Mol-dau</t></w>
<w><t>Mol-da-vi-a</t></w>
<w><t>Mol-da-vi-an</t></w>
@@ -98484,6 +98489,7 @@
<w><t>mou-jik</t></w>
<w><t>Mouk-den</t></w>
<w><t>mou-lage</t></w>
+<word-placeholder><t>mould</t><country-specific country="GBR"/></word-placeholder>
<phrase><t>mould-ing board</t></phrase>
<w><t>mould-warp</t></w>
<w><t>mould-y</t></w>
@@ -99480,7 +99486,7 @@
<w><t>mu-ti-la-tive</t></w>
<w><t>mu-ti-la-tor</t></w>
<w><t>mu-ti-la-to-ry</t></w>
-<w><t>mu-ti-neer</t></w>
+<w><t>mu-ti-neer</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mut-ing</t></w>
<w><t>mu-ti-nied</t></w>
<w><t>mu-ti-nous</t></w>
@@ -99673,7 +99679,7 @@
<w><t>myr-me-coph-i-lism</t></w>
<w><t>myr-me-coph-i-lous</t></w>
<w><t>myr-me-coph-i-ly</t></w>
-<w><t>Myr-mi-don</t></w>
+<w><t>myr-mi-don</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Myr-mid-o-nes</t></w>
<w><t>Myr-mi-dons</t></w>
<w><t>my-rob-a-lan</t></w>
@@ -107709,7 +107715,7 @@
<w><t>no-ti-fi-ca-tion</t></w>
<w><t>no-ti-fied</t></w>
<w><t>no-ti-fi-er</t></w>
-<w><t>no-ti-fy</t></w>
+<w><t>no-ti-fy</t><verb><regular-root/></verb></w>
<w><t>no-ti-fy-ing</t></w>
<w><t>no=till-age</t></w>
<w><t>not-ing</t></w>
@@ -108458,7 +108464,9 @@
<w><t>ob-se-quence</t></w>
<w><t>ob-se-quent</t></w>
<w><t>ob-se-quies</t></w>
-<w><t>ob-se-qui-ous</t></w>
+<w><t>ob-se-qui-ous</t><adjective/></w>
+<w><t>ob-se-qui-ous-ly</t><adverb/></w>
+<w><t>ob-se-qui-ous-ness</t><noun/></w>
<w><t>ob-serv-a-bil-i-ty</t></w>
<w><t>ob-serv-a-ble</t></w>
<w><t>ob-serv-a-ble-ness</t></w>
@@ -108517,7 +108525,7 @@
<w><t>ob-struct-ed-ly</t></w>
<w><t>ob-struct-er</t></w>
<w><t>ob-struct-ing-ly</t></w>
-<w><t>ob-struc-tion</t></w>
+<w><t>ob-struc-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ob-struc-tion-ism</t></w>
<w><t>ob-struc-tion-ist</t></w>
<w><t>ob-struc-tion-is-tic</t></w>
@@ -108620,7 +108628,7 @@
<w><t>oc-clud-ent</t></w>
<w><t>oc-clud-ing</t></w>
<w><t>oc-clu-sal</t></w>
-<w><t>oc-clu-sion</t></w>
+<w><t>oc-clu-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>oc-clu-sive</t></w>
<w><t>oc-clu-sive-ness</t></w>
<w><t>oc-cult</t></w>
@@ -109925,7 +109933,7 @@
<phrase><t>op-po-site num-ber</t></phrase>
<phrase><t>op-po-site prompt</t></phrase>
<phrase><t>op-po-site sex</t></phrase>
-<w><t>op-po-si-tion</t></w>
+<w><t>op-po-si-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>op-po-si-tion-al</t></w>
<w><t>op-po-si-tion-ar-y</t></w>
<w><t>op-po-si-tion-less</t></w>
@@ -110787,7 +110795,7 @@
<w><t>Ost-wald</t></w>
<w><t>Os-ty-ak</t></w>
<w><t>Os-val-do</t></w>
-<w><t>Os-wald</t></w>
+<w><t>Os-wald</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Os-wal-do</t></w>
<w><t>Os-we-go</t></w>
<w><t>O-tage</t></w>
@@ -110905,7 +110913,7 @@
<w><t>Ou-spen-sky</t></w>
<w><t>oust</t><verb><regular-root/></verb></w>
<w><t>oust-er</t></w>
-<w><t>out</t></w>
+<w><t>out</t><noun><pluralizable/></noun><verb><regular-root/></verb><adjective/><adverb/><preposition/></w>
<w><t>out-act</t></w>
<w><t>out-age</t></w>
<w><t>out-am-bush</t></w>
@@ -111033,7 +111041,7 @@
<w><t>out-car-ol-ing</t></w>
<w><t>out-car-olled</t></w>
<w><t>out-car-ol-ling</t></w>
-<w><t>out-cast</t></w>
+<w><t>out-cast</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>out-caste</t></w>
<w><t>out-cav-il</t></w>
<w><t>out-cav-iled</t></w>
@@ -111703,7 +111711,7 @@
<w><t>out-stat-ure</t></w>
<w><t>out-stat-ured</t></w>
<w><t>out-stat-ur-ing</t></w>
-<w><t>out-stay</t></w>
+<w><t>out-stay</t><verb><regular-root/></verb></w>
<w><t>out-steal</t></w>
<w><t>out-steal-ing</t></w>
<w><t>out-steam</t></w>
@@ -113028,7 +113036,7 @@
<w><t>o-ver-live-li-ness</t></w>
<w><t>o-ver-live-ly</t></w>
<w><t>o-ver-liv-ing</t></w>
-<w><t>o-ver-load</t></w>
+<w><t>o-ver-load</t><verb><regular-root/></verb></w>
<w><t>o-ver-loan</t></w>
<w><t>o-ver-loath</t></w>
<w><t>o-ver-loft-i-ly</t></w>
@@ -114378,7 +114386,7 @@
<phrase><t>Paint-ed Des-ert</t></phrase>
<phrase><t>paint-ed la-dy</t></phrase>
<phrase><t>paint-ed wom-an</t></phrase>
-<w><t>paint-er</t></w>
+<w><t>paint-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>paint-er-ly</t></w>
<phrase><t>paint-er’s col-ic</t></phrase>
<w><t>paint-ing</t></w>
@@ -115543,7 +115551,7 @@
<w><t>par-buck-le</t></w>
<w><t>Par-ca</t></w>
<w><t>Par-cae</t></w>
-<w><t>par-cel</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>par-cel</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>par-cel=gilt</t></w>
<w><t>par-cel-ing</t></w>
<w><t>par-ce-nar-y</t></w>
@@ -115896,7 +115904,7 @@
<w><t>par-tial-ise</t></w>
<w><t>par-tial-ised</t></w>
<w><t>par-tial-is-ing</t></w>
-<w><t>par-ti-al-i-ty</t></w>
+<w><t>par-ti-al-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>par-tial-ly</t><adverb/></w>
<phrase><t>par-tial pres-sure</t></phrase>
<phrase><t>par-tial prod-uct</t></phrase>
@@ -115955,7 +115963,7 @@
<w><t>par-ti-tion-ment</t></w>
<w><t>par-ti-tive</t></w>
<w><t>par-ti-tive-ly</t></w>
-<w><t>par-ti-zan</t></w>
+<w><t>par-ti-zan</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>par-ti-zan-ship</t></w>
<w><t>part-let</t></w>
<w><t>part-ly</t></w>
@@ -116769,9 +116777,11 @@
<phrase><t>pec-to-ral mus-cle</t></phrase>
<w><t>pec-tose</t></w>
<w><t>pec-tous</t></w>
-<w><t>pec-u-late</t></w>
+<w><t>pec-u-late</t><verb><regular-root/></verb></w>
<w><t>pec-u-lat-ed</t></w>
<w><t>pec-u-lat-ing</t></w>
+<w><t>pec-u-la-tion</t><noun><pluralizable/></noun></w>
+<w><t>pec-u-la-tor</t><noun><pluralizable/></noun></w>
<w><t>pe-cu-liar</t></w>
<w><t>pe-cu-li-ar</t></w>
<w><t>pe-cu-liar-ise</t></w>
@@ -117613,6 +117623,7 @@
<w><t>pe-rei-rine</t></w>
<w><t>Perel-man</t></w>
<w><t>per-emp-to-ri-ly</t><adverb/></w>
+<w><t>per-emp-to-ri-ness</t><noun/></w>
<w><t>per-emp-to-ry</t><adjective/></w>
<w><t>pe-ren-nate</t></w>
<w><t>per-en-ni-al</t></w>
@@ -118114,7 +118125,7 @@
<w><t>per-plex-ing</t><adjective/></w>
<w><t>per-plex-ing-ly</t></w>
<w><t>per-plex-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>per-qui-site</t></w>
+<w><t>per-qui-site</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Per-rault</t></w>
<w><t>Per-ret</t></w>
<w><t>Per-rin</t></w>
@@ -121040,7 +121051,7 @@
<w><t>plen-ti-ful</t></w>
<w><t>plen-ti-ful-ly</t></w>
<w><t>plen-ti-ful-ness</t></w>
-<w><t>plen-ty</t></w>
+<w><t>plen-ty</t><noun><singular/></noun><adjective><extensible/></adjective></w>
<w><t>Plen-ty</t></w>
<w><t>ple-num</t></w>
<w><t>ple-nums</t></w>
@@ -122539,6 +122550,7 @@
<w><t>pos-i-tive</t></w>
<phrase><t>pos-i-tive feed-back</t></phrase>
<w><t>pos-i-tive-ly</t></w>
+<w><t>pos-i-tive-ness</t><noun/></w>
<phrase><t>pos-i-tive po-lar-i-ty</t></phrase>
<w><t>pos-i-tiv-ism</t></w>
<w><t>pos-i-tiv-ist</t></w>
@@ -123252,7 +123264,7 @@
<w><t>pow</t></w>
<w><t>POW</t></w>
<w><t>pow-an</t></w>
-<w><t>pow-der</t></w>
+<w><t>pow-der</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>pow-der blue</t></phrase>
<w><t>pow-der=blue</t></w>
<phrase><t>pow-der burn</t></phrase>
@@ -124567,7 +124579,7 @@
<w><t>pre-di-ges-tion</t></w>
<w><t>pre-dig-i-tal</t></w>
<w><t>pred-i-kant</t></w>
-<w><t>pre-di-lec-tion</t></w>
+<w><t>pre-di-lec-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pre-dil-i-gent</t></w>
<w><t>pre-di-lu-vi-al</t></w>
<w><t>pre-di-min-ish</t></w>
@@ -125705,7 +125717,7 @@
<w><t>pre-mo-nar-chi-cal</t></w>
<w><t>pre-mon-e-tar-y</t></w>
<w><t>pre=Mon-go-li-an</t></w>
-<w><t>pre-mon-ish</t></w>
+<w><t>pre-mon-ish</t><verb><regular-root/></verb></w>
<w><t>prem-o-ni-tion</t></w>
<w><t>pre-mo-ni-tion</t></w>
<w><t>pre-mon-i-to-ry</t></w>
@@ -126589,7 +126601,7 @@
<w><t>pre-suit-a-ble</t></w>
<w><t>pre-sum-a-ble</t></w>
<w><t>pre-sum-a-bly</t></w>
-<w><t>pre-sume</t></w>
+<w><t>pre-sume</t><verb><regular-root/></verb></w>
<w><t>pre-sumed</t></w>
<w><t>pre-sum-ed-ly</t></w>
<w><t>pre-sum-er</t></w>
@@ -127609,8 +127621,9 @@
<w><t>pro-duc-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-duc-tion-al</t></w>
<phrase><t>pro-duc-tion line</t></phrase>
-<w><t>pro-duc-tive</t></w>
+<w><t>pro-duc-tive</t><adjective/></w>
<w><t>pro-duc-tive-ly</t></w>
+<w><t>pro-duc-tive-ness</t><noun><singular/></noun></w>
<w><t>pro-duc-tiv-i-ty</t></w>
<w><t>pro=East-ern</t></w>
<w><t>pro=Ec-ua-dor</t></w>
@@ -128231,8 +128244,9 @@
<w><t>pro-por-tion-a-ble</t></w>
<w><t>pro-por-tion-a-ble-ness</t></w>
<w><t>pro-por-tion-a-bly</t></w>
-<w><t>pro-por-tion-al</t></w>
+<w><t>pro-por-tion-al</t><adjective/></w>
<w><t>pro-por-tion-al-i-ty</t></w>
+<w><t>pro-por-tion-al-ly</t><adverb/></w>
<phrase><t>pro-por-tion-al rep-re-sen-ta-tion</t></phrase>
<w><t>pro-por-tion-ate</t></w>
<w><t>pro-por-tion-at-ed</t></w>
@@ -128764,7 +128778,7 @@
<w><t>pro-vi-sion-ar-y</t></w>
<w><t>pro-vi-sion-er</t></w>
<w><t>pro-vi-sion-less</t></w>
-<w><t>pro-vi-so</t></w>
+<w><t>pro-vi-so</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-vi-so-ri-ly</t></w>
<w><t>pro-vi-so-ry</t></w>
<w><t>pro-vit-a-min</t></w>
@@ -130458,6 +130472,7 @@
<w><t>pyx-ie</t></w>
<w><t>pyx-is</t></w>
<w><t>Pyx-is</t></w>
+<w><t>q. v.</t><abbrev referenced-word="quod vode (for which, see)"/></w>
<phrase><t>Qa-boos bin Said</t></phrase>
<w><t>Qad-a-rite</t></w>
<w><t>Qad-da-fi</t></w>
@@ -131563,7 +131578,7 @@
<w><t>Que-r-ta-ro</t></w>
<w><t>quer-u-lous</t></w>
<w><t>quer-u-ry-ing</t></w>
-<w><t>que-ry</t></w>
+<w><t>que-ry</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>que-ry-ing-ly</t></w>
<w><t>ques</t></w>
<w><t>Ques-nay</t></w>
@@ -131828,7 +131843,7 @@
<w><t>quon-dam</t></w>
<phrase><t>Quon-set hut</t></phrase>
<w><t>quor-um</t></w>
-<w><t>quo-rum</t></w>
+<w><t>quo-rum</t><noun><singular/></noun></w>
<w><t>quot</t></w>
<w><t>quo-ta</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>quot-a-ble</t></w>
@@ -133290,7 +133305,7 @@
<w><t>rea-son-less</t></w>
<w><t>rea-son-sured</t></w>
<w><t>rea-son-sur-ing</t></w>
-<w><t>re-as-sem-ble</t></w>
+<w><t>re-as-sem-ble</t><verb><regular-root/></verb></w>
<w><t>re-as-sem-bly</t></w>
<w><t>re-as-sent</t></w>
<w><t>re-as-sert</t><verb><regular-root/></verb></w>
@@ -133403,7 +133418,7 @@
<w><t>re-bel</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>reb-el-dom</t></w>
<w><t>reb-el-like</t></w>
-<w><t>re-bel-lion</t></w>
+<w><t>re-bel-lion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>re-bel-lious</t></w>
<w><t>re-bel-lious-ly</t></w>
<w><t>re-bel-lious-ness</t></w>
@@ -133533,7 +133548,7 @@
<w><t>re-capped</t></w>
<w><t>re-cap-ping</t></w>
<w><t>re-cap-tion</t></w>
-<w><t>re-cap-ture</t></w>
+<w><t>re-cap-ture</t><verb><regular-root/></verb></w>
<w><t>re-cap-tured</t></w>
<w><t>re-cap-tur-ing</t></w>
<w><t>re-car-bu-ri-za-tion</t></w>
@@ -133677,7 +133692,7 @@
<w><t>rec-i-ta-tive</t></w>
<w><t>re-cit-a-tive</t></w>
<w><t>rec-i-ta-ti-vo</t></w>
-<w><t>re-cite</t></w>
+<w><t>re-cite</t><verb><regular-root/></verb></w>
<w><t>re-cit-ed</t></w>
<w><t>re-cit-er</t></w>
<w><t>re-cit-ing</t></w>
@@ -133885,7 +133900,7 @@
<w><t>re-con-se-cra-tion</t></w>
<w><t>re-con-sent</t></w>
<w><t>re-con-sid-er</t><verb><regular-root/></verb></w>
-<w><t>re-con-sid-er-a-tion</t></w>
+<w><t>re-con-sid-er-a-tion</t><noun/></w>
<w><t>re-con-sign</t></w>
<w><t>re-con-sign-ment</t></w>
<w><t>re-con-sole</t></w>
@@ -134211,7 +134226,7 @@
<w><t>re-dem-on-strat-ing</t></w>
<w><t>re-dem-on-stra-tion</t></w>
<w><t>re-demp-ti-ble</t></w>
-<w><t>re-demp-tion</t></w>
+<w><t>re-demp-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>re-demp-tion-al</t></w>
<w><t>re-demp-tion-er</t></w>
<w><t>re-demp-tion-less</t></w>
@@ -134507,7 +134522,7 @@
<w><t>re-ed-i-fy-ing</t></w>
<w><t>reed-i-ness</t></w>
<w><t>reed-ing</t></w>
-<w><t>re-ed-it</t></w>
+<w><t>re-ed-it</t><verb><regular-root/></verb></w>
<w><t>Reed-ley</t></w>
<w><t>reed-ling</t></w>
<phrase><t>reed or-gan</t></phrase>
@@ -134767,7 +134782,7 @@
<w><t>re-felt</t></w>
<w><t>refer</t></w>
<w><t>re-fer</t><verb><regular-root/></verb></w>
-<w><t>ref-er-ee</t></w>
+<w><t>ref-er-ee</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ref-er-ence</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>ref-er-ence book</t></phrase>
<w><t>ref-er-enced</t></w>
@@ -135497,7 +135512,7 @@
<w><t>re-in-scribe</t></w>
<w><t>re-in-scribed</t></w>
<w><t>re-in-scrib-ing</t></w>
-<w><t>re-in-sert</t></w>
+<w><t>re-in-sert</t><verb><regular-root/></verb></w>
<w><t>re-in-ser-tion</t></w>
<w><t>re-in-sist</t></w>
<w><t>re-in-spect</t></w>
@@ -135627,7 +135642,7 @@
<w><t>re-joice-ful</t></w>
<w><t>re-joic-ing</t></w>
<w><t>re-join</t><verb><regular-root/></verb></w>
-<w><t>re-join-der</t></w>
+<w><t>re-join-der</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>re-judge</t></w>
<w><t>re-judged</t></w>
<w><t>re-judge-ment</t></w>
@@ -135827,7 +135842,7 @@
<w><t>re...
[truncated message content] |
|
From: <vic...@us...> - 2023-10-22 12:16:32
|
Revision: 13316
http://sourceforge.net/p/foray/code/13316
Author: victormote
Date: 2023-10-22 12:16:29 +0000 (Sun, 22 Oct 2023)
Log Message:
-----------
Dictionary improvements.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-10-12 12:04:30 UTC (rev 13315)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-10-22 12:16:29 UTC (rev 13316)
@@ -7070,7 +7070,7 @@
<w><t>an-ti-church</t></w>
<w><t>an-tic-i-pant</t></w>
<w><t>an-tic-i-pat-a-ble</t></w>
-<w><t>an-tic-i-pate</t></w>
+<w><t>an-tic-i-pate</t><verb><regular-root/></verb></w>
<w><t>an-tic-i-pat-ed</t></w>
<w><t>an-tic-i-pat-ing</t></w>
<w><t>an-tic-i-pa-tion</t><noun><pluralizable/></noun></w>
@@ -12907,7 +12907,7 @@
<phrase><t>bal-loon sleeve</t></phrase>
<phrase><t>bal-loon tyre</t></phrase>
<phrase><t>bal-loon vine</t></phrase>
-<w><t>bal-lot</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>bal-lot</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>bal-lo-tade</t></w>
<phrase><t>bal-lot box</t></phrase>
<w><t>bal-lot-ed</t></w>
@@ -13166,7 +13166,7 @@
<w><t>bank=rid-ing</t></w>
<w><t>bank-roll</t></w>
<w><t>bank-roll-er</t></w>
-<w><t>bank-rupt</t></w>
+<w><t>bank-rupt</t><verb><regular-root/></verb><adjective></adjective></w>
<w><t>bank-rupt-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bank-rupt-ly</t></w>
<w><t>Banks</t></w>
@@ -15277,7 +15277,7 @@
<w><t>Be-o-grad</t></w>
<w><t>Be-o-wulf</t></w>
<w><t>be-paint</t></w>
-<w><t>be-queath</t></w>
+<w><t>be-queath</t><verb><regular-root/></verb></w>
<w><t>be-queath-a-ble</t></w>
<w><t>be-queath-al</t></w>
<w><t>be-queath-er</t></w>
@@ -23316,7 +23316,7 @@
<phrase><t>Car-di-gan Bay</t></phrase>
<w><t>Car-di-gan-shire</t></w>
<w><t>Car-din</t></w>
-<w><t>car-di-nal</t></w>
+<w><t>car-di-nal</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>car-di-nal-ate</t></w>
<phrase><t>car-di-nal bee-tle</t></phrase>
<w><t>car-di-nal=bish-op</t></w>
@@ -27943,7 +27943,7 @@
<w><t>CIO</t></w>
<w><t>ci-on</t></w>
<w><t>Ci-pan-go</t></w>
-<w><t>ci-pher</t></w>
+<w><t>ci-pher</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>ci-pher-a-ble</t></w>
<w><t>ci-pher-er</t></w>
<w><t>cip-o-lin</t></w>
@@ -29090,7 +29090,7 @@
<w><t>clo-tured</t></w>
<w><t>clo-tur-ing</t></w>
<w><t>clou</t></w>
-<w><t>cloud</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>cloud</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>cloud-age</t></w>
<w><t>cloud-ber-ry</t></w>
<w><t>cloud-burst</t></w>
@@ -34989,7 +34989,7 @@
<w><t>craw</t></w>
<w><t>craw-fish</t></w>
<w><t>craw-fish-es</t></w>
-<w><t>Craw-ford</t></w>
+<w><t>Craw-ford</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>crawl</t></w>
<w><t>crawl-er</t></w>
<w><t>Craw-ley</t></w>
@@ -47016,6 +47016,7 @@
<w><t>ec-ze-ma</t></w>
<w><t>ec-ze-ma-toid</t></w>
<w><t>ec-zem-a-tous</t></w>
+<w><t>ed.</t><abbrev referenced-word="editor, edited"/></w>
<w><t>Ed-a</t></w>
<w><t>e-da-cious</t></w>
<w><t>e-dac-i-ty</t></w>
@@ -48380,7 +48381,7 @@
<phrase><t>Em-ber days</t></phrase>
<phrase><t>em-ber goose</t></phrase>
<phrase><t>Em-ber week</t></phrase>
-<w><t>em-bez-zle</t></w>
+<w><t>em-bez-zle</t><verb><regular-root/></verb></w>
<w><t>em-bez-zle-ment</t></w>
<w><t>em-bez-zler</t></w>
<w><t>em-bi-id</t></w>
@@ -52436,7 +52437,7 @@
<w><t>ex-e-crate</t></w>
<w><t>ex-e-crat-ed</t></w>
<w><t>ex-e-crat-ing</t></w>
-<w><t>ex-e-cra-tion</t></w>
+<w><t>ex-e-cra-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-e-cra-tive</t></w>
<w><t>ex-e-cra-tive-ly</t></w>
<w><t>ex-e-cra-tor</t></w>
@@ -53534,7 +53535,7 @@
<w><t>fac-ile-ly</t></w>
<w><t>fac-ile-ness</t></w>
<phrase><t>fac-ile prin-ceps</t></phrase>
-<w><t>fa-cil-i-tate</t></w>
+<w><t>fa-cil-i-tate</t><verb><regular-root/></verb></w>
<w><t>fa-cil-i-tat-ed</t></w>
<w><t>fa-cil-i-tat-ing</t></w>
<w><t>fa-cil-i-ta-tion</t></w>
@@ -54649,7 +54650,7 @@
<w><t>feet-first</t></w>
<w><t>feeze</t></w>
<w><t>feez-ing</t></w>
-<w><t>feign</t></w>
+<w><t>feign</t><verb><regular-root/></verb></w>
<w><t>feigned</t></w>
<w><t>feign-ed-ly</t></w>
<w><t>feign-ed-ness</t></w>
@@ -70146,7 +70147,7 @@
<w><t>hoa-gy</t></w>
<w><t>Hoang-ho</t></w>
<w><t>hoar</t></w>
-<w><t>hoard</t></w>
+<w><t>hoard</t><noun><pluralizable/></noun><verb><regular-root/></verb></w>
<w><t>hoard-er</t></w>
<w><t>hoard-ing</t></w>
<w><t>Hoare</t></w>
@@ -74637,7 +74638,7 @@
<w><t>im-pe-cu-ni-ous-ly</t></w>
<w><t>im-pe-cu-ni-ous-ness</t></w>
<w><t>im-ped-ance</t></w>
-<w><t>im-pede</t></w>
+<w><t>im-pede</t><verb><regular-root/></verb></w>
<w><t>im-ped-ed</t></w>
<w><t>im-ped-er</t></w>
<w><t>im-ped-i-bil-i-ty</t></w>
@@ -88037,7 +88038,7 @@
<w><t>Linc</t></w>
<w><t>linch</t></w>
<w><t>linch-pin</t></w>
-<w><t>Lin-coln</t></w>
+<w><t>Lin-coln</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Lin-coln-esque</t></w>
<phrase><t>Lin-coln green</t></phrase>
<w><t>Lin-coln-i-an</t></w>
@@ -88935,7 +88936,7 @@
<phrase><t>lo-cus clas-si-cus</t></phrase>
<phrase><t>lo-cus si-gil-li</t></phrase>
<phrase><t>lo-cus stan-di</t></phrase>
-<w><t>lo-cust</t></w>
+<w><t>lo-cust</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>lo-cus-ta</t></w>
<w><t>lo-cus-tal</t></w>
<phrase><t>lo-cust bird</t></phrase>
@@ -94405,7 +94406,7 @@
<w><t>mer-i-ste-mat-ic</t></w>
<w><t>mer-i-ste-mat-i-cal-ly</t></w>
<w><t>me-ris-tic</t></w>
-<w><t>mer-it</t></w>
+<w><t>mer-it</t><noun></noun><verb><regular-root/></verb></w>
<w><t>mer-i-ted</t></w>
<w><t>mer-it-ed-ly</t></w>
<w><t>mer-it-less</t></w>
@@ -96244,7 +96245,7 @@
<w><t>mis-call</t></w>
<w><t>mis-call-er</t></w>
<w><t>mis-car-riage</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>mis-car-ry</t></w>
+<w><t>mis-car-ry</t><verb><regular-root/></verb></w>
<w><t>mis-cast</t></w>
<w><t>mis-cast-ing</t></w>
<w><t>mis-cat-e-go-rize</t></w>
@@ -99785,6 +99786,7 @@
<w><t>mzun-gu</t></w>
<w><t>N/F</t></w>
<w><t>N/S/F</t></w>
+<w><t>N.C.</t><abbrev referenced-word="North Carolina"/></w>
<w><t>NAAFI</t></w>
<w><t>nab</t></w>
<w><t>Na-bal</t></w>
@@ -132623,7 +132625,7 @@
<w><t>ra-sa</t></w>
<w><t>ras-bo-ra</t></w>
<w><t>RASC</t></w>
-<w><t>ras-cal</t></w>
+<w><t>ras-cal</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ras-cal-i-ty</t></w>
<w><t>ras-cal-like</t></w>
<w><t>ras-cal-ly</t></w>
@@ -133569,7 +133571,7 @@
<w><t>re-ceiv-a-ble-ness</t></w>
<w><t>re-ceive</t><verb><regular-root/></verb></w>
<phrase><t>Re-ceived Pro-nun-ci-a-tion</t></phrase>
-<w><t>re-ceiv-er</t></w>
+<w><t>re-ceiv-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>re-ceiv-er-ship</t></w>
<w><t>re-ceiv-ing</t></w>
<phrase><t>re-ceiv-ing or-der</t></phrase>
@@ -140379,6 +140381,7 @@
<phrase><t>Ry-u-kyu Is-lands</t></phrase>
<w><t>Ryu-no-su-ke</t></w>
<w><t>Ryu-rik</t></w>
+<w><t>S.C.</t><abbrev referenced-word="South Carolina"/></w>
<w><t>Saa-di</t></w>
<w><t>Saar</t></w>
<w><t>Saar-br-en</t></w>
@@ -158714,7 +158717,7 @@
<w><t>swin-dle</t></w>
<w><t>swin-dle-a-ble</t></w>
<w><t>swin-dled</t></w>
-<w><t>swin-dler</t></w>
+<w><t>swin-dler</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>swin-dle sheet</t></phrase>
<w><t>swin-dling-ly</t></w>
<w><t>Swin-don</t></w>
@@ -160857,7 +160860,7 @@
<w><t>Tell</t></w>
<phrase><t>tell a-part</t></phrase>
<phrase><t>Tell el A-mar-na</t></phrase>
-<w><t>tell-er</t></w>
+<w><t>tell-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Tel-ler</t></w>
<w><t>tell-er-ship</t></w>
<w><t>Tél-lez</t></w>
@@ -169292,7 +169295,8 @@
<w><t>un-con-ju-gat-ed</t></w>
<w><t>un-con-junc-tive</t></w>
<w><t>un-con-jured</t></w>
-<w><t>un-con-nect-ed</t></w>
+<w><t>un-con-nect-ed</t><adjective/></w>
+<w><t>un-con-nect-ed-ly</t><adverb/></w>
<w><t>un-con-nect-ed-ness</t></w>
<w><t>un-conned</t></w>
<w><t>un-con-nived</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2023-10-12 12:04:30 UTC (rev 13315)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2023-10-22 12:16:29 UTC (rev 13316)
@@ -91,6 +91,7 @@
<w><t>fac-to</t></w>
<w><t>fa-to</t></w>
<w><t>fat-u-us</t></w>
+<w><t>fe-lo</t></w>
<w><t>fide</t></w>
<w><t>fi-eri</t></w>
<w><t>fit</t></w>
@@ -134,6 +135,8 @@
<w><t>lo-co</t></w>
<w><t>ma-jor-i</t></w>
<w><t>man-dam-us</t></w>
+<w><t>max-im</t><noun><singular/></noun></w>
+<w><t>max-ims</t><noun><plural/></noun></w>
<w><t>me-ro</t></w>
<w><t>me-um</t></w>
<w><t>mi-nu-tiæ</t></w>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-12 12:04:32
|
Revision: 13315
http://sourceforge.net/p/foray/code/13315
Author: victormote
Date: 2023-10-12 12:04:30 +0000 (Thu, 12 Oct 2023)
Log Message:
-----------
Handle new axsl-dictionary elements.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-10-12 12:03:22 UTC (rev 13314)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-10-12 12:04:30 UTC (rev 13315)
@@ -379,6 +379,9 @@
setTextParsingActive(false);
break;
}
+ case "word-placeholder": break;
+ case "country-specific": break;
+ case "different-country": break;
default: {
throw new IllegalStateException("Unknown element started: " + localName + ", " +
getLocationString(getLocator()));
@@ -524,6 +527,14 @@
setTextParsingActive(true);
break;
}
+ case "word-placeholder":
+ /* All we care about is whether it is in the right place. */
+ final StringWord word = new StringWord(this.currentPartsOfSpeech, this.currentSegments);
+ final String actualContent = word.getActualContent().toString();
+ checkCollation(actualContent, word.getCollatingContent().toString());
+ break;
+ case "country-specific": break;
+ case "different-country": break;
default: {
throw new IllegalStateException("Unknown element ended: " + localName);
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-12 12:03:26
|
Revision: 13314
http://sourceforge.net/p/foray/code/13314
Author: victormote
Date: 2023-10-12 12:03:22 +0000 (Thu, 12 Oct 2023)
Log Message:
-----------
Improvements to dictionaries and orthographies.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2023-10-05 16:38:34 UTC (rev 13313)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2023-10-12 12:03:22 UTC (rev 13314)
@@ -36,9 +36,13 @@
<w><t>ful-fil</t><verb/></w>
<w><t>ful-fil-ment</t><noun/></w>
<w><t>ful-fils</t><verb><vf><singular/></vf></verb></w>
+<word-placeholder><t>labor</t><different-country country="USA"/></word-placeholder>
<w><t>la-bour</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>la-boured</t><adjective/></w>
<w><t>la-bour-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<word-placeholder><t>ma-neu-ver</t><different-country country="USA"/></word-placeholder>
+<w><t>ma-noeu-vre</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
+<w><t>ma-nœu-vre</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>mod-elled</t><verb/></w>
<w><t>mod-ell-ing</t><noun><singular/><convertible-to-possessive/></noun><verb/></w>
<w><t>mould</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2023-10-05 16:38:34 UTC (rev 13313)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2023-10-12 12:03:22 UTC (rev 13314)
@@ -39,6 +39,10 @@
<w><t>la-bor</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>la-bored</t><adjective/></w>
<w><t>la-bor-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<word-placeholder><t>labour</t><different-country country="GBR"/></word-placeholder>
+<w><t>ma-neu-ver</t><verb><regular-root/></verb></w>
+<word-placeholder><t>ma-noeu-vre</t><different-country country="GBR"/></word-placeholder>
+<word-placeholder><t>ma-nœu-vre</t><different-country country="GBR"/></word-placeholder>
<w><t>mod-eled</t><verb/></w>
<w><t>mod-el-ing</t><noun><singular/><convertible-to-possessive/></noun><verb/></w>
<w><t>mold</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml 2023-10-05 16:38:34 UTC (rev 13313)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml 2023-10-12 12:03:22 UTC (rev 13314)
@@ -11,6 +11,7 @@
<w><t>a-bid-eth</t></w>
+<w><t>Abm</t><abbrev referenced-word="Abraham"/></w>
<w><t>ac-o-lyth</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-fore-go-ing</t></w>
<w><t>an-num</t></w>
@@ -33,6 +34,7 @@
<w><t>com-mand-eth</t></w>
<w><t>comp-trol</t><verb><regular-root/></verb></w>
<w><t>comp-trol-ler=gen-er-al</t><verb><regular-root/></verb></w>
+<w><t>couldst</t></w>
<w><t>de-sir-edst</t><verb><lemma>desire</lemma></verb></w>
<w><t>dis-pro-por-tion</t><verb><regular-root/></verb></w>
<w><t>dream-ing</t><noun><pluralizable/></noun></w>
@@ -97,6 +99,8 @@
<w><t>re-ë-lec-tion</t><noun><pluralizable/></noun></w>
<w><t>re-ël-i-gi-bil-i-ty</t><noun/></w>
<w><t>re-ël-i-gi-ble</t><adjective/></w>
+<w><t>re-ën-act</t><verb><regular-root/></verb></w>
+<w><t>re-ën-force</t><verb><regular-root/></verb></w>
<w><t>re-ën-ter</t><verb><regular-root/></verb></w>
<w><t>re-ëx-am-ine</t><verb><regular-root/></verb></w>
<w><t>re-prov-eth</t><verb><lemma>reprove</lemma></verb></w>
@@ -104,6 +108,7 @@
<w><t>ri-val-ship</t><noun/><comment>modern = rivalry?</comment></w>
<w><t>Robt</t><abbrev referenced-word="Robert"/></w>
<w><t>sanc-ti-fi-eth</t><verb><lemma>sanctify</lemma></verb></w>
+<w><t>se-cret</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb><adjective/></w>
<w><t>seek-est</t><verb><regular-root value="false"/></verb></w>
<w><t>se-lect-edst</t><verb><lemma>select</lemma></verb></w>
<w><t>show-eth</t><verb><lemma>show</lemma></verb></w>
@@ -114,6 +119,7 @@
<w><t>tax=gath-er-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ten-our</t></w>
<w><t>think-est</t><verb><lemma>think</lemma></verb></w>
+<w><t>Tho’s</t><abbrev referenced-word="Thomas"/></w>
<w><t>un-ap-prized</t></w>
<w><t>un-lade</t><verb><regular-root/></verb></w>
<w><t>vis-it-est</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-10-05 16:38:34 UTC (rev 13313)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-10-12 12:03:22 UTC (rev 13314)
@@ -1697,7 +1697,7 @@
<w><t>A-dam-i-cal-ly</t></w>
<w><t>Ad-am-ite</t></w>
<w><t>Ad-am-it-ic</t></w>
-<w><t>Ad-ams</t></w>
+<w><t>Ad-ams</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Ad-am’s ap-ple</t></phrase>
<w><t>ad-ams-ite</t></w>
<w><t>Ad-am’s=nee-dle</t></w>
@@ -2188,7 +2188,7 @@
<w><t>A-du-wa</t></w>
<w><t>adv</t></w>
<w><t>Ad-vai-ta</t></w>
-<phrase><t>ad va-lo-rem</t></phrase>
+<w><t>ad va-lo-rem</t></w>
<w><t>ad-vance</t></w>
<w><t>ad-vanced</t></w>
<phrase><t>ad-vanced gas=cooled re-ac-tor</t></phrase>
@@ -2967,7 +2967,7 @@
<w><t>ag-gran-dize-ment</t></w>
<w><t>ag-gran-diz-er</t></w>
<w><t>ag-gran-diz-ing</t></w>
-<w><t>ag-gra-vate</t></w>
+<w><t>ag-gra-vate</t><verb><regular-root/></verb></w>
<w><t>ag-gra-vated</t></w>
<w><t>ag-gra-vat-ing</t></w>
<w><t>ag-gra-vat-ing-ly</t></w>
@@ -2984,11 +2984,11 @@
<w><t>ag-gre-ga-tive</t></w>
<w><t>ag-gre-ga-to-ry</t></w>
<w><t>ag-gress</t></w>
-<w><t>ag-gres-sion</t></w>
+<w><t>ag-gres-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ag-gres-sive</t></w>
<w><t>ag-gres-sive-ly</t></w>
<w><t>ag-gres-sive-ness</t></w>
-<w><t>ag-gres-sor</t></w>
+<w><t>ag-gres-sor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ag-grieve</t></w>
<w><t>ag-grieved</t></w>
<w><t>ag-griev-ed-ly</t></w>
@@ -6003,7 +6003,7 @@
<w><t>An-drei</t></w>
<w><t>An-dre-ot-ti</t></w>
<w><t>An-drés</t></w>
-<w><t>An-drew</t></w>
+<w><t>An-drew</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>An-drewes</t></w>
<w><t>An-drews</t></w>
<w><t>An-drew’s=cross</t></w>
@@ -6394,7 +6394,7 @@
<w><t>anim</t></w>
<w><t>an-i-m</t></w>
<w><t>an-i-ma</t></w>
-<w><t>an-i-mad-ver-sion</t></w>
+<w><t>an-i-mad-ver-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>an-i-mad-ver-sion-al</t></w>
<w><t>an-i-mad-vert</t><verb><regular-root/></verb></w>
<w><t>an-i-mad-vert-er</t></w>
@@ -6593,7 +6593,7 @@
<phrase><t>an-nu-al ring</t></phrase>
<w><t>an-nu-i-tant</t></w>
<w><t>an-nu-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>an-nul</t></w>
+<w><t>an-nul</t><verb><regular-root/></verb></w>
<w><t>an-nu-lar</t></w>
<phrase><t>an-nu-lar e-clipse</t></phrase>
<w><t>an-nu-lar-i-ty</t></w>
@@ -9837,7 +9837,7 @@
<w><t>ar-ti-fact</t></w>
<w><t>ar-ti-fac-ti-tious</t></w>
<w><t>ar-ti-fice</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>ar-tif-i-cer</t></w>
+<w><t>ar-tif-i-cer</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ar-ti-fi-cial</t></w>
<phrase><t>ar-ti-fi-cial feel</t></phrase>
<phrase><t>ar-ti-fi-cial ho-ri-zon</t></phrase>
@@ -10224,7 +10224,7 @@
<w><t>as-pic</t></w>
<w><t>as-pi-dis-tra</t></w>
<w><t>As-pin-wall</t></w>
-<w><t>as-pir-ant</t></w>
+<w><t>as-pir-ant</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>as-pi-ra-ta</t></w>
<w><t>as-pi-rate</t></w>
<w><t>as-pi-rat-ed</t></w>
@@ -10353,7 +10353,7 @@
<w><t>as-sig-nat</t></w>
<w><t>as-sig-na-tion</t></w>
<w><t>as-sig-nats</t></w>
-<w><t>as-sign-ee</t></w>
+<w><t>as-sign-ee</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>as-sign-er</t></w>
<w><t>as-sign-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>as-sign-or</t></w>
@@ -11775,7 +11775,7 @@
<w><t>a-ver-sion</t></w>
<phrase><t>a-ver-sion ther-a-py</t></phrase>
<w><t>a-ver-sive</t></w>
-<w><t>a-vert</t></w>
+<w><t>a-vert</t><verb><regular-root/></verb></w>
<w><t>a-vert-a-ble</t></w>
<w><t>a-vert-ed-ly</t></w>
<w><t>a-vert-er</t></w>
@@ -12428,7 +12428,7 @@
<w><t>bac-la-va</t></w>
<w><t>Ba-co</t></w>
<w><t>Ba-co-lod</t></w>
-<w><t>Ba-con</t></w>
+<w><t>Ba-con</t><noun><convertible-to-possessive/></noun></w>
<w><t>ba-con</t></w>
<w><t>ba-con=and=eggs</t></w>
<w><t>ba-con-er</t></w>
@@ -13283,7 +13283,7 @@
<w><t>bar-ba-rise</t></w>
<w><t>bar-ba-rised</t></w>
<w><t>bar-ba-ris-ing</t></w>
-<w><t>bar-ba-rism</t></w>
+<w><t>bar-ba-rism</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bar-bar-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bar-ba-ri-za-tion</t></w>
<w><t>bar-ba-rize</t></w>
@@ -13694,7 +13694,7 @@
<w><t>Bar-thol-di</t></w>
<phrase><t>Bar-tho-lin’s glands</t></phrase>
<w><t>Bar-tho-lo-me-u</t></w>
-<w><t>Bar-thol-o-mew</t></w>
+<w><t>Bar-thol-o-mew</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Bar-thou</t></w>
<w><t>bar-ti-zan</t></w>
<w><t>bar-ti-zaned</t></w>
@@ -14184,7 +14184,7 @@
<w><t>Bayle</t></w>
<w><t>Bay-less</t></w>
<w><t>Bay-ley</t></w>
-<w><t>bay-o-net</t></w>
+<w><t>bay-o-net</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bay-o-net-ed</t></w>
<w><t>bay-o-net-ing</t></w>
<w><t>bay-o-net-ted</t></w>
@@ -14241,7 +14241,7 @@
<w><t>Beach-wood</t></w>
<w><t>beach-y</t></w>
<phrase><t>Beach-y Head</t></phrase>
-<w><t>bea-con</t></w>
+<w><t>bea-con</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bea-con-age</t></w>
<w><t>bea-con-less</t></w>
<w><t>Bea-cons-field</t></w>
@@ -14498,7 +14498,7 @@
<phrase><t>bed-ding plant</t></phrase>
<w><t>Bed-does</t></w>
<w><t>Bede</t></w>
-<w><t>be-deck</t></w>
+<w><t>be-deck</t><verb><regular-root/></verb></w>
<w><t>bed-e-gar</t></w>
<w><t>bed-e-guar</t></w>
<w><t>bede-house</t></w>
@@ -14707,7 +14707,7 @@
<w><t>be-gets</t><verb><lemma>beget</lemma></verb></w>
<w><t>be-get-ter</t></w>
<w><t>be-get-ting</t></w>
-<w><t>beg-gar</t></w>
+<w><t>beg-gar</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>beg-gar-dom</t></w>
<w><t>beg-gar-hood</t></w>
<w><t>beg-gar-ies</t></w>
@@ -19441,8 +19441,8 @@
<w><t>brec-ci-a-tion</t></w>
<w><t>Brecht</t></w>
<w><t>brecht</t></w>
-<w><t>Breck-en-ridge</t></w>
-<w><t>Breck-in-ridge</t></w>
+<w><t>Breck-en-ridge</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>Breck-in-ridge</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Breck-nock</t></w>
<w><t>Breck-nock-shire</t></w>
<w><t>Brecks-ville</t></w>
@@ -19798,7 +19798,7 @@
<w><t>bri-sure</t></w>
<w><t>brit</t></w>
<w><t>Brit</t></w>
-<w><t>Brit-ain</t></w>
+<w><t>Brit-ain</t><noun><convertible-to-possessive/></noun></w>
<w><t>Bri-tan-ni-a</t></w>
<phrase><t>Bri-tan-ni-a met-al</t></phrase>
<w><t>Bri-tan-nic</t></w>
@@ -20926,7 +20926,7 @@
<w><t>burk</t></w>
<w><t>bur-ka</t></w>
<w><t>Burk-bur-nett</t></w>
-<w><t>Burke</t></w>
+<w><t>Burke</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>burke</t></w>
<w><t>burk-er</t></w>
<w><t>bur-kha</t></w>
@@ -21973,7 +21973,7 @@
<phrase><t>calf’s=foot jel-ly</t></phrase>
<w><t>calf-skin</t></w>
<w><t>Cal-ga-ry</t></w>
-<w><t>Cal-houn</t></w>
+<w><t>Cal-houn</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ca-li</t></w>
<w><t>Cal-i-ban</t></w>
<w><t>cal-i-ber</t></w>
@@ -22152,7 +22152,7 @@
<w><t>ca-lum-ni-a-to-ry</t></w>
<w><t>ca-lum-ni-ous</t></w>
<w><t>ca-lum-ni-ous-ly</t></w>
-<w><t>cal-um-ny</t></w>
+<w><t>cal-um-ny</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cal-u-tron</t></w>
<w><t>Cal-va-dos</t></w>
<w><t>cal-var-i-a</t></w>
@@ -22166,8 +22166,8 @@
<w><t>Cal-vin</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Cal-vi-na</t></w>
<w><t>calv-ing</t></w>
-<w><t>Cal-vin-ism</t></w>
-<w><t>Cal-vin-ist</t></w>
+<w><t>Cal-vin-ism</t><noun><singular/><convertible-to-possessive/></noun></w>
+<w><t>Cal-vin-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Cal-vin-is-tic</t></w>
<w><t>Cal-vin-is-ti-cal</t></w>
<w><t>Cal-vin-is-ti-cal-ly</t></w>
@@ -22917,7 +22917,7 @@
<w><t>cap-i-tal-is-er</t></w>
<w><t>cap-i-tal-is-ing</t></w>
<w><t>cap-i-tal-ism</t></w>
-<w><t>cap-i-tal-ist</t></w>
+<w><t>cap-i-tal-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cap-i-tal-is-tic</t></w>
<w><t>cap-i-tal-is-ti-cal-ly</t></w>
<w><t>cap-i-tal-iz-a-ble</t></w>
@@ -24083,7 +24083,7 @@
<w><t>cas-u-al-ist</t></w>
<w><t>cas-u-al-ly</t></w>
<w><t>cas-u-al-ness</t></w>
-<w><t>cas-u-al-ty</t></w>
+<w><t>cas-u-al-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cas-u-a-ri-na</t></w>
<w><t>cas-u-ist</t></w>
<w><t>cas-u-is-tic</t></w>
@@ -25456,11 +25456,12 @@
<w><t>chair-la-dy</t></w>
<w><t>chair-less</t></w>
<w><t>chair-lift</t></w>
-<w><t>chair-man</t></w>
+<w><t>chair-man</t><noun><singular/><convertible-to-possessive/></noun></w>
<w><t>chair-man-ing</t></w>
<w><t>chair-manned</t></w>
<w><t>chair-man-ning</t></w>
<w><t>chair-man-ship</t></w>
+<w><t>chair-men</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>chair=warm-er</t></w>
<w><t>chair-wom-an</t></w>
<w><t>chair-wom-en</t></w>
@@ -25830,7 +25831,7 @@
<phrase><t>char-ac-ter-is-tic curve</t></phrase>
<w><t>char-ac-ter-iz-a-ble</t></w>
<w><t>char-ac-ter-i-za-tion</t></w>
-<w><t>char-ac-ter-ize</t></w>
+<w><t>char-ac-ter-ize</t><verb><regular-root/></verb></w>
<w><t>char-ac-ter-ized</t></w>
<w><t>char-ac-ter-iz-er</t></w>
<w><t>char-ac-ter-iz-ing</t></w>
@@ -26146,7 +26147,7 @@
<w><t>cha-zan</t></w>
<w><t>cha-zan-im</t></w>
<w><t>cheap</t><adjective><extensible/></adjective></w>
-<w><t>cheap-en</t></w>
+<w><t>cheap-en</t><verb><regular-root/></verb></w>
<w><t>cheap-en-er</t></w>
<w><t>cheap-jack</t></w>
<w><t>cheap-ly</t></w>
@@ -27970,7 +27971,7 @@
<w><t>cir-cler</t></w>
<w><t>cir-clet</t></w>
<w><t>Cir-clo-ra-ma</t></w>
-<w><t>cir-cuit</t></w>
+<w><t>cir-cuit</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cir-cuit-al</t></w>
<phrase><t>cir-cuit bind-ing</t></phrase>
<phrase><t>cir-cuit break-er</t></phrase>
@@ -30199,7 +30200,7 @@
<w><t>co-lon</t></w>
<w><t>Co-lón</t></w>
<w><t>co-lo-nate</t></w>
-<w><t>colo-nel</t></w>
+<w><t>colo-nel</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Colo-nel Blimp</t></phrase>
<w><t>colo-nel-cy</t></w>
<w><t>colo-nel-ship</t></w>
@@ -31623,7 +31624,7 @@
<w><t>con-duc-tive</t></w>
<w><t>con-duc-tive-ly</t></w>
<w><t>con-duc-tiv-i-ty</t></w>
-<w><t>con-duc-tor</t></w>
+<w><t>con-duc-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-duc-to-ri-al</t></w>
<w><t>con-duc-tor-less</t></w>
<w><t>con-duc-tor-ship</t></w>
@@ -31833,7 +31834,7 @@
<w><t>con-fut-a-ble</t></w>
<w><t>con-fu-ta-tion</t></w>
<w><t>con-fut-a-tive</t></w>
-<w><t>con-fute</t></w>
+<w><t>con-fute</t><verb><regular-root/></verb></w>
<w><t>con-fut-ed</t></w>
<w><t>con-fut-er</t></w>
<w><t>con-fut-ing</t></w>
@@ -32229,7 +32230,7 @@
<w><t>con-sig-na-tion</t></w>
<w><t>con-sign-ee</t></w>
<w><t>con-sign-er</t></w>
-<w><t>con-sign-ment</t></w>
+<w><t>con-sign-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-sign-or</t></w>
<w><t>con-sist</t><verb><regular-root/></verb></w>
<w><t>con-sist-ence</t></w>
@@ -32400,7 +32401,7 @@
<w><t>con-struc-tiv-ism</t></w>
<w><t>Con-struc-tiv-ist</t></w>
<w><t>con-struc-tor</t></w>
-<w><t>con-strue</t></w>
+<w><t>con-strue</t><verb><regular-root/></verb></w>
<w><t>con-strued</t></w>
<w><t>con-stru-er</t></w>
<w><t>con-stru-ing</t></w>
@@ -33252,7 +33253,7 @@
<w><t>Cop-pard</t></w>
<w><t>Cop-pée</t></w>
<w><t>Cop-pel-ia</t></w>
-<w><t>cop-per</t></w>
+<w><t>cop-per</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cop-per-ah</t></w>
<w><t>cop-per-as</t></w>
<phrase><t>Cop-per Belt</t></phrase>
@@ -33689,7 +33690,7 @@
<w><t>cor-po-rat-ist</t></w>
<w><t>cor-po-ra-tive</t></w>
<w><t>cor-po-rat-iv-ism</t></w>
-<w><t>cor-po-ra-tor</t></w>
+<w><t>cor-po-ra-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cor-po-re-al</t></w>
<w><t>cor-po-re-al-i-ty</t></w>
<w><t>cor-po-re-al-ly</t></w>
@@ -34144,7 +34145,7 @@
<w><t>cot-tier</t></w>
<w><t>cot-ti-er</t></w>
<w><t>Cot-ton</t></w>
-<w><t>cot-ton</t></w>
+<w><t>cot-ton</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective><extensible value="false"/></adjective></w>
<w><t>cot-ton-ade</t></w>
<phrase><t>cot-ton belt</t></phrase>
<phrase><t>cot-ton bush</t></phrase>
@@ -34222,7 +34223,7 @@
<w><t>cou-ma-rin</t></w>
<w><t>cou-ma-rone</t></w>
<w><t>coun-cil</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>coun-cil-lor</t></w>
+<w><t>coun-cil-lor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>coun-cil-lor-ship</t></w>
<w><t>coun-cil-man</t></w>
<phrase><t>coun-cil=man-ag-er plan</t></phrase>
@@ -34399,7 +34400,7 @@
<w><t>coun-ter-trans-fer-ence</t></w>
<w><t>coun-ter-turn</t></w>
<w><t>coun-ter-type</t></w>
-<w><t>coun-ter-vail</t></w>
+<w><t>coun-ter-vail</t><verb><regular-root/></verb></w>
<w><t>coun-ter-vair</t></w>
<w><t>coun-ter-view</t></w>
<w><t>coun-ter-weigh</t></w>
@@ -34454,7 +34455,7 @@
<w><t>Cou-perin</t></w>
<w><t>Cou-pe-rin</t></w>
<w><t>Cou-pe-rus</t></w>
-<w><t>cou-ple</t></w>
+<w><t>cou-ple</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>cou-ple=close</t></w>
<w><t>cou-ple-ment</t></w>
<w><t>cou-pler</t></w>
@@ -34792,12 +34793,12 @@
<w><t>cra-co-vi-enne</t></w>
<w><t>Crac-ow</t></w>
<w><t>Cra-cow</t></w>
-<w><t>cra-dle</t></w>
+<w><t>cra-dle</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>cra-dler</t></w>
<phrase><t>cra-dle snatch-er</t></phrase>
<w><t>cra-dle-song</t></w>
<w><t>cra-dling</t></w>
-<w><t>craft</t></w>
+<w><t>craft</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>craft-i-er</t></w>
<w><t>craft-i-est</t></w>
<w><t>craft-i-ly</t></w>
@@ -35298,7 +35299,7 @@
<w><t>Crève-coeur</t></w>
<w><t>crev-ice</t></w>
<w><t>crev-iced</t></w>
-<w><t>crew</t></w>
+<w><t>crew</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Crewe</t></w>
<w><t>crew-el</t></w>
<w><t>crew-el-ist</t></w>
@@ -36307,7 +36308,7 @@
<w><t>cul-ti-vat-ed</t></w>
<w><t>cul-ti-vat-ing</t></w>
<w><t>cul-ti-va-tion</t></w>
-<w><t>cul-ti-va-tor</t></w>
+<w><t>cul-ti-va-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cul-trate</t></w>
<w><t>cul-trat-ed</t></w>
<w><t>cul-tu-al</t></w>
@@ -38195,7 +38196,7 @@
<w><t>de-beak</t></w>
<w><t>de-beak-er</t></w>
<phrase><t>de Beau-voir</t></phrase>
-<w><t>de-ben-ture</t></w>
+<w><t>de-ben-ture</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-ben-tured</t></w>
<w><t>Deb-es</t></w>
<w><t>de-bil-i-tant</t></w>
@@ -38882,7 +38883,7 @@
<w><t>de-fem-i-nize</t></w>
<w><t>de-fem-i-nized</t></w>
<w><t>de-fem-i-niz-ing</t></w>
-<w><t>de-fence</t></w>
+<w><t>de-fence</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-fence-less</t></w>
<w><t>de-fence-less-ly</t></w>
<w><t>de-fence-less-ness</t></w>
@@ -39566,7 +39567,7 @@
<phrase><t>de-men-tia prae-cox</t></phrase>
<w><t>Dem-e-rar-a</t></w>
<w><t>dem-e-rar-a</t></w>
-<w><t>de-mer-it</t></w>
+<w><t>de-mer-it</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-mer-i-to-ri-ous</t></w>
<w><t>de-mer-i-to-ri-ous-ly</t></w>
<w><t>Dem-e-rol</t></w>
@@ -40239,7 +40240,7 @@
<w><t>de-pre-ci-at-ed</t></w>
<w><t>de-pre-ci-at-ing</t></w>
<w><t>de-pre-ci-at-ing-ly</t></w>
-<w><t>de-pre-ci-a-tion</t></w>
+<w><t>de-pre-ci-a-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-pre-ci-a-tive</t></w>
<w><t>de-pre-ci-a-tive-ly</t></w>
<w><t>de-pre-ci-a-tor</t></w>
@@ -40702,7 +40703,7 @@
<w><t>des-pot-i-cal</t></w>
<w><t>des-pot-i-cal-ly</t></w>
<w><t>des-pot-i-cal-ness</t></w>
-<w><t>des-pot-ism</t></w>
+<w><t>des-pot-ism</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Des Pror De-prez</t></phrase>
<w><t>des-pu-mate</t></w>
<w><t>de-spu-mate</t></w>
@@ -40754,7 +40755,7 @@
<w><t>de-struct-i-bil-i-ty</t></w>
<w><t>de-struct-i-ble</t></w>
<w><t>de-struct-i-ble-ness</t></w>
-<w><t>de-struc-tion</t></w>
+<w><t>de-struc-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-struc-tion-ist</t></w>
<w><t>de-struc-tive</t></w>
<phrase><t>de-struc-tive dis-til-la-tion</t></phrase>
@@ -41006,7 +41007,7 @@
<w><t>de-vel-ope-ment</t></w>
<w><t>de-vel-op-er</t></w>
<w><t>de-vel-op-ing</t></w>
-<w><t>de-vel-op-ment</t></w>
+<w><t>de-vel-op-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-vel-op-men-tal</t></w>
<w><t>de-vel-op-men-ta-ry</t></w>
<w><t>de-vel-op-pé</t></w>
@@ -42679,7 +42680,7 @@
<w><t>dis-cog-ra-phy</t></w>
<w><t>dis-coid</t></w>
<w><t>dis-coi-dal</t></w>
-<w><t>dis-col-or</t></w>
+<w><t>dis-col-or</t><verb><regular-root/></verb></w>
<w><t>dis-col-or-a-tion</t></w>
<w><t>dis-col-or-ment</t></w>
<w><t>dis-com-bob-u-late</t></w>
@@ -42768,7 +42769,7 @@
<w><t>dis-cour-age</t></w>
<w><t>dis-cour-age-a-ble</t></w>
<w><t>dis-cour-aged</t></w>
-<w><t>dis-cour-age-ment</t></w>
+<w><t>dis-cour-age-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-cour-ag-er</t></w>
<w><t>dis-cour-ag-ing</t></w>
<w><t>dis-cour-ag-ing-ly</t></w>
@@ -43363,7 +43364,7 @@
<w><t>dis-put-er</t></w>
<w><t>dis-put-ing</t></w>
<w><t>dis-qual-i-fi-a-ble</t></w>
-<w><t>dis-qual-i-fi-ca-tion</t></w>
+<w><t>dis-qual-i-fi-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-qual-i-fied</t></w>
<w><t>dis-qual-i-fy</t><verb><regular-root/></verb></w>
<w><t>dis-qual-i-fy-ing</t></w>
@@ -43457,7 +43458,7 @@
<w><t>Dis-sent-er</t></w>
<w><t>dis-sen-tience</t></w>
<w><t>dis-sen-tien-cy</t></w>
-<w><t>dis-sen-tient</t></w>
+<w><t>dis-sen-tient</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective/></w>
<w><t>dis-sen-tient-ly</t></w>
<w><t>dis-sent-ing-ly</t></w>
<w><t>dis-sen-tious</t></w>
@@ -43694,7 +43695,7 @@
<w><t>dis-trib-u-tee</t></w>
<w><t>dis-trib-ut-er</t></w>
<w><t>dis-trib-ut-ing</t></w>
-<w><t>dis-tri-bu-tion</t></w>
+<w><t>dis-tri-bu-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-tri-bu-tion-al</t></w>
<phrase><t>dis-tri-bu-tion func-tion</t></phrase>
<w><t>dis-trib-u-tive</t></w>
@@ -45179,6 +45180,7 @@
<w><t>drae-ger-man</t></w>
<w><t>draff</t></w>
<w><t>draff-y</t></w>
+<word-placeholder><t>draft</t><country-specific country="USA"/></word-placeholder>
<w><t>drag</t><verb><regular-root/></verb></w>
<w><t>dra-g</t></w>
<w><t>dra-gée</t></w>
@@ -47161,7 +47163,7 @@
<w><t>ed-u-cat-ee</t></w>
<w><t>ed-u-cat-ing</t></w>
<w><t>Educa-tion</t></w>
-<w><t>ed-u-ca-tion</t></w>
+<w><t>ed-u-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ed-u-ca-tion-al</t></w>
<w><t>ed-u-ca-tion-al-ist</t></w>
<w><t>ed-u-ca-tion-al-ly</t></w>
@@ -47520,10 +47522,10 @@
<w><t>eis-tedd-fods</t></w>
<w><t>ei-ther</t></w>
<w><t>ei-ther=or</t></w>
-<w><t>e-jac-u-late</t></w>
+<w><t>e-jac-u-late</t><verb><regular-root/></verb></w>
<w><t>e-jac-u-lat-ed</t></w>
<w><t>e-jac-u-lat-ing</t></w>
-<w><t>e-jac-u-la-tion</t></w>
+<w><t>e-jac-u-la-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>e-jac-u-la-tive</t></w>
<w><t>e-jac-u-la-tor</t></w>
<w><t>e-jac-u-la-to-ry</t></w>
@@ -48371,7 +48373,7 @@
<w><t>em-bed-ded</t></w>
<w><t>em-bed-ding</t></w>
<w><t>em-bed-ment</t></w>
-<w><t>em-bel-lish</t></w>
+<w><t>em-bel-lish</t><verb><regular-root/></verb></w>
<w><t>em-bel-lish-er</t></w>
<w><t>em-bel-lish-ment</t></w>
<w><t>em-ber</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -48523,7 +48525,7 @@
<w><t>Em-er-ald</t></w>
<w><t>em-er-ald</t></w>
<phrase><t>Em-er-ald Isle</t></phrase>
-<w><t>e-merge</t></w>
+<w><t>e-merge</t><verb><regular-root/></verb></w>
<w><t>e-merged</t></w>
<w><t>e-mer-gence</t></w>
<w><t>e-mer-gen-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -49915,7 +49917,7 @@
<w><t>en-vis-ag-ing</t></w>
<w><t>en-vi-sion</t></w>
<w><t>en-voi</t></w>
-<w><t>en-voy</t></w>
+<w><t>en-voy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>en-vy</t><noun><singular/></noun><verb><regular-root/></verb></w>
<w><t>en-vy-ing</t></w>
<w><t>en-vy-ing-ly</t></w>
@@ -51090,7 +51092,7 @@
<w><t>ESPN</t><noun><singular/><convertible-to-possessive/></noun><comment>Entertainment and Sports Programming Network</comment></w>
<w><t>Es-poo</t></w>
<w><t>es-pous-al</t></w>
-<w><t>es-pouse</t></w>
+<w><t>es-pouse</t><verb><regular-root/></verb></w>
<w><t>es-poused</t></w>
<w><t>es-pous-er</t></w>
<w><t>es-pous-ing</t></w>
@@ -52144,7 +52146,7 @@
<w><t>ex-act-ing</t></w>
<w><t>ex-act-ing-ly</t></w>
<w><t>ex-act-ing-ness</t></w>
-<w><t>ex-ac-tion</t></w>
+<w><t>ex-ac-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-act-i-tude</t></w>
<w><t>ex-act-ly</t></w>
<w><t>ex-act-ness</t></w>
@@ -52295,7 +52297,7 @@
<w><t>ex-cit-ed</t></w>
<w><t>ex-cit-ed-ly</t></w>
<w><t>ex-cit-ed-ness</t></w>
-<w><t>ex-cite-ment</t></w>
+<w><t>ex-cite-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-cit-er</t></w>
<w><t>ex-cit-ing</t></w>
<w><t>ex-cit-ing-ly</t></w>
@@ -52959,7 +52961,7 @@
<w><t>ex-po-si-tion-al</t></w>
<w><t>ex-pos-i-tive</t></w>
<w><t>ex-pos-i-tive-ly</t></w>
-<w><t>ex-pos-i-tor</t></w>
+<w><t>ex-pos-i-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-pos-i-to-ri-al</t></w>
<w><t>ex-pos-i-to-ri-al-ly</t></w>
<w><t>ex-pos-i-to-ri-ly</t></w>
@@ -52976,7 +52978,7 @@
<w><t>ex-po-sure</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>ex-po-sure me-ter</t></phrase>
<w><t>ex-pound</t><verb><regular-root/></verb></w>
-<w><t>ex-pound-er</t></w>
+<w><t>ex-pound-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex=pres-i-dent</t></w>
<w><t>ex-press</t><verb><regular-root/></verb></w>
<w><t>ex-press-a-ble</t></w>
@@ -53116,7 +53118,7 @@
<w><t>ex-ten-siv-i-ty</t></w>
<w><t>ex-ten-som-e-ter</t></w>
<w><t>ex-ten-sor</t></w>
-<w><t>ex-tent</t></w>
+<w><t>ex-tent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-ten-u-ate</t></w>
<w><t>ex-ten-u-at-ed</t></w>
<w><t>ex-ten-u-at-ing</t></w>
@@ -53475,7 +53477,7 @@
<w><t>fab-li-aux</t></w>
<w><t>Fa-bre</t></w>
<w><t>Fa-bri-a-no</t></w>
-<w><t>fab-ric</t></w>
+<w><t>fab-ric</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fab-ri-cant</t></w>
<w><t>fab-ri-cate</t></w>
<w><t>fab-ri-cat-ed</t></w>
@@ -53941,7 +53943,7 @@
<w><t>Fa-na-ga-lo</t></w>
<w><t>Fan-a-ka-lo</t></w>
<w><t>fa-na-ka-lo</t></w>
-<w><t>fa-nat-ic</t></w>
+<w><t>fa-nat-ic</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fa-nat-i-cal</t></w>
<w><t>fa-nat-i-cal-ly</t></w>
<w><t>fa-nat-i-cal-ness</t></w>
@@ -54260,7 +54262,7 @@
<w><t>fa-tal-ness</t></w>
<phrase><t>Fa-ta Mor-ga-na</t></phrase>
<w><t>fat-back</t></w>
-<w><t>fate</t></w>
+<w><t>fate</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fat-ed</t></w>
<w><t>fate-ful</t></w>
<w><t>fate-ful-ly</t></w>
@@ -55842,7 +55844,7 @@
<w><t>fire=rais-ing</t></w>
<w><t>fire=re-sist-ant</t></w>
<w><t>fire=re-tard-ant</t></w>
-<w><t>fire-side</t></w>
+<w><t>fire-side</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>fire sta-tion</t></phrase>
<w><t>fire-stone</t></w>
<w><t>Fire-stone</t></w>
@@ -56615,7 +56617,7 @@
<w><t>fliv-ver</t></w>
<w><t>fl-neur</t></w>
<w><t>Flo</t></w>
-<w><t>float</t></w>
+<w><t>float</t><verb><regular-root/></verb></w>
<w><t>float-a-bil-i-ty</t></w>
<w><t>float-a-ble</t></w>
<w><t>float-age</t></w>
@@ -57128,7 +57130,7 @@
<w><t>foe-tor</t></w>
<w><t>foe-tus</t></w>
<w><t>foe-tus-es</t></w>
-<w><t>fog</t></w>
+<w><t>fog</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Fo-gar-ty</t></w>
<w><t>fog-bound</t></w>
<w><t>fog-bow</t></w>
@@ -57329,7 +57331,7 @@
<w><t>foot-bridge</t></w>
<w><t>foot=can-dle</t></w>
<w><t>foot-cloth</t></w>
-<w><t>Foote</t></w>
+<w><t>Foote</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>foot-ed</t></w>
<w><t>foot-er</t></w>
<w><t>foot-fall</t></w>
@@ -57675,9 +57677,10 @@
<w><t>fore-taste</t></w>
<w><t>fore-tast-ed</t></w>
<w><t>fore-tast-ing</t></w>
-<w><t>fore-tell</t></w>
+<w><t>fore-tell</t><verb><regular-root value="false"/></verb></w>
<w><t>fore-tell-er</t></w>
-<w><t>fore-tell-ing</t></w>
+<w><t>fore-tell-ing</t><verb><lemma>foretell</lemma></verb></w>
+<w><t>fore-tells</t><verb><lemma>foretell</lemma></verb></w>
<w><t>fore-thought</t></w>
<w><t>fore-thought-ful</t></w>
<w><t>fore-thought-ful-ly</t></w>
@@ -57684,7 +57687,7 @@
<w><t>fore-thought-ful-ness</t></w>
<w><t>fore-time</t></w>
<w><t>fore-to-ken</t></w>
-<w><t>fore-told</t></w>
+<w><t>fore-told</t><verb><lemma>foretell</lemma></verb></w>
<w><t>fore-tooth</t></w>
<w><t>fore-top</t></w>
<w><t>fore=top-gal-lant</t></w>
@@ -57919,8 +57922,10 @@
<w><t>Forss-man</t></w>
<w><t>For-ster</t></w>
<w><t>for-ster-ite</t></w>
-<w><t>for-swear</t></w>
+<w><t>for-swear</t><verb><regular-root value="false"/></verb></w>
<w><t>for-swear-er</t></w>
+<w><t>for-swears</t><verb><lemma>forswear</lemma></verb></w>
+<w><t>for-swore</t><verb><lemma>forswear</lemma></verb></w>
<w><t>for-sworn</t></w>
<w><t>for-sworn-ness</t></w>
<w><t>For-syth</t></w>
@@ -58496,7 +58501,7 @@
<w><t>Fré-dé-ric</t></w>
<w><t>Fred-e-ri-ca</t></w>
<w><t>Fre-de-ri-cia</t></w>
-<w><t>Fred-er-ick</t></w>
+<w><t>Fred-er-ick</t><noun><convertible-to-possessive/></noun></w>
<phrase><t>Fred-er-ick I</t></phrase>
<phrase><t>Fred-er-ick II</t></phrase>
<phrase><t>Fred-er-ick III</t></phrase>
@@ -58915,7 +58920,7 @@
<w><t>fri-til-lar-y</t></w>
<w><t>fritt</t></w>
<w><t>frit-ted</t></w>
-<w><t>frit-ter</t></w>
+<w><t>frit-ter</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>frit-ter-er</t></w>
<w><t>frit-ting</t></w>
<w><t>Fritz</t></w>
@@ -60012,7 +60017,7 @@
<w><t>gal-ler-y-like</t></w>
<w><t>gal-let</t></w>
<w><t>gal-let-ing</t></w>
-<w><t>gal-ley</t></w>
+<w><t>gal-ley</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>gal-ley-like</t></w>
<phrase><t>gal-ley proof</t></phrase>
<phrase><t>gal-ley slave</t></phrase>
@@ -61059,7 +61064,7 @@
<w><t>gen-er-a-bil-i-ty</t></w>
<w><t>gen-er-a-ble</t></w>
<w><t>gen-er-a-ble-ness</t></w>
-<w><t>gen-er-al</t></w>
+<w><t>gen-er-al</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective><extensible value="false"/></adjective></w>
<phrase><t>gen-er-al an-aes-thet-ic</t></phrase>
<phrase><t>Gen-er-al As-sem-bly</t></phrase>
<w><t>gen-er-al-cy</t></w>
@@ -62096,7 +62101,7 @@
<phrase><t>giv-en name</t></phrase>
<phrase><t>give on-to</t></phrase>
<phrase><t>give o-ver</t></phrase>
-<w><t>giv-er</t></w>
+<w><t>giv-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>gives</t><verb><regular-root value="false"/></verb></w>
<w><t>giv-ing</t></w>
<w><t>Gi-za</t></w>
@@ -62201,7 +62206,7 @@
<w><t>glam-or-ous</t></w>
<w><t>glam-or-ous-ly</t></w>
<w><t>glam-or-ous-ness</t></w>
-<w><t>glance</t></w>
+<w><t>glance</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>glanc-ing</t></w>
<w><t>glanc-ing-ly</t></w>
<w><t>gland</t></w>
@@ -62297,7 +62302,7 @@
<w><t>GLC</t></w>
<w><t>gld</t></w>
<w><t>G-le</t></w>
-<w><t>gleam</t></w>
+<w><t>gleam</t><verb><regular-root/></verb></w>
<w><t>gleam-ing-ly</t></w>
<w><t>gleam-less</t></w>
<w><t>glean</t></w>
@@ -62505,7 +62510,7 @@
<w><t>glo-ry=of=the=sun</t></w>
<w><t>glo-ry=pea</t></w>
<w><t>Glos</t></w>
-<w><t>gloss</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>gloss</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Glos-sa</t></w>
<w><t>glos-sa</t></w>
<w><t>glos-sal</t></w>
@@ -64859,7 +64864,7 @@
<w><t>grown-up</t></w>
<w><t>grown=up-ness</t></w>
<w><t>grows</t><verb><regular-root value="false"/></verb></w>
-<w><t>growth</t></w>
+<w><t>growth</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>groyne</t></w>
<phrase><t>groz-ing i-ron</t></phrase>
<w><t>Groz-ny</t></w>
@@ -66031,7 +66036,7 @@
<w><t>hai-kai</t></w>
<w><t>hai-kal</t></w>
<w><t>hai-ku</t></w>
-<w><t>hail</t></w>
+<w><t>hail</t><noun><singular/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>hail-er</t></w>
<phrase><t>Hai-le Se-las-sie</t></phrase>
<w><t>hail=fel-low</t></w>
@@ -68265,7 +68270,7 @@
<w><t>hel-den-ten-te-no-re</t></w>
<w><t>Hel-en</t></w>
<w><t>Hel-e-na</t></w>
-<w><t>He-le-na</t></w>
+<w><t>He-le-na</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>He-lene</t></w>
<w><t>He-le-nor</t></w>
<w><t>Hel-ens</t></w>
@@ -69880,7 +69885,7 @@
<w><t>hind-ward</t></w>
<w><t>Hines</t></w>
<w><t>Hines-ville</t></w>
-<w><t>hinge</t></w>
+<w><t>hinge</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>hinge-less</t></w>
<w><t>hinge-like</t></w>
<w><t>hing-ing</t></w>
@@ -70297,7 +70302,7 @@
<w><t>hog-nut</t></w>
<phrase><t>hog pea-nut</t></phrase>
<phrase><t>hog’s fen-nel</t></phrase>
-<w><t>hogs-head</t></w>
+<w><t>hogs-head</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>hog-tie</t></w>
<w><t>hog-tied</t></w>
<w><t>hog-ty-ing</t></w>
@@ -71641,7 +71646,7 @@
<w><t>huck-le</t></w>
<w><t>huck-le-ber-ry</t></w>
<w><t>huck-le-bone</t></w>
-<w><t>huck-ster</t></w>
+<w><t>huck-ster</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>huck-ster-er</t></w>
<w><t>huck-ster-ess</t></w>
<w><t>huck-ster-ism</t></w>
@@ -74929,7 +74934,7 @@
<w><t>im-pound-age</t></w>
<w><t>im-pound-er</t></w>
<w><t>im-pound-ment</t></w>
-<w><t>im-pov-er-ish</t></w>
+<w><t>im-pov-er-ish</t><verb><regular-root/></verb></w>
<w><t>im-pov-er-ished</t></w>
<w><t>im-pov-er-ish-er</t></w>
<w><t>im-pov-er-ish-ment</t></w>
@@ -75059,7 +75064,7 @@
<w><t>im-pu-dent-ly</t></w>
<w><t>im-pu-dent-ness</t></w>
<w><t>im-pu-dic-i-ty</t></w>
-<w><t>im-pugn</t></w>
+<w><t>im-pugn</t><verb><regular-root/></verb></w>
<w><t>im-pugn-a-bil-i-ty</t></w>
<w><t>im-pugn-a-ble</t></w>
<w><t>im-pugn-er</t></w>
@@ -75279,7 +75284,7 @@
<w><t>in-ca-pac-i-tat-ed</t></w>
<w><t>in-ca-pac-i-tat-ing</t></w>
<w><t>in-ca-pac-i-ta-tion</t></w>
-<w><t>in-ca-pac-i-ty</t></w>
+<w><t>in-ca-pac-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>In-cap-a-ri-na</t></w>
<w><t>in-cap-su-late</t></w>
<w><t>in=car</t></w>
@@ -75383,7 +75388,7 @@
<w><t>in-ci-sure</t></w>
<w><t>in-cit-ant</t></w>
<w><t>in-ci-ta-tion</t></w>
-<w><t>in-cite</t></w>
+<w><t>in-cite</t><verb><regular-root/></verb></w>
<w><t>in-cit-ed</t></w>
<w><t>in-cite-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-cit-er</t></w>
@@ -75405,7 +75410,7 @@
<w><t>in-cli-na-tion-al</t></w>
<w><t>in-cli-na-to-ri-ly</t></w>
<w><t>in-cli-na-to-ry</t></w>
-<w><t>in-cline</t></w>
+<w><t>in-cline</t><noun><pluralizable/></noun><verb><regular-root/></verb></w>
<w><t>in-clined</t></w>
<phrase><t>in-clined plane</t></phrase>
<phrase><t>in-clined rail-way</t></phrase>
@@ -75536,7 +75541,7 @@
<w><t>in-con-gru-ence</t></w>
<w><t>in-con-gru-ent</t></w>
<w><t>in-con-gru-ent-ly</t></w>
-<w><t>in-con-gru-i-ty</t></w>
+<w><t>in-con-gru-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-con-gru-ous</t></w>
<w><t>in-con-gru-ous-ly</t></w>
<w><t>in-con-gru-ous-ness</t></w>
@@ -75620,7 +75625,7 @@
<w><t>in-cor-po-rat-ed</t></w>
<w><t>in-cor-po-rat-ed-ness</t></w>
<w><t>in-cor-po-rat-ing</t></w>
-<w><t>in-cor-po-ra-tion</t></w>
+<w><t>in-cor-po-ra-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-cor-po-ra-tive</t></w>
<w><t>in-cor-po-ra-tor</t></w>
<w><t>in-cor-po-re-al</t></w>
@@ -76496,7 +76501,7 @@
<w><t>in-fect-ant</t></w>
<w><t>in-fect-ed-ness</t></w>
<w><t>in-fect-er</t></w>
-<w><t>in-fec-tion</t></w>
+<w><t>in-fec-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-fec-tious</t></w>
<phrase><t>in-fec-tious hep-a-ti-tis</t></phrase>
<w><t>in-fec-tious-ly</t></w>
@@ -76513,7 +76518,7 @@
<w><t>in-fe-lic-i-ty</t></w>
<w><t>in-felt</t></w>
<w><t>in-feoff</t></w>
-<w><t>in-fer</t></w>
+<w><t>in-fer</t><verb><regular-root/></verb></w>
<w><t>in-fer-a-ble</t></w>
<w><t>in-fer-a-bly</t></w>
<w><t>in-fer-ence</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -76687,7 +76692,7 @@
<w><t>in-for-tune</t></w>
<w><t>in-fra</t></w>
<w><t>in-fra-cos-tal</t></w>
-<w><t>in-fract</t></w>
+<w><t>in-fract</t><verb><regular-root/></verb></w>
<w><t>in-frac-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-frac-tor</t></w>
<phrase><t>in-fra dig</t></phrase>
@@ -76881,7 +76886,7 @@
<w><t>in-her-i-tri-ces</t></w>
<w><t>in-her-i-trix</t></w>
<w><t>in-he-sion</t></w>
-<w><t>in-hib-it</t></w>
+<w><t>in-hib-it</t><verb><regular-root/></verb></w>
<w><t>in-hib-it-a-ble</t></w>
<w><t>in-hib-it-er</t></w>
<w><t>in-hi-bi-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -76952,7 +76957,7 @@
<w><t>in-ju-di-cious-ly</t></w>
<w><t>in-ju-di-cious-ness</t></w>
<w><t>In-jun</t></w>
-<w><t>in-junc-tion</t></w>
+<w><t>in-junc-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-junc-tive</t></w>
<w><t>in-junc-tive-ly</t></w>
<w><t>in-jur-a-ble</t></w>
@@ -77177,7 +77182,7 @@
<w><t>in-quir-a-ble</t></w>
<w><t>in-quire</t><verb><regular-root/></verb></w>
<w><t>in-quired</t></w>
-<w><t>in-quir-er</t></w>
+<w><t>in-quir-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-quir-ies</t></w>
<w><t>in-quir-ing</t></w>
<w><t>in-quir-ing-ly</t></w>
@@ -77234,7 +77239,7 @@
<w><t>in-scribed</t></w>
<w><t>in-scrib-er</t></w>
<w><t>in-scrib-ing</t></w>
-<w><t>in-scrip-tion</t></w>
+<w><t>in-scrip-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-scrip-tion-al</t></w>
<w><t>in-scrip-tion-less</t></w>
<w><t>in-scrip-tive</t></w>
@@ -77385,7 +77390,7 @@
<w><t>in-solv-a-bil-i-ty</t></w>
<w><t>in-solv-a-ble</t></w>
<w><t>in-solv-a-bly</t></w>
-<w><t>in-sol-ven-cy</t></w>
+<w><t>in-sol-ven-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-sol-vent</t></w>
<w><t>in-som-ni-a</t></w>
<w><t>in-som-ni-ac</t></w>
@@ -77719,7 +77724,7 @@
<w><t>in-tem-er-ate</t></w>
<w><t>in-tem-er-ate-ly</t></w>
<w><t>in-tem-er-ate-ness</t></w>
-<w><t>in-tem-per-ance</t></w>
+<w><t>in-tem-per-ance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-tem-per-ate</t></w>
<w><t>in-tem-per-ate-ly</t></w>
<w><t>in-tem-per-ate-ness</t></w>
@@ -78107,7 +78112,7 @@
<w><t>in-ter-fem-o-ral</t></w>
<w><t>in-ter-fe-nes-tral</t></w>
<w><t>in-ter-fen-es-tra-tion</t></w>
-<w><t>in-ter-fere</t></w>
+<w><t>in-ter-fere</t><verb><regular-root/></verb></w>
<w><t>in-ter-fered</t></w>
<w><t>in-ter-fer-ence</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-ter-fe-ren-tial</t></w>
@@ -80931,7 +80936,7 @@
<w><t>jan-i-tor-ship</t></w>
<w><t>jan-i-tress</t></w>
<w><t>Jan-i-zar-ies</t></w>
-<w><t>jan-i-zar-y</t></w>
+<w><t>jan-i-zar-y</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Jan-i-zar-y</t></w>
<phrase><t>Jan May-en</t></phrase>
<w><t>jan-nock</t></w>
@@ -82006,7 +82011,7 @@
<w><t>ju-di-ca-to-ry</t></w>
<w><t>ju-di-ca-ture</t></w>
<w><t>ju-di-ci-a-ble</t></w>
-<w><t>ju-di-cial</t></w>
+<w><t>ju-di-cial</t><adjective><extensible value="false"/></adjective></w>
<w><t>ju-di-cial-ly</t></w>
<w><t>ju-di-cial-ness</t></w>
<phrase><t>ju-di-cial sep-a-ra-tion</t></phrase>
@@ -82250,7 +82255,7 @@
<w><t>Jus-si</t></w>
<w><t>jus-sive</t></w>
<phrase><t>jus so-li</t></phrase>
-<w><t>just</t></w>
+<w><t>just</t><adjective><extensible value="false"/></adjective></w>
<w><t>Jus-ta</t></w>
<w><t>just-au-corps</t></w>
<w><t>juste=mi-lieu</t></w>
@@ -84756,6 +84761,7 @@
<w><t>la-bi-o-ve-lar-iz-ing</t></w>
<w><t>la-bi-um</t></w>
<w><t>lab-lab</t></w>
+<word-placeholder><t>labor</t><country-specific country="USA"/></word-placeholder>
<w><t>lab-o-ra-to-ri-al</t></w>
<w><t>lab-o-ra-to-ri-al-ly</t></w>
<w><t>lab-o-ra-to-ri-an</t></w>
@@ -84778,7 +84784,7 @@
<phrase><t>La-bor Par-ty</t></phrase>
<w><t>la-bor=sav-ing</t></w>
<phrase><t>la-bor un-ion</t></phrase>
-<w><t>la-bour</t></w>
+<word-placeholder><t>labour</t><country-specific country="GBR"/></word-placeholder>
<phrase><t>la-bour camp</t></phrase>
<phrase><t>La-bour Day</t></phrase>
<w><t>la-boured</t></w>
@@ -85286,7 +85292,7 @@
<w><t>lam-en-ta-ble</t></w>
<w><t>lam-en-ta-ble-ness</t></w>
<w><t>lam-en-ta-bly</t></w>
-<w><t>lam-en-ta-tion</t></w>
+<w><t>lam-en-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>la-men-ta-tion</t></w>
<w><t>Lam-en-ta-tions</t></w>
<w><t>la-ment-ed</t></w>
@@ -86679,7 +86685,7 @@
<w><t>le-gion-naire</t></w>
<phrase><t>Le-gion of hon-or</t></phrase>
<w><t>legis</t></w>
-<w><t>leg-is-late</t></w>
+<w><t>leg-is-late</t><verb><regular-root/></verb></w>
<w><t>leg-is-lat-ed</t></w>
<w><t>leg-is-lat-ing</t></w>
<w><t>leg-is-la-tion</t></w>
@@ -86699,7 +86705,7 @@
<w><t>le-git</t></w>
<w><t>leg-i-tim</t></w>
<w><t>le-git-i-ma-cy</t></w>
-<w><t>le-git-i-mate</t></w>
+<w><t>le-git-i-mate</t><verb><regular-root/></verb><adjective><extensible value="false"/></adjective></w>
<w><t>le-git-i-mat-ed</t></w>
<w><t>le-git-i-mate-ly</t></w>
<w><t>le-git-i-mate-ness</t></w>
@@ -87552,12 +87558,12 @@
<w><t>li-cenced</t></w>
<w><t>li-cen-cee</t></w>
<w><t>li-cenc-ing</t></w>
-<w><t>li-cense</t></w>
+<w><t>li-cense</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>li-censed</t></w>
<w><t>li-cen-see</t></w>
<w><t>li-cense-less</t></w>
<phrase><t>li-cense plate</t></phrase>
-<w><t>li-cens-er</t></w>
+<w><t>li-cens-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>li-cens-ing</t></w>
<w><t>li-cen-sor</t></w>
<w><t>li-cen-ti-ate</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -91427,7 +91433,8 @@
<w><t>male-ness</t></w>
<w><t>Ma-len-kov</t></w>
<w><t>Ma-le-vich</t></w>
-<w><t>ma-lev-o-lent</t></w>
+<w><t>ma-lev-o-lence</t><noun/></w>
+<w><t>ma-lev-o-lent</t><adjective/></w>
<w><t>ma-lev-o-lent-ly</t></w>
<w><t>mal-fea-sance</t></w>
<w><t>mal-for-ma-tion</t></w>
@@ -91691,7 +91698,7 @@
<phrase><t>man-da-rin duck</t></phrase>
<w><t>man-da-tar-ies</t></w>
<w><t>man-da-tar-y</t></w>
-<w><t>man-date</t></w>
+<w><t>man-date</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>man-dat-ed</t></w>
<w><t>man-dat-ing</t></w>
<w><t>man-da-to-ri-ly</t></w>
@@ -91737,7 +91744,7 @@
<w><t>Ma-net</t></w>
<w><t>ma-net</t></w>
<w><t>Man-e-tho</t></w>
-<w><t>ma-neu-ver</t><verb><regular-root/></verb></w>
+<word-placeholder><t>ma-neu-ver</t><country-specific country="USA"/></word-placeholder>
<w><t>ma-neu-ver-a-bil-i-ty</t></w>
<w><t>ma-neu-ver-a-ble</t></w>
<w><t>ma-neu-ver-er</t></w>
@@ -91921,7 +91928,8 @@
<w><t>man-nose</t></w>
<w><t>Man-ny</t></w>
<w><t>Ma-no-ah</t></w>
-<w><t>ma-noeu-vre</t></w>
+<word-placeholder><t>ma-noeu-vre</t><country-specific country="GBR"/></word-placeholder>
+<word-placeholder><t>ma-nœu-vre</t><country-specific country="GBR"/></word-placeholder>
<w><t>Ma-no-le-te</t></w>
<w><t>ma-nom-e-ter</t></w>
<w><t>man-o-met-ric</t></w>
@@ -94046,9 +94054,9 @@
<w><t>me-mo-ri-al-ised</t></w>
<w><t>me-mo-ri-al-is-er</t></w>
<w><t>me-mo-ri-al-is-ing</t></w>
-<w><t>me-mo-ri-al-ist</t></w>
+<w><t>me-mo-ri-al-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>me-mo-ri-al-i-za-tion</t></w>
-<w><t>me-mo-ri-al-ize</t></w>
+<w><t>me-mo-ri-al-ize</t><verb><regular-root/></verb></w>
<w><t>me-mo-ri-al-iz-er</t></w>
<w><t>me-mo-ri-al-ly</t></w>
<phrase><t>me-mo-ri-a tech-ni-ca</t></phrase>
@@ -94811,7 +94819,7 @@
<w><t>met-a-zo-an</t></w>
<w><t>met-a-zo-ic</t></w>
<w><t>Metch-ni-koff</t></w>
-<w><t>mete</t></w>
+<w><t>mete</t><verb><regular-root/></verb></w>
<w><t>met-ed</t></w>
<w><t>met-em-pir-ic</t></w>
<w><t>met-em-pir-i-cal</t></w>
@@ -95624,8 +95632,9 @@
<w><t>mil-i-tate</t><verb><regular-root/></verb></w>
<w><t>mil-i-tat-ed</t></w>
<w><t>mil-i-tat-ing</t></w>
-<w><t>mi-li-tia</t></w>
-<w><t>mi-li-tia-man</t></w>
+<w><t>mi-li-tia</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>mi-li-tia-man</t><noun><singular/><convertible-to-possessive/></noun></w>
+<w><t>mi-li-tia-men</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>mil-i-um</t></w>
<w><t>milk</t><noun/><verb><regular-root/></verb></w>
<w><t>milk=and=wa-ter</t></w>
@@ -96167,6 +96176,7 @@
<w><t>mis-an-thro-py</t></w>
<w><t>mis-ap-pel-la-tion</t></w>
<w><t>mis-ap-pend-ed</t></w>
+<w><t>mis-ap-pli-ca-tion</t><noun/></w>
<w><t>mis-ap-plied</t></w>
<w><t>mis-ap-pli-er</t></w>
<w><t>mis-ap-ply</t></w>
@@ -96296,7 +96306,7 @@
<w><t>mis-con-ju-gate</t></w>
<w><t>mis-con-ju-gat-ed</t></w>
<w><t>mis-con-ju-gat-ing</t></w>
-<w><t>mis-con-struc-tion</t></w>
+<w><t>mis-con-struc-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mis-con-strue</t></w>
<w><t>mis-con-strued</t></w>
<w><t>mis-con-stru-ing</t></w>
@@ -96422,7 +96432,7 @@
<w><t>mis-giv-en</t></w>
<w><t>mis-giv-ing</t></w>
<w><t>mis-giv-ing-ly</t></w>
-<w><t>mis-gov-ern</t></w>
+<w><t>mis-gov-ern</t><verb><regular-root/></verb></w>
<w><t>mis-grade</t></w>
<w><t>mis-grad-ed</t></w>
<w><t>mis-grad-ing</t></w>
@@ -96878,7 +96888,7 @@
<w><t>mit-i-gate</t><verb><regular-root/></verb></w>
<w><t>mit-i-gat-ed-ly</t></w>
<phrase><t>mit-i-gat-ing cir-cum-stanc-es</t></phrase>
-<w><t>mit-i-ga-tion</t></w>
+<w><t>mit-i-ga-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mit-i-ga-tive</t></w>
<w><t>mit-i-ga-tor</t></w>
<w><t>mit-i-ga-to-ry</t></w>
@@ -97087,7 +97097,7 @@
<w><t>mod-er-a-tor</t></w>
<w><t>mod-er-a-to-ri-al</t></w>
<w><t>mod-er-a-tor-ship</t></w>
-<w><t>mod-ern</t></w>
+<w><t>mod-ern</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective><extensible value="false"/></adjective></w>
<phrase><t>mod-ern dance</t></phrase>
<w><t>mo-derne</t></w>
<phrase><t>Mod-ern Eng-lish</t></phrase>
@@ -97822,7 +97832,7 @@
<w><t>mon-ox-ide</t></w>
<w><t>mon-o-zy-got-ic</t></w>
<w><t>mon-o-zy-gous</t></w>
-<w><t>Mon-roe</t></w>
+<w><t>Mon-roe</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Mon-roe doc-trine</t></phrase>
<w><t>Mon-ro-vi-a</t></w>
<w><t>Mons</t></w>
@@ -99312,7 +99322,7 @@
<w><t>mus-cu-la-ture</t></w>
<w><t>MusD</t></w>
<w><t>Muse</t></w>
-<w><t>muse</t></w>
+<w><t>muse</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>muse-ful</t></w>
<w><t>mu-se-ol-o-gist</t></w>
<w><t>mus-e-ol-o-gy</t></w>
@@ -100105,7 +100115,7 @@
<w><t>nar-rat-ed</t></w>
<w><t>nar-rat-er</t></w>
<w><t>nar-rat-ing</t></w>
-<w><t>nar-ra-tion</t></w>
+<w><t>nar-ra-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>nar-ra-tive</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>nar-ra-tive-ly</t></w>
<w><t>nar-ra-tor</t></w>
@@ -101212,7 +101222,7 @@
<w><t>Neus-tri-an</t></w>
<w><t>neut</t></w>
<w><t>neu-ter</t></w>
-<w><t>neu-tral</t></w>
+<w><t>neu-tral</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective/></w>
<w><t>neu-tral-i-sa-tion</t></w>
<w><t>neu-tral-ise</t></w>
<w><t>neu-tral-ism</t></w>
@@ -107964,7 +107974,7 @@
<w><t>nul-lip-a-rous</t></w>
<w><t>nul-li-pore</t></w>
<w><t>nul-lip-o-rous</t></w>
-<w><t>nul-li-ty</t></w>
+<w><t>nul-li-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>null=man-i-fold</t></w>
<w><t>nul-lo</t></w>
<w><t>Num</t></w>
@@ -112813,7 +112823,7 @@
<w><t>o-ver-heart-i-ly</t></w>
<w><t>o-ver-heart-i-ness</t></w>
<w><t>o-ver-heart-y</t></w>
-<w><t>o-ver-heat</t></w>
+<w><t>o-ver-heat</t><verb><regular-root/></verb></w>
<w><t>o-ver-heav-i-ly</t></w>
<w><t>o-ver-heav-i-ness</t></w>
<w><t>o-ver-heav-y</t></w>
@@ -114217,7 +114227,7 @@
<w><t>pack-thread</t></w>
<w><t>pack-thread-ed</t></w>
<w><t>pack-train</t></w>
-<w><t>pact</t></w>
+<w><t>pact</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pac-tion</t></w>
<w><t>pac-tion-al</t></w>
<w><t>pac-tion-al-ly</t></w>
@@ -115567,7 +115577,7 @@
<w><t>par-don-less</t></w>
<w><t>Par-du-bi-ce</t></w>
<w><t>par-dy</t></w>
-<w><t>pare</t></w>
+<w><t>pare</t><verb><regular-root/></verb></w>
<w><t>Pa-ré</t></w>
<w><t>pa-re-cious</t></w>
<w><t>pa-re-cious-ness</t></w>
@@ -116377,7 +116387,7 @@
<w><t>pa-trix</t></w>
<w><t>Pa-troc-lus</t></w>
<w><t>Pa-tro-clus</t></w>
-<w><t>pa-trol</t></w>
+<w><t>pa-trol</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>pa-trol car</t></phrase>
<w><t>pa-trolled</t></w>
<w><t>pa-trol-ler</t></w>
@@ -119802,7 +119812,7 @@
<w><t>Pie-mon-te</t></w>
<w><t>piend</t></w>
<w><t>pie-plant</t></w>
-<w><t>pier</t></w>
+<w><t>pier</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Pierce</t></w>
<w><t>pierce</t><verb><regular-root/></verb></w>
<w><t>pierce-a-ble</t></w>
@@ -121042,7 +121052,7 @@
<w><t>ple-o-mor-phy</t></w>
<w><t>ple-on</t></w>
<w><t>ple-on-al</t></w>
-<w><t>ple-o-nasm</t></w>
+<w><t>ple-o-nasm</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ple-o-nas-ti-cal-ly</t></w>
<w><t>ple-o-pod</t></w>
<w><t>pler-er-gate</t></w>
@@ -123231,7 +123241,7 @@
<w><t>Pous-sin</t></w>
<w><t>pous-sin</t></w>
<w><t>Pous-sin-isme</t></w>
-<w><t>pout</t></w>
+<w><t>pout</t><verb><regular-root/></verb></w>
<w><t>pout-er</t></w>
<w><t>pout-ful</t></w>
<w><t>pout-ing-ly</t></w>
@@ -124504,7 +124514,7 @@
<w><t>pre-de-ter-mi-nate</t></w>
<w><t>pre-de-ter-mi-na-tion</t></w>
<w><t>pre-de-ter-mi-na-tive</t></w>
-<w><t>pre-de-ter-mine</t></w>
+<w><t>pre-de-ter-mine</t><verb><regular-root/></verb></w>
<w><t>pre-de-ter-min-er</t></w>
<w><t>pre-det-ri-men-tal</t></w>
<w><t>pre-de-vel-op</t></w>
@@ -125045,7 +125055,7 @@
<w><t>pref-er-a-ble</t></w>
<w><t>pref-er-a-ble-ness</t></w>
<w><t>pref-er-a-bly</t></w>
-<w><t>pref-er-ence</t></w>
+<w><t>pref-er-ence</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>pref-er-ence shares</t></phrase>
<w><t>pref-er-en-tial</t></w>
<w><t>pref-er-en-tial-ist</t></w>
@@ -125441,7 +125451,7 @@
<w><t>pre-is-su-ing</t></w>
<w><t>pre=Jew-ish</t></w>
<w><t>pre-jour-nal-is-tic</t></w>
-<w><t>pre-judge</t></w>
+<w><t>pre-judge</t><verb><regular-root/></verb></w>
<w><t>pre-judge-ment</t></w>
<w><t>pre-judg-er</t></w>
<w><t>pre-judg-ment</t></w>
@@ -126182,7 +126192,7 @@
<w><t>pre-re-quired</t></w>
<w><t>pre-re-quire-ment</t></w>
<w><t>pre-re-quir-ing</t></w>
-<w><t>pre-req-ui-site</t></w>
+<w><t>pre-req-ui-site</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pre-re-sem-blance</t></w>
<w><t>pre-re-sem-ble</t></w>
<w><t>pre-res-o-lu-tion</t></w>
@@ -126713,7 +126723,7 @@
<w><t>pre-tes-ti-fy</t></w>
<w><t>pre-tes-ti-fy-ing</t></w>
<w><t>pre-tes-ti-mo-ny</t></w>
-<w><t>pre-text</t></w>
+<w><t>pre-text</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pre-tex-ta</t></w>
<w><t>pre-tex-tae</t></w>
<w><t>pre=Thanks-giv-ing</t></w>
@@ -127362,7 +127372,8 @@
<w><t>pro-bate</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-bat-ed</t></w>
<w><t>pro-bat-ing</t></w>
-<w><t>pro-ba-tion</t></w>
+<w><t>pro-ba-tion</t><noun/></w>
+<w><t>pro-ba-tion-ar-y</t><adjective/></w>
<w><t>pro-ba-tion-er</t></w>
<w><t>pro-ba-tion-er-ship</t></w>
<phrase><t>pro-ba-tion of-fic-er</t></phrase>
@@ -127454,7 +127465,7 @@
<w><t>pro-ci-vil-ian</t></w>
<w><t>pro-claim</t><verb><regular-root/></verb></w>
<w><t>pro-claim-er</t></w>
-<w><t>proc-la-ma-tion</t></w>
+<w><t>proc-la-ma-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-clas-si-cal</t></w>
<w><t>pro-cler-gy</t></w>
<w><t>pro-cler-i-cal</t></w>
@@ -127675,7 +127686,7 @@
<w><t>prof-es-sor-i-ate</t></w>
<w><t>pro-fes-so-ri-ate</t></w>
<w><t>pro-fes-sor-ship</t></w>
-<w><t>prof-fer</t></w>
+<w><t>prof-fer</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>prof-fer-er</t></w>
<w><t>pro-fi-cien-cy</t></w>
<w><t>pro-fi-cient</t></w>
@@ -128004,7 +128015,7 @@
<w><t>pro=Mos-lem</t></w>
<w><t>pro-mote</t><verb><regular-root/></verb></w>
<w><t>pro-mot-ed</t></w>
-<w><t>pro-mot-er</t></w>
+<w><t>pro-mot-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-mot-ing</t></w>
<w><t>pro-mo-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-mo-tion-al</t></w>
@@ -128555,9 +128566,7 @@
<w><t>pro-thet-ic</t></w>
<w><t>pro-thet-i-cal-ly</t></w>
<w><t>Pro-tho-ë-nor</t></w>
-<w><t>pro-thon-o-tar-ies</t></w>
-<w><t>pro-thon-o-tar-y</t></w>
-<w><t>pro-tho-no-tar-y</t></w>
+<w><t>pro-thon-o-tar-y</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-tho-ra-ces</t></w>
<w><t>pro-tho-rac-ic</t></w>
<w><t>pro-tho-rax</t></w>
@@ -129907,7 +129916,7 @@
<w><t>pu-pil-less</t></w>
<w><t>Pu-pin</t></w>
<w><t>pu-pip-a-rous</t></w>
-<w><t>pup-pet</t></w>
+<w><t>pup-pet</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pup-pet-eer</t></w>
<w><t>pup-pet-ry</t></w>
<w><t>pup-ping</t></w>
@@ -132418,7 +132427,7 @@
<w><t>ram-pag-ing</t></w>
<w><t>ram-pant</t></w>
<w><t>ramp-ant-ly</t></w>
-<w><t>ram-part</t></w>
+<w><t>ram-part</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ram-pick</t></w>
<w><t>ram-pike</t></w>
<w><t>ramp-ing-ly</t></w>
@@ -132427,7 +132436,7 @@
<w><t>ram-rod</t></w>
<w><t>ram-rod-ded</t></w>
<w><t>ram-rod-ding</t></w>
-<w><t>Ram-say</t></w>
+<w><t>Ram-say</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Rams-den</t></w>
<w><t>Ram-ses</t></w>
<phrase><t>Ram-ses II</t></phrase>
@@ -132716,7 +132725,7 @@
<w><t>rat-i-fi-ca-tion-ist</t></w>
<w><t>rat-i-fied</t></w>
<w><t>rat-i-fi-er</t></w>
-<w><t>rat-i-fy</t></w>
+<w><t>rat-i-fy</t><verb><regular-root/></verb></w>
<w><t>rat-i-fy-ing</t></w>
<w><t>rat-i-n</t></w>
<w><t>ra-tine</t></w>
@@ -133735,7 +133744,7 @@
<w><t>re-cog-ni-tive</t></w>
<w><t>rec-og-niz-a-ble</t></w>
<w><t>rec-og-niz-a-bly</t></w>
-<w><t>re-cog-ni-zance</t></w>
+<w><t>re-cog-ni-zance</t><noun><pluralizable/></noun></w>
<w><t>rec-og-nize</t><verb><regular-root/></verb></w>
<w><t>rec-og-nized</t></w>
<w><t>re-cog-ni-zee</t></w>
@@ -135099,7 +135108,7 @@
<w><t>re-gird-ing</t></w>
<w><t>Re-gis</t></w>
<w><t>ré-gis-seur</t></w>
-<w><t>reg-is-ter</t></w>
+<w><t>reg-is-ter</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>reg-is-ter-a-ble</t></w>
<w><t>reg-is-tered</t></w>
<phrase><t>reg-is-tered post</t></phrase>
@@ -136082,7 +136091,7 @@
<w><t>re-mov-a-ble</t></w>
<w><t>re-mov-a-ble-ness</t></w>
<w><t>re-mov-a-bly</t></w>
-<w><t>re-mov-al</t></w>
+<w><t>re-mov-al</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>re-mov-al-ist</t></w>
<w><t>re-move</t><verb><regular-root/></verb></w>
<w><t>re-moved</t></w>
@@ -136285,7 +136294,7 @@
<w><t>re-or-ches-trat-ing</t></w>
<w><t>re-or-ches-tra-tion</t></w>
<w><t>re-or-dain</t><verb><regular-root/></verb></w>
-<w><t>re-or-der</t></w>
+<w><t>re-or-der</t><verb><regular-root/></verb></w>
<w><t>re-or-gan-ise</t></w>
<w><t>re-or-gan-ised</t></w>
<w><t>re-or-gan-is-ing</t></w>
@@ -136604,7 +136613,7 @@
<w><t>rep-re-sent-a-tive</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>rep-re-sent-a-tive-ly</t></w>
<w><t>rep-re-sent-a-tive-ness</t></w>
-<w><t>re-press</t></w>
+<w><t>re-press</t><verb><regular-root/></verb></w>
<w><t>re-press-er</t></w>
<w><t>re-press-i-ble</t></w>
<w><t>re-pres-sion</t></w>
@@ -136719,7 +136728,7 @@
<w><t>re-pub-li-can-iz-er</t></w>
<phrase><t>Re-pub-li-can Par-ty</t></phrase>
<w><t>re-pub-li-ca-tion</t></w>
-<w><t>re-pub-lish</t></w>
+<w><t>re-pub-lish</t><verb><regular-root/></verb></w>
<w><t>re-pub-lish-a-ble</t></w>
<w><t>re-pu-di-ate</t></w>
<w><t>re-pu-di-at-ed</t></w>
@@ -137433,7 +137442,7 @@
<w><t>re-tab-u-lat-ed</t></w>
<w><t>re-tab-u-lat-ing</t></w>
<w><t>re-tack</t></w>
-<w><t>re-tail</t></w>
+<w><t>re-tail</t><noun/><verb><regular-root/></verb><adverb/></w>
<w><t>re-tail-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>re-tain</t><verb><regular-root/></verb></w>
<phrase><t>re-tained ob-ject</t></phrase>
@@ -137579,7 +137588,7 @@
<w><t>re-tore</t></w>
<w><t>re-torn</t></w>
<w><t>re-tor-sion</t></w>
-<w><t>re-tort</t></w>
+<w><t>re-tort</t><verb><regular-root/></verb></w>
<w><t>re-tort-er</t></w>
<w><t>re-tor-tion</t></w>
<w><t>re-to-tal</t></w>
@@ -137889,6 +137898,7 @@
<w><t>rev-er-en-tial</t></w>
<w><t>rev-er-en-ti-al-i-ty</t></w>
<w><t>rev-er-en-tial-ly</t></w>
+<w><t>rev-er-ent-ly</t><adverb/></w>
<w><t>re-ver-er</t></w>
<w><t>rev-er-ie</t></w>
<w><t>re-ver-i-fi-ca-tion</t></w>
@@ -139767,7 +139777,7 @@
<w><t>roust</t></w>
<w><t>roust-a-bout</t></w>
<w><t>rout</t><verb><regular-root/></verb></w>
-<w><t>route</t></w>
+<w><t>route</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>rout-ed</t></w>
<w><t>route-man</t></w>
<w><t>route-march</t></w>
@@ -140202,7 +140212,7 @@
<w><t>Ru-pert</t></w>
<w><t>ru-pi-ah</t></w>
<w><t>rup-tur-a-ble</t></w>
-<w><t>rup-ture</t></w>
+<w><t>rup-ture</t><verb><regular-root/></verb></w>
<w><t>ru-ral</t></w>
<phrase><t>ru-ral dean</t></phrase>
<phrase><t>ru-ral dis-trict</t></phrase>
@@ -140514,6 +140524,7 @@
<phrase><t>Sa-cred Heart</t></phrase>
<w><t>sa-cred-ly</t><adverb/></w>
<phrase><t>sa-cred mush-room</t></phrase>
+<w><t>sa-cred-ness</t><noun/></w>
<w><t>sac-ri-fice</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>sac-ri-fice-a-ble</t></w>
<w><t>sac-ri-fic-er</t></w>
@@ -146310,7 +146321,7 @@
<w><t>shad-y</t></w>
<w><t>Sha-dy-side</t></w>
<w><t>SHAEF</t></w>
-<w><t>shaft</t></...
[truncated message content] |
|
From: <vic...@us...> - 2023-10-05 16:38:36
|
Revision: 13313
http://sourceforge.net/p/foray/code/13313
Author: victormote
Date: 2023-10-05 16:38:34 +0000 (Thu, 05 Oct 2023)
Log Message:
-----------
Move dictionary-related instance variables into the inner class to better separate them from other parser variables.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-10-05 16:20:21 UTC (rev 13312)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-10-05 16:38:34 UTC (rev 13313)
@@ -86,6 +86,46 @@
/** The hard hyphen char for this dictionary. */
private char hardHyphenChar;
+
+ /** The set of all segments that have been parsed by this parser. */
+ private Set<StringWordSegment> segmentSet = new HashSet<StringWordSegment>();
+
+ /** The data structure containing the dictionary words. */
+ private Map<String, StringWord> wordMap = new HashMap<String, StringWord>();
+
+ /** The data structure containing ambiguous words. */
+ private Map<String, List<StringWord>> ambiguousWordMap = new HashMap<String, List<StringWord>>();
+
+ /**
+ * Converts the accumulated parsed data for this dictionary element into an actual dictionary instance.
+ * @return A new dictionary element.
+ */
+ private SegmentDictionary createDictionary() {
+ final StringWordSegment[] uniqueWordSegments = new StringWordSegment[this.segmentSet.size()];
+ this.segmentSet.toArray(uniqueWordSegments);
+ Arrays.sort(uniqueWordSegments);
+
+ final SegmentDictionary dictionary = new SegmentDictionary(this.id, this.imports, this.writingSystem,
+ uniqueWordSegments, this.wordMap.size());
+
+ for (Map.Entry<String, StringWord> entry : this.wordMap.entrySet()) {
+ dictionary.addWord(entry.getKey(), entry.getValue());
+ }
+ for (Map.Entry<String, List<StringWord>> entry : this.ambiguousWordMap.entrySet()) {
+ final List<StringWord> list = entry.getValue();
+ final SegmentDictionaryWord[] sdWords = new SegmentDictionaryWord[list.size()];
+ for (int index = 0; index < list.size(); index ++) {
+ final StringWord stringWord = list.get(index);
+ sdWords[index] = new SegmentDictionaryWord(stringWord.getPartsOfSpeech(), dictionary, stringWord);
+ }
+ final AmbiguousWord<SegmentDictionaryWord> ambWord = new AmbiguousWord<SegmentDictionaryWord>(sdWords);
+ dictionary.addAmbiguousWord(entry.getKey(), ambWord);
+ }
+
+ dictionary.optimize();
+ return dictionary;
+ }
+
}
/** Constant used to initialize string builders. */
@@ -119,15 +159,6 @@
stringFactories.add(new StringWordSegmentUtf16Factory());
}
- /** The set of all segments that have been parsed by this parser. */
- private Set<StringWordSegment> segmentSet = new HashSet<StringWordSegment>();
-
- /** The data structure containing the dictionary words. */
- private Map<String, StringWord> wordMap = new HashMap<String, StringWord>();
-
- /** The data structure containing ambiguous words. */
- private Map<String, List<StringWord>> ambiguousWordMap = new HashMap<String, List<StringWord>>();
-
/** Reusable builder. */
private StringBuilder builder = new StringBuilder(MAX_EXPECTED_WORD_LENGTH);
@@ -367,24 +398,24 @@
checkCollation(actualContent, word.getCollatingContent().toString());
/* Is it an existing ambiguous word? */
- if (this.ambiguousWordMap.containsKey(actualContent)) {
- final List<StringWord> list = this.ambiguousWordMap.get(actualContent);
+ if (this.currentDictionaryElement.ambiguousWordMap.containsKey(actualContent)) {
+ final List<StringWord> list = this.currentDictionaryElement.ambiguousWordMap.get(actualContent);
list.add(word);
break;
}
/* Is it a new ambiguous word? */
- if (wordMap.containsKey(actualContent)) {
- final StringWord existingMapEntry = wordMap.remove(actualContent);
+ if (this.currentDictionaryElement.wordMap.containsKey(actualContent)) {
+ final StringWord existingMapEntry = this.currentDictionaryElement.wordMap.remove(actualContent);
final List<StringWord> list = new ArrayList<StringWord>();
list.add(existingMapEntry);
list.add(word);
- this.ambiguousWordMap.put(actualContent, list);
+ this.currentDictionaryElement.ambiguousWordMap.put(actualContent, list);
break;
}
/* Add it to normal words. */
- wordMap.put(actualContent, word);
+ this.currentDictionaryElement.wordMap.put(actualContent, word);
break;
}
case "t": {
@@ -401,7 +432,7 @@
}
final StringWordSegment wordSegment = createSegment(builder.toString());
segmentList.add(wordSegment);
- segmentSet.add(wordSegment);
+ this.currentDictionaryElement.segmentSet.add(wordSegment);
builder.delete(0, builder.length());
} else {
if (theChar == this.currentDictionaryElement.hardHyphenChar) {
@@ -415,7 +446,7 @@
if (builder.length() > 0) {
final StringWordSegment wordSegment = createSegment(builder.toString());
segmentList.add(wordSegment);
- segmentSet.add(wordSegment);
+ this.currentDictionaryElement.segmentSet.add(wordSegment);
}
if (segmentList.size() < 1) {
throw new SAXException("0-syllable word: " + getLocationString(getLocator()));
@@ -440,33 +471,12 @@
case "ordinal": break;
case "word-group": break;
case "axsl-dictionary": {
- final StringWordSegment[] uniqueWordSegments = new StringWordSegment[this.segmentSet.size()];
- this.segmentSet.toArray(uniqueWordSegments);
- Arrays.sort(uniqueWordSegments);
- final SegmentDictionary dictionary = new SegmentDictionary(this.currentDictionaryElement.id,
- this.currentDictionaryElement.imports, this.currentDictionaryElement.writingSystem,
- uniqueWordSegments, this.wordMap.size());
-
- for (Map.Entry<String, StringWord> entry : this.wordMap.entrySet()) {
- dictionary.addWord(entry.getKey(), entry.getValue());
- }
- for (Map.Entry<String, List<StringWord>> entry : this.ambiguousWordMap.entrySet()) {
- final List<StringWord> list = entry.getValue();
- final SegmentDictionaryWord[] sdWords = new SegmentDictionaryWord[list.size()];
- for (int index = 0; index < list.size(); index ++) {
- final StringWord stringWord = list.get(index);
- sdWords[index] = new SegmentDictionaryWord(stringWord.getPartsOfSpeech(), dictionary, stringWord);
- }
- final AmbiguousWord<SegmentDictionaryWord> ambWord = new AmbiguousWord<SegmentDictionaryWord>(sdWords);
- dictionary.addAmbiguousWord(entry.getKey(), ambWord);
- }
-
- dictionary.optimize();
+ final SegmentDictionary dictionary = this.currentDictionaryElement.createDictionary();
this.parsedDictionaries.add(dictionary);
debugMessage("End parsing for dictionary: " + this.currentDictionaryElement.writingSystem.toString());
- debugMessage("Qty of unique word segments parsed: " + this.segmentSet.size());
- debugMessage("Qty of words parsed: " + wordMap.size());
+ debugMessage("Qty of unique word segments parsed: " + this.currentDictionaryElement.segmentSet.size());
+ debugMessage("Qty of words parsed: " + this.currentDictionaryElement.wordMap.size());
break;
}
case "axsl-dictionary-collection": break;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-05 16:20:24
|
Revision: 13312
http://sourceforge.net/p/foray/code/13312
Author: victormote
Date: 2023-10-05 16:20:21 +0000 (Thu, 05 Oct 2023)
Log Message:
-----------
Remove no-longer-needed instance variable.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2023-10-05 12:07:01 UTC (rev 13311)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2023-10-05 16:20:21 UTC (rev 13312)
@@ -6,7 +6,7 @@
<axsl-dictionary
id="org.foray.lat.Latn.ZZZ"
- language="lat" script="Latn"
+ language="lat" script="Latn" country="ZZZ"
hard-hyphen-char="=" soft-hyphen-char="-">
<!--
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-10-05 12:07:01 UTC (rev 13311)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-10-05 16:20:21 UTC (rev 13312)
@@ -44,7 +44,6 @@
import org.foray.xml.dtd.DtdAttribute;
import org.axsl.i18n.WritingSystem;
-import org.axsl.orthography.Dictionary;
import org.axsl.orthography.Lexer;
import org.axsl.orthography.Lexer.TokenType;
import org.axsl.orthography.Orthography;
@@ -166,9 +165,6 @@
/** The lexer. */
private Lexer4a lexer;
- /** The list of ad-hoc dictionaries, usually parsed from the command-line. */
- private List<Dictionary> adhocDictionaries = new ArrayList<Dictionary>();
-
/** The counter for "Not found" words. */
private int notFoundCounter = 0;
@@ -218,7 +214,6 @@
new Orthography4aVariant(baseOrthography, dictionary);
variantServer.registerOrthography(writingSystem, variantOrthography);
}
- this.adhocDictionaries.addAll(dictionaries);
}
this.server = variantServer;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-05 12:07:04
|
Revision: 13311
http://sourceforge.net/p/foray/code/13311
Author: victormote
Date: 2023-10-05 12:07:01 +0000 (Thu, 05 Oct 2023)
Log Message:
-----------
Replace some uses of Logger with Exceptions.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryNodeParents.java
trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryNodes.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryNodeParents.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryNodeParents.java 2023-10-05 02:27:05 UTC (rev 13310)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryNodeParents.java 2023-10-05 12:07:01 UTC (rev 13311)
@@ -28,9 +28,6 @@
package org.foray.common.data;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
/**
* Sister class for {@link TernaryNodes} that provides the parent for each node in that tree.
* This is probably only useful for debugging, as we would ordinarily not want to spend the memory to keep track of
@@ -41,9 +38,6 @@
/** The array of parent values. */
private int[] parents;
- /** The logger. */
- private transient Logger logger = LoggerFactory.getLogger(this.getClass());
-
/**
* Constructor.
* Finds the parent for each node.
@@ -60,7 +54,7 @@
if (this.parents[lowPointer] == 0) {
this.parents[lowPointer] = index;
} else {
- this.logger.error(
+ throw new IllegalStateException(
String.format(multipleParentFormat, lowPointer, index, this.parents[lowPointer]));
}
}
@@ -69,7 +63,7 @@
if (this.parents[equalPointer] == 0) {
this.parents[equalPointer] = index;
} else {
- this.logger.error(
+ throw new IllegalStateException(
String.format(multipleParentFormat, equalPointer, index, this.parents[equalPointer]));
}
}
@@ -78,7 +72,7 @@
if (this.parents[highPointer] == 0) {
this.parents[highPointer] = index;
} else {
- this.logger.error(
+ throw new IllegalStateException(
String.format(multipleParentFormat, highPointer, index, this.parents[highPointer]));
}
}
@@ -86,10 +80,10 @@
/* Nodes 0 and 1 should not have parents... */
if (parents[0] != 0) {
- this.logger.error("Index 0 should have no parent.");
+ throw new IllegalStateException("Index 0 should have no parent.");
}
if (parents[1] != 0) {
- this.logger.error("Index 1 should have no parent.");
+ throw new IllegalStateException("Index 1 should have no parent.");
}
/* ... but everybody else should, unless they are orphaned. */
@@ -103,22 +97,17 @@
} else if (index == end + 1) {
end = index;
} else if (start == end) {
- this.logger.error("Index " + start + " has no parent.");
- start = index;
- end = index;
+ throw new IllegalStateException("Index " + start + " has no parent.");
} else {
- this.logger.error(
- "Indexes " + start + " through " + end + " have no parent.");
- start = index;
- end = index;
+ throw new IllegalStateException("Indexes " + start + " through " + end + " have no parent.");
}
}
}
if (start > -1) {
if (start == end) {
- this.logger.error("Index " + start + " has no parent.");
+ throw new IllegalStateException("Index " + start + " has no parent.");
} else {
- this.logger.error("Indexes " + start + " through " + end + " have no parent.");
+ throw new IllegalStateException("Indexes " + start + " through " + end + " have no parent.");
}
}
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryNodes.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryNodes.java 2023-10-05 02:27:05 UTC (rev 13310)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/data/TernaryNodes.java 2023-10-05 12:07:01 UTC (rev 13311)
@@ -28,9 +28,7 @@
package org.foray.common.data;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import java.io.PrintStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Iterator;
@@ -70,9 +68,6 @@
/** Constant needed for serialization. */
private static final long serialVersionUID = 2866689417641820196L;
- /** The logger. */
- private transient Logger logger = LoggerFactory.getLogger(this.getClass());
-
/** The number of nodes in this data structure. This also corresponds to the index of the next available node. */
private int length;
@@ -397,14 +392,15 @@
/**
* Dumps the content of the tree to a logger.
* Useful for tests.
+ * @param output The print stream to which the tree should be written.
*/
- public void dumpTree() {
+ public void dumpTree(final PrintStream output) {
final TernaryNodeParents parents = getParents();
for (int index = 0; index < size(); index ++) {
final TernaryNode node = createNotionalNode(index, null, parents);
- this.logger.info(node.toString());
+ output.println(node.toString());
}
- this.logger.info("-----------------------------------------------------------------------------------------");
+ output.println("-----------------------------------------------------------------------------------------");
}
/**
@@ -515,16 +511,13 @@
public boolean isConsistent() {
for (int index = 0; index < size(); index ++) {
if (getLowPointer(index) < 0) {
- this.logger.error("Low Pointer out of range: {}", getLowPointer(0));
- return false;
+ throw new IllegalStateException("Low Pointer out of range: " + getLowPointer(0));
}
if (getEqualPointer(index) < 0) {
- this.logger.error("Equal Pointer out of range: {}", getEqualPointer(0));
- return false;
+ throw new IllegalStateException("Equal Pointer out of range: " + getEqualPointer(0));
}
if (getHighPointer(index) < 0) {
- this.logger.error("High Pointer out of range: {}", getHighPointer(0));
- return false;
+ throw new IllegalStateException("High Pointer out of range: " + getHighPointer(0));
}
}
/* For now, most of the testing is done with the TernaryNodeParents class. Just create an instance of it and
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-05 02:27:08
|
Revision: 13310
http://sourceforge.net/p/foray/code/13310
Author: victormote
Date: 2023-10-05 02:27:05 +0000 (Thu, 05 Oct 2023)
Log Message:
-----------
Reset last word when starting a new axsl-dictionary element.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-10-04 21:27:20 UTC (rev 13309)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-10-05 02:27:05 UTC (rev 13310)
@@ -138,7 +138,7 @@
private boolean logDictionaryProblems = false;
/** The last parsed word, used to verify alphabetical order. */
- private String lastWord = StringUtils.EMPTY_STRING;
+ private String lastWord;
/** Collator used to check the order of entries in the dictionary. */
private Collator collator;
@@ -305,6 +305,7 @@
* and uppercase the same. */
this.collator.setStrength(Collator.SECONDARY);
}
+ this.lastWord = StringUtils.EMPTY_STRING;
break;
}
case "import-dictionary": {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-04 21:27:24
|
Revision: 13309
http://sourceforge.net/p/foray/code/13309
Author: victormote
Date: 2023-10-04 21:27:20 +0000 (Wed, 04 Oct 2023)
Log Message:
-----------
Remove hard-coded paths to orthography resources by handling relative URLs.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-10-04 12:55:55 UTC (rev 13308)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-10-04 21:27:20 UTC (rev 13309)
@@ -229,11 +229,11 @@
<resource-location type="classpath">/resources/org/foray/dictionaries/en-moby.jbso</resource-location>
</parsed-resource>
<parsed-resource>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/resources/resources/org/foray/dictionaries/eng-Latn-ZZZ.dict.jbso</resource-location>
+ <resource-location type="url">../../resources/resources/org/foray/dictionaries/eng-Latn-ZZZ.dict.jbso</resource-location>
</parsed-resource>
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/eng-Latn-ZZZ.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -241,7 +241,7 @@
<dictionary-resource id="org.foray.eng.Latn.GBR">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/eng-Latn-GBR.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -249,7 +249,7 @@
<dictionary-resource id="org.foray.eng.Latn.USA">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/eng-Latn-USA.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -257,7 +257,7 @@
<dictionary-resource id="org.foray.eng.Latn.ZZZ.1920">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/eng-Latn-ZZZ-epoch-01.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -265,7 +265,7 @@
<dictionary-resource id="org.foray.ita.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/ita-Latn-ZZZ.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/ita-Latn-ZZZ.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -273,7 +273,7 @@
<dictionary-resource id="org.foray.lat.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/lat-Latn-ZZZ.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -281,7 +281,7 @@
<dictionary-resource id="org.foray.fre.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/fre-Latn-ZZZ.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -289,7 +289,7 @@
<dictionary-resource id="org.foray.grc.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Latn-ZZZ.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/grc-Latn-ZZZ.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -297,7 +297,7 @@
<dictionary-resource id="org.foray.grc.Grek.ZZZ">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Grek-ZZZ.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/grc-Grek-ZZZ.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -305,7 +305,7 @@
<dictionary-resource id="org.foray.heb.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/heb-Latn-ZZZ.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -313,7 +313,7 @@
<dictionary-resource id="org.foray.arc.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/arc-Latn-ZZZ.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/arc-Latn-ZZZ.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -321,7 +321,7 @@
<dictionary-resource id="org.foray.non.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/non-Latn-ZZZ.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/non-Latn-ZZZ.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -329,7 +329,7 @@
<dictionary-resource id="org.foray.eng.Latn.ZZZ.theology">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-theology.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/eng-Latn-ZZZ-theology.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -337,7 +337,7 @@
<dictionary-resource id="org.foray.pol.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/pol-Latn-ZZZ.dict.xml</resource-location>
+ <resource-location type="url">../dictionaries/pol-Latn-ZZZ.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
@@ -347,10 +347,10 @@
<resource-location type="classpath">/resources/org/foray/orthography/hyphPatterns/eng.jbso</resource-location>
</parsed-resource>
<parsed-resource>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/resources/resources/org/foray/orthography/hyphPatterns/eng.jbso</resource-location>
+ <resource-location type="url">../../resources/resources/org/foray/orthography/hyphPatterns/eng.jbso</resource-location>
</parsed-resource>
<unparsed-hyphenation-patterns>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/hyphPatterns/eng.xml</resource-location>
+ <resource-location type="url">../hyphPatterns/eng.xml</resource-location>
</unparsed-hyphenation-patterns>
</hyphenation-patterns-resource>
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2023-10-04 12:55:55 UTC (rev 13308)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2023-10-04 21:27:20 UTC (rev 13309)
@@ -98,6 +98,7 @@
throw new OrthographyException(e);
}
final InputSource inputSource = new InputSource(inputStream);
+ inputSource.setSystemId(config.getOrthographyConfigurationLocation().toString());
final OrthographyParser parser = new OrthographyParser(this);
try {
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2023-10-04 12:55:55 UTC (rev 13308)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2023-10-04 21:27:20 UTC (rev 13309)
@@ -132,6 +132,9 @@
/** The stack of elements currently being processed. */
private Stack<String> elementStack = new Stack<String>();
+ /** The system ID of the input source being parsed. */
+ private URL systemId;
+
/**
* Constructor.
* @param server The hyphenation server which will capture the information from the parsed configuration.
@@ -142,6 +145,11 @@
@Override
public Orthography4aStandard parse(final InputSource inputSource) throws IOException, SAXException {
+ /* Get location of the document to be parsed, so that relative paths can be computed from it. */
+ if (inputSource.getSystemId() != null) {
+ this.systemId = new URL(inputSource.getSystemId());
+ }
+
final XMLReader parser = createSax2Parser(true, true, true, ForayEntityResolver.getInstance(), false);
parser.parse(inputSource);
return this.currentOrthographyConfig;
@@ -576,11 +584,24 @@
*/
private URL createUrl(final String urlString) {
try {
+ /* Is it an absolute URL? */
return new URL(urlString);
} catch (final MalformedURLException e) {
+ /* Ignore. This just tells us that it is not a valid absolute URL. */
+ }
+
+ if (this.systemId == null) {
errorMessage("Invalid URL: {}", urlString);
return null;
}
+
+ /* Try making it a URL relative to the system ID. */
+ try {
+ return new URL(this.systemId, urlString);
+ } catch (final MalformedURLException e) {
+ errorMessage("Invalid URL: {}", urlString);
+ return null;
+ }
}
/**
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-04 12:55:58
|
Revision: 13308
http://sourceforge.net/p/foray/code/13308
Author: victormote
Date: 2023-10-04 12:55:55 +0000 (Wed, 04 Oct 2023)
Log Message:
-----------
1. Move the main logic for Orthography4a and subclasses to the abstract superclass, leaving the subclasses to basically feed configuration it. 2. Add abstract method that allows the variant class to add the dictionary of the wrapped orthography to the stack that is being processed.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2023-10-03 16:55:32 UTC (rev 13307)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2023-10-04 12:55:55 UTC (rev 13308)
@@ -28,10 +28,27 @@
package org.foray.orthography;
+import org.foray.common.i18n.WritingSystem4a;
+import org.foray.common.primitive.CharSequenceUtils;
+import org.foray.orthography.wrapper.CapitalizedWord;
+import org.foray.orthography.wrapper.ExactWord;
+import org.foray.orthography.wrapper.UppercaseWord;
+import org.foray.primitive.BooleanUtils;
+import org.foray.primitive.CharacterUtils;
+
import org.axsl.fotree.text.FoOrthography;
import org.axsl.orthography.Dictionary;
+import org.axsl.orthography.Lexer;
+import org.axsl.orthography.Lexer.TokenType;
+import org.axsl.orthography.OrthographyException;
+import org.axsl.orthography.Word;
+import java.util.List;
+import java.util.Stack;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
/**
* Abstract superclass for FOray orthographies.
*/
@@ -38,9 +55,335 @@
public abstract class Orthography4a implements FoOrthography {
/**
+ * Returns the parent orthography server.
+ * @return the parent orthography server.
+ */
+ public abstract OrthographyServer4a getServer();
+
+ /**
* Returns the dictionary.
* @return The dictionary, or null if one is not configured or cannot be obtained.
*/
- public abstract Dictionary getDictionary();
+ public abstract SegmentDictionary getDictionary();
+ /**
+ * Returns the list of match rule Ids.
+ * @return The list of match rule Ids.
+ */
+ public abstract List<String> getMatchRuleListIds();
+
+ /**
+ * Returns the list of derivative rule Ids.
+ * @return The list of derivative rule Ids.
+ */
+ public abstract List<String> getDerivativeRuleListIds();
+
+ /**
+ * Returns the list of word wrapper factories.
+ * @return The list of word wrapper factories.
+ */
+ public abstract List<WordWrapperFactory<?>> getWordWrapperFactories();
+
+ /**
+ * Returns the regex pattern used to break compound words into their components.
+ * @return The regex pattern used to break compound words into their components.
+ */
+ public abstract Pattern getCompoundWordBreaker();
+
+ /**
+ * Returns the hyphenation patterns.
+ * @return The hyphenation patterns.
+ */
+ public abstract PatternTree getHyphenationPatterns();
+
+ /**
+ * Returns the writing system for this orthography.
+ * @return The writing system for this orthography.
+ */
+ public abstract WritingSystem4a getWritingSystem();
+
+ /**
+ * Searches the configured word wrapper factories for a match that would create a word derived from a dictionary
+ * word.
+ * @param chars The word to test.
+ * @return A word wrapper if {@code chars} matches a word wrapper factory, or null if not.
+ */
+ public WordWrapper findDerivatives(final CharSequence chars) {
+ /* TODO: For now, this returns the first item that matches. This may need to be expanded to allow nested wrapped
+ * words. */
+ WordWrapper word = null;
+ final Dictionary dictionary = getDictionary();
+ for (int index = 0; index < getWordWrapperFactories().size(); index ++) {
+ final WordWrapperFactory<?> factory = getWordWrapperFactories().get(index);
+ word = factory.makeInstance(chars, dictionary);
+ if (word != null) {
+ return word;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Indicates whether a given word is found in the match rules for this orthography, i.e. rules looking for
+ * non-dictionary items such as numbers, currency, etc.
+ * @param wordChars The word to be tested.
+ * @return True if and only if {@code word} matches at least one match rule for this orthography.
+ */
+ public boolean foundInMatchRules(final CharSequence wordChars) {
+ for (int idIndex = 0; idIndex < getMatchRuleListIds().size(); idIndex ++) {
+ final String ruleListId = getMatchRuleListIds().get(idIndex);
+ final List<Pattern> validWordPatterns = getServer().getMatchRules(ruleListId);
+ for (int index = 0; index < validWordPatterns.size(); index ++) {
+ final Pattern pattern = validWordPatterns.get(index);
+ final Matcher matcher = pattern.matcher(wordChars);
+ if (matcher.matches()) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public Word4a recognizeWord(final CharSequence wordChars, final int offset, final int length,
+ final Word.PartOfSpeech pos) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public boolean isRecognizedWord(final CharSequence wordCharsIn, final int offset, final int length,
+ final Word.PartOfSpeech pos) {
+ /* TODO: For performance and memory, try to eliminate the following conversion. */
+ final CharSequence wordChars = wordCharsIn.subSequence(offset, offset + length);
+ if (wordChars.length() < 1) {
+ return false;
+ }
+
+ final Stack<Dictionary> dictionaryStack = new Stack<Dictionary>();
+
+ /* 1. Check exact matches in standard dictionaries for the orthography. */
+ dictionaryStack.clear();
+ stackStandardDictionaries(dictionaryStack);
+ /* Check the referenced dictionary and each of its ancestor dictionaries. */
+ while (! dictionaryStack.isEmpty()) {
+ final Dictionary baseDictionary = dictionaryStack.pop();
+ if (baseDictionary.getWord(wordChars, 0) == null) {
+ stackImportedDictionaries(baseDictionary, dictionaryStack);
+ } else {
+ return true;
+ }
+ }
+
+ /* 2. Check the match rules. */
+ if (foundInMatchRules(wordChars)) {
+ return true;
+ }
+
+ /* 3. Check for compound word. */
+ if (CharSequenceUtils.contains(wordChars, '-')) {
+ final String[] components = getCompoundWordBreaker().split(wordChars);
+ final boolean[] componentsValid = new boolean[components.length];
+ for (int index = 0; index < components.length; index ++) {
+ final String component = components[index];
+ componentsValid[index] = isRecognizedWord(component, 0, component.length(), pos);
+ }
+ if (BooleanUtils.allTrue(componentsValid)) {
+ return true;
+ }
+ }
+
+ /* 4. Check derivative matches in standard dictionaries for the orthography. */
+ dictionaryStack.clear();
+ stackStandardDictionaries(dictionaryStack);
+ /* Check the referenced dictionary and each of its ancestor dictionaries. */
+ while (! dictionaryStack.empty()) {
+ final Dictionary baseDictionary = dictionaryStack.pop();
+ if (isDerivativeFound(wordChars, baseDictionary)) {
+ return true;
+ } else {
+ stackImportedDictionaries(baseDictionary, dictionaryStack);
+ }
+ }
+
+ /* Not found in any dictionary. */
+ /* If the first character is uppercase, convert to lowercase and try again. Discussion: For English at least, we
+ * do not want the opposite effect, i.e. to convert words starting with lowercase have the first char converted
+ * to uppercase. If the word is in the dictionary as a proper noun, we should treat a failure to capitalize it
+ * as a spelling error. Also, we do not want to generally convert the entire word to lowercase, as capital
+ * letters in the middle of the word should normally be treated as a spelling error. For exceptions to this
+ * last rule, users should enter the oddly-capitalized word into a dictionary in that form.
+ * TODO: This capability should be included in the orthography configuration instead of being hard-coded
+ * here. */
+ final int indexFirstLetter = CharacterUtils.firstLetter(wordChars);
+ if (indexFirstLetter > -1) {
+ if (Character.isUpperCase(wordChars.charAt(indexFirstLetter))) {
+ final StringBuilder builder = new StringBuilder(wordChars);
+ builder.setCharAt(indexFirstLetter, Character.toLowerCase(wordChars.charAt(indexFirstLetter)));
+ return isRecognizedWord(builder, offset, length, pos);
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Adds the standard dictionary(s) for the orthography to a stack of dictionaries to be processed.
+ * @param dictionaryStack The stack to which the dictionary(s) should be added.
+ */
+ protected abstract void stackStandardDictionaries(Stack<Dictionary> dictionaryStack);
+
+ /**
+ * Adds all imported dictionaries in a given {@link Dictionary} instance to stack of such dictionary IDs.
+ * @param baseDictionary The base dictionary whose imports are to be added to the stack.
+ * @param dictionaryStack The stack to which the Dictionary IDs should be added.
+ */
+ private void stackImportedDictionaries(final Dictionary baseDictionary, final Stack<Dictionary> dictionaryStack) {
+ final List<String> imported = baseDictionary.getImportedDictionaries();
+ for (int index = 0; index < imported.size(); index ++) {
+ final String dictId = imported.get(index);
+ final Dictionary importedDictionary = getServer().getDictionary(dictId);
+ if (importedDictionary != null) {
+ dictionaryStack.push(importedDictionary);
+ }
+ }
+ }
+
+ /**
+ * Indicates whether a given word can be found in a given dictionary after considering the derivative rules in this
+ * orthography.
+ * @param wordChars The word characters.
+ * @param dictionary The dictionary to be searched.
+ * @return True if and only if a root for {@code wordChars} can be found in {@code dictionary} using the derivative
+ * rules in this orthography.
+ */
+ private boolean isDerivativeFound(final CharSequence wordChars, final Dictionary dictionary) {
+ for (int listIndex = 0; listIndex < getDerivativeRuleListIds().size(); listIndex ++) {
+ final String ruleListKey = getDerivativeRuleListIds().get(listIndex);
+ final List<DerivativePattern> patternList = getServer().getDerivativePatterns(ruleListKey);
+ for (int patternIndex = 0; patternIndex < patternList.size(); patternIndex ++) {
+ final DerivativePattern pattern = patternList.get(patternIndex);
+ if (pattern.findFirstApplicableRule(wordChars, dictionary) != null) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public Word4a hyphenateUnrecognizedWord(final CharSequence word, final int offset, final int length) {
+ /* The character sequence containing the characters in the word that we are looking for. */
+ final CharSequence chars = word.subSequence(offset, offset + length);
+ Word4a hyphenatedWord = null;
+
+ /* Look in the dictionary first, as it should be more accurate. */
+ final SegmentDictionary dictionary = getDictionary();
+ if (dictionary != null) {
+ hyphenatedWord = dictionary.getWord(chars.toString().toLowerCase(), 0);
+ if (hyphenatedWord == null) {
+ hyphenatedWord = findDerivatives(chars);
+ }
+ }
+
+
+ if (hyphenatedWord == null) {
+ /* The word was not found in the dictionary. Try the hyphenation patterns. */
+ final PatternTree patternTree = getHyphenationPatterns();
+ if (patternTree == null) {
+ return null;
+ }
+ hyphenatedWord = patternTree.hyphenate(chars, 0, length);
+ }
+
+ if (hyphenatedWord == null) {
+ return null;
+ }
+
+ final boolean capitalized = CharSequenceUtils.equalToCapitalized(hyphenatedWord.getNormalizedContent(), chars);
+ if (capitalized) {
+ return new CapitalizedWord(hyphenatedWord);
+ }
+ final boolean uppercase = CharSequenceUtils.equalToUppercase(hyphenatedWord.getNormalizedContent(), chars);
+ if (uppercase) {
+ return new UppercaseWord(hyphenatedWord);
+ }
+
+ if (CharSequenceUtils.hasAnyUppercase(chars)) {
+ /* There is unexpected capitalization. */
+ return new ExactWord(hyphenatedWord, chars.toString());
+ }
+
+ return hyphenatedWord;
+ }
+
+ @Override
+ public TokenFlow4a tokenize(final CharSequence characters, final int startIndex, final int length)
+ throws OrthographyException {
+ final TokenFlow4a wordSequence = new TokenFlow4a();
+ final CharSequence sequence = characters.subSequence(startIndex, startIndex + length);
+ final Lexer4a lexer = getServer().getLexer();
+ lexer.clear();
+ lexer.addUntokenized(sequence, getWritingSystem());
+ lexer.lock();
+
+ while (lexer.hasNext()) {
+ final Lexer.Token token = lexer.next();
+ final CharSequence chunk = token.getText();
+ if (chunk.length() < 1) {
+ continue;
+ }
+
+ if (token.getTokenType() == TokenType.WORD) {
+ /* Chunk is a word. */
+ Word4a word = recognizeWord(chunk, 0, chunk.length(), null);
+ if (word == null) {
+ word = hyphenateUnrecognizedWord(chunk, 0, chunk.length());
+ }
+ if (word == null) {
+ word = new StringWord(0, chunk);
+ }
+ wordSequence.addToken(word);
+ } else {
+ /* Chunk is interword content. */
+ parseInterwordContent(chunk, wordSequence);
+ }
+ }
+ lexer.clear();
+ return wordSequence;
+ }
+
+ /**
+ * Converts interword characters to instances of {@link Punctuation4a} or {@link Whitespace4a}, and adds them as
+ * tokens to a given word sequence.
+ * @param interword The interword characters to be tokenized.
+ * @param wordSequence The sequence of tokens to which tokens will be added.
+ * @throws OrthographyException If {@code interword} cannot be converted to punctuation and whitespace tokens.
+ */
+ private void parseInterwordContent(final CharSequence interword, final TokenFlow4a wordSequence)
+ throws OrthographyException {
+ int index = 0;
+ while (index < interword.length()) {
+ final char contentChar = interword.charAt(index);
+ final Punctuation4a punctuation = Punctuation4a.findInstance(interword.subSequence(index, index + 1));
+ if (punctuation != null) {
+ wordSequence.addToken(punctuation);
+ index ++;
+ } else if (Character.isWhitespace(contentChar)) {
+ final Whitespace4a whitespace = Whitespace4a.findInstance(interword.subSequence(index, index + 1));
+ wordSequence.addToken(whitespace);
+ index ++;
+ } else {
+ throw new OrthographyException("Don't know how to handle interword content: " + contentChar);
+ }
+ }
+
+ }
+
+ @Override
+ public boolean canBreakLineMidWord() {
+ // TODO Auto-generated method stub
+ return false;
+ }
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java 2023-10-03 16:55:32 UTC (rev 13307)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java 2023-10-04 12:55:55 UTC (rev 13308)
@@ -29,23 +29,12 @@
package org.foray.orthography;
import org.foray.common.i18n.WritingSystem4a;
-import org.foray.common.primitive.CharSequenceUtils;
-import org.foray.orthography.wrapper.CapitalizedWord;
-import org.foray.orthography.wrapper.ExactWord;
-import org.foray.orthography.wrapper.UppercaseWord;
-import org.foray.primitive.BooleanUtils;
-import org.foray.primitive.CharacterUtils;
import org.axsl.orthography.Dictionary;
-import org.axsl.orthography.Lexer;
-import org.axsl.orthography.Lexer.TokenType;
-import org.axsl.orthography.OrthographyException;
-import org.axsl.orthography.Word;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
@@ -92,10 +81,12 @@
this.writingSystem = writingSystem;
}
- /**
- * Returns the list of match rule Ids.
- * @return The list of match rule Ids.
- */
+ @Override
+ public OrthographyServer4a getServer() {
+ return this.server;
+ }
+
+ @Override
public List<String> getMatchRuleListIds() {
return this.matchRuleListIds;
}
@@ -112,10 +103,7 @@
this.matchRuleListIds.add(matchRuleListId);
}
- /**
- * Returns the list of derivative rule Ids.
- * @return The list of derivative rule Ids.
- */
+ @Override
public List<String> getDerivativeRuleListIds() {
return this.derivativeRuleListIds;
}
@@ -164,10 +152,7 @@
this.hyphenationPatternsResource = hyphenationPatternsResource;
}
- /**
- * Returns the list of word wrapper factories.
- * @return The list of word wrapper factories.
- */
+ @Override
public List<WordWrapperFactory<?>> getWordWrapperFactories() {
return this.wordWrapperFactories;
}
@@ -189,10 +174,7 @@
}
}
- /**
- * Returns the hyphenation patterns.
- * @return The hyphenation patterns.
- */
+ @Override
public PatternTree getHyphenationPatterns() {
if (this.hyphenationPatternsResource == null) {
return null;
@@ -201,324 +183,19 @@
}
}
- /**
- * Searches the configured word wrapper factories for a match that would create a word derived from a dictionary
- * word.
- * @param chars The word to test.
- * @return A word wrapper if {@code chars} matches a word wrapper factory, or null if not.
- */
- public WordWrapper findDerivatives(final CharSequence chars) {
- /* TODO: For now, this returns the first item that matches. This may need to be expanded to allow nested wrapped
- * words. */
- WordWrapper word = null;
- final Dictionary dictionary = getDictionary();
- for (int index = 0; index < this.wordWrapperFactories.size(); index ++) {
- final WordWrapperFactory<?> factory = this.wordWrapperFactories.get(index);
- word = factory.makeInstance(chars, dictionary);
- if (word != null) {
- return word;
- }
- }
- return null;
- }
-
- /**
- * Indicates whether a given word is found in the match rules for this orthography, i.e. rules looking for
- * non-dictionary items such as numbers, currency, etc.
- * @param wordChars The word to be tested.
- * @return True if and only if {@code word} matches at least one match rule for this orthography.
- */
- public boolean foundInMatchRules(final CharSequence wordChars) {
- for (int idIndex = 0; idIndex < getMatchRuleListIds().size(); idIndex ++) {
- final String ruleListId = matchRuleListIds.get(idIndex);
- final List<Pattern> validWordPatterns = server.getMatchRules(ruleListId);
- for (int index = 0; index < validWordPatterns.size(); index ++) {
- final Pattern pattern = validWordPatterns.get(index);
- final Matcher matcher = pattern.matcher(wordChars);
- if (matcher.matches()) {
- return true;
- }
- }
- }
- return false;
- }
-
@Override
- public Word4a recognizeWord(final CharSequence wordChars, final int offset, final int length,
- final Word.PartOfSpeech pos) {
- // TODO Auto-generated method stub
- return null;
+ public WritingSystem4a getWritingSystem() {
+ return this.writingSystem;
}
@Override
- public boolean isRecognizedWord(final CharSequence wordCharsIn, final int offset, final int length,
- final Word.PartOfSpeech pos) {
- /* TODO: For performance and memory, try to eliminate the following conversion. */
- final CharSequence wordChars = wordCharsIn.subSequence(offset, offset + length);
- if (wordChars.length() < 1) {
- return false;
- }
-
- final Stack<Dictionary> dictionaryStack = new Stack<Dictionary>();
-
- /* 1. Check exact matches in adhoc dictionaries. */
-// if (adhocDictionaries != null) {
-// dictionaryStack.addAll(adhocDictionaries);
-//
-// while (! dictionaryStack.isEmpty()) {
-// final Dictionary baseDictionary = dictionaryStack.pop();
-// /* Check the referenced dictionary and each of its imported dictionaries. */
-// if (baseDictionary.getWritingSystem().satisfies(this.writingSystem)) {
-// if (baseDictionary.getWord(wordChars, 0) == null) {
-// addImportedDictionaries(baseDictionary, dictionaryStack);
-// } else {
-// return true;
-// }
-// } else {
-// addImportedDictionaries(baseDictionary, dictionaryStack);
-// }
-// }
-// }
-
- /* 2. Check exact matches in standard dictionaries for the orthography. */
- dictionaryStack.clear();
- dictionaryStack.push(getDictionary());
- /* Check the referenced dictionary and each of its ancestor dictionaries. */
- while (! dictionaryStack.isEmpty()) {
- final Dictionary baseDictionary = dictionaryStack.pop();
- if (baseDictionary.getWord(wordChars, 0) == null) {
- addImportedDictionaries(baseDictionary, dictionaryStack);
- } else {
- return true;
- }
- }
-
- /* 3. Check the match rules. */
- if (foundInMatchRules(wordChars)) {
- return true;
- }
-
- /* 4. Check for compound word. */
- if (CharSequenceUtils.contains(wordChars, '-')) {
- final String[] components = this.compoundWordBreaker.split(wordChars);
- final boolean[] componentsValid = new boolean[components.length];
- for (int index = 0; index < components.length; index ++) {
- final String component = components[index];
- componentsValid[index] = isRecognizedWord(component, 0, component.length(), pos);
- }
- if (BooleanUtils.allTrue(componentsValid)) {
- return true;
- }
- }
-
- /* 5. Check derivative matches in adhoc dictionaries. */
-// dictionaryStack.clear();
-// if (adhocDictionaries != null) {
-// dictionaryStack.addAll(adhocDictionaries);
-// while (! dictionaryStack.isEmpty()) {
-// final Dictionary baseDictionary = dictionaryStack.pop();
-// /* Check the referenced dictionary and each of its ancestor dictionaries. */
-// if (isDerivativeFound(wordChars, baseDictionary)) {
-// return true;
-// } else {
-// addImportedDictionaries(baseDictionary, dictionaryStack);
-// }
-// }
-// }
-
- /* 6. Check derivative matches in standard dictionaries for the orthography. */
- dictionaryStack.clear();
- dictionaryStack.push(getDictionary());
- /* Check the referenced dictionary and each of its ancestor dictionaries. */
- while (! dictionaryStack.empty()) {
- final Dictionary baseDictionary = dictionaryStack.pop();
- if (isDerivativeFound(wordChars, baseDictionary)) {
- return true;
- } else {
- addImportedDictionaries(baseDictionary, dictionaryStack);
- }
- }
-
- /* Not found in any dictionary. */
- /* If the first character is uppercase, convert to lowercase and try again. Discussion: For English at least, we
- * do not want the opposite effect, i.e. to convert words starting with lowercase have the first char converted
- * to uppercase. If the word is in the dictionary as a proper noun, we should treat a failure to capitalize it
- * as a spelling error. Also, we do not want to generally convert the entire word to lowercase, as capital
- * letters in the middle of the word should normally be treated as a spelling error. For exceptions to this
- * last rule, users should enter the oddly-capitalized word into a dictionary in that form.
- * TODO: This capability should be included in the orthography configuration instead of being hard-coded
- * here. */
- final int indexFirstLetter = CharacterUtils.firstLetter(wordChars);
- if (indexFirstLetter > -1) {
- if (Character.isUpperCase(wordChars.charAt(indexFirstLetter))) {
- final StringBuilder builder = new StringBuilder(wordChars);
- builder.setCharAt(indexFirstLetter, Character.toLowerCase(wordChars.charAt(indexFirstLetter)));
- return isRecognizedWord(builder, offset, length, pos);
- }
- }
-
- return false;
+ public Pattern getCompoundWordBreaker() {
+ return this.compoundWordBreaker;
}
- /**
- * Adds all imported dictionaries in a given {@link Dictionary} instance to stack of such dictionary IDs.
- * @param baseDictionary The base dictionary whose imports are to be added to the stack.
- * @param dictionaryStack The stack to which the Dictionary IDs should be added.
- */
- private void addImportedDictionaries(final Dictionary baseDictionary, final Stack<Dictionary> dictionaryStack) {
- final List<String> imported = baseDictionary.getImportedDictionaries();
- for (int index = 0; index < imported.size(); index ++) {
- final String dictId = imported.get(index);
- final Dictionary importedDictionary = this.server.getDictionary(dictId);
- if (importedDictionary != null) {
- dictionaryStack.push(importedDictionary);
- }
- }
- }
-
- /**
- * Indicates whether a given word can be found in a given dictionary after considering the derivative rules in this
- * orthography.
- * @param wordChars The word characters.
- * @param dictionary The dictionary to be searched.
- * @return True if and only if a root for {@code wordChars} can be found in {@code dictionary} using the derivative
- * rules in this orthography.
- */
- private boolean isDerivativeFound(final CharSequence wordChars, final Dictionary dictionary) {
- for (int listIndex = 0; listIndex < this.derivativeRuleListIds.size(); listIndex ++) {
- final String ruleListKey = this.derivativeRuleListIds.get(listIndex);
- final List<DerivativePattern> patternList = this.server.getDerivativePatterns(ruleListKey);
- for (int patternIndex = 0; patternIndex < patternList.size(); patternIndex ++) {
- final DerivativePattern pattern = patternList.get(patternIndex);
- if (pattern.findFirstApplicableRule(wordChars, dictionary) != null) {
- return true;
- }
- }
- }
- return false;
- }
-
@Override
- public Word4a hyphenateUnrecognizedWord(final CharSequence word, final int offset, final int length) {
- /* The character sequence containing the characters in the word that we are looking for. */
- final CharSequence chars = word.subSequence(offset, offset + length);
- Word4a hyphenatedWord = null;
-
- /* Look in the dictionary first, as it should be more accurate. */
- final SegmentDictionary dictionary = getDictionary();
- if (dictionary != null) {
- hyphenatedWord = dictionary.getWord(chars.toString().toLowerCase(), 0);
- if (hyphenatedWord == null) {
- hyphenatedWord = findDerivatives(chars);
- }
- }
-
-
- if (hyphenatedWord == null) {
- /* The word was not found in the dictionary. Try the hyphenation patterns. */
- final PatternTree patternTree = getHyphenationPatterns();
- if (patternTree == null) {
- return null;
- }
- hyphenatedWord = patternTree.hyphenate(chars, 0, length);
- }
-
- if (hyphenatedWord == null) {
- return null;
- }
-
- final boolean capitalized = CharSequenceUtils.equalToCapitalized(hyphenatedWord.getNormalizedContent(), chars);
- if (capitalized) {
- return new CapitalizedWord(hyphenatedWord);
- }
- final boolean uppercase = CharSequenceUtils.equalToUppercase(hyphenatedWord.getNormalizedContent(), chars);
- if (uppercase) {
- return new UppercaseWord(hyphenatedWord);
- }
-
- if (CharSequenceUtils.hasAnyUppercase(chars)) {
- /* There is unexpected capitalization. */
- return new ExactWord(hyphenatedWord, chars.toString());
- }
-
- return hyphenatedWord;
+ protected void stackStandardDictionaries(final Stack<Dictionary> dictionaryStack) {
+ dictionaryStack.push(this.getDictionary());
}
- @Override
- public TokenFlow4a tokenize(final CharSequence characters, final int startIndex, final int length)
- throws OrthographyException {
- final TokenFlow4a wordSequence = new TokenFlow4a();
- final CharSequence sequence = characters.subSequence(startIndex, startIndex + length);
- final Lexer4a lexer = this.server.getLexer();
- lexer.clear();
- lexer.addUntokenized(sequence, this.writingSystem);
- lexer.lock();
-
- while (lexer.hasNext()) {
- final Lexer.Token token = lexer.next();
- final CharSequence chunk = token.getText();
- if (chunk.length() < 1) {
- continue;
- }
-
- if (token.getTokenType() == TokenType.WORD) {
- /* Chunk is a word. */
- Word4a word = recognizeWord(chunk, 0, chunk.length(), null);
- if (word == null) {
- word = hyphenateUnrecognizedWord(chunk, 0, chunk.length());
- }
- if (word == null) {
- word = new StringWord(0, chunk);
- }
- wordSequence.addToken(word);
- } else {
- /* Chunk is interword content. */
- parseInterwordContent(chunk, wordSequence);
- }
- }
- lexer.clear();
- return wordSequence;
- }
-
- /**
- * Converts interword characters to instances of {@link Punctuation4a} or {@link Whitespace4a}, and adds them as
- * tokens to a given word sequence.
- * @param interword The interword characters to be tokenized.
- * @param wordSequence The sequence of tokens to which tokens will be added.
- * @throws OrthographyException If {@code interword} cannot be converted to punctuation and whitespace tokens.
- */
- private void parseInterwordContent(final CharSequence interword, final TokenFlow4a wordSequence)
- throws OrthographyException {
- int index = 0;
- while (index < interword.length()) {
- final char contentChar = interword.charAt(index);
- final Punctuation4a punctuation = Punctuation4a.findInstance(interword.subSequence(index, index + 1));
- if (punctuation != null) {
- wordSequence.addToken(punctuation);
- index ++;
- } else if (Character.isWhitespace(contentChar)) {
- final Whitespace4a whitespace = Whitespace4a.findInstance(interword.subSequence(index, index + 1));
- wordSequence.addToken(whitespace);
- index ++;
- } else {
- throw new OrthographyException("Don't know how to handle interword content: " + contentChar);
- }
- }
-
- }
-
- @Override
- public boolean canBreakLineMidWord() {
- // TODO Auto-generated method stub
- return false;
- }
-
- /**
- * Returns the writing system for this orthography.
- * @return The writing system for this orthography.
- */
- public WritingSystem4a getWritingSystem() {
- return this.writingSystem;
- }
-
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java 2023-10-03 16:55:32 UTC (rev 13307)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java 2023-10-04 12:55:55 UTC (rev 13308)
@@ -28,12 +28,14 @@
package org.foray.orthography;
-import org.axsl.fotree.text.FoTextTokenFlow;
-import org.axsl.fotree.text.FoWord;
+import org.foray.common.i18n.WritingSystem4a;
+
import org.axsl.orthography.Dictionary;
-import org.axsl.orthography.OrthographyException;
-import org.axsl.orthography.Word.PartOfSpeech;
+import java.util.List;
+import java.util.Stack;
+import java.util.regex.Pattern;
+
/**
* Wrapper around an {@link Orthography4a} instance that overrides some properties of it.
* This is useful for cases where a one-off use is needed, such as a document that has its own dictionary(s).
@@ -44,7 +46,7 @@
private Orthography4aStandard wrapped;
/** The dictionary overriding that provided by {@link #wrapped}. */
- private Dictionary dictionary;
+ private SegmentDictionary dictionary;
/**
* Constructor.
@@ -51,42 +53,57 @@
* @param wrapped The base server that is being wrapped and overriden by this instance.
* @param dictionary The dictionary overriding that provided by {@code #wrapped}.
*/
- public Orthography4aVariant(final Orthography4aStandard wrapped, final Dictionary dictionary) {
+ public Orthography4aVariant(final Orthography4aStandard wrapped, final SegmentDictionary dictionary) {
this.wrapped = wrapped;
this.dictionary = dictionary;
}
@Override
- public FoWord recognizeWord(final CharSequence wordChars, final int offset, final int length,
- final PartOfSpeech pos) {
- return this.wrapped.recognizeWord(wordChars, offset, length, pos);
+ public OrthographyServer4a getServer() {
+ return this.wrapped.getServer();
}
@Override
- public FoWord hyphenateUnrecognizedWord(final CharSequence wordChars, final int offset, final int length) {
- return this.wrapped.hyphenateUnrecognizedWord(wordChars, offset, length);
+ public SegmentDictionary getDictionary() {
+ return this.dictionary;
}
@Override
- public FoTextTokenFlow tokenize(final CharSequence wordSequenceChars, final int offset, final int length)
- throws OrthographyException {
- return this.wrapped.tokenize(wordSequenceChars, offset, length);
+ public List<String> getMatchRuleListIds() {
+ return this.wrapped.getMatchRuleListIds();
}
@Override
- public boolean isRecognizedWord(final CharSequence wordChars, final int offset, final int length,
- final PartOfSpeech pos) {
- return this.wrapped.isRecognizedWord(wordChars, offset, length, pos);
+ public List<String> getDerivativeRuleListIds() {
+ return this.wrapped.getDerivativeRuleListIds();
}
@Override
- public boolean canBreakLineMidWord() {
- return this.wrapped.canBreakLineMidWord();
+ public List<WordWrapperFactory<?>> getWordWrapperFactories() {
+ return this.wrapped.getWordWrapperFactories();
}
@Override
- public Dictionary getDictionary() {
- return this.dictionary;
+ public Pattern getCompoundWordBreaker() {
+ return this.wrapped.getCompoundWordBreaker();
}
+ @Override
+ public WritingSystem4a getWritingSystem() {
+ return this.wrapped.getWritingSystem();
+ }
+
+ @Override
+ public PatternTree getHyphenationPatterns() {
+ return this.wrapped.getHyphenationPatterns();
+ }
+
+ @Override
+ protected void stackStandardDictionaries(final Stack<Dictionary> dictionaryStack) {
+ /* Place the standard dictionary at the bottom of the stack. */
+ dictionaryStack.push(this.wrapped.getDictionary());
+ /* Place the override dictionary on the top of the stack so that it will be processed first. */
+ dictionaryStack.push(this.dictionary);
+ }
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-10-03 16:55:32 UTC (rev 13307)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-10-04 12:55:55 UTC (rev 13308)
@@ -30,6 +30,9 @@
import org.axsl.fotree.text.FoOrthographyServer;
import org.axsl.i18n.WritingSystem;
+import java.util.List;
+import java.util.regex.Pattern;
+
/**
* Abstract superclass for FOray orthography servers.
*/
@@ -41,4 +44,18 @@
@Override
public abstract Lexer4a getLexer();
+ /**
+ * Returns the list of match rules for a given Id.
+ * @param id The id of the match rules to be returned.
+ * @return The match rules for {@code id}.
+ */
+ public abstract List<Pattern> getMatchRules(String id);
+
+ /**
+ * Returns the list of derivative rules for a given Id.
+ * @param id The id of the derivative rules to be returned.
+ * @return The derivative rules for {@code id}.
+ */
+ public abstract List<DerivativePattern> getDerivativePatterns(String id);
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2023-10-03 16:55:32 UTC (rev 13307)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2023-10-04 12:55:55 UTC (rev 13308)
@@ -161,11 +161,7 @@
this.matchRuleLists.put(id, matchRules);
}
- /**
- * Returns the list of match rules for a given Id.
- * @param id The id of the match rules to be returned.
- * @return The match rules for {@code id}.
- */
+ @Override
public List<Pattern> getMatchRules(final String id) {
return this.matchRuleLists.get(id);
}
@@ -182,11 +178,7 @@
this.derivativeRuleLists.put(id, derivativeRules);
}
- /**
- * Returns the list of derivative rules for a given Id.
- * @param id The id of the derivative rules to be returned.
- * @return The derivative rules for {@code id}.
- */
+ @Override
public List<DerivativePattern> getDerivativePatterns(final String id) {
return this.derivativeRuleLists.get(id);
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java 2023-10-03 16:55:32 UTC (rev 13307)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java 2023-10-04 12:55:55 UTC (rev 13308)
@@ -32,7 +32,9 @@
import org.axsl.orthography.Dictionary;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
+import java.util.regex.Pattern;
/**
* Wrapper around a fully-configured server that overrides certain properties of it that are unique to a specific
@@ -83,4 +85,14 @@
return this.wrapped.getOrthography(writingSystem);
}
+ @Override
+ public List<Pattern> getMatchRules(final String id) {
+ return this.wrapped.getMatchRules(id);
+ }
+
+ @Override
+ public List<DerivativePattern> getDerivativePatterns(final String id) {
+ return this.wrapped.getDerivativePatterns(id);
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-03 16:55:35
|
Revision: 13307
http://sourceforge.net/p/foray/code/13307
Author: victormote
Date: 2023-10-03 16:55:32 +0000 (Tue, 03 Oct 2023)
Log Message:
-----------
Conform to aXSL change: Remove passage of ad-hoc dictionaries when searching for words.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java
trunk/foray/foray-pioneer/src/main/java/org/foray/pioneer/lb/SolitaryLineBreaker.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java 2023-10-03 15:57:57 UTC (rev 13306)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java 2023-10-03 16:55:32 UTC (rev 13307)
@@ -245,7 +245,7 @@
@Override
public Word4a recognizeWord(final CharSequence wordChars, final int offset, final int length,
- final Word.PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
+ final Word.PartOfSpeech pos) {
// TODO Auto-generated method stub
return null;
}
@@ -252,7 +252,7 @@
@Override
public boolean isRecognizedWord(final CharSequence wordCharsIn, final int offset, final int length,
- final Word.PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
+ final Word.PartOfSpeech pos) {
/* TODO: For performance and memory, try to eliminate the following conversion. */
final CharSequence wordChars = wordCharsIn.subSequence(offset, offset + length);
if (wordChars.length() < 1) {
@@ -262,24 +262,24 @@
final Stack<Dictionary> dictionaryStack = new Stack<Dictionary>();
/* 1. Check exact matches in adhoc dictionaries. */
- if (adhocDictionaries != null) {
- dictionaryStack.addAll(adhocDictionaries);
+// if (adhocDictionaries != null) {
+// dictionaryStack.addAll(adhocDictionaries);
+//
+// while (! dictionaryStack.isEmpty()) {
+// final Dictionary baseDictionary = dictionaryStack.pop();
+// /* Check the referenced dictionary and each of its imported dictionaries. */
+// if (baseDictionary.getWritingSystem().satisfies(this.writingSystem)) {
+// if (baseDictionary.getWord(wordChars, 0) == null) {
+// addImportedDictionaries(baseDictionary, dictionaryStack);
+// } else {
+// return true;
+// }
+// } else {
+// addImportedDictionaries(baseDictionary, dictionaryStack);
+// }
+// }
+// }
- while (! dictionaryStack.isEmpty()) {
- final Dictionary baseDictionary = dictionaryStack.pop();
- /* Check the referenced dictionary and each of its imported dictionaries. */
- if (baseDictionary.getWritingSystem().satisfies(this.writingSystem)) {
- if (baseDictionary.getWord(wordChars, 0) == null) {
- addImportedDictionaries(baseDictionary, dictionaryStack);
- } else {
- return true;
- }
- } else {
- addImportedDictionaries(baseDictionary, dictionaryStack);
- }
- }
- }
-
/* 2. Check exact matches in standard dictionaries for the orthography. */
dictionaryStack.clear();
dictionaryStack.push(getDictionary());
@@ -304,7 +304,7 @@
final boolean[] componentsValid = new boolean[components.length];
for (int index = 0; index < components.length; index ++) {
final String component = components[index];
- componentsValid[index] = isRecognizedWord(component, 0, component.length(), pos, adhocDictionaries);
+ componentsValid[index] = isRecognizedWord(component, 0, component.length(), pos);
}
if (BooleanUtils.allTrue(componentsValid)) {
return true;
@@ -312,19 +312,19 @@
}
/* 5. Check derivative matches in adhoc dictionaries. */
- dictionaryStack.clear();
- if (adhocDictionaries != null) {
- dictionaryStack.addAll(adhocDictionaries);
- while (! dictionaryStack.isEmpty()) {
- final Dictionary baseDictionary = dictionaryStack.pop();
- /* Check the referenced dictionary and each of its ancestor dictionaries. */
- if (isDerivativeFound(wordChars, baseDictionary)) {
- return true;
- } else {
- addImportedDictionaries(baseDictionary, dictionaryStack);
- }
- }
- }
+// dictionaryStack.clear();
+// if (adhocDictionaries != null) {
+// dictionaryStack.addAll(adhocDictionaries);
+// while (! dictionaryStack.isEmpty()) {
+// final Dictionary baseDictionary = dictionaryStack.pop();
+// /* Check the referenced dictionary and each of its ancestor dictionaries. */
+// if (isDerivativeFound(wordChars, baseDictionary)) {
+// return true;
+// } else {
+// addImportedDictionaries(baseDictionary, dictionaryStack);
+// }
+// }
+// }
/* 6. Check derivative matches in standard dictionaries for the orthography. */
dictionaryStack.clear();
@@ -353,7 +353,7 @@
if (Character.isUpperCase(wordChars.charAt(indexFirstLetter))) {
final StringBuilder builder = new StringBuilder(wordChars);
builder.setCharAt(indexFirstLetter, Character.toLowerCase(wordChars.charAt(indexFirstLetter)));
- return isRecognizedWord(builder, offset, length, pos, adhocDictionaries);
+ return isRecognizedWord(builder, offset, length, pos);
}
}
@@ -463,7 +463,7 @@
if (token.getTokenType() == TokenType.WORD) {
/* Chunk is a word. */
- Word4a word = recognizeWord(chunk, 0, chunk.length(), null, null);
+ Word4a word = recognizeWord(chunk, 0, chunk.length(), null);
if (word == null) {
word = hyphenateUnrecognizedWord(chunk, 0, chunk.length());
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java 2023-10-03 15:57:57 UTC (rev 13306)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java 2023-10-03 16:55:32 UTC (rev 13307)
@@ -34,8 +34,6 @@
import org.axsl.orthography.OrthographyException;
import org.axsl.orthography.Word.PartOfSpeech;
-import java.util.List;
-
/**
* Wrapper around an {@link Orthography4a} instance that overrides some properties of it.
* This is useful for cases where a one-off use is needed, such as a document that has its own dictionary(s).
@@ -60,8 +58,8 @@
@Override
public FoWord recognizeWord(final CharSequence wordChars, final int offset, final int length,
- final PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
- return this.wrapped.recognizeWord(wordChars, offset, length, pos, adhocDictionaries);
+ final PartOfSpeech pos) {
+ return this.wrapped.recognizeWord(wordChars, offset, length, pos);
}
@Override
@@ -77,8 +75,8 @@
@Override
public boolean isRecognizedWord(final CharSequence wordChars, final int offset, final int length,
- final PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
- return this.wrapped.isRecognizedWord(wordChars, offset, length, pos, adhocDictionaries);
+ final PartOfSpeech pos) {
+ return this.wrapped.isRecognizedWord(wordChars, offset, length, pos);
}
@Override
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-10-03 15:57:57 UTC (rev 13306)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-10-03 16:55:32 UTC (rev 13307)
@@ -363,7 +363,7 @@
final CharSequence text = token.getText();
- if (orthography.isRecognizedWord(text, 0, text.length(), null, this.adhocDictionaries)) {
+ if (orthography.isRecognizedWord(text, 0, text.length(), null)) {
return;
} else {
final Lexer.Token savedToken = token.getImmutableCopy();
@@ -371,7 +371,7 @@
final Lexer.Token nextToken = lexer.peekNext();
if (nextToken.getTokenType() == Lexer.TokenType.AMBIGUOUS_TRAILING_PUNCTUATION) {
final String testWord = savedToken.getText().toString() + nextToken.getText().toString();
- if (orthography.isRecognizedWord(testWord, 0, testWord.length(), null, adhocDictionaries)) {
+ if (orthography.isRecognizedWord(testWord, 0, testWord.length(), null)) {
return;
}
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java 2023-10-03 15:57:57 UTC (rev 13306)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java 2023-10-03 16:55:32 UTC (rev 13307)
@@ -146,7 +146,7 @@
if (word.length() < 1) {
return;
}
- if (this.currentOrthographyConfig.isRecognizedWord(word, 0, word.length(), null, this.currentDictionaries)) {
+ if (this.currentOrthographyConfig.isRecognizedWord(word, 0, word.length(), null)) {
this.output.println("Found: " + word);
return;
} else {
Modified: trunk/foray/foray-pioneer/src/main/java/org/foray/pioneer/lb/SolitaryLineBreaker.java
===================================================================
--- trunk/foray/foray-pioneer/src/main/java/org/foray/pioneer/lb/SolitaryLineBreaker.java 2023-10-03 15:57:57 UTC (rev 13306)
+++ trunk/foray/foray-pioneer/src/main/java/org/foray/pioneer/lb/SolitaryLineBreaker.java 2023-10-03 16:55:32 UTC (rev 13307)
@@ -384,7 +384,7 @@
// Extract the word that should be evaluated by the hyphenation system.
final int wordSize = wordSize(this.currentChars, actualWordStart);
// See if there are discretionary hyphenation points.
- Word hyph = orthographyConfig.recognizeWord(this.currentChars, actualWordStart, wordSize, null, null);
+ Word hyph = orthographyConfig.recognizeWord(this.currentChars, actualWordStart, wordSize, null);
if (hyph == null) {
hyph = orthographyConfig.hyphenateUnrecognizedWord(this.currentChars, actualWordStart, wordSize);
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-03 15:58:00
|
Revision: 13306
http://sourceforge.net/p/foray/code/13306
Author: victormote
Date: 2023-10-03 15:57:57 +0000 (Tue, 03 Oct 2023)
Log Message:
-----------
Allow orthography server to be overridden by a document-specific variant.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java
Added: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-10-03 15:57:57 UTC (rev 13306)
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2023 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+package org.foray.orthography;
+
+import org.axsl.fotree.text.FoOrthographyServer;
+import org.axsl.i18n.WritingSystem;
+
+/**
+ * Abstract superclass for FOray orthography servers.
+ */
+public abstract class OrthographyServer4a implements FoOrthographyServer {
+
+ @Override
+ public abstract Orthography4a getOrthography(WritingSystem writingSystem);
+
+ @Override
+ public abstract Lexer4a getLexer();
+
+}
Property changes on: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2023-10-03 15:36:09 UTC (rev 13305)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2023-10-03 15:57:57 UTC (rev 13306)
@@ -35,7 +35,6 @@
import org.foray.orthography.util.OrthographyParser;
-import org.axsl.fotree.text.FoOrthographyServer;
import org.axsl.i18n.WritingSystem;
import org.axsl.orthography.OrthographyException;
@@ -55,7 +54,7 @@
/**
* This class is the main entry point to the hyphenation package.
*/
-public class OrthographyServer4aStandard implements FoOrthographyServer {
+public class OrthographyServer4aStandard extends OrthographyServer4a {
/** The logger. */
private Logger logger = LoggerFactory.getLogger(OrthographyServer4aStandard.class);
Added: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java 2023-10-03 15:57:57 UTC (rev 13306)
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2023 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.axsl.i18n.WritingSystem;
+import org.axsl.orthography.Dictionary;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Wrapper around a fully-configured server that overrides certain properties of it that are unique to a specific
+ * document.
+ */
+public class OrthographyServer4aVariant extends OrthographyServer4a {
+
+ /** The wrapped server. */
+ private OrthographyServer4aStandard wrapped;
+
+ /** Map of writing systems and their orthographies. */
+ private Map<WritingSystem, Orthography4aVariant> orthographyMap =
+ new HashMap<WritingSystem, Orthography4aVariant>();
+
+ /**
+ * Constructor.
+ * @param wrapped The wrapped server.
+ */
+ public OrthographyServer4aVariant(final OrthographyServer4aStandard wrapped) {
+ this.wrapped = wrapped;
+ }
+
+ /**
+ * Register a variant orthography with this server.
+ * @param writingSystem The writing system by which the variant orthography can be found.
+ * @param variant The variant orthography to be used when one is needed for {@code writingSystem}.
+ */
+ public void registerOrthography(final WritingSystem writingSystem, final Orthography4aVariant variant) {
+ this.orthographyMap.put(writingSystem, variant);
+ }
+
+ @Override
+ public Dictionary getDictionary(final String dictionaryId) {
+ return this.wrapped.getDictionary(dictionaryId);
+ }
+
+ @Override
+ public Lexer4a getLexer() {
+ return this.wrapped.getLexer();
+ }
+
+ @Override
+ public Orthography4a getOrthography(final WritingSystem writingSystem) {
+ final Orthography4aVariant variant = this.orthographyMap.get(writingSystem);
+ if (variant != null) {
+ return variant;
+ }
+ return this.wrapped.getOrthography(writingSystem);
+ }
+
+}
Property changes on: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aVariant.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-10-03 15:36:09 UTC (rev 13305)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-10-03 15:57:57 UTC (rev 13306)
@@ -31,8 +31,12 @@
import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.ObjectUtils;
import org.foray.orthography.Lexer4a;
+import org.foray.orthography.Orthography4a;
import org.foray.orthography.Orthography4aStandard;
+import org.foray.orthography.Orthography4aVariant;
+import org.foray.orthography.OrthographyServer4a;
import org.foray.orthography.OrthographyServer4aStandard;
+import org.foray.orthography.OrthographyServer4aVariant;
import org.foray.orthography.OrthographyServerConfig;
import org.foray.orthography.SegmentDictionary;
import org.foray.xml.SaxParser;
@@ -157,7 +161,7 @@
private Stack<Element> elementStack = new Stack<Element>();
/** The Orthography server. */
- private OrthographyServer4aStandard server;
+ private OrthographyServer4a server;
/** The lexer. */
private Lexer4a lexer;
@@ -195,10 +199,12 @@
final OrthographyServerConfig serverConfig = new OrthographyServerConfig();
serverConfig.setOrthographyConfigurationLocation(orthographyConfigPath);
- this.server = new OrthographyServer4aStandard(serverConfig);
- this.lexer = this.server.getLexer();
-
- if (adhocDictionaryPaths != null) {
+ final OrthographyServer4aStandard baseServer = new OrthographyServer4aStandard(serverConfig);
+ if (adhocDictionaryPaths == null
+ || adhocDictionaryPaths.size() < 1) {
+ this.server = baseServer;
+ } else {
+ final OrthographyServer4aVariant variantServer = new OrthographyServer4aVariant(baseServer);
for (URL adhocDictionaryPath : adhocDictionaryPaths) {
final DictionaryParser dictParser = new DictionaryParser();
this.logger.info("Parsing Ad-hoc Dictionary: " + adhocDictionaryPath.toExternalForm());
@@ -205,10 +211,20 @@
dictParser.setLogDictionaryProblems(true);
final InputSource source = new InputSource(adhocDictionaryPath.toExternalForm());
final List<SegmentDictionary> dictionaries = dictParser.parse(source);
+ for (SegmentDictionary dictionary : dictionaries) {
+ final WritingSystem writingSystem = dictionary.getWritingSystem();
+ final Orthography4aStandard baseOrthography = baseServer.getOrthography(writingSystem);
+ final Orthography4aVariant variantOrthography =
+ new Orthography4aVariant(baseOrthography, dictionary);
+ variantServer.registerOrthography(writingSystem, variantOrthography);
+ }
this.adhocDictionaries.addAll(dictionaries);
}
+ this.server = variantServer;
}
+
+ this.lexer = this.server.getLexer();
if (xmlCatalogPath != null) {
this.entityResolver = SaxUtils.getEntityResolver(xmlCatalogPath);
}
@@ -314,7 +330,7 @@
/* Writing system should never be null, but orthography could be. */
WritingSystem lastWritingSystem = null;
- Orthography4aStandard orthography = null;
+ Orthography4a orthography = null;
if (lexer.hasNext()) {
lastWritingSystem = lexer.peekNext().getWritingSystem();
orthography = this.server.getOrthography(lastWritingSystem);
@@ -338,7 +354,7 @@
* @param token The word token to be checked.
* @param location The location of the word in the original document.
*/
- private void checkWord(final Orthography4aStandard orthography, final Lexer.Token token, final String location) {
+ private void checkWord(final Orthography4a orthography, final Lexer.Token token, final String location) {
if (orthography == null) {
/* Treat as an error. */
this.output.println("(no config) " + token.getText());
@@ -388,7 +404,7 @@
final String message = String.format("Writing system not found for: %s", languageAttr);
this.output.println(message + getLocationString(getLocator()));
} else {
- final Orthography4aStandard config = this.server.getOrthography(writingSystem);
+ final Orthography4a config = this.server.getOrthography(writingSystem);
if (config == null) {
final String message = String.format("Unconfigured orthography: %s", languageAttr);
this.output.println(message + getLocationString(getLocator()));
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-03 15:36:12
|
Revision: 13305
http://sourceforge.net/p/foray/code/13305
Author: victormote
Date: 2023-10-03 15:36:09 +0000 (Tue, 03 Oct 2023)
Log Message:
-----------
Rename class in preparation for extraction of an abstract superclass.
Modified Paths:
--------------
trunk/foray/foray-app/src/main/java/org/foray/app/ForaySpecific.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyServer4aTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/PatternTreeTests.java
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java
Removed Paths:
-------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
Modified: trunk/foray/foray-app/src/main/java/org/foray/app/ForaySpecific.java
===================================================================
--- trunk/foray/foray-app/src/main/java/org/foray/app/ForaySpecific.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-app/src/main/java/org/foray/app/ForaySpecific.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -162,7 +162,7 @@
configuration.optionSerializedHyphenationPatternsBaseDirectory());
try {
- return new org.foray.orthography.OrthographyServer4a(config);
+ return new org.foray.orthography.OrthographyServer4aStandard(config);
} catch (final OrthographyException e) {
throw new ForayException(e);
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -310,7 +310,7 @@
* Constructor.
* @param server The parent server.
*/
- public Lexer4a(final OrthographyServer4a server) {
+ public Lexer4a(final OrthographyServer4aStandard server) {
// this.server = server;
this.isLocked = false;
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -55,7 +55,7 @@
* Constructor.
* @param server The parent server.
*/
- public LexerIcu4jBreakIterator(final OrthographyServer4a server) {
+ public LexerIcu4jBreakIterator(final OrthographyServer4aStandard server) {
super(server);
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -46,7 +46,7 @@
* Constructor.
* @param server The parent server.
*/
- public LexerJavaBreakIterator(final OrthographyServer4a server) {
+ public LexerJavaBreakIterator(final OrthographyServer4aStandard server) {
super(server);
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -69,7 +69,7 @@
private List<WordWrapperFactory<?>> wordWrapperFactories = new ArrayList<WordWrapperFactory<?>>();
/** The parent hyphenation server. */
- private OrthographyServer4a server;
+ private OrthographyServer4aStandard server;
/* TODO: Following orthography-specific config needs to be moved to XML or subclass. */
/** Character delimiting a compound word. NB: This variable may be orthography specific, and may therefore need to
@@ -87,7 +87,7 @@
* @param server The parent hyphenation server.
* @param writingSystem The writing system for this orthography.
*/
- public Orthography4aStandard(final OrthographyServer4a server, final WritingSystem4a writingSystem) {
+ public Orthography4aStandard(final OrthographyServer4aStandard server, final WritingSystem4a writingSystem) {
this.server = server;
this.writingSystem = writingSystem;
}
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -1,235 +0,0 @@
-/*
- * Copyright 2004 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-/*
- * Known contributors:
- * @author Carlos Villegas <ca...@un...> (original author)
- */
-
-package org.foray.orthography;
-
-import org.foray.orthography.util.OrthographyParser;
-
-import org.axsl.fotree.text.FoOrthographyServer;
-import org.axsl.i18n.WritingSystem;
-import org.axsl.orthography.OrthographyException;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.EntityResolver;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Pattern;
-
-/**
- * This class is the main entry point to the hyphenation package.
- */
-public class OrthographyServer4a implements FoOrthographyServer {
-
- /** The logger. */
- private Logger logger = LoggerFactory.getLogger(OrthographyServer4a.class);
-
-// /** The configuration for this server. */
-// private OrthographyServerConfig config;
-
- /** An EntityResolver to be used by XML parsers (for handling DTD catalogs, etc.). */
- private EntityResolver entityResolver = null;
-
- /** Map of writing systems and their orthographies. */
- private Map<WritingSystem, Orthography4aStandard> orthographyMap =
- new HashMap<WritingSystem, Orthography4aStandard>();
-
- /** Map whose key is a dictionary ID, and whose value is the matching dictionary resource. */
- private Map<String, DictionaryResource> dictionaryMap = new HashMap<String, DictionaryResource>();
-
- /** The map of match rule lists, keyed by id. */
- private Map<String, List<Pattern>> matchRuleLists = new HashMap<String, List<Pattern>>();
-
- /** The map of derivative rule lists, keyed by id. */
- private Map<String, List<DerivativePattern>> derivativeRuleLists = new HashMap<String, List<DerivativePattern>>();
-
- /** Lazily-created string tokenizer. */
- private Lexer4a lexer;
-
- /**
- * Constructor.
- * @param config The configuration for this server.
- * @throws OrthographyException For errors parsing configuration.
- */
- public OrthographyServer4a(final OrthographyServerConfig config) throws OrthographyException {
-// this.config = config;
- if (config.getOrthographyConfigurationLocation() == null) {
- throw new OrthographyException("Orthography configuration location has not been provided");
- } else {
- InputStream inputStream = null;
- try {
- inputStream = config.getOrthographyConfigurationLocation().openStream();
- } catch (final IOException e) {
- throw new OrthographyException(e);
- }
- final InputSource inputSource = new InputSource(inputStream);
-
- final OrthographyParser parser = new OrthographyParser(this);
- try {
- parser.parse(inputSource);
- } catch (IOException | SAXException e) {
- throw new OrthographyException(e);
- }
- }
- }
-
- /**
- * Returns the EntityResolver for this server.
- * @return The EntityResolver.
- */
- public EntityResolver getEntityResolver() {
- return this.entityResolver;
- }
-
- /**
- * Sets the EntityResolver for this server.
- * @param entityResolver The new EntityResolver.
- */
- public void setEntityResolver(final EntityResolver entityResolver) {
- this.entityResolver = entityResolver;
- }
-
- /**
- * Registers a configuration for a given orthography.
- * @param writingSystem The orthography for which the configuration should be registered.
- * @param orthography The configuration for {@code orthography}.
- */
- public void registerOrthography(final WritingSystem writingSystem, final Orthography4aStandard orthography) {
- this.orthographyMap.put(writingSystem, orthography);
- final DictionaryResource resource = orthography.getDictionaryResource();
- /* TODO: If the new resources is the same as the existing one, ignore. Otherwise, log a warning and ignore the
- * new resource. */
-// if (this.dictionaryMap.get(resource.getId()) != null) {
-// throw new IllegalStateException(String.format(
-// "Dictionary already registered for ID: %1s", resource.getId()));
-// }
- /* Not every orthography has a dictionary resource. */
- if (resource != null) {
- this.dictionaryMap.put(resource.getId(), resource);
- }
- }
-
- @Override
- public Orthography4aStandard getOrthography(final WritingSystem writingSystem) {
- return this.orthographyMap.get(writingSystem);
- }
-
- /**
- * Registers a list of match rules.
- * @param id The id of the match rules to be registered.
- * @param matchRules The match rules being registered.
- */
- public void registerMatchRules(final String id, final List<Pattern> matchRules) {
- if (this.matchRuleLists.get(id) != null) {
- throw new IllegalArgumentException("Match Rules already exist for id: " + id);
- }
- this.matchRuleLists.put(id, matchRules);
- }
-
- /**
- * Returns the list of match rules for a given Id.
- * @param id The id of the match rules to be returned.
- * @return The match rules for {@code id}.
- */
- public List<Pattern> getMatchRules(final String id) {
- return this.matchRuleLists.get(id);
- }
-
- /**
- * Registers a list of derivative rules.
- * @param id The id of the derivative rules to be registered.
- * @param derivativeRules The derivative rules being registered.
- */
- public void registerDerivativeRules(final String id, final List<DerivativePattern> derivativeRules) {
- if (this.derivativeRuleLists.get(id) != null) {
- throw new IllegalArgumentException("Derivative Rules already exist for id: " + id);
- }
- this.derivativeRuleLists.put(id, derivativeRules);
- }
-
- /**
- * Returns the list of derivative rules for a given Id.
- * @param id The id of the derivative rules to be returned.
- * @return The derivative rules for {@code id}.
- */
- public List<DerivativePattern> getDerivativePatterns(final String id) {
- return this.derivativeRuleLists.get(id);
- }
-
- /**
- * Registers a dictionary.
- * @param id The id of the dictionary to be registered.
- * @param resource The dictionary resource being registered.
- */
- public void registerDictionary(final String id, final DictionaryResource resource) {
- if (this.dictionaryMap.get(id) != null) {
- throw new IllegalArgumentException("DictionaryResource already exist for id: " + id);
- }
- this.dictionaryMap.put(id, resource);
- }
-
- /**
- * Returns the dictionary resource for a given Id.
- * @param id The id of the dictionary to be returned.
- * @return The dictionary resource for {@code id}.
- */
- public DictionaryResource getDictionaryResource(final String id) {
- return this.dictionaryMap.get(id);
- }
-
- @Override
- public SegmentDictionary getDictionary(final String dictionaryId) {
- final DictionaryResource resource = this.dictionaryMap.get(dictionaryId);
- if (resource == null) {
- this.logger.warn("Dictionary not found: {}", dictionaryId);
- return null;
- } else {
- return resource.getResource();
- }
- }
-
- @Override
- public Lexer4a getLexer() {
- if (this.lexer == null) {
- this.lexer = new LexerJavaBreakIterator(this);
- }
- return this.lexer;
- }
-
-}
Copied: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java (from rev 13303, trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java)
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4aStandard.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2004 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+/*
+ * Known contributors:
+ * @author Carlos Villegas <ca...@un...> (original author)
+ */
+
+package org.foray.orthography;
+
+import org.foray.orthography.util.OrthographyParser;
+
+import org.axsl.fotree.text.FoOrthographyServer;
+import org.axsl.i18n.WritingSystem;
+import org.axsl.orthography.OrthographyException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+/**
+ * This class is the main entry point to the hyphenation package.
+ */
+public class OrthographyServer4aStandard implements FoOrthographyServer {
+
+ /** The logger. */
+ private Logger logger = LoggerFactory.getLogger(OrthographyServer4aStandard.class);
+
+// /** The configuration for this server. */
+// private OrthographyServerConfig config;
+
+ /** An EntityResolver to be used by XML parsers (for handling DTD catalogs, etc.). */
+ private EntityResolver entityResolver = null;
+
+ /** Map of writing systems and their orthographies. */
+ private Map<WritingSystem, Orthography4aStandard> orthographyMap =
+ new HashMap<WritingSystem, Orthography4aStandard>();
+
+ /** Map whose key is a dictionary ID, and whose value is the matching dictionary resource. */
+ private Map<String, DictionaryResource> dictionaryMap = new HashMap<String, DictionaryResource>();
+
+ /** The map of match rule lists, keyed by id. */
+ private Map<String, List<Pattern>> matchRuleLists = new HashMap<String, List<Pattern>>();
+
+ /** The map of derivative rule lists, keyed by id. */
+ private Map<String, List<DerivativePattern>> derivativeRuleLists = new HashMap<String, List<DerivativePattern>>();
+
+ /** Lazily-created string tokenizer. */
+ private Lexer4a lexer;
+
+ /**
+ * Constructor.
+ * @param config The configuration for this server.
+ * @throws OrthographyException For errors parsing configuration.
+ */
+ public OrthographyServer4aStandard(final OrthographyServerConfig config) throws OrthographyException {
+// this.config = config;
+ if (config.getOrthographyConfigurationLocation() == null) {
+ throw new OrthographyException("Orthography configuration location has not been provided");
+ } else {
+ InputStream inputStream = null;
+ try {
+ inputStream = config.getOrthographyConfigurationLocation().openStream();
+ } catch (final IOException e) {
+ throw new OrthographyException(e);
+ }
+ final InputSource inputSource = new InputSource(inputStream);
+
+ final OrthographyParser parser = new OrthographyParser(this);
+ try {
+ parser.parse(inputSource);
+ } catch (IOException | SAXException e) {
+ throw new OrthographyException(e);
+ }
+ }
+ }
+
+ /**
+ * Returns the EntityResolver for this server.
+ * @return The EntityResolver.
+ */
+ public EntityResolver getEntityResolver() {
+ return this.entityResolver;
+ }
+
+ /**
+ * Sets the EntityResolver for this server.
+ * @param entityResolver The new EntityResolver.
+ */
+ public void setEntityResolver(final EntityResolver entityResolver) {
+ this.entityResolver = entityResolver;
+ }
+
+ /**
+ * Registers a configuration for a given orthography.
+ * @param writingSystem The orthography for which the configuration should be registered.
+ * @param orthography The configuration for {@code orthography}.
+ */
+ public void registerOrthography(final WritingSystem writingSystem, final Orthography4aStandard orthography) {
+ this.orthographyMap.put(writingSystem, orthography);
+ final DictionaryResource resource = orthography.getDictionaryResource();
+ /* TODO: If the new resources is the same as the existing one, ignore. Otherwise, log a warning and ignore the
+ * new resource. */
+// if (this.dictionaryMap.get(resource.getId()) != null) {
+// throw new IllegalStateException(String.format(
+// "Dictionary already registered for ID: %1s", resource.getId()));
+// }
+ /* Not every orthography has a dictionary resource. */
+ if (resource != null) {
+ this.dictionaryMap.put(resource.getId(), resource);
+ }
+ }
+
+ @Override
+ public Orthography4aStandard getOrthography(final WritingSystem writingSystem) {
+ return this.orthographyMap.get(writingSystem);
+ }
+
+ /**
+ * Registers a list of match rules.
+ * @param id The id of the match rules to be registered.
+ * @param matchRules The match rules being registered.
+ */
+ public void registerMatchRules(final String id, final List<Pattern> matchRules) {
+ if (this.matchRuleLists.get(id) != null) {
+ throw new IllegalArgumentException("Match Rules already exist for id: " + id);
+ }
+ this.matchRuleLists.put(id, matchRules);
+ }
+
+ /**
+ * Returns the list of match rules for a given Id.
+ * @param id The id of the match rules to be returned.
+ * @return The match rules for {@code id}.
+ */
+ public List<Pattern> getMatchRules(final String id) {
+ return this.matchRuleLists.get(id);
+ }
+
+ /**
+ * Registers a list of derivative rules.
+ * @param id The id of the derivative rules to be registered.
+ * @param derivativeRules The derivative rules being registered.
+ */
+ public void registerDerivativeRules(final String id, final List<DerivativePattern> derivativeRules) {
+ if (this.derivativeRuleLists.get(id) != null) {
+ throw new IllegalArgumentException("Derivative Rules already exist for id: " + id);
+ }
+ this.derivativeRuleLists.put(id, derivativeRules);
+ }
+
+ /**
+ * Returns the list of derivative rules for a given Id.
+ * @param id The id of the derivative rules to be returned.
+ * @return The derivative rules for {@code id}.
+ */
+ public List<DerivativePattern> getDerivativePatterns(final String id) {
+ return this.derivativeRuleLists.get(id);
+ }
+
+ /**
+ * Registers a dictionary.
+ * @param id The id of the dictionary to be registered.
+ * @param resource The dictionary resource being registered.
+ */
+ public void registerDictionary(final String id, final DictionaryResource resource) {
+ if (this.dictionaryMap.get(id) != null) {
+ throw new IllegalArgumentException("DictionaryResource already exist for id: " + id);
+ }
+ this.dictionaryMap.put(id, resource);
+ }
+
+ /**
+ * Returns the dictionary resource for a given Id.
+ * @param id The id of the dictionary to be returned.
+ * @return The dictionary resource for {@code id}.
+ */
+ public DictionaryResource getDictionaryResource(final String id) {
+ return this.dictionaryMap.get(id);
+ }
+
+ @Override
+ public SegmentDictionary getDictionary(final String dictionaryId) {
+ final DictionaryResource resource = this.dictionaryMap.get(dictionaryId);
+ if (resource == null) {
+ this.logger.warn("Dictionary not found: {}", dictionaryId);
+ return null;
+ } else {
+ return resource.getResource();
+ }
+ }
+
+ @Override
+ public Lexer4a getLexer() {
+ if (this.lexer == null) {
+ this.lexer = new LexerJavaBreakIterator(this);
+ }
+ return this.lexer;
+ }
+
+}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -41,7 +41,7 @@
import org.foray.orthography.DictionaryResource;
import org.foray.orthography.HyphenationPatternsResource;
import org.foray.orthography.Orthography4aStandard;
-import org.foray.orthography.OrthographyServer4a;
+import org.foray.orthography.OrthographyServer4aStandard;
import org.foray.orthography.PosUtils;
import org.foray.orthography.WordWrapperFactory;
import org.foray.xml.SaxParser;
@@ -67,8 +67,8 @@
import java.util.regex.Pattern;
/**
- * SAX2 Handler which retrieves the orthography configuration information and stores it in a {@link OrthographyServer4a}
- * instance.
+ * SAX2 Handler which retrieves the orthography configuration information and stores it in a
+ * {@link OrthographyServer4aStandard} instance.
* Normally this class doesn't need to be accessed directly.
*/
public class OrthographyParser extends SaxParser<Orthography4aStandard> {
@@ -127,7 +127,7 @@
new HashMap<String, HyphenationPatternsResource>();
/** The hyphenation server receiving the parsed information. */
- private OrthographyServer4a hyphenationServer;
+ private OrthographyServer4aStandard hyphenationServer;
/** The stack of elements currently being processed. */
private Stack<String> elementStack = new Stack<String>();
@@ -136,7 +136,7 @@
* Constructor.
* @param server The hyphenation server which will capture the information from the parsed configuration.
*/
- public OrthographyParser(final OrthographyServer4a server) {
+ public OrthographyParser(final OrthographyServer4aStandard server) {
this.hyphenationServer = server;
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -32,7 +32,7 @@
import org.foray.common.primitive.ObjectUtils;
import org.foray.orthography.Lexer4a;
import org.foray.orthography.Orthography4aStandard;
-import org.foray.orthography.OrthographyServer4a;
+import org.foray.orthography.OrthographyServer4aStandard;
import org.foray.orthography.OrthographyServerConfig;
import org.foray.orthography.SegmentDictionary;
import org.foray.xml.SaxParser;
@@ -157,7 +157,7 @@
private Stack<Element> elementStack = new Stack<Element>();
/** The Orthography server. */
- private OrthographyServer4a server;
+ private OrthographyServer4aStandard server;
/** The lexer. */
private Lexer4a lexer;
@@ -195,7 +195,7 @@
final OrthographyServerConfig serverConfig = new OrthographyServerConfig();
serverConfig.setOrthographyConfigurationLocation(orthographyConfigPath);
- this.server = new OrthographyServer4a(serverConfig);
+ this.server = new OrthographyServer4aStandard(serverConfig);
this.lexer = this.server.getLexer();
if (adhocDictionaryPaths != null) {
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -30,7 +30,7 @@
import org.foray.common.i18n.WritingSystem4a;
import org.foray.orthography.Orthography4aStandard;
-import org.foray.orthography.OrthographyServer4a;
+import org.foray.orthography.OrthographyServer4aStandard;
import org.foray.orthography.OrthographyServerConfig;
import org.foray.orthography.SegmentDictionary;
@@ -84,7 +84,7 @@
private Orthography4aStandard currentOrthographyConfig;
/** The Hyphenation server. */
- private OrthographyServer4a server;
+ private OrthographyServer4aStandard server;
/** The list of dictionaries that are currently active, i.e. that match the current orthography. */
private List<Dictionary> currentDictionaries = new ArrayList<Dictionary>();
@@ -110,7 +110,7 @@
final OrthographyServerConfig serverConfig = new OrthographyServerConfig();
serverConfig.setOrthographyConfigurationLocation(orthographyConfigPath);
- this.server = new OrthographyServer4a(serverConfig);
+ this.server = new OrthographyServer4aStandard(serverConfig);
/* Remove hard-coding. */
final WritingSystem4a writingSystem = WritingSystem4a.find("eng", "USA", "Latn");
this.currentOrthographyConfig = this.server.getOrthography(writingSystem);
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -54,7 +54,7 @@
*/
@BeforeEach
public void setupTest() throws IOException, OrthographyException {
- final OrthographyServer4a server = createServer();
+ final OrthographyServer4aStandard server = createServer();
this.out = new LexerIcu4jBreakIterator(server);
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -49,7 +49,7 @@
*/
@BeforeEach
public void setupTest() throws IOException, OrthographyException {
- final OrthographyServer4a server = createServer();
+ final OrthographyServer4aStandard server = createServer();
this.out = new LexerJavaBreakIterator(server);
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -61,8 +61,8 @@
* @throws IOException Not expected here.
* @throws OrthographyException Not expected here.
*/
- public OrthographyServer4a createServer() throws IOException, OrthographyException {
- final OrthographyServer4a server = OrthographyServer4aTests.makeHyphenationServer();
+ public OrthographyServer4aStandard createServer() throws IOException, OrthographyException {
+ final OrthographyServer4aStandard server = OrthographyServer4aTests.makeHyphenationServer();
return server;
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -50,7 +50,7 @@
public class Orthography4aTests {
/** The hyphenation server used in these tests. */
- private static OrthographyServer4a server;
+ private static OrthographyServer4aStandard server;
/** The object under test. */
private Orthography4aStandard consumer;
@@ -64,7 +64,7 @@
@BeforeAll
public static void beforeClass() throws IOException, OrthographyException {
final OrthographyServerConfig config = OrthographyServer4aTests.makeHyphenationServerConfig();
- server = new OrthographyServer4a(config);
+ server = new OrthographyServer4aStandard(config);
assertNotNull(server);
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyServer4aTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyServer4aTests.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyServer4aTests.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -45,12 +45,12 @@
import java.util.Properties;
/**
- * Tests of {@link OrthographyServer4a}.
+ * Tests of {@link OrthographyServer4aStandard}.
*/
public class OrthographyServer4aTests {
/** The object under test. */
- private OrthographyServer4a out;
+ private OrthographyServer4aStandard out;
/**
* Set up the tests.
@@ -97,14 +97,14 @@
}
/**
- * Creates an instance of {@link OrthographyServer4a}.
+ * Creates an instance of {@link OrthographyServer4aStandard}.
* @return The newly-created instance.
* @throws IOException For any errors creating the instance.
* @throws OrthographyException For errors creating a hyphenation server.
*/
- public static OrthographyServer4a makeHyphenationServer() throws IOException, OrthographyException {
+ public static OrthographyServer4aStandard makeHyphenationServer() throws IOException, OrthographyException {
final OrthographyServerConfig config = makeHyphenationServerConfig();
- return new OrthographyServer4a(config);
+ return new OrthographyServer4aStandard(config);
}
/**
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/PatternTreeTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/PatternTreeTests.java 2023-10-03 11:31:27 UTC (rev 13304)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/PatternTreeTests.java 2023-10-03 15:36:09 UTC (rev 13305)
@@ -55,7 +55,7 @@
*/
@BeforeAll
public static void beforeClass() throws IOException, OrthographyException {
- final OrthographyServer4a server = OrthographyServer4aTests.makeHyphenationServer();
+ final OrthographyServer4aStandard server = OrthographyServer4aTests.makeHyphenationServer();
final WritingSystem4a writingSystem = WritingSystem4a.USA;
PatternTreeTests.patternTree = server.getOrthography(writingSystem).getHyphenationPatterns();
assertNotNull(PatternTreeTests.patternTree);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-03 11:31:30
|
Revision: 13304
http://sourceforge.net/p/foray/code/13304
Author: victormote
Date: 2023-10-03 11:31:27 +0000 (Tue, 03 Oct 2023)
Log Message:
-----------
Move more NaturalLanguage code to the attic (some coming from aXSL).
Modified Paths:
--------------
trunk/foray/foray-app/src/main/java/org/foray/app/ForaySpecific.java
trunk/foray/foray-common/src/main/java/org/foray/common/ForayEntityResolver.java
trunk/foray/foray-core/src/main/java/org/foray/core/SessionConfig.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServerConfig.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyServer4aTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyTestUtilities.java
trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/OrthographyServer4aAttic.java
Added Paths:
-----------
trunk/foray/foray-zz-attic/src/main/data/
trunk/foray/foray-zz-attic/src/main/data/natural-languages/
trunk/foray/foray-zz-attic/src/main/data/natural-languages/axsl-natural-language.dtd
Removed Paths:
-------------
trunk/foray/foray-orthography/src/main/data/natural-languages/
Modified: trunk/foray/foray-app/src/main/java/org/foray/app/ForaySpecific.java
===================================================================
--- trunk/foray/foray-app/src/main/java/org/foray/app/ForaySpecific.java 2023-10-03 03:15:48 UTC (rev 13303)
+++ trunk/foray/foray-app/src/main/java/org/foray/app/ForaySpecific.java 2023-10-03 11:31:27 UTC (rev 13304)
@@ -160,7 +160,6 @@
config.setRawHyphenationPatternsBaseDirectory(configuration.optionRawHyphenationPatternsBaseDirectory());
config.setSerializedHyphenationPatternsBaseDirectory(
configuration.optionSerializedHyphenationPatternsBaseDirectory());
- config.setNaturalLanguagesBaseDirectory(configuration.optionNaturalLanguageBaseDirectory());
try {
return new org.foray.orthography.OrthographyServer4a(config);
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/ForayEntityResolver.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/ForayEntityResolver.java 2023-10-03 03:15:48 UTC (rev 13303)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/ForayEntityResolver.java 2023-10-03 11:31:27 UTC (rev 13304)
@@ -132,7 +132,6 @@
public InputStream getInputStream(final String publicId) {
switch (publicId) {
/* aXSL DTDs. */
- case NATURAL_LANGUAGE_PUBLIC_ID: return getAxslDtdAsInputStream("axsl-natural-language.dtd");
case HYPHENATION_PUBLIC_ID: return getAxslDtdAsInputStream("axsl-hyphenation.dtd");
case DICTIONARY_PUBLIC_ID: return getAxslDtdAsInputStream("axsl-dictionary.dtd");
case FONT_CONFIG_PUBLIC_ID: return getAxslDtdAsInputStream("axsl-font-config.dtd");
Modified: trunk/foray/foray-core/src/main/java/org/foray/core/SessionConfig.java
===================================================================
--- trunk/foray/foray-core/src/main/java/org/foray/core/SessionConfig.java 2023-10-03 03:15:48 UTC (rev 13303)
+++ trunk/foray/foray-core/src/main/java/org/foray/core/SessionConfig.java 2023-10-03 11:31:27 UTC (rev 13304)
@@ -271,18 +271,6 @@
}
/**
- * Returns the "natural-language-base-directory" configuration item.
- * @return The "natural-language-base-directory" configuration item.
- */
- public URL optionNaturalLanguageBaseDirectory() {
- final URL value = (URL) getValue("natural-language-base-directory");
- if (value == null) {
- return optionBaseDirectory();
- }
- return value;
- }
-
- /**
* Returns the "language" configuration item.
* @return The "language" configuration item.
*/
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServerConfig.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServerConfig.java 2023-10-03 03:15:48 UTC (rev 13303)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServerConfig.java 2023-10-03 11:31:27 UTC (rev 13304)
@@ -50,9 +50,6 @@
/** The directory containing parsed and serialized hyphenation pattern files. */
private URL serializedHyphenationPatternsBaseDirectory;
- /** The directory containing natural language files. */
- private URL naturalLanguagesBaseDirectory;
-
/**
* Sets the location of the orthography configuration.
* @param orthographyConfigurationLocation The location of the orthography configuration.
@@ -133,20 +130,4 @@
return this.serializedHyphenationPatternsBaseDirectory;
}
- /**
- * Sets the natural languages base directory.
- * @param naturalLanguagesBaseDirectory The natural languages base directory to set.
- */
- public void setNaturalLanguagesBaseDirectory(final URL naturalLanguagesBaseDirectory) {
- this.naturalLanguagesBaseDirectory = naturalLanguagesBaseDirectory;
- }
-
- /**
- * Returns the directory containing natural language files.
- * @return The directory containing natural language files.
- */
- public URL getNaturalLanguagesBaseDirectory() {
- return this.naturalLanguagesBaseDirectory;
- }
-
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyServer4aTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyServer4aTests.java 2023-10-03 03:15:48 UTC (rev 13303)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyServer4aTests.java 2023-10-03 11:31:27 UTC (rev 13304)
@@ -80,7 +80,6 @@
final String rawPatternsPath = foraySandbox + "/foray-orthography/src/main/data/hyph-patterns";
final String parsedPatternsPath =
foraySandbox + "/foray-orthography/src/main/resources/resources/org/foray/orthography/hyphPatterns/";
- final String naturalLanguagePath = foraySandbox + "/foray-orthography/src/main/data/natural-languages/";
final URL orthographyFile = new URL("file", null, orthographyConfigPath);
final URL rawDictionary = new URL("file", null, rawDictionaryPath);
@@ -87,7 +86,6 @@
final URL parsedDictionary = new URL("file", null, parsedDictionaryPath);
final URL rawPatterns = new URL("file", null, rawPatternsPath);
final URL parsedPatterns = new URL("file", null, parsedPatternsPath);
- final URL naturalLanguage = new URL("file", null, naturalLanguagePath);
final OrthographyServerConfig config = new OrthographyServerConfig();
config.setOrthographyConfigurationLocation(orthographyFile);
@@ -95,7 +93,6 @@
config.setSerializedDictionariesBaseDirectory(parsedDictionary);
config.setRawHyphenationPatternsBaseDirectory(rawPatterns);
config.setSerializedHyphenationPatternsBaseDirectory(parsedPatterns);
- config.setNaturalLanguagesBaseDirectory(naturalLanguage);
return config;
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyTestUtilities.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyTestUtilities.java 2023-10-03 03:15:48 UTC (rev 13303)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/OrthographyTestUtilities.java 2023-10-03 11:31:27 UTC (rev 13304)
@@ -63,7 +63,6 @@
final String rawPatternsDirString = foraySandbox + "/foray-orthography/src/main/data/hyph-patterns/";
final String serializedPatternsDirString =
foraySandbox + "/foray-orthography/src/main/resources/resources/org/foray/orthography/dictionaries";
- final String naturalLanguageDirString = foraySandbox + "/foray-orthography/src/main/data/natural-languages/";
final URL orthographyFile = new URL("file", null, orthographyConfigPath);
final URL rawDictionary = new URL("file", null, wordListDirString);
@@ -70,7 +69,6 @@
final URL parsedDictionary = new URL("file", null, dictionaryDirString);
final URL rawPatterns = new URL("file", null, rawPatternsDirString);
final URL parsedPatterns = new URL("file", null, serializedPatternsDirString);
- final URL naturalLanguageDir = new URL("file", null, naturalLanguageDirString);
final OrthographyServerConfig config = new OrthographyServerConfig();
config.setOrthographyConfigurationLocation(orthographyFile);
@@ -78,7 +76,6 @@
config.setSerializedDictionariesBaseDirectory(parsedDictionary);
config.setRawHyphenationPatternsBaseDirectory(rawPatterns);
config.setSerializedHyphenationPatternsBaseDirectory(parsedPatterns);
- config.setNaturalLanguagesBaseDirectory(naturalLanguageDir);
return config;
}
Added: trunk/foray/foray-zz-attic/src/main/data/natural-languages/axsl-natural-language.dtd
===================================================================
--- trunk/foray/foray-zz-attic/src/main/data/natural-languages/axsl-natural-language.dtd (rev 0)
+++ trunk/foray/foray-zz-attic/src/main/data/natural-languages/axsl-natural-language.dtd 2023-10-03 11:31:27 UTC (rev 13304)
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Document Type Definition (DTD) for an XML document that describes various
+features of a natural language, that is, a language spoken and/or written by
+humans.
+
+The initial purpose of this DTD is to provide a way to document the valid
+letters in the language.
+
+Use the following public and system IDs for this DTD:
+<!DOCTYPE axsl-natural-language
+ PUBLIC "-//aXSL//DTD Natural Language V0.1//EN"
+ "http://www.axsl.org/dtds/0.1/en/axsl-natural-language.dtd">
+-->
+
+<!ELEMENT axsl-natural-language (letter-range*, letter*)>
+<!--
+1. iso-639: The ISO-639 code for the language being defined.
+-->
+<!ATTLIST axsl-natural-language
+ iso-639 CDATA #REQUIRED
+>
+
+<!--
+A range of Unicode code points that are valid letters in this language. By
+"letters" is meant characters other than numbers, symbols, and punctuation marks
+that can properly be found in content for this language.
+
+Applications are expected to handle any canonical normalization of letters in
+this range.
+-->
+<!ELEMENT letter-range EMPTY>
+
+<!--
+1. start: The Unicode code point marking the start of the range of valid
+letters. For example, to designate the character "a", use "U+0061".
+2. end: The Unicode code point marking the end of the range of valid letters.
+For example, to designate the character "z", use "U+007A".
+-->
+<!ATTLIST letter-range
+ description CDATA #IMPLIED
+ start CDATA #REQUIRED
+ end CDATA #REQUIRED
+>
+
+<!--
+A single user-oriented grapheme that is a valid letter in this language. By
+"grapheme" is meant not a byte, or a 16-bit character, or even a Unicode code
+point, but rather one or more Unicode code points that together describe a
+displayable/printable character. For example, an "e" with an acute accent can
+be encoded as either a single code point, U+00E9, or as a combination of the
+code points for "e", U+0065, followed by the code point for the "combining"
+acute accent, U+0301.
+-->
+<!ELEMENT letter EMPTY>
+
+<!--
+1. value: The Unicode code point(s) defining the letter. If multiple code points
+comprise the letter, delimit each with a space. For example, to define an "e"
+with an acute accent, set "value" to "U+0065 U+0301".
+-->
+<!ATTLIST letter
+ description CDATA #IMPLIED
+ value CDATA #REQUIRED
+>
+
+<!-- Last Line of DTD -->
Property changes on: trunk/foray/foray-zz-attic/src/main/data/natural-languages/axsl-natural-language.dtd
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/OrthographyServer4aAttic.java
===================================================================
--- trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/OrthographyServer4aAttic.java 2023-10-03 03:15:48 UTC (rev 13303)
+++ trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/OrthographyServer4aAttic.java 2023-10-03 11:31:27 UTC (rev 13304)
@@ -60,8 +60,8 @@
/** The logger. */
private Logger logger = LoggerFactory.getLogger(this.getClass());
- /** The configuration for this server. */
- private OrthographyServerConfig config;
+// /** The configuration for this server. */
+// private OrthographyServerConfig config;
/** Map of all Natural Language instances. */
private Map<Language, NaturalLanguage> languages = new HashMap<Language, NaturalLanguage>();
@@ -72,7 +72,7 @@
* @throws OrthographyException For errors parsing configuration.
*/
public OrthographyServer4aAttic(final OrthographyServerConfig config) throws OrthographyException {
- this.config = config;
+// this.config = config;
}
/**
@@ -143,11 +143,12 @@
*/
private NaturalLanguage parseNaturalLanguage(final String languageCode)
throws OrthographyException {
- final URL naturalLanguageDir = this.config.getNaturalLanguagesBaseDirectory();
+// final URL naturalLanguageDir = this.config.getNaturalLanguagesBaseDirectory();
+ final URL naturalLanguageDir = null;
final String filePrefix = languageCode + "-language";
- if (naturalLanguageDir == null) {
- return null;
- }
+// if (naturalLanguageDir == null) {
+// return null;
+// }
NaturalLanguage nl = null;
/* Use the language code as the base name for the file. */
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-10-03 03:15:51
|
Revision: 13303
http://sourceforge.net/p/foray/code/13303
Author: victormote
Date: 2023-10-03 03:15:48 +0000 (Tue, 03 Oct 2023)
Log Message:
-----------
Move NaturalLanguage code to the attic.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
trunk/foray/foray-zz-attic/build.gradle
Added Paths:
-----------
trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/NatLangParser.java
trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/NaturalLanguage.java
trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/OrthographyServer4aAttic.java
trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/ValidateChars.java
trunk/foray/foray-zz-attic/src/test/java/org/foray/orthography/NaturalLanguageTests.java
Removed Paths:
-------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NaturalLanguage.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ValidateChars.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/util/NaturalLanguageTests.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-10-03 01:30:18 UTC (rev 13302)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-10-03 03:15:48 UTC (rev 13303)
@@ -33,15 +33,9 @@
package org.foray.orthography;
-import org.foray.common.ForayConstants;
-import org.foray.common.i18n.Language4a;
-import org.foray.common.url.UrlFactory;
-import org.foray.orthography.util.NatLangParser;
-import org.foray.orthography.util.NaturalLanguage;
import org.foray.orthography.util.OrthographyParser;
import org.axsl.fotree.text.FoOrthographyServer;
-import org.axsl.i18n.Language;
import org.axsl.i18n.WritingSystem;
import org.axsl.orthography.OrthographyException;
@@ -53,9 +47,6 @@
import java.io.IOException;
import java.io.InputStream;
-import java.io.ObjectInputStream;
-import java.net.MalformedURLException;
-import java.net.URL;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -69,12 +60,9 @@
/** The logger. */
private Logger logger = LoggerFactory.getLogger(OrthographyServer4a.class);
- /** The configuration for this server. */
- private OrthographyServerConfig config;
+// /** The configuration for this server. */
+// private OrthographyServerConfig config;
- /** Map of all Natural Language instances. */
- private Map<Language, NaturalLanguage> languages = new HashMap<Language, NaturalLanguage>();
-
/** An EntityResolver to be used by XML parsers (for handling DTD catalogs, etc.). */
private EntityResolver entityResolver = null;
@@ -100,7 +88,7 @@
* @throws OrthographyException For errors parsing configuration.
*/
public OrthographyServer4a(final OrthographyServerConfig config) throws OrthographyException {
- this.config = config;
+// this.config = config;
if (config.getOrthographyConfigurationLocation() == null) {
throw new OrthographyException("Orthography configuration location has not been provided");
} else {
@@ -122,159 +110,6 @@
}
/**
- * Finds or creates the instance of this class that is suitable for a given
- * language description.
- * @param languageCode The ISO-639 code for the language sought.
- * @return The suitable instance for {@code languageCode}.
- */
- public NaturalLanguage getNaturalLanguage(
- final String languageCode) {
- final Language iso639 = Language4a.findFromAlpha(languageCode);
- return this.getNaturalLanguage(iso639);
- }
-
- /**
- * Finds or creates the instance of this class that is suitable for a given
- * language description.
- * @param iso639 The ISO-639 instance for the language sought.
- * @return The suitable instance for {@code iso639}.
- */
- public NaturalLanguage getNaturalLanguage(final Language iso639) {
- NaturalLanguage nl = this.languages.get(iso639);
- if (nl != null) {
- return nl;
- }
- try {
- nl = this.parseNaturalLanguage(iso639.getAlpha3Code());
- } catch (final OrthographyException e) {
- this.logger.error(e.getMessage());
- return null;
- }
- if (nl != null) {
- this.languages.put(iso639, nl);
- }
- return nl;
- }
-
- /**
- * Makes a new, empty instance of this class for a given language
- * description.
- * @param languageCode The ISO-639 code for the new instance.
- * @return A new instance for {@code languageCode}, or null if an
- * instance already exists for it.
- */
- public NaturalLanguage makeNaturalLanguage(
- final String languageCode) {
- final Language iso639 = Language4a.findFromAlpha(languageCode);
- final NaturalLanguage existingLang = this.getNaturalLanguage(iso639);
- if (existingLang != null) {
- /* If the instance already exists, don't return a new one for the
- * same language code. */
- return null;
- }
- final NaturalLanguage newLang = new NaturalLanguage();
- /* TODO: It is ugly to expose this instance before it has been parsed.
- * Instead we need to add a concept of "locking" to this class that
- * will make it effectively immutable, and register it at that time
- * instead of now. */
- this.languages.put(iso639, newLang);
- return newLang;
- }
-
- /**
- * Parse a NaturalLanguage instance from its description.
- * @param languageCode The language code of the language to be parsed.
- * @return The parsed NaturalLanguage instance, if it can be found, or null otherwise.
- * @throws OrthographyException For errors finding or parsing the files.
- */
- private NaturalLanguage parseNaturalLanguage(final String languageCode)
- throws OrthographyException {
- final URL naturalLanguageDir = this.config.getNaturalLanguagesBaseDirectory();
- final String filePrefix = languageCode + "-language";
- if (naturalLanguageDir == null) {
- return null;
- }
- NaturalLanguage nl = null;
-
- /* Use the language code as the base name for the file. */
-
- /* First look for the serialized object. */
- String fileName = filePrefix + "." + ForayConstants.BINARY_SERIALIZATION_EXTENSION;
- URL nlFile = null;
- try {
- nlFile = UrlFactory.createURL(naturalLanguageDir, fileName);
- } catch (final MalformedURLException e) {
- /* This probably means that the hyphenation URL is not valid.
- * Rethrow the exception to alert the user. */
- throw new OrthographyException(e);
- }
-
- InputStream inputStream = null;
- try {
- inputStream = nlFile.openStream();
- } catch (final IOException e) {
- /* This just means that the input doesn't exist. Ignore the
- * exception as the null value in "inputStream" will control the
- * downstream logic. */
- }
-
- if (inputStream != null) {
- ObjectInputStream ois = null;
- try {
- ois = new ObjectInputStream(inputStream);
- nl = (NaturalLanguage) ois.readObject();
- return nl;
- } catch (final IOException e) {
- throw new OrthographyException(e);
- } catch (final ClassNotFoundException e) {
- throw new OrthographyException(e);
- } finally {
- if (ois != null) {
- try {
- ois.close();
- } catch (final IOException e) {
- this.logger.error(
- "Exception closing ObjectInputStream "
- + "for " + nlFile.toString(), e);
- }
- }
- }
- }
-
-
- /* Look for the raw XML file. */
- fileName = filePrefix + ".xml";
- try {
- nlFile = UrlFactory.createURL(naturalLanguageDir, fileName);
- } catch (final MalformedURLException e) {
- /* This probably means that the hyphenation URL is not valid.
- * Rethrow the exception to alert the user. */
- throw new OrthographyException(e);
- }
-
- /* Does the XML file exist? */
- try {
- inputStream = nlFile.openStream();
- inputStream.close();
- } catch (final IOException e) {
- /* The XML pattern file does not exist. Do not try to parse it. */
- return null;
- }
-
- if (this.logger.isDebugEnabled()) {
- this.logger.debug("reading " + naturalLanguageDir + filePrefix
- + ".xml");
- }
- final NatLangParser parser = new NatLangParser();
- try {
- nl = parser.parse(nlFile);
- } catch (IOException | SAXException e) {
- throw new OrthographyException(e);
- }
- return nl;
- }
-
- /**
* Returns the EntityResolver for this server.
* @return The EntityResolver.
*/
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java 2023-10-03 01:30:18 UTC (rev 13302)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java 2023-10-03 03:15:48 UTC (rev 13303)
@@ -1,115 +0,0 @@
-/*
- * Copyright 2007 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography.util;
-
-import org.foray.common.ForayEntityResolver;
-import org.foray.common.primitive.UnicodeCharUtils;
-import org.foray.xml.SaxParser;
-
-import org.xml.sax.Attributes;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.XMLReader;
-
-import java.io.IOException;
-import java.util.StringTokenizer;
-
-/**
- * A SAX document handler to read and parse natural-language descriptions
- * from an XML file.
- */
-public class NatLangParser extends SaxParser<NaturalLanguage> {
- /*
- * TODO: This class should probably be converted to work with the CLDR/LDML (see src/main/schema/ldml).
- */
-
- /** The natural language instance being parsed. */
- private NaturalLanguage nl;
-
- @Override
- public NaturalLanguage parse(final InputSource inputSource) throws IOException, SAXException {
- final XMLReader parser = createSax2Parser(false, true, true, ForayEntityResolver.getInstance(), false);
- parser.parse(inputSource);
- return this.nl;
- }
-
- @Override
- public void startElement(final String uri, final String local,
- final String raw, final Attributes attrs) throws SAXException {
- if (local.equals("axsl-natural-language")) {
- final String languageCode = attrs.getValue("iso-639");
- this.nl = new NaturalLanguage();
- if (this.nl == null) {
- /* An instance of this language already exists. */
- throw new SAXException("NaturalLanguage instance already exists"
- + " for " + languageCode);
- }
- } else if (local.equals("letter-range")) {
- final String startAttr = attrs.getValue("start");
- final String endAttr = attrs.getValue("end");
- if (! UnicodeCharUtils.isValidUnparsedCodePoint(startAttr)) {
- throw new SAXException("Invalid start: " + startAttr);
- }
- if (! UnicodeCharUtils.isValidUnparsedCodePoint(startAttr)) {
- throw new SAXException("Invalid end: " + endAttr);
- }
- final int start = UnicodeCharUtils.parseCodePoint(startAttr);
- final int end = UnicodeCharUtils.parseCodePoint(endAttr);
- this.nl.addRange(start, end);
- } else if (local.equals("letter")) {
- /* Example: value="U+0041 U+0304" */
- final String value = attrs.getValue("value");
- final StringTokenizer tokenizer = new StringTokenizer(value);
- final int[] codePoints = new int[tokenizer.countTokens()];
- int index = 0;
- while (tokenizer.hasMoreTokens()) {
- final String token = tokenizer.nextToken();
- if (! UnicodeCharUtils.isValidUnparsedCodePoint(token)) {
- throw new SAXException("Invalid value: " + token);
- }
- final int parsedValue = UnicodeCharUtils.parseCodePoint(token);
- codePoints[index] = parsedValue;
- index ++;
- }
- this.nl.addCluster(codePoints);
- }
- }
-
- @Override
- public void endElement(final String uri, final String local,
- final String raw) {
- /* Nothing to do here. */
- }
-
- @Override
- public void reset() {
- throw new UnsupportedOperationException();
- }
-
-}
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NaturalLanguage.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NaturalLanguage.java 2023-10-03 01:30:18 UTC (rev 13302)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NaturalLanguage.java 2023-10-03 03:15:48 UTC (rev 13303)
@@ -1,309 +0,0 @@
-/*
- * Copyright 2007 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography.util;
-
-import org.foray.primitive.StringUtils;
-
-import com.ibm.icu.lang.UCharacter;
-
-import java.text.Normalizer;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Manages various aspects of a natural language, specifically what grapheme
- * clusters are valid in that language.
- *
- * NOTE: There may be a better way to do this, but I have not found it yet.
- * Java has the "Locale" class, which gives access to certain resources.
- * However, this seems to be JVM-specific, and does not allow for extension by
- * addition of new locales. Also the ICU4J libraries from IBM (parts of which
- * are included in Java 5, parts in Java 6) provide some similar capabilities,
- * but do not seem to be documented well enough for us to use. It seems like
- * writing this class will be easier than trying to figure out any of the
- * other.
- */
-public final class NaturalLanguage {
- /*
- * TODO: This class should probably be converted to work with the CLDR/LDML (see src/main/schema/ldml).
- */
-
- /**
- * Inner class for storing one grapheme cluster.
- * A grapheme cluster is a sequence of Unicode code points that define a logical character, for example an "e" with
- * a combining diacritical acute accent.
- */
- private final class GraphemeCluster {
-
- /** The Unicode code points describing the grapheme cluster.
- * These are maintained in canonical decomposed order. */
- private int[] codePoints;
-
- /**
- * Constructor.
- * @param codePoints The code points that comprise the grapheme cluster.
- */
- private GraphemeCluster(final int[] codePoints) {
- /* TODO: Ensure validity, canonical order. */
- this.codePoints = codePoints;
- }
-
- /**
- * Indicates whether a given sequence of characters matches this Grapheme Cluster.
- * @param testCodePoints The sequence of code points to be tested.
- * This sequence must be already normalized to the canonical decomposed sequence and order.
- * @param start The index to the first character that is being tested.
- * @param end The index to the last character that is being tested.
- * @return True if and only if the sequence of characters matches this Grapheme
- * Cluster.
- */
- public boolean isIncluded(final int[] testCodePoints, final int start, final int end) {
- if ((end - start + 1) != this.codePoints.length) {
- return false;
- }
- for (int i = 0; i < this.codePoints.length; i++) {
- if (this.codePoints[i] != testCodePoints[i + start]) {
- return false;
- }
- }
- return true;
- }
- }
-
- /**
- * Inner class for storing one code point range.
- * A code point range is a range of Unicode code points that are valid in this language.
- * This class is suitable only for graphemes that can be described with a single code point when canonically
- * decomposed.
- * Graphemes that require more than one code point should use {@link GraphemeCluster}.
- */
- private final class CodePointRange {
-
- /** The starting Unicode code point that defines this range. */
- private int start;
-
- /** The ending Unicode code point that defines this range. */
- private int end;
-
- /**
- * Private Constructor.
- * @param start The starting Unicode code point that defines this range.
- * @param end The ending Unicode code point that defines this range.
- */
- private CodePointRange(final int start, final int end) {
- this.start = start;
- this.end = end;
- }
-
- /**
- * Indicates whether a given code point is in this range.
- * @param codePoint The Unicode code point being tested.
- * @return True if and only if {@code codePoint} is included in this range.
- */
- public boolean isIncluded(final int codePoint) {
- if (codePoint < this.start
- || codePoint > this.end) {
- return false;
- }
- return true;
- }
- }
-
- /** Temporary data structure for storing the grapheme clusters as the
- * instance is being created. */
- private transient List<NaturalLanguage.GraphemeCluster> clusters =
- new ArrayList<NaturalLanguage.GraphemeCluster>();
-
- /** Temporary data structure for storing the code point ranges as the
- * instance is being created. */
- private transient List<NaturalLanguage.CodePointRange> codePointRanges =
- new ArrayList<NaturalLanguage.CodePointRange>();
-
- /**
- * Private Constructor.
- */
- public NaturalLanguage() {
- }
-
- /**
- * Add a range of Unicode code points to this language.
- * @param start The first code point in the range to be added.
- * @param end The last code point in the range to be added.
- */
- public void addRange(final int start, final int end) {
- final CodePointRange newRange = new CodePointRange(start, end);
- this.codePointRanges.add(newRange);
- }
-
- /**
- * Add a new Grapheme Cluster to this language.
- * @param codePoints The sequence of Unicode code point that define the Grapheme Cluster.
- */
- public void addCluster(final int[] codePoints) {
- final GraphemeCluster newCluster = new GraphemeCluster(codePoints);
- this.clusters.add(newCluster);
- }
-
- /**
- * Indicates whether a specific Unicode code point is valid as a grapheme in this language.
- * @param codePoint The Unicode code point to be tested.
- * @return True if and only if {@code codePoint} is valid in this language.
- */
- public boolean isIncluded(final int codePoint) {
- /* Start with some exceptions to the general rules below. */
- switch (codePoint) {
- /* The linefeed character. */
- case '\n': return true;
- /* The tab character. */
- case '\t': return true;
- }
-
- /* For now, assume that all digits, spaces, symbols, and punctuation
- * are valid in all languages. */
- final int type = Character.getType(codePoint);
- switch (type) {
- case Character.UNASSIGNED: return false;
- case Character.UPPERCASE_LETTER: break;
- case Character.LOWERCASE_LETTER: break;
- case Character.TITLECASE_LETTER: break;
- case Character.MODIFIER_LETTER: break;
- case Character.OTHER_LETTER: break;
- case Character.NON_SPACING_MARK: break;
- case Character.ENCLOSING_MARK: return true;
- case Character.COMBINING_SPACING_MARK: break;
- case Character.DECIMAL_DIGIT_NUMBER: return true;
- case Character.LETTER_NUMBER: break;
- case Character.OTHER_NUMBER: break;
- case Character.SPACE_SEPARATOR: return true;
- case Character.LINE_SEPARATOR: return true;
- case Character.PARAGRAPH_SEPARATOR: return true;
- case Character.CONTROL: return false;
- case Character.FORMAT: return false;
- case Character.PRIVATE_USE: break;
- case Character.SURROGATE: break;
- case Character.DASH_PUNCTUATION: return true;
- case Character.START_PUNCTUATION: return true;
- case Character.END_PUNCTUATION: return true;
- case Character.CONNECTOR_PUNCTUATION: return true;
- case Character.OTHER_PUNCTUATION: return true;
- case Character.MATH_SYMBOL: return true;
- case Character.CURRENCY_SYMBOL: return true;
- case Character.MODIFIER_SYMBOL: return true;
- case Character.OTHER_SYMBOL: return true;
- case Character.INITIAL_QUOTE_PUNCTUATION: return true;
- case Character.FINAL_QUOTE_PUNCTUATION: return true;
- default: break;
- }
-
- for (int i = 0; i < this.codePointRanges.size(); i++) {
- final CodePointRange range = this.codePointRanges.get(i);
- if (range.isIncluded(codePoint)) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Indicates whether a given sequence of characters is a valid grapheme cluster in this language.
- * @param codePoints The sequence of code points to be tested.
- * This sequence must be already normalized to the canonical decomposed sequence and order.
- * @param start The index to the first character that is being tested.
- * @param end The index to the last character that is being tested.
- * @return True if and only if the sequence of characters matches a valid Grapheme
- * Cluster in this language.
- */
- public boolean isIncluded(final int[] codePoints, final int start,
- final int end) {
- if (end - start == 0) {
- final int codePoint = codePoints[start];
- return this.isIncluded(codePoint);
- }
- for (GraphemeCluster cluster : this.clusters) {
- if (cluster.isIncluded(codePoints, start, end)) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Validates the content of a sequence of chars to determine whether they are valid in this language.
- * By "valid" is meant that the grapheme clusters contained in the text are valid grapheme clusters in this
- * language.
- * @param theChars The String or other CharSequence that contains the text to be validated.
- * This text does not need to already be normalized.
- * @return The index to the first code point of the first grapheme cluster in {@code theChars} that is <em>not</em>
- * valid in this language, or -1 if all clusters are valid.
- */
- public int validateText(final CharSequence theChars) {
- /* Normalize the input. */
- final String normalized = Normalizer.normalize(theChars.toString(), Normalizer.Form.NFD);
- /* Convert to code points. */
- /* Note: We are after programming simplicity here for now. There are
- * probably more efficient ways to do this. */
- final int[] codePoints = StringUtils.toCodePoints(normalized);
-
- int clusterStart = 0;
- while (clusterStart < codePoints.length) {
- final int codePoint = codePoints[clusterStart];
- if (UCharacter.getCombiningClass(codePoint) != 0) {
- throw new IllegalStateException("Grapheme Cluster must start with a code point whose combining class "
- + "is zero.");
- }
- /* Start out with the cluster end equal to the cluster start. */
- int clusterEnd = clusterStart;
- /* Increment it until the combining class of the code point is 0, which signals the beginning of the next
- * grapheme cluster. */
- boolean done = false;
- while (! done) {
- if (clusterEnd >= codePoints.length - 1) {
- done = true;
- } else {
- final int nextCodePoint = codePoints[clusterEnd + 1];
- if (UCharacter.getCombiningClass(nextCodePoint) == 0) {
- done = true;
- } else {
- clusterEnd ++;
- }
- }
- }
-
- /* Test the cluster. */
- final boolean included = this.isIncluded(codePoints, clusterStart,
- clusterEnd);
- if (! included) {
- return clusterStart;
- }
- clusterStart = clusterEnd + 1;
- }
- return -1;
- }
-
-}
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ValidateChars.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ValidateChars.java 2023-10-03 01:30:18 UTC (rev 13302)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ValidateChars.java 2023-10-03 03:15:48 UTC (rev 13303)
@@ -1,272 +0,0 @@
-/*
- * Copyright 2007 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography.util;
-
-import org.foray.common.i18n.Language4a;
-import org.foray.common.url.UrlFactory;
-import org.foray.orthography.OrthographyServer4a;
-import org.foray.orthography.OrthographyServerConfig;
-import org.foray.xml.SaxParser;
-import org.foray.xml.SaxUtils;
-
-import org.axsl.i18n.Language;
-import org.axsl.orthography.OrthographyException;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.Attributes;
-import org.xml.sax.EntityResolver;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.XMLReader;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.MalformedURLException;
-import java.net.URL;
-
-/**
- * Command-line application that reads an XML file and checks its content
- * against a predefined set of characters that are legitimate in a given
- * language, reporting on anomalies. The purpose here is to find words that are
- * misspelled or that are not encoded properly, so that they can be fixed in
- * preparation for creating a word list.
- */
-public class ValidateChars extends SaxParser<Object> {
-
- /** Command-line return status constant indicating that the number of
- * arguments is wrong. */
- public static final byte STATUS_WRONG_QTY_ARGUMENTS = 1;
-
- /** Command-line return status constant indicating that a file was not
- * found. */
- public static final byte STATUS_FILE_NOT_FOUND = 2;
-
- /** Command-line return status constant indicating that there was a parsing
- * error. */
- public static final byte STATUS_PARSING_ERROR = 3;
-
- /** The minimum number of command-line arguments. */
- private static final byte MIN_CL_ARGUMENTS = 3;
-
- /** The maximum number of command-line arguments. */
- private static final byte MAX_CL_ARGUMENTS = 4;
-
- /** The entity resolver to be used for resolving DTD catalogs and other
- * entities. */
- private EntityResolver entityResolver;
-
- /** The String languageCode passed in the constructor. */
- private String languageCode;
-
- /** The Iso639 instance against whose language the content of this document
- * will be tested. */
- private Language iso639;
- /* TODO: This needs to be handled on an element-by-element basis, based
- * upon language codes in the document itself. */
-
- /** The server used to find natural language resources. */
- private OrthographyServer4a server;
-
- /**
- * Constructor.
- * @param server The server used to find natural language resources.
- * @param catalog The location of a catalog file that should be used to find
- * DTDs.
- * @param languageCode The valid ISO-639 language against which this
- * document will be tested.
- * find the DTD for this document.
- */
- public ValidateChars(final OrthographyServer4a server, final String catalog, final String languageCode) {
- this.server = server;
- this.entityResolver = SaxUtils.getEntityResolver(catalog);
- this.languageCode = languageCode;
- this.iso639 = Language4a.findFromAlpha(languageCode);
- if (this.iso639 == null) {
- throw new IllegalArgumentException("Not a valid ISO-639 code: " + this.languageCode);
- }
- }
-
- @Override
- public Object parse(final InputSource input) throws IOException, SAXException {
- final XMLReader parser = createSax2Parser(true, true, true, this.entityResolver, true);
- parser.parse(input);
- return null;
- }
-
- @Override
- public void startDocument() {
- }
-
- @Override
- public void endDocument() {
- }
-
- @Override
- public void startElement(final String uri, final String local,
- final String qName, final Attributes attributes) {
- final String content = getAndClearText();
- this.checkContent(content);
- }
-
- @Override
- public void endElement(final String uri, final String local,
- final String qName) {
- final String content = getAndClearText();
- this.checkContent(content);
- }
-
- /**
- * Examines the content character-by-character, logging any invalid content
- * found.
- * @param content The current content String to be processed.
- */
- private void checkContent(final String content) {
- if (content == null) {
- return;
- }
- final NaturalLanguage nl = this.server.getNaturalLanguage(
- this.iso639);
- if (nl == null) {
- errorMessage("Cannot get NaturalLanguage instance for: " + this.iso639.getEnglishName());
- return;
- }
- final int invalidChar = nl.validateText(content);
- if (invalidChar < 0) {
- return;
- }
- errorMessage("Invalid char at index: " + invalidChar);
- }
-
- /**
- * Command-line interface for validating the characters in an XML document.
- *
- * @param args command-line arguments.
- * Argument 1 is the location of the input file.
- * Argument 2 is the ISO-639 language code for the language to be used to
- * validate this file.
- * Argument 3 is the URL to the directory containing the natural language
- * input files.
- * Argument 4 is an optional location of an OASIS-compliant catalog file.
- * that can be used to locate local DTDs.
- */
- public static void main(final String[] args) {
- final Logger logger = LoggerFactory.getLogger(ValidateChars.class);
- if (args == null
- || args.length < ValidateChars.MIN_CL_ARGUMENTS
- || args.length > ValidateChars.MAX_CL_ARGUMENTS) {
- logger.error("Wrong number of arguments.");
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(ValidateChars.STATUS_WRONG_QTY_ARGUMENTS);
- }
- final String input = args[0];
- final String iso639 = args[1];
- final String nlDir = args[2];
- String catalog = null;
- if (args.length > ValidateChars.MIN_CL_ARGUMENTS) {
- catalog = args[ValidateChars.MIN_CL_ARGUMENTS + 1];
- }
-
- URL hyphenationDir = null;
- try {
- hyphenationDir = UrlFactory.createURL(nlDir);
- } catch (final MalformedURLException e) {
- logger.error("Invalid URL: " + nlDir, e);
- }
- final OrthographyServerConfig config = new OrthographyServerConfig();
- config.setRawHyphenationPatternsBaseDirectory(hyphenationDir);
- config.setSerializedHyphenationPatternsBaseDirectory(hyphenationDir);
- OrthographyServer4a server = null;
- try {
- server = new OrthographyServer4a(config);
- } catch (final OrthographyException e) {
- logger.error(e.getMessage(), e);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(ValidateChars.STATUS_FILE_NOT_FOUND);
- }
-
- /* Build the list of files to process. */
- final File file = new File(input);
- File[] filesToProcess = null;
- if (file.isDirectory()) {
- final File[] files = file.listFiles();
- int fileCount = 0;
- for (int i = 0; i < files.length; i++) {
- if (files[i].isFile()) {
- fileCount ++;
- }
- }
- filesToProcess = new File[fileCount];
- fileCount = 0;
- for (int i = 0; i < files.length; i++) {
- if (files[i].isFile()) {
- filesToProcess[fileCount] = files[i];
- fileCount ++;
- }
- }
- } else {
- filesToProcess = new File[1];
- filesToProcess[0] = file;
- }
-
- /* Process each file in the list. */
- for (int i = 0; i < filesToProcess.length; i++) {
- final File fileToProcess = filesToProcess[i];
- logger.info("Processing: " + fileToProcess.getName());
- FileInputStream fis = null;
- try {
- fis = new FileInputStream(fileToProcess);
- } catch (final FileNotFoundException e) {
- logger.error(e.getMessage(), e);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(ValidateChars.STATUS_FILE_NOT_FOUND);
- }
- final InputStream inputStream = new BufferedInputStream(fis);
- final InputSource inputSource = new InputSource(inputStream);
- final ValidateChars validator = new ValidateChars(server, catalog, iso639);
- try {
- validator.parse(inputSource);
- } catch (final IOException | SAXException e) {
- logger.error(e.getMessage(), e);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(ValidateChars.STATUS_PARSING_ERROR);
- }
- }
- }
-
- @Override
- public void reset() {
- throw new UnsupportedOperationException();
- }
-
-}
Deleted: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/util/NaturalLanguageTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/util/NaturalLanguageTests.java 2023-10-03 01:30:18 UTC (rev 13302)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/util/NaturalLanguageTests.java 2023-10-03 03:15:48 UTC (rev 13303)
@@ -1,60 +0,0 @@
-/*
- * Copyright 2007 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography.util;
-
-import org.foray.common.i18n.Language4a;
-import org.foray.orthography.OrthographyServer4a;
-import org.foray.orthography.OrthographyServer4aTests;
-
-import org.axsl.orthography.OrthographyException;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-
-/**
- * JUnit test class for the class {@link NaturalLanguage}.
- */
-public class NaturalLanguageTests {
-
- /**
- * Tests various Strings against the method {@link NaturalLanguage#validateText(CharSequence)}.
- * @throws IOException For errors creating the Hyphenation server.
- * @throws OrthographyException For errors creating the hyphenation server.
- */
- @Test
- public void testValidateText() throws IOException, OrthographyException {
- final OrthographyServer4a server = OrthographyServer4aTests.makeHyphenationServer();
- final NaturalLanguage nl = server.getNaturalLanguage(Language4a.findFrom3Char("mah"));
- final int invalidChar = nl.validateText("ka" + '\u00B5' + "uri");
- assertEquals(2, invalidChar);
- }
-
-}
Modified: trunk/foray/foray-zz-attic/build.gradle
===================================================================
--- trunk/foray/foray-zz-attic/build.gradle 2023-10-03 01:30:18 UTC (rev 13302)
+++ trunk/foray/foray-zz-attic/build.gradle 2023-10-03 03:15:48 UTC (rev 13303)
@@ -1,6 +1,7 @@
plugins {
id 'foray.library-conventions'
id 'foray.test-conventions'
+ id 'foray.logging-conventions'
}
description = 'foray-zz-attic'
@@ -8,6 +9,7 @@
dependencies {
implementation (group: 'commons-io', name: 'commons-io', version: versions.commonsIo)
api (group: 'jakarta.activation', name: 'jakarta.activation-api', version: '1.2.2')
+ implementation (group: 'com.ibm.icu', name: 'icu4j', version: versions.icu4j)
api (group: 'org.axsl', name: 'axsl-constants', version: versions.axsl)
implementation (group: 'org.axsl', name: 'axsl-primitive', version: versions.axsl)
@@ -14,6 +16,8 @@
api (project(':foray-common'))
implementation (project(':foray-primitive'))
+ api (project(':foray-orthography'))
+ testImplementation (project(':foray-orthography').sourceSets.test.output)
}
javadoc {
Copied: trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/NatLangParser.java (from rev 13225, trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java)
===================================================================
--- trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/NatLangParser.java (rev 0)
+++ trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/NatLangParser.java 2023-10-03 03:15:48 UTC (rev 13303)
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2007 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.foray.common.ForayEntityResolver;
+import org.foray.common.primitive.UnicodeCharUtils;
+import org.foray.xml.SaxParser;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import java.io.IOException;
+import java.util.StringTokenizer;
+
+/**
+ * A SAX document handler to read and parse natural-language descriptions
+ * from an XML file.
+ */
+public class NatLangParser extends SaxParser<NaturalLanguage> {
+ /*
+ * TODO: This class should probably be converted to work with the CLDR/LDML (see src/main/schema/ldml).
+ */
+
+ /** The natural language instance being parsed. */
+ private NaturalLanguage nl;
+
+ @Override
+ public NaturalLanguage parse(final InputSource inputSource) throws IOException, SAXException {
+ final XMLReader parser = createSax2Parser(false, true, true, ForayEntityResolver.getInstance(), false);
+ parser.parse(inputSource);
+ return this.nl;
+ }
+
+ @Override
+ public void startElement(final String uri, final String local,
+ final String raw, final Attributes attrs) throws SAXException {
+ if (local.equals("axsl-natural-language")) {
+ final String languageCode = attrs.getValue("iso-639");
+ this.nl = new NaturalLanguage();
+ if (this.nl == null) {
+ /* An instance of this language already exists. */
+ throw new SAXException("NaturalLanguage instance already exists"
+ + " for " + languageCode);
+ }
+ } else if (local.equals("letter-range")) {
+ final String startAttr = attrs.getValue("start");
+ final String endAttr = attrs.getValue("end");
+ if (! UnicodeCharUtils.isValidUnparsedCodePoint(startAttr)) {
+ throw new SAXException("Invalid start: " + startAttr);
+ }
+ if (! UnicodeCharUtils.isValidUnparsedCodePoint(startAttr)) {
+ throw new SAXException("Invalid end: " + endAttr);
+ }
+ final int start = UnicodeCharUtils.parseCodePoint(startAttr);
+ final int end = UnicodeCharUtils.parseCodePoint(endAttr);
+ this.nl.addRange(start, end);
+ } else if (local.equals("letter")) {
+ /* Example: value="U+0041 U+0304" */
+ final String value = attrs.getValue("value");
+ final StringTokenizer tokenizer = new StringTokenizer(value);
+ final int[] codePoints = new int[tokenizer.countTokens()];
+ int index = 0;
+ while (tokenizer.hasMoreTokens()) {
+ final String token = tokenizer.nextToken();
+ if (! UnicodeCharUtils.isValidUnparsedCodePoint(token)) {
+ throw new SAXException("Invalid value: " + token);
+ }
+ final int parsedValue = UnicodeCharUtils.parseCodePoint(token);
+ codePoints[index] = parsedValue;
+ index ++;
+ }
+ this.nl.addCluster(codePoints);
+ }
+ }
+
+ @Override
+ public void endElement(final String uri, final String local,
+ final String raw) {
+ /* Nothing to do here. */
+ }
+
+ @Override
+ public void reset() {
+ throw new UnsupportedOperationException();
+ }
+
+}
Copied: trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/NaturalLanguage.java (from rev 13225, trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NaturalLanguage.java)
===================================================================
--- trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/NaturalLanguage.java (rev 0)
+++ trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/NaturalLanguage.java 2023-10-03 03:15:48 UTC (rev 13303)
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2007 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.foray.primitive.StringUtils;
+
+import com.ibm.icu.lang.UCharacter;
+
+import java.text.Normalizer;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Manages various aspects of a natural language, specifically what grapheme
+ * clusters are valid in that language.
+ *
+ * NOTE: There may be a better way to do this, but I have not found it yet.
+ * Java has the "Locale" class, which gives access to certain resources.
+ * However, this seems to be JVM-specific, and does not allow for extension by
+ * addition of new locales. Also the ICU4J libraries from IBM (parts of which
+ * are included in Java 5, parts in Java 6) provide some similar capabilities,
+ * but do not seem to be documented well enough for us to use. It seems like
+ * writing this class will be easier than trying to figure out any of the
+ * other.
+ */
+public final class NaturalLanguage {
+ /*
+ * TODO: This class should probably be converted to work with the CLDR/LDML (see src/main/schema/ldml).
+ */
+
+ /**
+ * Inner class for storing one grapheme cluster.
+ * A grapheme cluster is a sequence of Unicode code points that define a logical character, for example an "e" with
+ * a combining diacritical acute accent.
+ */
+ private final class GraphemeCluster {
+
+ /** The Unicode code points describing the grapheme cluster.
+ * These are maintained in canonical decomposed order. */
+ private int[] codePoints;
+
+ /**
+ * Constructor.
+ * @param codePoints The code points that comprise the grapheme cluster.
+ */
+ private GraphemeCluster(final int[] codePoints) {
+ /* TODO: Ensure validity, canonical order. */
+ this.codePoints = codePoints;
+ }
+
+ /**
+ * Indicates whether a given sequence of characters matches this Grapheme Cluster.
+ * @param testCodePoints The sequence of code points to be tested.
+ * This sequence must be already normalized to the canonical decomposed sequence and order.
+ * @param start The index to the first character that is being tested.
+ * @param end The index to the last character that is being tested.
+ * @return True if and only if the sequence of characters matches this Grapheme
+ * Cluster.
+ */
+ public boolean isIncluded(final int[] testCodePoints, final int start, final int end) {
+ if ((end - start + 1) != this.codePoints.length) {
+ return false;
+ }
+ for (int i = 0; i < this.codePoints.length; i++) {
+ if (this.codePoints[i] != testCodePoints[i + start]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+
+ /**
+ * Inner class for storing one code point range.
+ * A code point range is a range of Unicode code points that are valid in this language.
+ * This class is suitable only for graphemes that can be described with a single code point when canonically
+ * decomposed.
+ * Graphemes that require more than one code point should use {@link GraphemeCluster}.
+ */
+ private final class CodePointRange {
+
+ /** The starting Unicode code point that defines this range. */
+ private int start;
+
+ /** The ending Unicode code point that defines this range. */
+ private int end;
+
+ /**
+ * Private Constructor.
+ * @param start The starting Unicode code point that defines this range.
+ * @param end The ending Unicode code point that defines this range.
+ */
+ private CodePointRange(final int start, final int end) {
+ this.start = start;
+ this.end = end;
+ }
+
+ /**
+ * Indicates whether a given code point is in this range.
+ * @param codePoint The Unicode code point being tested.
+ * @return True if and only if {@code codePoint} is included in this range.
+ */
+ public boolean isIncluded(final int codePoint) {
+ if (codePoint < this.start
+ || codePoint > this.end) {
+ return false;
+ }
+ return true;
+ }
+ }
+
+ /** Temporary data structure for storing the grapheme clusters as the
+ * instance is being created. */
+ private transient List<NaturalLanguage.GraphemeCluster> clusters =
+ new ArrayList<NaturalLanguage.GraphemeCluster>();
+
+ /** Temporary data structure for storing the code point ranges as the
+ * instance is being created. */
+ private transient List<NaturalLanguage.CodePointRange> codePointRanges =
+ new ArrayList<NaturalLanguage.CodePointRange>();
+
+ /**
+ * Private Constructor.
+ */
+ public NaturalLanguage() {
+ }
+
+ /**
+ * Add a range of Unicode code points to this language.
+ * @param start The first code point in the range to be added.
+ * @param end The last code point in the range to be added.
+ */
+ public void addRange(final int start, final int end) {
+ final CodePointRange newRange = new CodePointRange(start, end);
+ this.codePointRanges.add(newRange);
+ }
+
+ /**
+ * Add a new Grapheme Cluster to this language.
+ * @param codePoints The sequence of Unicode code point that define the Grapheme Cluster.
+ */
+ public void addCluster(final int[] codePoints) {
+ final GraphemeCluster newCluster = new GraphemeCluster(codePoints);
+ this.clusters.add(newCluster);
+ }
+
+ /**
+ * Indicates whether a specific Unicode code point is valid as a grapheme in this language.
+ * @param codePoint The Unicode code point to be tested.
+ * @return True if and only if {@code codePoint} is valid in this language.
+ */
+ public boolean isIncluded(final int codePoint) {
+ /* Start with some exceptions to the general rules below. */
+ switch (codePoint) {
+ /* The linefeed character. */
+ case '\n': return true;
+ /* The tab character. */
+ case '\t': return true;
+ }
+
+ /* For now, assume that all digits, spaces, symbols, and punctuation
+ * are valid in all languages. */
+ final int type = Character.getType(codePoint);
+ switch (type) {
+ case Character.UNASSIGNED: return false;
+ case Character.UPPERCASE_LETTER: break;
+ case Character.LOWERCASE_LETTER: break;
+ case Character.TITLECASE_LETTER: break;
+ case Character.MODIFIER_LETTER: break;
+ case Character.OTHER_LETTER: break;
+ case Character.NON_SPACING_MARK: break;
+ case Character.ENCLOSING_MARK: return true;
+ case Character.COMBINING_SPACING_MARK: break;
+ case Character.DECIMAL_DIGIT_NUMBER: return true;
+ case Character.LETTER_NUMBER: break;
+ case Character.OTHER_NUMBER: break;
+ case Character.SPACE_SEPARATOR: return true;
+ case Character.LINE_SEPARATOR: return true;
+ case Character.PARAGRAPH_SEPARATOR: return true;
+ case Character.CONTROL: return false;
+ case Character.FORMAT: return false;
+ case Character.PRIVATE_USE: break;
+ case Character.SURROGATE: break;
+ case Character.DASH_PUNCTUATION: return true;
+ case Character.START_PUNCTUATION: return true;
+ case Character.END_PUNCTUATION: return true;
+ case Character.CONNECTOR_PUNCTUATION: return true;
+ case Character.OTHER_PUNCTUATION: return true;
+ case Character.MATH_SYMBOL: return true;
+ case Character.CURRENCY_SYMBOL: return true;
+ case Character.MODIFIER_SYMBOL: return true;
+ case Character.OTHER_SYMBOL: return true;
+ case Character.INITIAL_QUOTE_PUNCTUATION: return true;
+ case Character.FINAL_QUOTE_PUNCTUATION: return true;
+ default: break;
+ }
+
+ for (int i = 0; i < this.codePointRanges.size(); i++) {
+ final CodePointRange range = this.codePointRanges.get(i);
+ if (range.isIncluded(codePoint)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Indicates whether a given sequence of characters is a valid grapheme cluster in this language.
+ * @param codePoints The sequence of code points to be tested.
+ * This sequence must be already normalized to the canonical decomposed sequence and order.
+ * @param start The index to the first character that is being tested.
+ * @param end The index to the last character that is being tested.
+ * @return True if and only if the sequence of characters matches a valid Grapheme
+ * Cluster in this language.
+ */
+ public boolean isIncluded(final int[] codePoints, final int start,
+ final int end) {
+ if (end - start == 0) {
+ final int codePoint = codePoints[start];
+ return this.isIncluded(codePoint);
+ }
+ for (GraphemeCluster cluster : this.clusters) {
+ if (cluster.isIncluded(codePoints, start, end)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Validates the content of a sequence of chars to determine whether they are valid in this language.
+ * By "valid" is meant that the grapheme clusters contained in the text are valid grapheme clusters in this
+ * language.
+ * @param theChars The String or other CharSequence that contains the text to be validated.
+ * This text does not need to already be normalized.
+ * @return The index to the first code point of the first grapheme cluster in {@code theChars} that is <em>not</em>
+ * valid in this language, or -1 if all clusters are valid.
+ */
+ public int validateText(final CharSequence theChars) {
+ /* Normalize the input. */
+ final String normalized = Normalizer.normalize(theChars.toString(), Normalizer.Form.NFD);
+ /* Convert to code points. */
+ /* Note: We are after programming simplicity here for now. There are
+ * probably more efficient ways to do this. */
+ final int[] codePoints = StringUtils.toCodePoints(normalized);
+
+ int clusterStart = 0;
+ while (clusterStart < codePoints.length) {
+ final int codePoint = codePoints[clusterStart];
+ if (UCharacter.getCombiningClass(codePoint) != 0) {
+ throw new IllegalStateException("Grapheme Cluster must start with a code point whose combining class "
+ + "is zero.");
+ }
+ /* Start out with the cluster end equal to the cluster start. */
+ int clusterEnd = clusterStart;
+ /* Increment it until the combining class of the code point is 0, which signals the beginning of the next
+ * grapheme cluster. */
+ boolean done = false;
+ while (! done) {
+ if (clusterEnd >= codePoints.length - 1) {
+ done = true;
+ } else {
+ final int nextCodePoint = codePoints[clusterEnd + 1];
+ if (UCharacter.getCombiningClass(nextCodePoint) == 0) {
+ done = true;
+ } else {
+ clusterEnd ++;
+ }
+ }
+ }
+
+ /* Test the cluster. */
+ final boolean included = this.isIncluded(codePoints, clusterStart,
+ clusterEnd);
+ if (! included) {
+ return clusterStart;
+ }
+ clusterStart = clusterEnd + 1;
+ }
+ return -1;
+ }
+
+}
Added: trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/OrthographyServer4aAttic.java
===================================================================
--- trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/OrthographyServer4aAttic.java (rev 0)
+++ trunk/foray/foray-zz-attic/src/main/java/org/foray/orthography/OrthographyServer4aAttic.java 2023-10-03 03:15:48 UTC (rev 13303)
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2004 The FO...
[truncated message content] |
|
From: <vic...@us...> - 2023-10-03 01:30:20
|
Revision: 13302
http://sourceforge.net/p/foray/code/13302
Author: victormote
Date: 2023-10-03 01:30:18 +0000 (Tue, 03 Oct 2023)
Log Message:
-----------
Create class that can override a standard orthography.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2023-10-02 23:19:34 UTC (rev 13301)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2023-10-03 01:30:18 UTC (rev 13302)
@@ -1,5 +1,5 @@
/*
- * Copyright 2019 The FOray Project.
+ * Copyright 2023 The FOray Project.
* http://www.foray.org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -28,501 +28,19 @@
package org.foray.orthography;
-import org.foray.common.i18n.WritingSystem4a;
-import org.foray.common.primitive.CharSequenceUtils;
-import org.foray.orthography.wrapper.CapitalizedWord;
-import org.foray.orthography.wrapper.ExactWord;
-import org.foray.orthography.wrapper.UppercaseWord;
-import org.foray.primitive.BooleanUtils;
-import org.foray.primitive.CharacterUtils;
-
import org.axsl.fotree.text.FoOrthography;
import org.axsl.orthography.Dictionary;
-import org.axsl.orthography.Lexer;
-import org.axsl.orthography.Lexer.TokenType;
-import org.axsl.orthography.OrthographyException;
-import org.axsl.orthography.Word;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Stack;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
/**
- * The resources (dictionaries, word wrappers, hyphenation patterns, etc.) that should be used by a given orthography.
+ * Abstract superclass for FOray orthographies.
*/
-public class Orthography4a implements FoOrthography {
+public abstract class Orthography4a implements FoOrthography {
- /** The list of ids to lists of regex patterns, which, for this orthography, signal a valid word if matched. */
- private List<String> matchRuleListIds = new ArrayList<String>();
-
- /** The list of ids to lists of derivative rules, which, for this orthography, can compute derivative words. */
- private List<String> derivativeRuleListIds = new ArrayList<String>();
-
- /** The dictionary for this orthography.*/
- private DictionaryResource dictionaryResource;
-
- /** The hyphenation patterns for this orthography. */
- private HyphenationPatternsResource hyphenationPatternsResource;
-
- /** The list of word wrapper factories for this orthography. */
- private List<WordWrapperFactory<?>> wordWrapperFactories = new ArrayList<WordWrapperFactory<?>>();
-
- /** The parent hyphenation server. */
- private OrthographyServer4a server;
-
- /* TODO: Following orthography-specific config needs to be moved to XML or subclass. */
- /** Character delimiting a compound word. NB: This variable may be orthography specific, and may therefore need to
- * be moved to the orthography configuration. However, we have found no evidence yet for that need. */
- private char compoundWordMarker = '-';
-
- /** Regex pattern used to break compound words into their components. */
- private Pattern compoundWordBreaker = Pattern.compile(Character.toString(compoundWordMarker));
-
- /** The writing system for this orthography. */
- private WritingSystem4a writingSystem;
-
/**
- * Constructor.
- * @param server The parent hyphenation server.
- * @param writingSystem The writing system for this orthography.
- */
- public Orthography4a(final OrthographyServer4a server, final WritingSystem4a writingSystem) {
- this.server = server;
- this.writingSystem = writingSystem;
- }
-
- /**
- * Returns the list of match rule Ids.
- * @return The list of match rule Ids.
- */
- public List<String> getMatchRuleListIds() {
- return this.matchRuleListIds;
- }
-
- /**
- * Adds a match rule list Id to this configuration.
- * @param matchRuleListId The new match rule list Id.
- */
- public void registerMatchRuleListId(final String matchRuleListId) {
- if (this.matchRuleListIds.contains(matchRuleListId)) {
- throw new IllegalArgumentException(
- "Match Rule List already configured for this orthography: " + matchRuleListId);
- }
- this.matchRuleListIds.add(matchRuleListId);
- }
-
- /**
- * Returns the list of derivative rule Ids.
- * @return The list of derivative rule Ids.
- */
- public List<String> getDerivativeRuleListIds() {
- return this.derivativeRuleListIds;
- }
-
- /**
- * Adds a derivative rule list Id to this configuration.
- * @param derivativeRuleListId The new derivative rule list Id.
- */
- public void registerDerivativeRuleListId(final String derivativeRuleListId) {
- if (this.derivativeRuleListIds.contains(derivativeRuleListId)) {
- throw new IllegalArgumentException(
- "Derivative Rule List already configured for this orthography: " + derivativeRuleListId);
- }
- this.derivativeRuleListIds.add(derivativeRuleListId);
- }
-
- /**
- * Returns the dictionary resource.
- * @return The dictionary resource.
- */
- public DictionaryResource getDictionaryResource() {
- return this.dictionaryResource;
- }
-
- /**
- * Sets the dictionary resource.
- * @param dictionaryResource The dictionaryResource to set.
- */
- public void setDictionaryResource(final DictionaryResource dictionaryResource) {
- this.dictionaryResource = dictionaryResource;
- }
-
- /**
- * Returns the hyphenation patterns resource.
- * @return The hyphenation patterns resource
- */
- public HyphenationPatternsResource getHyphenationPatternsResource() {
- return this.hyphenationPatternsResource;
- }
-
- /**
- * Sets the hyphenation patterns resource.
- * @param hyphenationPatternsResource The hyphenation patterns resource to set.
- */
- public void setHyphenationPatternsResource(final HyphenationPatternsResource hyphenationPatternsResource) {
- this.hyphenationPatternsResource = hyphenationPatternsResource;
- }
-
- /**
- * Returns the list of word wrapper factories.
- * @return The list of word wrapper factories.
- */
- public List<WordWrapperFactory<?>> getWordWrapperFactories() {
- return this.wordWrapperFactories;
- }
-
- /**
- * Sets the list of word wrapper factories.
- * @param wordWrapperFactories The word wrapper factories to set.
- */
- public void setWordWrapperFactories(final List<WordWrapperFactory<?>> wordWrapperFactories) {
- this.wordWrapperFactories = wordWrapperFactories;
- }
-
- /**
* Returns the dictionary.
* @return The dictionary, or null if one is not configured or cannot be obtained.
*/
- public SegmentDictionary getDictionary() {
- if (this.dictionaryResource == null) {
- return null;
- } else {
- return this.dictionaryResource.getResource();
- }
- }
+ public abstract Dictionary getDictionary();
- /**
- * Returns the hyphenation patterns.
- * @return The hyphenation patterns.
- */
- public PatternTree getHyphenationPatterns() {
- if (this.hyphenationPatternsResource == null) {
- return null;
- } else {
- return this.hyphenationPatternsResource.getResource();
- }
- }
-
- /**
- * Searches the configured word wrapper factories for a match that would create a word derived from a dictionary
- * word.
- * @param chars The word to test.
- * @return A word wrapper if {@code chars} matches a word wrapper factory, or null if not.
- */
- public WordWrapper findDerivatives(final CharSequence chars) {
- /* TODO: For now, this returns the first item that matches. This may need to be expanded to allow nested wrapped
- * words. */
- WordWrapper word = null;
- final Dictionary dictionary = getDictionary();
- for (int index = 0; index < this.wordWrapperFactories.size(); index ++) {
- final WordWrapperFactory<?> factory = this.wordWrapperFactories.get(index);
- word = factory.makeInstance(chars, dictionary);
- if (word != null) {
- return word;
- }
- }
- return null;
- }
-
- /**
- * Indicates whether a given word is found in the match rules for this orthography, i.e. rules looking for
- * non-dictionary items such as numbers, currency, etc.
- * @param wordChars The word to be tested.
- * @return True if and only if {@code word} matches at least one match rule for this orthography.
- */
- public boolean foundInMatchRules(final CharSequence wordChars) {
- for (int idIndex = 0; idIndex < getMatchRuleListIds().size(); idIndex ++) {
- final String ruleListId = matchRuleListIds.get(idIndex);
- final List<Pattern> validWordPatterns = server.getMatchRules(ruleListId);
- for (int index = 0; index < validWordPatterns.size(); index ++) {
- final Pattern pattern = validWordPatterns.get(index);
- final Matcher matcher = pattern.matcher(wordChars);
- if (matcher.matches()) {
- return true;
- }
- }
- }
- return false;
- }
-
- @Override
- public Word4a recognizeWord(final CharSequence wordChars, final int offset, final int length,
- final Word.PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public boolean isRecognizedWord(final CharSequence wordCharsIn, final int offset, final int length,
- final Word.PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
- /* TODO: For performance and memory, try to eliminate the following conversion. */
- final CharSequence wordChars = wordCharsIn.subSequence(offset, offset + length);
- if (wordChars.length() < 1) {
- return false;
- }
-
- final Stack<Dictionary> dictionaryStack = new Stack<Dictionary>();
-
- /* 1. Check exact matches in adhoc dictionaries. */
- if (adhocDictionaries != null) {
- dictionaryStack.addAll(adhocDictionaries);
-
- while (! dictionaryStack.isEmpty()) {
- final Dictionary baseDictionary = dictionaryStack.pop();
- /* Check the referenced dictionary and each of its imported dictionaries. */
- if (baseDictionary.getWritingSystem().satisfies(this.writingSystem)) {
- if (baseDictionary.getWord(wordChars, 0) == null) {
- addImportedDictionaries(baseDictionary, dictionaryStack);
- } else {
- return true;
- }
- } else {
- addImportedDictionaries(baseDictionary, dictionaryStack);
- }
- }
- }
-
- /* 2. Check exact matches in standard dictionaries for the orthography. */
- dictionaryStack.clear();
- dictionaryStack.push(getDictionary());
- /* Check the referenced dictionary and each of its ancestor dictionaries. */
- while (! dictionaryStack.isEmpty()) {
- final Dictionary baseDictionary = dictionaryStack.pop();
- if (baseDictionary.getWord(wordChars, 0) == null) {
- addImportedDictionaries(baseDictionary, dictionaryStack);
- } else {
- return true;
- }
- }
-
- /* 3. Check the match rules. */
- if (foundInMatchRules(wordChars)) {
- return true;
- }
-
- /* 4. Check for compound word. */
- if (CharSequenceUtils.contains(wordChars, '-')) {
- final String[] components = this.compoundWordBreaker.split(wordChars);
- final boolean[] componentsValid = new boolean[components.length];
- for (int index = 0; index < components.length; index ++) {
- final String component = components[index];
- componentsValid[index] = isRecognizedWord(component, 0, component.length(), pos, adhocDictionaries);
- }
- if (BooleanUtils.allTrue(componentsValid)) {
- return true;
- }
- }
-
- /* 5. Check derivative matches in adhoc dictionaries. */
- dictionaryStack.clear();
- if (adhocDictionaries != null) {
- dictionaryStack.addAll(adhocDictionaries);
- while (! dictionaryStack.isEmpty()) {
- final Dictionary baseDictionary = dictionaryStack.pop();
- /* Check the referenced dictionary and each of its ancestor dictionaries. */
- if (isDerivativeFound(wordChars, baseDictionary)) {
- return true;
- } else {
- addImportedDictionaries(baseDictionary, dictionaryStack);
- }
- }
- }
-
- /* 6. Check derivative matches in standard dictionaries for the orthography. */
- dictionaryStack.clear();
- dictionaryStack.push(getDictionary());
- /* Check the referenced dictionary and each of its ancestor dictionaries. */
- while (! dictionaryStack.empty()) {
- final Dictionary baseDictionary = dictionaryStack.pop();
- if (isDerivativeFound(wordChars, baseDictionary)) {
- return true;
- } else {
- addImportedDictionaries(baseDictionary, dictionaryStack);
- }
- }
-
- /* Not found in any dictionary. */
- /* If the first character is uppercase, convert to lowercase and try again. Discussion: For English at least, we
- * do not want the opposite effect, i.e. to convert words starting with lowercase have the first char converted
- * to uppercase. If the word is in the dictionary as a proper noun, we should treat a failure to capitalize it
- * as a spelling error. Also, we do not want to generally convert the entire word to lowercase, as capital
- * letters in the middle of the word should normally be treated as a spelling error. For exceptions to this
- * last rule, users should enter the oddly-capitalized word into a dictionary in that form.
- * TODO: This capability should be included in the orthography configuration instead of being hard-coded
- * here. */
- final int indexFirstLetter = CharacterUtils.firstLetter(wordChars);
- if (indexFirstLetter > -1) {
- if (Character.isUpperCase(wordChars.charAt(indexFirstLetter))) {
- final StringBuilder builder = new StringBuilder(wordChars);
- builder.setCharAt(indexFirstLetter, Character.toLowerCase(wordChars.charAt(indexFirstLetter)));
- return isRecognizedWord(builder, offset, length, pos, adhocDictionaries);
- }
- }
-
- return false;
- }
-
- /**
- * Adds all imported dictionaries in a given {@link Dictionary} instance to stack of such dictionary IDs.
- * @param baseDictionary The base dictionary whose imports are to be added to the stack.
- * @param dictionaryStack The stack to which the Dictionary IDs should be added.
- */
- private void addImportedDictionaries(final Dictionary baseDictionary, final Stack<Dictionary> dictionaryStack) {
- final List<String> imported = baseDictionary.getImportedDictionaries();
- for (int index = 0; index < imported.size(); index ++) {
- final String dictId = imported.get(index);
- final Dictionary importedDictionary = this.server.getDictionary(dictId);
- if (importedDictionary != null) {
- dictionaryStack.push(importedDictionary);
- }
- }
- }
-
- /**
- * Indicates whether a given word can be found in a given dictionary after considering the derivative rules in this
- * orthography.
- * @param wordChars The word characters.
- * @param dictionary The dictionary to be searched.
- * @return True if and only if a root for {@code wordChars} can be found in {@code dictionary} using the derivative
- * rules in this orthography.
- */
- private boolean isDerivativeFound(final CharSequence wordChars, final Dictionary dictionary) {
- for (int listIndex = 0; listIndex < this.derivativeRuleListIds.size(); listIndex ++) {
- final String ruleListKey = this.derivativeRuleListIds.get(listIndex);
- final List<DerivativePattern> patternList = this.server.getDerivativePatterns(ruleListKey);
- for (int patternIndex = 0; patternIndex < patternList.size(); patternIndex ++) {
- final DerivativePattern pattern = patternList.get(patternIndex);
- if (pattern.findFirstApplicableRule(wordChars, dictionary) != null) {
- return true;
- }
- }
- }
- return false;
- }
-
- @Override
- public Word4a hyphenateUnrecognizedWord(final CharSequence word, final int offset, final int length) {
- /* The character sequence containing the characters in the word that we are looking for. */
- final CharSequence chars = word.subSequence(offset, offset + length);
- Word4a hyphenatedWord = null;
-
- /* Look in the dictionary first, as it should be more accurate. */
- final SegmentDictionary dictionary = getDictionary();
- if (dictionary != null) {
- hyphenatedWord = dictionary.getWord(chars.toString().toLowerCase(), 0);
- if (hyphenatedWord == null) {
- hyphenatedWord = findDerivatives(chars);
- }
- }
-
-
- if (hyphenatedWord == null) {
- /* The word was not found in the dictionary. Try the hyphenation patterns. */
- final PatternTree patternTree = getHyphenationPatterns();
- if (patternTree == null) {
- return null;
- }
- hyphenatedWord = patternTree.hyphenate(chars, 0, length);
- }
-
- if (hyphenatedWord == null) {
- return null;
- }
-
- final boolean capitalized = CharSequenceUtils.equalToCapitalized(hyphenatedWord.getNormalizedContent(), chars);
- if (capitalized) {
- return new CapitalizedWord(hyphenatedWord);
- }
- final boolean uppercase = CharSequenceUtils.equalToUppercase(hyphenatedWord.getNormalizedContent(), chars);
- if (uppercase) {
- return new UppercaseWord(hyphenatedWord);
- }
-
- if (CharSequenceUtils.hasAnyUppercase(chars)) {
- /* There is unexpected capitalization. */
- return new ExactWord(hyphenatedWord, chars.toString());
- }
-
- return hyphenatedWord;
- }
-
- @Override
- public TokenFlow4a tokenize(final CharSequence characters, final int startIndex, final int length)
- throws OrthographyException {
- final TokenFlow4a wordSequence = new TokenFlow4a();
- final CharSequence sequence = characters.subSequence(startIndex, startIndex + length);
- final Lexer4a lexer = this.server.getLexer();
- lexer.clear();
- lexer.addUntokenized(sequence, this.writingSystem);
- lexer.lock();
-
- while (lexer.hasNext()) {
- final Lexer.Token token = lexer.next();
- final CharSequence chunk = token.getText();
- if (chunk.length() < 1) {
- continue;
- }
-
- if (token.getTokenType() == TokenType.WORD) {
- /* Chunk is a word. */
- Word4a word = recognizeWord(chunk, 0, chunk.length(), null, null);
- if (word == null) {
- word = hyphenateUnrecognizedWord(chunk, 0, chunk.length());
- }
- if (word == null) {
- word = new StringWord(0, chunk);
- }
- wordSequence.addToken(word);
- } else {
- /* Chunk is interword content. */
- parseInterwordContent(chunk, wordSequence);
- }
- }
- lexer.clear();
- return wordSequence;
- }
-
- /**
- * Converts interword characters to instances of {@link Punctuation4a} or {@link Whitespace4a}, and adds them as
- * tokens to a given word sequence.
- * @param interword The interword characters to be tokenized.
- * @param wordSequence The sequence of tokens to which tokens will be added.
- * @throws OrthographyException If {@code interword} cannot be converted to punctuation and whitespace tokens.
- */
- private void parseInterwordContent(final CharSequence interword, final TokenFlow4a wordSequence)
- throws OrthographyException {
- int index = 0;
- while (index < interword.length()) {
- final char contentChar = interword.charAt(index);
- final Punctuation4a punctuation = Punctuation4a.findInstance(interword.subSequence(index, index + 1));
- if (punctuation != null) {
- wordSequence.addToken(punctuation);
- index ++;
- } else if (Character.isWhitespace(contentChar)) {
- final Whitespace4a whitespace = Whitespace4a.findInstance(interword.subSequence(index, index + 1));
- wordSequence.addToken(whitespace);
- index ++;
- } else {
- throw new OrthographyException("Don't know how to handle interword content: " + contentChar);
- }
- }
-
- }
-
- @Override
- public boolean canBreakLineMidWord() {
- // TODO Auto-generated method stub
- return false;
- }
-
- /**
- * Returns the writing system for this orthography.
- * @return The writing system for this orthography.
- */
- public WritingSystem4a getWritingSystem() {
- return this.writingSystem;
- }
-
}
Copied: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java (from rev 13298, trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java)
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aStandard.java 2023-10-03 01:30:18 UTC (rev 13302)
@@ -0,0 +1,524 @@
+/*
+ * Copyright 2019 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.foray.common.i18n.WritingSystem4a;
+import org.foray.common.primitive.CharSequenceUtils;
+import org.foray.orthography.wrapper.CapitalizedWord;
+import org.foray.orthography.wrapper.ExactWord;
+import org.foray.orthography.wrapper.UppercaseWord;
+import org.foray.primitive.BooleanUtils;
+import org.foray.primitive.CharacterUtils;
+
+import org.axsl.orthography.Dictionary;
+import org.axsl.orthography.Lexer;
+import org.axsl.orthography.Lexer.TokenType;
+import org.axsl.orthography.OrthographyException;
+import org.axsl.orthography.Word;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Stack;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * The resources (dictionaries, word wrappers, hyphenation patterns, etc.) that should be used by a given orthography.
+ */
+public class Orthography4aStandard extends Orthography4a {
+
+ /** The list of ids to lists of regex patterns, which, for this orthography, signal a valid word if matched. */
+ private List<String> matchRuleListIds = new ArrayList<String>();
+
+ /** The list of ids to lists of derivative rules, which, for this orthography, can compute derivative words. */
+ private List<String> derivativeRuleListIds = new ArrayList<String>();
+
+ /** The dictionary for this orthography.*/
+ private DictionaryResource dictionaryResource;
+
+ /** The hyphenation patterns for this orthography. */
+ private HyphenationPatternsResource hyphenationPatternsResource;
+
+ /** The list of word wrapper factories for this orthography. */
+ private List<WordWrapperFactory<?>> wordWrapperFactories = new ArrayList<WordWrapperFactory<?>>();
+
+ /** The parent hyphenation server. */
+ private OrthographyServer4a server;
+
+ /* TODO: Following orthography-specific config needs to be moved to XML or subclass. */
+ /** Character delimiting a compound word. NB: This variable may be orthography specific, and may therefore need to
+ * be moved to the orthography configuration. However, we have found no evidence yet for that need. */
+ private char compoundWordMarker = '-';
+
+ /** Regex pattern used to break compound words into their components. */
+ private Pattern compoundWordBreaker = Pattern.compile(Character.toString(compoundWordMarker));
+
+ /** The writing system for this orthography. */
+ private WritingSystem4a writingSystem;
+
+ /**
+ * Constructor.
+ * @param server The parent hyphenation server.
+ * @param writingSystem The writing system for this orthography.
+ */
+ public Orthography4aStandard(final OrthographyServer4a server, final WritingSystem4a writingSystem) {
+ this.server = server;
+ this.writingSystem = writingSystem;
+ }
+
+ /**
+ * Returns the list of match rule Ids.
+ * @return The list of match rule Ids.
+ */
+ public List<String> getMatchRuleListIds() {
+ return this.matchRuleListIds;
+ }
+
+ /**
+ * Adds a match rule list Id to this configuration.
+ * @param matchRuleListId The new match rule list Id.
+ */
+ public void registerMatchRuleListId(final String matchRuleListId) {
+ if (this.matchRuleListIds.contains(matchRuleListId)) {
+ throw new IllegalArgumentException(
+ "Match Rule List already configured for this orthography: " + matchRuleListId);
+ }
+ this.matchRuleListIds.add(matchRuleListId);
+ }
+
+ /**
+ * Returns the list of derivative rule Ids.
+ * @return The list of derivative rule Ids.
+ */
+ public List<String> getDerivativeRuleListIds() {
+ return this.derivativeRuleListIds;
+ }
+
+ /**
+ * Adds a derivative rule list Id to this configuration.
+ * @param derivativeRuleListId The new derivative rule list Id.
+ */
+ public void registerDerivativeRuleListId(final String derivativeRuleListId) {
+ if (this.derivativeRuleListIds.contains(derivativeRuleListId)) {
+ throw new IllegalArgumentException(
+ "Derivative Rule List already configured for this orthography: " + derivativeRuleListId);
+ }
+ this.derivativeRuleListIds.add(derivativeRuleListId);
+ }
+
+ /**
+ * Returns the dictionary resource.
+ * @return The dictionary resource.
+ */
+ public DictionaryResource getDictionaryResource() {
+ return this.dictionaryResource;
+ }
+
+ /**
+ * Sets the dictionary resource.
+ * @param dictionaryResource The dictionaryResource to set.
+ */
+ public void setDictionaryResource(final DictionaryResource dictionaryResource) {
+ this.dictionaryResource = dictionaryResource;
+ }
+
+ /**
+ * Returns the hyphenation patterns resource.
+ * @return The hyphenation patterns resource
+ */
+ public HyphenationPatternsResource getHyphenationPatternsResource() {
+ return this.hyphenationPatternsResource;
+ }
+
+ /**
+ * Sets the hyphenation patterns resource.
+ * @param hyphenationPatternsResource The hyphenation patterns resource to set.
+ */
+ public void setHyphenationPatternsResource(final HyphenationPatternsResource hyphenationPatternsResource) {
+ this.hyphenationPatternsResource = hyphenationPatternsResource;
+ }
+
+ /**
+ * Returns the list of word wrapper factories.
+ * @return The list of word wrapper factories.
+ */
+ public List<WordWrapperFactory<?>> getWordWrapperFactories() {
+ return this.wordWrapperFactories;
+ }
+
+ /**
+ * Sets the list of word wrapper factories.
+ * @param wordWrapperFactories The word wrapper factories to set.
+ */
+ public void setWordWrapperFactories(final List<WordWrapperFactory<?>> wordWrapperFactories) {
+ this.wordWrapperFactories = wordWrapperFactories;
+ }
+
+ @Override
+ public SegmentDictionary getDictionary() {
+ if (this.dictionaryResource == null) {
+ return null;
+ } else {
+ return this.dictionaryResource.getResource();
+ }
+ }
+
+ /**
+ * Returns the hyphenation patterns.
+ * @return The hyphenation patterns.
+ */
+ public PatternTree getHyphenationPatterns() {
+ if (this.hyphenationPatternsResource == null) {
+ return null;
+ } else {
+ return this.hyphenationPatternsResource.getResource();
+ }
+ }
+
+ /**
+ * Searches the configured word wrapper factories for a match that would create a word derived from a dictionary
+ * word.
+ * @param chars The word to test.
+ * @return A word wrapper if {@code chars} matches a word wrapper factory, or null if not.
+ */
+ public WordWrapper findDerivatives(final CharSequence chars) {
+ /* TODO: For now, this returns the first item that matches. This may need to be expanded to allow nested wrapped
+ * words. */
+ WordWrapper word = null;
+ final Dictionary dictionary = getDictionary();
+ for (int index = 0; index < this.wordWrapperFactories.size(); index ++) {
+ final WordWrapperFactory<?> factory = this.wordWrapperFactories.get(index);
+ word = factory.makeInstance(chars, dictionary);
+ if (word != null) {
+ return word;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Indicates whether a given word is found in the match rules for this orthography, i.e. rules looking for
+ * non-dictionary items such as numbers, currency, etc.
+ * @param wordChars The word to be tested.
+ * @return True if and only if {@code word} matches at least one match rule for this orthography.
+ */
+ public boolean foundInMatchRules(final CharSequence wordChars) {
+ for (int idIndex = 0; idIndex < getMatchRuleListIds().size(); idIndex ++) {
+ final String ruleListId = matchRuleListIds.get(idIndex);
+ final List<Pattern> validWordPatterns = server.getMatchRules(ruleListId);
+ for (int index = 0; index < validWordPatterns.size(); index ++) {
+ final Pattern pattern = validWordPatterns.get(index);
+ final Matcher matcher = pattern.matcher(wordChars);
+ if (matcher.matches()) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public Word4a recognizeWord(final CharSequence wordChars, final int offset, final int length,
+ final Word.PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public boolean isRecognizedWord(final CharSequence wordCharsIn, final int offset, final int length,
+ final Word.PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
+ /* TODO: For performance and memory, try to eliminate the following conversion. */
+ final CharSequence wordChars = wordCharsIn.subSequence(offset, offset + length);
+ if (wordChars.length() < 1) {
+ return false;
+ }
+
+ final Stack<Dictionary> dictionaryStack = new Stack<Dictionary>();
+
+ /* 1. Check exact matches in adhoc dictionaries. */
+ if (adhocDictionaries != null) {
+ dictionaryStack.addAll(adhocDictionaries);
+
+ while (! dictionaryStack.isEmpty()) {
+ final Dictionary baseDictionary = dictionaryStack.pop();
+ /* Check the referenced dictionary and each of its imported dictionaries. */
+ if (baseDictionary.getWritingSystem().satisfies(this.writingSystem)) {
+ if (baseDictionary.getWord(wordChars, 0) == null) {
+ addImportedDictionaries(baseDictionary, dictionaryStack);
+ } else {
+ return true;
+ }
+ } else {
+ addImportedDictionaries(baseDictionary, dictionaryStack);
+ }
+ }
+ }
+
+ /* 2. Check exact matches in standard dictionaries for the orthography. */
+ dictionaryStack.clear();
+ dictionaryStack.push(getDictionary());
+ /* Check the referenced dictionary and each of its ancestor dictionaries. */
+ while (! dictionaryStack.isEmpty()) {
+ final Dictionary baseDictionary = dictionaryStack.pop();
+ if (baseDictionary.getWord(wordChars, 0) == null) {
+ addImportedDictionaries(baseDictionary, dictionaryStack);
+ } else {
+ return true;
+ }
+ }
+
+ /* 3. Check the match rules. */
+ if (foundInMatchRules(wordChars)) {
+ return true;
+ }
+
+ /* 4. Check for compound word. */
+ if (CharSequenceUtils.contains(wordChars, '-')) {
+ final String[] components = this.compoundWordBreaker.split(wordChars);
+ final boolean[] componentsValid = new boolean[components.length];
+ for (int index = 0; index < components.length; index ++) {
+ final String component = components[index];
+ componentsValid[index] = isRecognizedWord(component, 0, component.length(), pos, adhocDictionaries);
+ }
+ if (BooleanUtils.allTrue(componentsValid)) {
+ return true;
+ }
+ }
+
+ /* 5. Check derivative matches in adhoc dictionaries. */
+ dictionaryStack.clear();
+ if (adhocDictionaries != null) {
+ dictionaryStack.addAll(adhocDictionaries);
+ while (! dictionaryStack.isEmpty()) {
+ final Dictionary baseDictionary = dictionaryStack.pop();
+ /* Check the referenced dictionary and each of its ancestor dictionaries. */
+ if (isDerivativeFound(wordChars, baseDictionary)) {
+ return true;
+ } else {
+ addImportedDictionaries(baseDictionary, dictionaryStack);
+ }
+ }
+ }
+
+ /* 6. Check derivative matches in standard dictionaries for the orthography. */
+ dictionaryStack.clear();
+ dictionaryStack.push(getDictionary());
+ /* Check the referenced dictionary and each of its ancestor dictionaries. */
+ while (! dictionaryStack.empty()) {
+ final Dictionary baseDictionary = dictionaryStack.pop();
+ if (isDerivativeFound(wordChars, baseDictionary)) {
+ return true;
+ } else {
+ addImportedDictionaries(baseDictionary, dictionaryStack);
+ }
+ }
+
+ /* Not found in any dictionary. */
+ /* If the first character is uppercase, convert to lowercase and try again. Discussion: For English at least, we
+ * do not want the opposite effect, i.e. to convert words starting with lowercase have the first char converted
+ * to uppercase. If the word is in the dictionary as a proper noun, we should treat a failure to capitalize it
+ * as a spelling error. Also, we do not want to generally convert the entire word to lowercase, as capital
+ * letters in the middle of the word should normally be treated as a spelling error. For exceptions to this
+ * last rule, users should enter the oddly-capitalized word into a dictionary in that form.
+ * TODO: This capability should be included in the orthography configuration instead of being hard-coded
+ * here. */
+ final int indexFirstLetter = CharacterUtils.firstLetter(wordChars);
+ if (indexFirstLetter > -1) {
+ if (Character.isUpperCase(wordChars.charAt(indexFirstLetter))) {
+ final StringBuilder builder = new StringBuilder(wordChars);
+ builder.setCharAt(indexFirstLetter, Character.toLowerCase(wordChars.charAt(indexFirstLetter)));
+ return isRecognizedWord(builder, offset, length, pos, adhocDictionaries);
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Adds all imported dictionaries in a given {@link Dictionary} instance to stack of such dictionary IDs.
+ * @param baseDictionary The base dictionary whose imports are to be added to the stack.
+ * @param dictionaryStack The stack to which the Dictionary IDs should be added.
+ */
+ private void addImportedDictionaries(final Dictionary baseDictionary, final Stack<Dictionary> dictionaryStack) {
+ final List<String> imported = baseDictionary.getImportedDictionaries();
+ for (int index = 0; index < imported.size(); index ++) {
+ final String dictId = imported.get(index);
+ final Dictionary importedDictionary = this.server.getDictionary(dictId);
+ if (importedDictionary != null) {
+ dictionaryStack.push(importedDictionary);
+ }
+ }
+ }
+
+ /**
+ * Indicates whether a given word can be found in a given dictionary after considering the derivative rules in this
+ * orthography.
+ * @param wordChars The word characters.
+ * @param dictionary The dictionary to be searched.
+ * @return True if and only if a root for {@code wordChars} can be found in {@code dictionary} using the derivative
+ * rules in this orthography.
+ */
+ private boolean isDerivativeFound(final CharSequence wordChars, final Dictionary dictionary) {
+ for (int listIndex = 0; listIndex < this.derivativeRuleListIds.size(); listIndex ++) {
+ final String ruleListKey = this.derivativeRuleListIds.get(listIndex);
+ final List<DerivativePattern> patternList = this.server.getDerivativePatterns(ruleListKey);
+ for (int patternIndex = 0; patternIndex < patternList.size(); patternIndex ++) {
+ final DerivativePattern pattern = patternList.get(patternIndex);
+ if (pattern.findFirstApplicableRule(wordChars, dictionary) != null) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public Word4a hyphenateUnrecognizedWord(final CharSequence word, final int offset, final int length) {
+ /* The character sequence containing the characters in the word that we are looking for. */
+ final CharSequence chars = word.subSequence(offset, offset + length);
+ Word4a hyphenatedWord = null;
+
+ /* Look in the dictionary first, as it should be more accurate. */
+ final SegmentDictionary dictionary = getDictionary();
+ if (dictionary != null) {
+ hyphenatedWord = dictionary.getWord(chars.toString().toLowerCase(), 0);
+ if (hyphenatedWord == null) {
+ hyphenatedWord = findDerivatives(chars);
+ }
+ }
+
+
+ if (hyphenatedWord == null) {
+ /* The word was not found in the dictionary. Try the hyphenation patterns. */
+ final PatternTree patternTree = getHyphenationPatterns();
+ if (patternTree == null) {
+ return null;
+ }
+ hyphenatedWord = patternTree.hyphenate(chars, 0, length);
+ }
+
+ if (hyphenatedWord == null) {
+ return null;
+ }
+
+ final boolean capitalized = CharSequenceUtils.equalToCapitalized(hyphenatedWord.getNormalizedContent(), chars);
+ if (capitalized) {
+ return new CapitalizedWord(hyphenatedWord);
+ }
+ final boolean uppercase = CharSequenceUtils.equalToUppercase(hyphenatedWord.getNormalizedContent(), chars);
+ if (uppercase) {
+ return new UppercaseWord(hyphenatedWord);
+ }
+
+ if (CharSequenceUtils.hasAnyUppercase(chars)) {
+ /* There is unexpected capitalization. */
+ return new ExactWord(hyphenatedWord, chars.toString());
+ }
+
+ return hyphenatedWord;
+ }
+
+ @Override
+ public TokenFlow4a tokenize(final CharSequence characters, final int startIndex, final int length)
+ throws OrthographyException {
+ final TokenFlow4a wordSequence = new TokenFlow4a();
+ final CharSequence sequence = characters.subSequence(startIndex, startIndex + length);
+ final Lexer4a lexer = this.server.getLexer();
+ lexer.clear();
+ lexer.addUntokenized(sequence, this.writingSystem);
+ lexer.lock();
+
+ while (lexer.hasNext()) {
+ final Lexer.Token token = lexer.next();
+ final CharSequence chunk = token.getText();
+ if (chunk.length() < 1) {
+ continue;
+ }
+
+ if (token.getTokenType() == TokenType.WORD) {
+ /* Chunk is a word. */
+ Word4a word = recognizeWord(chunk, 0, chunk.length(), null, null);
+ if (word == null) {
+ word = hyphenateUnrecognizedWord(chunk, 0, chunk.length());
+ }
+ if (word == null) {
+ word = new StringWord(0, chunk);
+ }
+ wordSequence.addToken(word);
+ } else {
+ /* Chunk is interword content. */
+ parseInterwordContent(chunk, wordSequence);
+ }
+ }
+ lexer.clear();
+ return wordSequence;
+ }
+
+ /**
+ * Converts interword characters to instances of {@link Punctuation4a} or {@link Whitespace4a}, and adds them as
+ * tokens to a given word sequence.
+ * @param interword The interword characters to be tokenized.
+ * @param wordSequence The sequence of tokens to which tokens will be added.
+ * @throws OrthographyException If {@code interword} cannot be converted to punctuation and whitespace tokens.
+ */
+ private void parseInterwordContent(final CharSequence interword, final TokenFlow4a wordSequence)
+ throws OrthographyException {
+ int index = 0;
+ while (index < interword.length()) {
+ final char contentChar = interword.charAt(index);
+ final Punctuation4a punctuation = Punctuation4a.findInstance(interword.subSequence(index, index + 1));
+ if (punctuation != null) {
+ wordSequence.addToken(punctuation);
+ index ++;
+ } else if (Character.isWhitespace(contentChar)) {
+ final Whitespace4a whitespace = Whitespace4a.findInstance(interword.subSequence(index, index + 1));
+ wordSequence.addToken(whitespace);
+ index ++;
+ } else {
+ throw new OrthographyException("Don't know how to handle interword content: " + contentChar);
+ }
+ }
+
+ }
+
+ @Override
+ public boolean canBreakLineMidWord() {
+ // TODO Auto-generated method stub
+ return false;
+ }
+
+ /**
+ * Returns the writing system for this orthography.
+ * @return The writing system for this orthography.
+ */
+ public WritingSystem4a getWritingSystem() {
+ return this.writingSystem;
+ }
+
+}
Added: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java 2023-10-03 01:30:18 UTC (rev 13302)
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2023 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.axsl.fotree.text.FoTextTokenFlow;
+import org.axsl.fotree.text.FoWord;
+import org.axsl.orthography.Dictionary;
+import org.axsl.orthography.OrthographyException;
+import org.axsl.orthography.Word.PartOfSpeech;
+
+import java.util.List;
+
+/**
+ * Wrapper around an {@link Orthography4a} instance that overrides some properties of it.
+ * This is useful for cases where a one-off use is needed, such as a document that has its own dictionary(s).
+ */
+public class Orthography4aVariant extends Orthography4a {
+
+ /** The wrapped orthography, which provides the base functionality for any features not overridden. */
+ private Orthography4aStandard wrapped;
+
+ /** The dictionary overriding that provided by {@link #wrapped}. */
+ private Dictionary dictionary;
+
+ /**
+ * Constructor.
+ * @param wrapped The base server that is being wrapped and overriden by this instance.
+ * @param dictionary The dictionary overriding that provided by {@code #wrapped}.
+ */
+ public Orthography4aVariant(final Orthography4aStandard wrapped, final Dictionary dictionary) {
+ this.wrapped = wrapped;
+ this.dictionary = dictionary;
+ }
+
+ @Override
+ public FoWord recognizeWord(final CharSequence wordChars, final int offset, final int length,
+ final PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
+ return this.wrapped.recognizeWord(wordChars, offset, length, pos, adhocDictionaries);
+ }
+
+ @Override
+ public FoWord hyphenateUnrecognizedWord(final CharSequence wordChars, final int offset, final int length) {
+ return this.wrapped.hyphenateUnrecognizedWord(wordChars, offset, length);
+ }
+
+ @Override
+ public FoTextTokenFlow tokenize(final CharSequence wordSequenceChars, final int offset, final int length)
+ throws OrthographyException {
+ return this.wrapped.tokenize(wordSequenceChars, offset, length);
+ }
+
+ @Override
+ public boolean isRecognizedWord(final CharSequence wordChars, final int offset, final int length,
+ final PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
+ return this.wrapped.isRecognizedWord(wordChars, offset, length, pos, adhocDictionaries);
+ }
+
+ @Override
+ public boolean canBreakLineMidWord() {
+ return this.wrapped.canBreakLineMidWord();
+ }
+
+ @Override
+ public Dictionary getDictionary() {
+ return this.dictionary;
+ }
+
+}
Property changes on: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4aVariant.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-10-02 23:19:34 UTC (rev 13301)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-10-03 01:30:18 UTC (rev 13302)
@@ -79,7 +79,8 @@
private EntityResolver entityResolver = null;
/** Map of writing systems and their orthographies. */
- private Map<WritingSystem, Orthography4a> orthographyMap = new HashMap<WritingSystem, Orthography4a>();
+ private Map<WritingSystem, Orthography4aStandard> orthographyMap =
+ new HashMap<WritingSystem, Orthography4aStandard>();
/** Map whose key is a dictionary ID, and whose value is the matching dictionary resource. */
private Map<String, DictionaryResource> dictionaryMap = new HashMap<String, DictionaryResource>();
@@ -294,7 +295,7 @@
* @param writingSystem The orthography for which the configuration should be registered.
* @param orthography The configuration for {@code orthography}.
*/
- public void registerOrthography(final WritingSystem writingSystem, final Orthography4a orthography) {
+ public void registerOrthography(final WritingSystem writingSystem, final Orthography4aStandard orthography) {
this.orthographyMap.put(writingSystem, orthography);
final DictionaryResource resource = orthography.getDictionaryResource();
/* TODO: If the new resources is the same as the existing one, ignore. Otherwise, log a warning and ignore the
@@ -310,7 +311,7 @@
}
@Override
- public Orthography4a getOrthography(final WritingSystem writingSystem) {
+ public Orthography4aStandard getOrthography(final WritingSystem writingSystem) {
return this.orthographyMap.get(writingSystem);
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2023-10-02 23:19:34 UTC (rev 13301)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2023-10-03 01:30:18 UTC (rev 13302)
@@ -40,7 +40,7 @@
import org.foray.orthography.DerivativeRule;
import org.foray.orthography.DictionaryResource;
import org.foray.orthography.HyphenationPatternsResource;
-import org.foray.orthography.Orthography4a;
+import org.foray.orthography.Orthography4aStandard;
import org.foray.orthography.OrthographyServer4a;
import org.foray.orthography.PosUtils;
import org.foray.orthography.WordWrapperFactory;
@@ -71,7 +71,7 @@
* instance.
* Normally this class doesn't need to be accessed directly.
*/
-public class OrthographyParser extends SaxParser<Orthography4a> {
+public class OrthographyParser extends SaxParser<Orthography4aStandard> {
/** Stateful variable. */
private DictionaryResource currentDictionaryResource;
@@ -116,7 +116,7 @@
private ResourceLocation currentResourceLocation;
/** Stateful variable tracking the current orthography configuration. */
- private transient Orthography4a currentOrthographyConfig;
+ private transient Orthography4aStandard currentOrthographyConfig;
/** The map of derivative factory lists, keyed by id. */
private Map<String, List<WordWrapperFactory<?>>> derivativeLists =
@@ -141,7 +141,7 @@
}
@Override
- public Orthography4a parse(final InputSource inputSource) throws IOException, SAXException {
+ public Orthography4aStandard parse(final InputSource inputSource) throws IOException, SAXException {
final XMLReader parser = createSax2Parser(true, true, true, ForayEntityResolver.getInstance(), false);
parser.parse(inputSource);
return this.currentOrthographyConfig;
@@ -299,7 +299,7 @@
}
case "orthography": {
final WritingSystem4a writingSystem = parseWritingSystem(attributes);
- this.currentOrthographyConfig = new Orthography4a(this.hyphenationServer, writingSystem);
+ this.currentOrthographyConfig = new Orthography4aStandard(this.hyphenationServer, writingSystem);
this.hyphenationServer.registerOrthography(writingSystem, this.currentOrthographyConfig);
return;
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-10-02 23:19:34 UTC (rev 13301)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-10-03 01:30:18 UTC (rev 13302)
@@ -31,7 +31,7 @@
import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.ObjectUtils;
import org.foray.orthography.Lexer4a;
-import org.foray.orthography.Orthography4a;
+import org.foray.orthography.Orthography4aStandard;
import org.foray.orthography.OrthographyServer4a;
import org.foray.orthography.OrthographyServerConfig;
import org.foray.orthography.SegmentDictionary;
@@ -314,7 +314,7 @@
/* Writing system should never be null, but orthography could be. */
WritingSystem lastWritingSystem = null;
- Orthography4a orthography = null;
+ Orthography4aStandard orthography = null;
if (lexer.hasNext()) {
lastWritingSystem = lexer.peekNext().getWritingSystem();
orthography = this.server.getOrthography(lastWritingSystem);
@@ -338,7 +338,7 @@
* @param token The word token to be checked.
* @param location The location of the word in the original document.
*/
- private void checkWord(final Orthography4a orthography, final Lexer.Token token, final String location) {
+ private void checkWord(final Orthography4aStandard orthography, final Lexer.Token token, final String location) {
if (orthography == null) {
/* Treat as an error. */
this.output.println("(no config) " + token.getText());
@@ -388,7 +388,7 @@
final String message = String.format("Writing system not found for: %s", languageAttr);
this.output.println(message + getLocationString(getLocator()));
} else {
- final Orthography4a config = this.server.getOrthography(writingSystem);
+ final Orthography4aStandard config = this.server.getOrthography(writingSystem);
if (config == null) {
final String message = String.format("Unconfigured orthography: %s", languageAttr);
this.output.println(message + getLocationString(getLocator()));
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java 2023-10-02 23:19:34 UTC (rev 13301)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/WordChecker.java 2023-10-03 01:30:18 UTC (rev 13302)
@@ -29,7 +29,7 @@
package org.foray.orthography.util;
import org.foray.common.i18n.WritingSystem4a;
-import org.foray.orthography.Orthography4a;
+import org.foray.orthography.Orthography4aStandard;
import org.foray.orthography.OrthographyServer4a;
import org.foray.orthography.OrthographyServerConfig;
import org.foray.orthography.SegmentDictionary;
@@ -81,7 +81,7 @@
private PrintStream output;
/** The current orthography configuration. */
- private Orthography4a currentOrthographyConfig;
+ private Orthography4aStandard currentOrthographyConfig;
/** The Hyphenation server. */
private OrthographyServer4a server;
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java 2023-10-02 23:19:34 UTC (rev 13301)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java 2023-10-03 01:30:18 UTC (rev 13302)
@@ -45,7 +45,7 @@
import java.io.IOException;
/**
- * Tests of {@link Orthography4a}.
+ * Tests of {@link Orthography4aStandard}.
*/
public class Orthography4aTests {
@@ -53,7 +53,7 @@
private static OrthographyServer4a server;
/** The object under test. */
- private Orthography4a consumer;
+ private Orthography4aStandard consumer;
/**
@@ -141,7 +141,7 @@
if (! testWord.toLowerCase().equals("hyphenation")) {
throw new OrthographyException("Test of \"hyphenation\" has invalid input.");
}
- final Orthography4a orthography = server.getOrthography(WritingSystem4a.USA);
+ final Orthography4aStandard orthography = server.getOrthography(WritingSystem4a.USA);
final Word hyphenation = orthography.hyphenateUnrecognizedWord(testWord, 0, testWord.length());
assertNotNull(hyphenation);
/* Computed value is "hy-phen-a-tion". */
@@ -183,7 +183,7 @@
if (! testWord.toLowerCase().equals("obligatory")) {
throw new OrthographyException("Test of \"obligatory\" has invalid input.");
}
- final Orthography4a orthography = server.getOrthography(WritingSystem4a.USA);
+ final Orthography4aStandard orthography = server.getOrthography(WritingSystem4a.USA);
final Word hyphenation = orthography.hyphenateUnrecognizedWord(testWord, 0, testWord.length());
assertNotNull(hyphenation);
/* Computed value is "ob-lig-a-to-ry". */
@@ -203,7 +203,7 @@
@Test
public void testEnInvalidCharacter() throws OrthographyException {
final String testWord = "table8";
- final Orthography4a orthography = server.getOrthography(WritingSystem4a.USA);
+ final Orthography4aStandard orthography = server.getOrthography(WritingSystem4a.USA);
final Word hyphenation = orthography.hyphenateUnrecognizedWord(testWord, 0, testWord.length());
assertNull(hyphenation);
}
@@ -216,7 +216,7 @@
@Test
public void testTimes() throws OrthographyException {
final String testWord = "times";
- final Orthography4a orthography = server.getOrthography(WritingSystem4a.USA);
+ final Orthography4aStandard orthography = server.getOrthography(WritingSystem4a.USA);
final Word hyphenation = o...
[truncated message content] |