Skip to content

Commit

Permalink
Add support for textStandardBase
Browse files Browse the repository at this point in the history
Add new primitive, parser, and unparser to handle non base-10 text
numbers. Actual logic from converting to/from bases is handled by Java's
built-in functions.

To maintain backwards compatibility, if the textStandardBase property is
not defined it defaults to "10" unless the requireTextStandardBaseProperty
tunable is set to true, default to false.

Also noticed some unordered sequence tests were commented out and work
with minor tweaks due to incorrect tests.

DAFFODIL-840
  • Loading branch information
stevedlawrence committed Jan 22, 2020
1 parent 0ba8cfd commit 02e6504
Show file tree
Hide file tree
Showing 17 changed files with 1,769 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ trait ElementBase
with BooleanTextMixin
with TextNumberFormatMixin
with EmptyElementParsePolicyMixin
with TextStandardBaseMixin
with OverlapCheckMixin {

override final def eBase = this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,12 @@ trait ElementBaseGrammarMixin
}

private lazy val textStandardNumber = prod("textStandardNumber", textNumberRep == TextNumberRep.Standard) {
ConvertTextCombinator(this, stringValue, textConverter)
val converter = textStandardBaseDefaulted match {
case 10 => textConverter
case 2 | 8 | 16 => textStandardNonBaseTenConverter
case _ => Assert.impossible()
}
ConvertTextCombinator(this, stringValue, converter)
}

private lazy val textZonedNumber = prod("textZonedNumber", textNumberRep == TextNumberRep.Zoned) {
Expand All @@ -625,13 +630,17 @@ trait ElementBaseGrammarMixin
}
}

private lazy val textStandardNonBaseTenConverter = {
primType match {
case _: NodeInfo.Integer.Kind => ConvertNonBaseTenTextNumberPrim(this)
case _ => SDE("dfdl:textStandardBase=\"%s\" cannot be used with %s", textStandardBaseDefaulted, primType.globalQName)
}
}

private lazy val textZonedConverter = {
primType match {
case PrimType.Double | PrimType.Float =>
SDE("dfdl:textNumberRep=\"zoned\" is not allowed for %s", primType.globalQName)
case _: NodeInfo.Numeric.Kind => ConvertZonedNumberPrim(this)
case PrimType.HexBinary | PrimType.Boolean | PrimType.Date | PrimType.Time | PrimType.DateTime | PrimType.AnyURI | PrimType.String =>
Assert.invariantFailed("textZonedConverter only to be used for numeric types")
case _: NodeInfo.Decimal.Kind => ConvertZonedNumberPrim(this)
case _ => SDE("dfdl:textNumberRep=\"zoned\" cannot be used with %s", primType.globalQName)
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.daffodil.grammar.primitives

import java.math.{ BigInteger => JBigInt }

import org.apache.daffodil.dsom.ElementBase
import org.apache.daffodil.grammar.Terminal
import org.apache.daffodil.processors.parsers.Parser
import org.apache.daffodil.processors.unparsers.Unparser
import org.apache.daffodil.processors.parsers.ConvertNonBaseTenTextNumberParser
import org.apache.daffodil.processors.unparsers.ConvertNonBaseTenTextNumberUnparser

import org.apache.daffodil.processors.unparsers.NadaUnparser


case class ConvertNonBaseTenTextNumberPrim(e: ElementBase)
extends Terminal(e, true) {

override lazy val parser: Parser =
new ConvertNonBaseTenTextNumberParser(
e.elementRuntimeData,
e.textStandardBaseDefaulted)

override lazy val unparser: Unparser =
new ConvertNonBaseTenTextNumberUnparser(
e.elementRuntimeData,
e.textStandardBaseDefaulted)
}
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,34 @@ object TextNumberBase {
case "8" => 8
case "10" => 10
case "16" => 16
case _ => self.schemaDefinitionError("Illegal number base: " + str) // validation will have checked. So this shoudn't happen.
case _ => self.schemaDefinitionError("For property textStandardBase, value must be 2, 8, 10, or 16. Found: %s", str)
}
}
}
trait TextStandardBaseMixin extends PropertyMixin {

def tunable: DaffodilTunables

private def optionTextStandardBase = findPropertyOption("textStandardBase")

/**
* Daffodil 2.5.0 and older ignored the textStandardBase property, behaving
* as if the value was always 10. Newer versions of Daffodil support this
* property, but we don't want to require it and potentially break old
* schemas that do not define it. So we check if we should require the
* property based on a tunable, and if we shouldn't require it and it's not
* defined, then we warn and default to 10.
*/
final lazy val textStandardBaseDefaulted = {
val numStr =
if (tunable.requireTextStandardBaseProperty || optionTextStandardBase.isDefined) {
getProperty("textStandardBase")
} else {
SDW(WarnID.TextStandardBaseUndefined, "dfdl:textStandardBase property is undefined. Defaulting to 10.")
"10"
}
TextNumberBase(numStr, this)
}
}

sealed trait SeparatorSuppressionPolicy extends SeparatorSuppressionPolicy.Value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,14 @@
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="requireTextStandardBaseProperty" type="xs:boolean" default="false" minOccurs="0">
<xs:annotation>
<xs:documentation>
If true, require that the dfdl:textStandardBase property is specified. If false
and the property is missing, behave as if the property is set to 10.
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="suppressSchemaDefinitionWarnings" type="daf:TunableSuppressSchemaDefinitionWarnings" default="emptyElementParsePolicyError" minOccurs="0">
<xs:annotation>
<xs:documentation>
Expand Down Expand Up @@ -464,6 +472,7 @@
<xs:enumeration value="regexPatternZeroLength" />
<xs:enumeration value="textBidiError" />
<xs:enumeration value="textOutputMinLengthOutOfRange" />
<xs:enumeration value="textStandardBaseUndefined" />
<xs:enumeration value="unsupportedAttributeBlockDefault" />
<xs:enumeration value="unsupportedAttributeFinalDefault" />
<xs:enumeration value="unsupportedAttributeFormDefault" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.daffodil.processors.unparsers

import java.lang.{ Long => JLong }
import java.lang.{ Number => JNumber }
import java.math.{ BigInteger => JBigInt }

import org.apache.daffodil.processors.ElementRuntimeData

case class ConvertNonBaseTenTextNumberUnparser(
override val context: ElementRuntimeData,
base: Int)
extends TextPrimUnparser {

override lazy val runtimeDependencies = Vector()

override def unparse(state: UState): Unit = {

val node = state.currentInfosetNode.asSimple
val value = node.dataValue

val baseStr = value.getNumber match {
case bi: JBigInt => {
if (bi.compareTo(JBigInt.ZERO) < 0) {
UE(state, "Unable to unparse negative values when dfdl:textStandardBase=\"%d\": %s", base, bi.toString)
}
bi.toString(base)
}
case n: JNumber => {
val l = n.longValue
if (l < 0) {
UE(state, "Unable to unparse negative values when dfdl:textStandardBase=\"%d\": %s", base, l.toString)
}
base match {
case 2 => JLong.toBinaryString(l)
case 8 => JLong.toOctalString(l)
case 16 => JLong.toHexString(l)
}
}
}

node.overwriteDataValue(baseStr)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.daffodil.processors.parsers

import java.math.{ BigInteger => JBigInt }
import java.lang.{ Number => JNumber }

import org.apache.daffodil.dpath.NodeInfo
import org.apache.daffodil.processors.ElementRuntimeData

class ConvertNonBaseTenTextNumberParser(
override val context: ElementRuntimeData,
base: Int)
extends TextPrimParser {

override lazy val runtimeDependencies = Vector()

private val primNumeric = context.optPrimType.get.asInstanceOf[NodeInfo.PrimType.PrimNumeric]

final def parse(state: PState): Unit = {

val node = state.simpleElement
val baseStr = node.dataValueAsString

if (baseStr == "") {
PE(state, "Unable to parse %s from empty string", context.optPrimType.get.globalQName)
return
}

// Must explicitly check for and error on text that start with + or -
// because DFDL does not allow a leading sign character, but parseInt and
// friends will accept them. Note that textNumberPattern is not used when
// textStandardBase is not 10--one might otherwise think that a +/- in the
// pattern would allow this.
val firstChar = baseStr(0)
if (firstChar == '-' || firstChar == '+') {
PE(state, "Unable to parse %s from base-%d text with leading sign: %s",
context.optPrimType.get.globalQName, base, baseStr)
return
}

// always parse the base string a BigInt since it allows us to differentiate
// between invalid characters or just too many characters for the prim type
val bi = try {
new JBigInt(baseStr, base)
} catch {
case e: NumberFormatException =>
PE(state, "Unable to parse %s from base-%d text due to invalid characters: %s",
context.optPrimType.get.globalQName, base, baseStr)
return
}

if (!primNumeric.isValidRange(bi)) {
PE(state, "Parsed %s is out of range for the type: %s",
context.optPrimType.get.globalQName, bi.toString)
return
}

val num = primNumeric.fromNumber(bi)
node.overwriteDataValue(num)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@

<tdml:parserTestCase name="simple_type_properties_text_number_13_03" root="base_group"
model="./fvt/ext/dpa/dpanum_properties.dfdl.xsd"
description="Section 13.5 Specification of number base - 2,8, and 16">
description="Section 13.5 Specification of number base - 2,8, and 16" roundTrip="twoPass">
<tdml:document>0101*127*10F</tdml:document>
<tdml:infoset>
<tdml:dfdlInfoset>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ X:x3
</parserTestCase>

<parserTestCase name="BF001" root="root" model="BF.dfdl.xsd"
description="Unordered sequences">
description="Unordered sequences" validation="limited">
<document><![CDATA[Y:y0
Y:y1
Y:y2
Expand All @@ -65,17 +65,26 @@ X:beyond
<infoset>
<dfdlInfoset>
<ex:root>
<plain><![CDATA[Y:y0
Y:y1
Y:y2
Y:y3
Y:y4
X:x1
X:x2
X:x3
X:beyond]]></plain>
<seq>
<x>x1</x>
<x>x2</x>
<x>x3</x>
<x>beyond</x>
<y>y0</y>
<y>y1</y>
<y>y2</y>
<y>y3</y>
<y>y4</y>
</seq>
</ex:root>
</dfdlInfoset>
</infoset>
<validationErrors>
<error>Validation Error</error>
<error>{}y</error>
<error>minOccurs='2'</error>
<error>maxOccurs='4'</error>
<error>5</error>
</validationErrors>
</parserTestCase>
</testSuite>
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,9 @@
dfdl:ignoreCase="yes" />
</sequence>
<sequence dfdl:separator="***" dfdl:terminator="%NL;">
<element name="y" type="xsd:double" maxOccurs="unbounded"
<element name="y" type="xsd:integer" maxOccurs="unbounded"
dfdl:lengthKind="delimited" dfdl:textNumberRep="standard"
dfdl:textNumberJustification="right"
dfdl:textNumberPattern="####"
dfdl:textNumberPadCharacter="%SP;" dfdl:textStandardBase="16" />
</sequence>
<sequence dfdl:separator="***" dfdl:terminator="%NL;">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,25 @@
description="Text number properties">
<document><![CDATA[ 9#876#543#210!01*** 12#345!6*** 123456789123456789*** INFINITO*** NNN*** ZERO*** NA*** NIL
aabbccddeeff*** 0f0f0f*** 123456789
10.1 *** 20.3*** -912E-13
10.1*** 20.3*** -912^-13
]]></document>
<infoset>
<dfdlInfoset>
<ex:list>
<x>9876543210.012345678</x>
<x>9.87654321001E9</x>
<x>12345.6</x>
<x>123456789123456789</x>
<x>Infinity</x>
<x>1.23456789123456784E17</x>
<x>INF</x>
<x>NaN</x>
<x>0</x>
<x>0</x>
<x>0</x>
<x>0.0</x>
<x>0.0</x>
<x>0.0</x>
<y>187723572702975</y>
<y>986895</y>
<y>4886718345</y>
<z>10.1</z>
<z>20.3</z>
<z>-0.0000000000912</z>
<z>-9.12E-11</z>
</ex:list>
</dfdlInfoset>
</infoset>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class IBMTestsThatPass {
//@Test def test_simple_type_properties_text_number_13_01() { runner2.runOneTest("simple_type_properties_text_number_13_01") }

// DAFFODIL-840 textStandardBase (base 16)
//@Test def test_simple_type_properties_text_number_13_03() { runner2.runOneTest("simple_type_properties_text_number_13_03") }
@Test def test_simple_type_properties_text_number_13_03() { runner2.runOneTest("simple_type_properties_text_number_13_03") }

// DAFFODIL-551 Needs dfdl:utf16Width='variable' implementation
//@Test def test_syntax_entities_6_03() { runner1.runOneTest("syntax_entities_6_03") }
Expand Down
Loading

0 comments on commit 02e6504

Please sign in to comment.