/
JHove2 modules - changes

JHove2 modules - changes

This page describes the changes made to JHove2 to get the modules running.

All of the changes described on this page are included inside the JHoNas version of JHove2.

Modifications to existing config files

/config/spring/jhove2-framework-config.xml

Add the following section to the xml. This adds a developer BnF to Jhove2.

jhove2-framework-config.xml
	<!-- BnF (Bibliothéque nationale de France) agent bean -->
	<bean id="BnFAgent" class="org.jhove2.core.Agent" scope="singleton">
		<constructor-arg value="Bibliothèque nationale de France"
		                 type="java.lang.String"/>
		<constructor-arg value="Corporate"
		                 type="org.jhove2.core.Agent$Type"/>
		<property name="URI" value="http://www.bnf.fr/"/>
	</bean>

/config/droid/DROID_SignatureFile_V20.xml

The Droid signature file needs to be modified to recognize ARC and WARC files correctly.

Insert the follow section in the internal signature part.

DROID_SignatureFile_V20.xml
        <!-- ARC, matches "filedesc:" in beginning of file. -->
        <InternalSignature ID="123456" Specificity="Specific">
            <ByteSequence Reference="BOFoffset">
                <SubSequence Position="1" SubSeqMinOffset="0" SubSeqMaxOffset="0" MinFragLength="0">
                    <Sequence>66696C65646573633A</Sequence>
                    <DefaultShift>10</DefaultShift>
                    <Shift Byte="3A">1</Shift>
                    <Shift Byte="63">2</Shift>
                    <Shift Byte="73">3</Shift>
                    <Shift Byte="65">4</Shift>
                    <Shift Byte="64">5</Shift>
                    <Shift Byte="65">6</Shift>
                    <Shift Byte="6C">7</Shift>
                    <Shift Byte="69">8</Shift>
                    <Shift Byte="66">9</Shift>
                </SubSequence>
            </ByteSequence>
        </InternalSignature>
        <!-- WARC, matches "WARC/" in beginning of file. -->
        <InternalSignature ID="12345" Specificity="Specific">
            <ByteSequence Reference="BOFoffset">
                <SubSequence Position="1" SubSeqMinOffset="0" SubSeqMaxOffset="0" MinFragLength="0">
                    <Sequence>574152432F</Sequence>
                    <DefaultShift>6</DefaultShift>
                    <Shift Byte="2F">1</Shift>
                    <Shift Byte="43">2</Shift>
                    <Shift Byte="52">3</Shift>
                    <Shift Byte="41">4</Shift>
                    <Shift Byte="57">5</Shift>
                </SubSequence>
            </ByteSequence>
        </InternalSignature>

Insert the follow section in the file format part.

DROID_SignatureFile_V20.xml
	<!-- ARC -->
        <FileFormat ID="310" Name="Alexa Archive File" PUID="x-fmt/219">
            <InternalSignatureID>123456</InternalSignatureID>
            <Extension>arc</Extension>
            <!-- If left out file is recognized as SGML -->
            <HasPriorityOverFileFormatID>638</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>639</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>640</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>641</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>642</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>643</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>644</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>645</HasPriorityOverFileFormatID>
        </FileFormat>
	<!-- WARC -->
        <FileFormat ID="12345" Name="WARC file" Version="1.0" PUID="x-fmt/289">
            <InternalSignatureID>12345</InternalSignatureID>
       	    <Extension>warc</Extension>
            <!-- If left out file is recognized as SGML -->
            <HasPriorityOverFileFormatID>638</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>639</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>640</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>641</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>642</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>643</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>644</HasPriorityOverFileFormatID>
            <HasPriorityOverFileFormatID>645</HasPriorityOverFileFormatID>
        </FileFormat>

New configuration files

/config/spring/module/format/gzip/jhove2-gzip-config.xml

Added to allow Spring to recognize and load the GZipModule.

jhove2-gzip-config.xml
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
       xmlns:util="http://www.springframework.org/schema/util"
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
       xmlns:context="http://www.springframework.org/schema/context"
       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-2.5.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-2.0.xsd">

	<!-- GZip module bean -->
	<bean id="GZipModule" class="org.jhove2.module.format.gzip.GzipModule" scope="prototype">
		<constructor-arg ref="GZipFormat"/>
		<constructor-arg ref="FormatModuleAccessor"/>
		<property name="developers">
			<list value-type="org.jhove2.core.Agent">
				<ref bean="BnFAgent"/>
			</list>
		</property>
		<property name="recurse" value="true"/>
		<!--property name="parallelCharacterization" value="4"/-->
	</bean>

	<!-- GZip format bean -->
	<bean id="GZipFormat" class="org.jhove2.core.format.Format" scope="singleton">
		<constructor-arg type="java.lang.String" value="GZip"/>
		<constructor-arg ref="GZipIdentifier"/>
		<constructor-arg type="org.jhove2.core.format.Format$Type" value="Format"/>
		<constructor-arg type="org.jhove2.core.format.Format$Ambiguity" value="Unambiguous"/>
		<property name="aliasIdentifiers">
			<set value-type="org.jhove2.core.I8R">
				<!-- ref bean="GZipIdentifier"/ -->
				<ref bean="GZipXFmt266Identifier"/>
				<ref bean="GZipMimeIdentifier"/>
				<ref bean="GZipRFC1952Identifier"/>
				<ref bean="GZipAppleUTIIdentifier"/>
			</set>
		</property>
		<property name="aliasNames">
			<set>
				<value>GZIP</value>
			</set>
		</property>
		<property name="specifications">
			<list value-type="org.jhove2.core.Document">
				<ref bean="GZip43Specification"/>
			</list>
		</property>
	</bean>

	<!-- Alias Identifiers -->

	<!-- GZip identifier bean -->
	<bean id="GZipIdentifier" class="org.jhove2.core.I8R" scope="singleton">
		<constructor-arg type="java.lang.String" value="http://jhove2.org/terms/format/gzip"/>
	</bean>

        <!-- GZip PUID x-fmt/266 alias identifier bean -->
        <bean id="GZipXFmt266Identifier" class="org.jhove2.core.I8R" scope="singleton">
		<constructor-arg type="java.lang.String" value="x-fmt/266"/>
		<constructor-arg type="org.jhove2.core.I8R$Namespace" value="PUID"/>
        </bean>

        <!-- GZip MIME alias identifier bean -->
        <bean id="GZipMimeIdentifier" class="org.jhove2.core.I8R" scope="singleton">
		<constructor-arg type="java.lang.String" value="application/x-gzip"/>
		<constructor-arg type="org.jhove2.core.I8R$Namespace" value="MIME"/>
        </bean>

        <!-- GZip RFC 1952 alias identifier bean -->
        <bean id="GZipRFC1952Identifier" class="org.jhove2.core.I8R" scope="singleton">
		<constructor-arg type="java.lang.String" value="RFC 1952"/>
		<constructor-arg type="org.jhove2.core.I8R$Namespace" value="RFC"/>
        </bean>

        <!-- GZip Apple UTI alias identifier bean -->
        <bean id="GZipAppleUTIIdentifier" class="org.jhove2.core.I8R" scope="singleton">
		<constructor-arg type="java.lang.String" value="org.gnu.gnu-zip-archive"/>
		<constructor-arg type="org.jhove2.core.I8R$Namespace" value="UTI"/>
	</bean>

	<!-- Specifications -->

	<!-- GZIP file format version 4.3 specification bean -->
	<bean id="GZip43Specification" class="org.jhove2.core.Document" scope="singleton">
		<constructor-arg type="java.lang.String" value="GZIP file format specification version 4.3"/>
		<constructor-arg type="org.jhove2.core.Document$Type" value="Specification"/>
		<constructor-arg type="org.jhove2.core.Document$Intention" value="Authoritative"/>
		<property name="author" value="P. Deutsch"/>
		<property name="date" value="May 1996"/>
		<property name="edition" value="RFC 1952"/>
		<property name="identifiers">
		<list value-type="org.jhove2.core.I8R">
				<bean class="org.jhove2.core.I8R">
					<constructor-arg type="java.lang.String" value="http://tools.ietf.org/html/rfc1952"/>
					<constructor-arg type="org.jhove2.core.I8R$Namespace" value="URI"/>
				</bean>
			</list>
		</property>
		<property name="publisher" value="IETF"/>
	</bean>

	<!-- GZip module resources -->

  <!-- GZip header compression methods description resources -->
  <bean class="org.springframework.beans.factory.config.MethodInvokingFactoryBean">
    <property name="targetClass"
              value="org.jhove2.module.format.gzip.field.CompressionMethod"/>
    <property name="targetMethod" value="initValues"/>
    <property name="arguments">
      <list>
        <bean class="org.jhove2.config.spring.ModulePropertiesFactoryBean">
          <property name="propertyFileBaseName"
                    value="gzip/compression-methods"/>
        </bean>
      </list>
    </property>
  </bean>

  <!-- GZip header extra flags description resources -->
  <bean class="org.springframework.beans.factory.config.MethodInvokingFactoryBean">
    <property name="targetClass"
              value="org.jhove2.module.format.gzip.field.CompressionType"/>
    <property name="targetMethod" value="initValues"/>
    <property name="arguments">
      <list>
        <bean class="org.jhove2.config.spring.ModulePropertiesFactoryBean">
          <property name="propertyFileBaseName"
                    value="gzip/compression-types"/>
        </bean>
      </list>
    </property>
  </bean>

  <!-- GZip header operating systems description resources -->
  <bean class="org.springframework.beans.factory.config.MethodInvokingFactoryBean">
    <property name="targetClass"
              value="org.jhove2.module.format.gzip.field.OperatingSystem"/>
    <property name="targetMethod" value="initValues"/>
    <property name="arguments">
      <list>
        <bean class="org.jhove2.config.spring.ModulePropertiesFactoryBean">
          <property name="propertyFileBaseName"
                    value="gzip/operating-systems"/>
        </bean>
      </list>
    </property>
  </bean>

</beans>

/config/spring/module/format/arc/jhove2-arc-config.xml

Added to allow Spring to recognize and load the ArcModule.

jhove2-arc-config.xml
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">

    <!-- ARC module bean -->
    <bean id="ArcModule" class="org.jhove2.module.format.arc.ArcModule" scope="prototype">
        <constructor-arg ref="AlexaArchiveFileFormat"/>
	<constructor-arg ref="FormatModuleAccessor"/>
	<property name="developers">
	    <list value-type="org.jhove2.core.Agent">
	      <ref bean="BnFAgent"/>
	    </list>
	</property>
	<property name="recurse" value="false"/>
	<property name="computeBlockDigest" value="true"/>
	<property name="blockDigestAlgorithm" value="sha1"/>
	<property name="blockDigestEncoding" value="base32"/>
	<property name="computePayloadDigest" value="true"/>
	<property name="payloadDigestAlgorithm" value="sha1"/>
	<property name="payloadDigestEncoding" value="base32"/>
	<!--property name="parallelCharacterization" value="0"/-->
    </bean>

    <!-- ARC format bean -->
    <bean id="AlexaArchiveFileFormat" class="org.jhove2.core.format.Format" scope="singleton">
        <constructor-arg type="java.lang.String" value="Alexa Archive File"/>
	<constructor-arg ref="AlexaArchiveFileIdentifier"/>
	<constructor-arg type="org.jhove2.core.format.Format$Type" value="Format"/>
	<constructor-arg type="org.jhove2.core.format.Format$Ambiguity" value="Unambiguous"/>

	<property name="aliasIdentifiers">
  	    <set value-type="org.jhove2.core.I8R">
	    <ref bean="AlexaArchiveFileIdentifier"/>
	    <!-- ARC PUID x-fmt/219 alias identifier bean -->
	    <bean class="org.jhove2.core.I8R">
	      <constructor-arg type="java.lang.String" value="x-fmt/219"/>
	      <constructor-arg type="org.jhove2.core.I8R$Namespace" value="PUID"/>
	    </bean>
	    <!-- ARC MIME alias identifier bean -->
	    <bean class="org.jhove2.core.I8R">
	      <constructor-arg type="java.lang.String" value="application/x-ia-arc"/>
	      <constructor-arg type="org.jhove2.core.I8R$Namespace" value="MIME"/>
	    </bean>
	  </set>
	</property>
	<property name="aliasNames">
	  <set>
	    <value>ARC</value>
	  </set>
	</property>
	<property name="specifications">
	  <list value-type="org.jhove2.core.Document">
	    <ref bean="Arc10Specification"/>
	  </list>
	</property>
    </bean>

  <!-- Arc identifier bean -->
  <bean id="AlexaArchiveFileIdentifier" class="org.jhove2.core.I8R" scope="singleton">
    <constructor-arg value="http://jhove2.org/terms/format/arc"
                     type="java.lang.String"/>
  </bean>

  <!-- ARC file format version 1.0 specification bean -->
  <bean id="Arc10Specification" class="org.jhove2.core.Document" scope="singleton">
    <constructor-arg type="java.lang.String"
                     value="ARC file format specification version 1.0"/>
    <constructor-arg type="org.jhove2.core.Document$Type"
                     value="Specification"/>
    <constructor-arg type="org.jhove2.core.Document$Intention"
                     value="Authoritative"/>

    <property name="author" value="Mike Burner and Brewster Kahle"/>
    <property name="date" value="September 15, 1996"/>
  </bean>

</beans>

/config/spring/module/format/warc/jhove2-warc-config.xml

Added to allow Spring to recognize and load the WarcModule.

jhove2-warc-config.xml
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">

  <!-- WARC module bean -->
  <bean id="WarcModule" class="org.jhove2.module.format.warc.WarcModule" scope="prototype">
    <constructor-arg ref="WarcArchiveFileFormat"/>
    <constructor-arg ref="FormatModuleAccessor"/>
    <property name="developers">
      <list value-type="org.jhove2.core.Agent">
	<ref bean="BnFAgent"/>
      </list>
    </property>
    <property name="recurse" value="false"/>
    <property name="computeBlockDigest" value="true"/>
    <property name="blockDigestAlgorithm" value="sha1"/>
    <property name="blockDigestEncoding" value="base32"/>
    <property name="computePayloadDigest" value="true"/>
    <property name="payloadDigestAlgorithm" value="sha1"/>
    <property name="payloadDigestEncoding" value="base32"/>
    <!--property name="parallelCharacterization" value="0"/-->
  </bean>

  <!-- WARC format bean -->
  <bean id="WarcArchiveFileFormat" class="org.jhove2.core.format.Format" scope="singleton">
    <constructor-arg type="java.lang.String" value="Warc Archive File"/>
    <constructor-arg ref="WarcArchiveFileIdentifier"/>
    <constructor-arg type="org.jhove2.core.format.Format$Type" value="Format"/>
    <constructor-arg type="org.jhove2.core.format.Format$Ambiguity" value="Unambiguous"/>
    <property name="aliasIdentifiers">
      <set value-type="org.jhove2.core.I8R">
	<ref bean="WarcArchiveFileIdentifier"/>
	<!-- ARC PUID x-fmt/219 alias identifier bean -->
	<bean class="org.jhove2.core.I8R">
	  <constructor-arg type="java.lang.String" value="x-fmt/289"/>
	  <constructor-arg type="org.jhove2.core.I8R$Namespace" value="PUID"/>
	</bean>
	<!-- WARC MIME alias identifier bean -->
	<bean class="org.jhove2.core.I8R">
	  <constructor-arg type="java.lang.String" value="application/warc"/>
	  <constructor-arg type="org.jhove2.core.I8R$Namespace" value="MIME"/>
	</bean>
	<!-- WARC ISO 28500 -->
	<bean id="WarcISO28500" class="org.jhove2.core.I8R" scope="singleton">
	  <constructor-arg type="java.lang.String" value="ISO 28500:2009(E)" />
	  <constructor-arg type="org.jhove2.core.I8R$Namespace" value="ISO" />
	</bean>
      </set>
    </property>
    <property name="aliasNames">
      <set>
	<value>WARC</value>
      </set>
    </property>
    <property name="specifications">
      <list value-type="org.jhove2.core.Document">
	<ref bean="Warc10Specification"/>
      </list>
    </property>
  </bean>

  <!-- WARC identifier bean -->
  <bean id="WarcArchiveFileIdentifier" class="org.jhove2.core.I8R" scope="singleton">
    <constructor-arg value="http://jhove2.org/terms/format/warc" type="java.lang.String"/>
  </bean>

  <!-- WARC file format version 1.0 specification bean -->
  <bean id="Warc10Specification" class="org.jhove2.core.Document" scope="singleton">
    <constructor-arg type="java.lang.String"
                     value="WARC file format specification version 1.0"/>
    <constructor-arg type="org.jhove2.core.Document$Type"
                     value="Specification"/>
    <constructor-arg type="org.jhove2.core.Document$Intention"
                     value="Authoritative"/>

    <property name="author" value="Technical Committee ISO/TC 46"/>
    <property name="date" value="September 15, 1996"/>
    <property name="identifiers">
      <list value-type="org.jhove2.core.I8R">
	<ref bean="ISO28500WARCSpecificationURI"/>
      </list>
    </property>
    <property name="publisher" value="ISO"/>
    <!--property name="title" value="ISO 28500: Information and documentation - WARC file format"/-->
  </bean>

  <!-- ISO 28500 WARC specification URI bean -->
  <bean id="ISO28500WARCSpecificationURI" class="org.jhove2.core.I8R" scope="singleton">
    <constructor-arg type="java.lang.String" value="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=44717"/>
    <constructor-arg type="org.jhove2.core.I8R$Namespace" value="URI"/>
  </bean>

</beans>

New message properties

/config/messages/gzip_messages.properties

Additional file with GZipModule messages.

gzip_messages.properties
# gzip_messages.properties
# Key value pairs from fully-qualified Java path for a Message field in a class
# to message text template
# Used for localization
#
# ##############################################################################
#   Message templates for class org.jhove2.module.format.gzip.GzipModule
# ##############################################################################
#
org.jhove2.module.format.gzip.GzipModule.invalidGzipFile = \
        Invalid GZIP file: fatal error encountered (current offset={0, number, integer}): {1}
#
org.jhove2.module.format.gzip.GzipModule.invalidExtraFlags = \
        Invalid extra flags value
org.jhove2.module.format.gzip.GzipModule.invalidOperatingSystem = \
        Unknown operating system value
org.jhove2.module.format.gzip.GzipModule.reservedFlagsSet = Reserved flags set
org.jhove2.module.format.gzip.GzipModule.invalidISize = \
        Invalid ISize value: read={0, number, integer}, computed={1, number, integer}
org.jhove2.module.format.gzip.GzipModule.invalidCrc16 = \
        Invalid header CRC16 value: read={0}, computed={1}
org.jhove2.module.format.gzip.GzipModule.invalidCrc32 = \
        Invalid data CRC32 value: read={0}, computed={1}
#

/config/messages/arc_messages.properties

Additional file with ArcModule messages.

arc_messages.properties
# arc_messages.properties
# Key value pairs from fully-qualified Java path for a Message field in a class
# to message text template
# Used for localization
#
# ##############################################################################
#   Message templates for class org.jhove2.module.format.arc.ArcModule
# ##############################################################################
#
org.jhove2.module.format.arc.ArcModule.missing = \
        Missing ARC header fields {0}
org.jhove2.module.format.arc.ArcModule.invalid = \
        Invalid {0}: {1}
org.jhove2.module.format.arc.ArcModule.warning = \
        Warning: {0}
#
org.jhove2.module.format.arc.ArcModule.characterizationError = \
        Fatal characterization error: {0}
#

/config/messages/warc_messages.properties

Additional file with WarcModule messages.

warc_messages.properties
# warc_messages.properties
# Key value pairs from fully-qualified Java path for a Message field in a class
# to message text template
# Used for localization
#
# ##############################################################################
#   Message templates for class org.jhove2.module.format.warc.WarcModule
# ##############################################################################
#
org.jhove2.module.format.warc.WarcModule.missing = \
        Missing ARC header fields {0}
org.jhove2.module.format.warc.WarcModule.invalid = \
        Invalid {0}: {1}
org.jhove2.module.format.warc.WarcModule.warning = \
        Warning: {0}

org.jhove2.module.format.warc.WarcModule.empty = \
        Empty ARC header field {0}
org.jhove2.module.format.warc.WarcModule.duplicate = \
        Invalid {0}: {1}
org.jhove2.module.format.warc.WarcModule.unknown = \
        Invalid {0}: {1}
org.jhove2.module.format.warc.WarcModule.wanted = \
        Invalid {0}: {1}
org.jhove2.module.format.warc.WarcModule.unwanted = \
        Invalid {0}: {1}
org.jhove2.module.format.warc.WarcModule.recommended = \
        Invalid {0}: {1}

#
org.jhove2.module.format.warc.WarcModule.characterizationError = \
        Fatal characterization error: {0}
#

Sources

/src/main/java/org/jhove2/module/format/gzip/

This folder contains all the GZipModule implementation sources.

/src/main/java/org/jhove2/module/format/arc/

This folder contains all the ArcModule implementation sources.

/src/main/java/org/jhove2/module/format/warc/

This folder contains all the WarcModule implementation sources.