Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: Migrated to Confluence 5.3

...

  1. Acquire a write-lock on the tapingDir
  2. Tape all that is in the tapingDir
  3. Acquire a write-lock on the cacheDir
  4. Iterate through all files in the cacheDir
    1. If the file is older than tapeDelay
      1. move to tapingDir
  5. release write-lock on cacheDir
  6. Tape all that is in the tapingDir
  7. release write-lock on tapingDir
Taping all that is in tapingDir goes like this
  • Acquire a write-lock on the tapingDir
  • iterate through all the files in tapingDir
    forward the create/remove operation to theStep 2 is part of the error recovery functionality. No file should ever be left in the tapingDir after a taping run have completed. If the taping run fails to complete, a file could be left, and step 2 takes care of getting that file taped.
    Since the taping of files can be slower than just file system operations, we have split the steps 4 and 6. This way, the write-lock on the cacheDir is held for the shortest possible time.

    Taping all that is in tapingDir goes like this
    1. Acquire a write-lock on the tapingDir
    2. iterate through all the files in tapingDir
      1. forward the create/remove operation to the TapeArchive
    3. release write-lock on tapingDir

    ...

    The cache is the place where changed objects are written. Objects are served from the cache first, so that changed objects always get served in the changed version. The cache does not cache objects retrieved from the tapes, only changed objects submitted by the user.

    ...

    To write a new file to the cacheDir, the write-lock for the cacheDir must of course be acquired. This means that writing operations are blocked until the timer thread described above have finished iterating through the cacheDir. Similarly, slow uploads will block the timer thread until the upload is complete.

    The Index

    For the index, a separate system called Redis http://redis.io/ is used.

    ...

    The system is configured from the file "akubra-llstore.xml" which is a spring config file. It is reproduced below with comments. 

    ...

    which is a spring config file. It is reproduced below with comments. 

    Code Block
    <?xml version="1.0" encoding="UTF-8"?>
    <!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN//EN" "http://www.springframework.org/dtd/spring-beans.dtd">
    <beans>
        <!--Standard-->
        <bean name="org.fcrepo.server.storage.lowlevel.ILowlevelStorage"
              class="org.fcrepo.server.storage.lowlevel.akubra.AkubraLowlevelStorageModule">
            <constructor-arg index="0">
                <map/>
            </constructor-arg>
            <constructor-arg index="1" ref="org.fcrepo.server.Server"/>
            <constructor-arg index="2" type="java.lang.String"
                             value="org.fcrepo.server.storage.lowlevel.ILowlevelStorage"/>
            <property name="impl"
                      ref="org.fcrepo.server.storage.lowlevel.akubra.AkubraLowlevelStorage"/>
        </bean>
        <bean name="org.fcrepo.server.storage.lowlevel.akubra.AkubraLowlevelStorage"
              class="org.fcrepo.server.storage.lowlevel.akubra.AkubraLowlevelStorage"
              singleton="true">
            <constructor-arg ref="tapeObjectStore">
                <description>The store of serialized Fedora objects</description>
                <!--Here we reference our tape system-->
            </constructor-arg>
            <constructor-arg ref="datastreamStore">
                <description>The store of datastream content</description>
                </constructor-arg>
            <constructor-arg value="false"><!--This is set to false, as we do not ever delete stuff-->
                <description>if true, replaceObject calls will be done in a way
                    that
                    ensures the old content is not deleted until the new content is safely
                   <constructor-arg index="0">
             written. If the objectStore already does this, this should be
       <map/>         </constructor-arg>    given as
       <constructor-arg index="1" ref="org.fcrepo.server.Server"/>             <constructor-arg index="2" type="java.lang.String"false
                </description>
            </constructor-arg>
            <constructor-arg value="org.fcrepo.server.storage.lowlevel.ILowlevelStorage"/true">
            <property name="impl"   <description>save as above, but for datastreamStore</description>
              ref="org.fcrepo.server.storage.lowlevel.akubra.AkubraLowlevelStorage"/></constructor-arg>
        </bean>
        <bean<!--This is the tape store Akubra Implementation-->
        <bean  name="org.fcrepo.server.storage.lowlevel.akubra.AkubraLowlevelStoragetapeObjectStore"
                class="orgdk.fcrepostatsbiblioteket.servermetadatarepository.storage.lowlevel.akubra.AkubraLowlevelStorage"
    xmltapes.XmlTapesBlobStore"
               singleton="true">
            <constructor-arg>
    arg value="urn:example.org:tapeObjectStore"/>
            <!--This parameter  <description>The store of serialized Fedora objects</description>
      is the name of the storage. -->
            <property <ref beanname="archive" ref="tapeObjectStorecacheTapeObjectStore"/>
     
              <!--Here weAnd this is the reference to ourthe tapeactual systemimplementation-->
        </bean>
    
        </constructor-arg>
            <constructor-arg>
         !--The cache tape object store holds the objects while it is being written and until the taper is ready
        to tape <description>The store of datastream content</description>
      the object-->
        <bean name="cacheTapeObjectStore"
             <ref beanclass="datastreamStore"/>dk.statsbiblioteket.metadatarepository.xmltapes.deferred2.Cache"
            </constructor-arg>  singleton="true">
          <constructor-arg value="false"><  <!--ThisWhere isto setstore tofiles false,until asthe wetaper dois notready everto deletetape stuffthem-->
            <constructor-arg value="/CHANGEME/cacheObjectStore" index="0"/>
      <description>if true, replaceObject calls will be done in a way
         <!--Where to store files while they are still being written-->
            <constructor-arg  thatvalue="/CHANGEME/cacheTempObjectStore" index="1"/>
            <!--The two adresses above must be on  ensures the oldsame contentfile issystem notas deletedwe untilrely theon new content is safelyatomic moves-->
            <!--The delegate for read operations-->
         written. If the objectStore already does this, this should be
    <property name="delegate" ref="tapingTapeObjectStore"/>
        </bean>
    
        <!--The cache tape object store givenholds asthe objects while it is being written and until the taper is ready
        false to tape the object-->
        <bean name="tapingTapeObjectStore"
      </description>         </constructor-arg>class="dk.statsbiblioteket.metadatarepository.xmltapes.deferred2.Taping"
             <constructor-arg valuesingleton="true">
             <!--Where to  <description>save as above, but for datastreamStore</description>store files until the taper is ready to tape them-->
             </constructor-arg>
    <constructor-arg value="/CHANGEME/tapingObjectStore" index="0"/>
       </bean>     <!--This is the tape store Akubra ImplementationThe allowed age of an file before it is taped, in ms-->
        <bean    <property name="tapeObjectStoretapeDelay" classvalue="dk.statsbiblioteket.metadatarepository.xmltapes.XmlTapesBlobStore"600000"/>
            <!--The delay  singleton="true"between invocations of the taper-->
            <constructor-arg<property name="delay" value="urn:example.org:tapeObjectStore100"/>
        
       <!--This parameter is the name of the
    storage. -->
            <property name="archivedelegate" ref="tarTapeObjectStore"/>
            <!--And this isThe cache, to get the referenceobjects toready thefor actual implementationwriting-->
            <property name="parent" ref="cacheTapeObjectStore"/>
         </bean>
    
        <!--The guts of the tape system-->
        <bean name="tarTapeObjectStore" 
              class="dk.statsbiblioteket.metadatarepository.xmltapes.TapeArchive"
              init-method="rebuild"
              singleton="true">
    		
            <!--Change Thethe init -method aboveto is specialinit, itif canyou havedo thenot value "init" or "rebuild". Rebuild just flushes the index before running init.want to rebuild the redis index on server startup -->
            
            <!--This constructor argument specifies the tape store location. -->
            <constructor-arg value="file:/CHANGEME/tapeObjectStore" type="java.net.URI"/>
            <!--This specifies the maximum length a tape can be before a new tape is started-->
            <constructor-arg value="10485760" type="long"/>
            <!--10 MB-->
            <!--This is the reference to the index-->
            <property name="index" ref="redisIndex"/>
    		
            <property name="fixErrors" value="false"/>
        </bean>
    
        <!--This is our Redis index-->
        <bean name="redisIndex" class="dk.statsbiblioteket.metadatarepository.xmltapes.redis.RedisIndex"
              singleton="true">
            <!--The redis server-->
            <constructor-arg value="localhost"/>
            <!--The port it is running on-->
            <constructor-arg value="6379"/>
            <!--The database name. Redis databases are always identified by integers-->
            <constructor-arg value="0"/>
        </bean>
    
        <!--Standard storage for managed datastreams. We do not use managed datastreams-->
        <bean name="datastreamStore" class="org.akubraproject.map.IdMappingBlobStore"
              singleton="true">
            <constructor-arg value="urn:fedora:datastreamStore"/>
            <constructor-arg>
                <ref bean="fsDatastreamStore"/>
            </constructor-arg>
            <constructor-arg>
                <ref bean="fsDatastreamStoreMapper"/>
            </constructor-arg>
        </bean>
        <!--Standard storage for managed datastreams. We do not use managed datastreams-->
        <bean name="fsDatastreamStore" class="org.akubraproject.fs.FSBlobStore"
              singleton="true">
            <constructor-arg value="urn:example.org:fsDatastreamStore"/>
            <constructor-arg value="/CHANGEME/datastreamStore"/>
        </bean>
        <!--Standard storage for managed datastreams. We do not use managed datastreams-->
        <bean name="fsDatastreamStoreMapper"
              class="org.fcrepo.server.storage.lowlevel.akubra.HashPathIdMapper"
              singleton="true">
            <constructor-arg value="##"/>
        </bean>
    
        <bean name="fedoraStorageHintProvider"
              class="org.fcrepo.server.storage.NullStorageHintsProvider"
              singleton="true">
        </bean>
    </beans>

    ...