Logo Search packages:      
Sourcecode: par2cmdline version File versions  Download package

verificationhashtable.h

//  This file is part of par2cmdline (a PAR 2.0 compatible file verification and
//  repair tool). See http://parchive.sourceforge.net for details of PAR 2.0.
//
//  Copyright (c) 2003 Peter Brian Clements
//
//  par2cmdline is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//
//  par2cmdline is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program; if not, write to the Free Software
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#ifndef __VERIFICATIONHASHTABLE_H__
#define __VERIFICATIONHASHTABLE_H__

class Par2RepairerSourceFile;
class VerificationHashTable;

// The VerificationHashEntry objects form the nodes of a binary trees stored
// in a VerificationHashTable object.

// There is one VerificationHashEntry object for each data block in the original
// source files.

class VerificationHashEntry
{
public:
  VerificationHashEntry(Par2RepairerSourceFile *_sourcefile,
                        DataBlock *_datablock,
                        bool _firstblock,
                        const FILEVERIFICATIONENTRY *_verificationentry)
  {
    sourcefile = _sourcefile;
    datablock = _datablock;
    firstblock = _firstblock;

    crc = _verificationentry->crc;
    hash = _verificationentry->hash;

    left = right = same = next = 0;
  }

  ~VerificationHashEntry(void)
  {
    delete left;
    delete right;
    delete same;
  }

  // Insert the current object is a child of the specified parent
  void Insert(VerificationHashEntry **parent);

  // Search (starting at the specified parent) for an object with a matching crc
  static const VerificationHashEntry* Search(const VerificationHashEntry *entry, u32 crc);

  // Search (starting at the specified parent) for an object with a matching hash
  static const VerificationHashEntry* Search(const VerificationHashEntry *entry, const MD5Hash &hash);

  // Comparison operators for searching
  bool operator <(const VerificationHashEntry &r) const 
  {
    return crc < r.crc || crc == r.crc && hash < r.hash;
  }
  bool operator >(const VerificationHashEntry &r) const 
  {
    return crc > r.crc || crc == r.crc && hash > r.hash;
  }
  bool operator ==(const VerificationHashEntry &r) const 
  {
    return crc == r.crc && hash == r.hash;
  }
  bool operator <=(const VerificationHashEntry &r) const {return !operator>(r);}
  bool operator >=(const VerificationHashEntry &r) const {return !operator<(r);}
  bool operator !=(const VerificationHashEntry &r) const {return !operator==(r);}

  // Data
  Par2RepairerSourceFile* SourceFile(void) const {return sourcefile;}
  const DataBlock* GetDataBlock(void) const {return datablock;}
  bool FirstBlock(void) const {return firstblock;}
  
  // Set/Check the associated datablock
  void SetBlock(DiskFile *diskfile, u64 offset) const;
  bool IsSet(void) const;

  u32 Checksum(void) const {return crc;}
  const MD5Hash& Hash(void) const {return hash;}

  VerificationHashEntry* Same(void) const {return same;}
  VerificationHashEntry* Next(void) const {return next;}
  void Next(VerificationHashEntry *_next) {next = _next;}

protected:
  // Data
  Par2RepairerSourceFile       *sourcefile;
  DataBlock                    *datablock;
  bool                          firstblock;

  u32                           crc;
  MD5Hash                       hash;

protected:
  // Binary tree
  VerificationHashEntry *left;
  VerificationHashEntry *right;

  // Linked list of entries with the same crc and hash
  VerificationHashEntry *same;

  // Linked list of entries in sequence for same file
  VerificationHashEntry *next;
};

inline void VerificationHashEntry::SetBlock(DiskFile *diskfile, u64 offset) const
{
  datablock->SetLocation(diskfile, offset);
}

inline bool VerificationHashEntry::IsSet(void) const
{
  return datablock->IsSet();
}

// Insert a new entry in the tree
inline void VerificationHashEntry::Insert(VerificationHashEntry **parent)
{
  while (*parent)
  {
    if (**parent < *this)
    {
      parent = &(*parent)->right;
    }
    else if (**parent > *this)
    {
      parent = &(*parent)->left;
    }
    else
    {
      break;
    }
  }

  while (*parent)
  {
    parent = &(*parent)->same;
  }

  *parent = this;
}

// Search the tree for an entry with the correct crc
inline const VerificationHashEntry* VerificationHashEntry::Search(const VerificationHashEntry *entry, u32 crc)
{
  while (entry)
  {
    if (entry->crc < crc)
    {
      entry = entry->right;
    }
    else if (entry->crc > crc)
    {
      entry = entry->left;
    }
    else
    {
      break;
    }
  }

  return entry;
}

// Search the tree for an entry with the correct hash
inline const VerificationHashEntry* VerificationHashEntry::Search(const VerificationHashEntry *entry, const MD5Hash &hash)
{
  u32 crc = entry->crc;

  while (entry)
  {
    if (entry->crc < crc || entry->crc == crc && entry->hash < hash)
    {
      entry = entry->right;
    }
    else if (entry->crc > crc || entry->crc == crc && entry->hash > hash)
    {
      entry = entry->left;
    }
    else
    {
      break;
    }
  }

  return entry;
}

// The VerificationHashTable object contains all of the VerificationHashEntry objects
// and is used to find matches for blocks of data in a target file that is being
// scanned.

// It is initialised by loading data from all available verification packets for the
// source files.

class VerificationHashTable
{
public:
  VerificationHashTable(void);
  ~VerificationHashTable(void);

  void SetLimit(u32 limit);

  // Load the data from the verification packet
  void Load(Par2RepairerSourceFile *sourcefile, u64 blocksize);

  // Try to find a match.
  //   nextentry   - The entry which we expect to find next. This is used
  //                 when a sequence of matches are found.
  //   sourcefile  - Which source file we would prefer to find a match for
  //                 if there are more than one possible match (with the
  //                 same crc and hash).
  //   checksummer - Provides the crc and hash values being tested.
  //   duplicate   - Set on return if the match would have been valid except
  //                 for the fact that the block has already been found.
  const VerificationHashEntry* FindMatch(const VerificationHashEntry *nextentry,
                                         const Par2RepairerSourceFile *sourcefile,
                                         FileCheckSummer &checksummer,
                                         bool &duplicate) const;

  // Look up based on the block crc
  const VerificationHashEntry* Lookup(u32 crc) const;

  // Continue lookup based on the block hash
  const VerificationHashEntry* Lookup(const VerificationHashEntry *entry,
                                      const MD5Hash &hash);

protected:
  VerificationHashEntry **hashtable;
  unsigned int hashmask;
};

// Search for an entry with the specified crc
inline const VerificationHashEntry* VerificationHashTable::Lookup(u32 crc) const
{
  if (hashmask)
  {
    return VerificationHashEntry::Search(hashtable[crc & hashmask], crc);
  }

  return 0;
}

// Search for an entry with the specified hash
inline const VerificationHashEntry* VerificationHashTable::Lookup(const VerificationHashEntry *entry,
                                                                  const MD5Hash &hash)
{
  return VerificationHashEntry::Search(entry, hash);
}

inline const VerificationHashEntry* VerificationHashTable::FindMatch(const VerificationHashEntry *suggestedentry,
                                                                     const Par2RepairerSourceFile *sourcefile,
                                                                     FileCheckSummer &checksummer,
                                                                     bool &duplicate) const
{
  duplicate = false;

  // Get the current checksum from the checksummer
  u32 crc = checksummer.Checksum();

  MD5Hash hash;
  bool havehash = false;

  // Do we know what the next entry should be
  if (0 != suggestedentry)
  {
    // Is the suggested match supposed to be the last one in the file
    if (suggestedentry->Next() == 0)
    {
      // How long should the last block be
      u64 length = suggestedentry->GetDataBlock()->GetLength();

      // Get a short checksum from the checksummer
      u32 checksum = checksummer.ShortChecksum(length);

      // Is the checksum correct
      if (checksum == suggestedentry->Checksum())
      {
        // Get a short hash from the checksummer
        hash = checksummer.ShortHash(length);

        // If the hash matches as well, then return it
        if (hash == suggestedentry->Hash())
        {
          return suggestedentry;
        }
      }
    }
    // If the suggested entry has not already been found, compare the checksum
    else if (!suggestedentry->IsSet() && suggestedentry->Checksum() == crc)
    {
      // Get the hash value from the checksummer
      havehash = true;
      hash = checksummer.Hash();

      // If the hash value matches, then return it.
      if (hash == suggestedentry->Hash())
      {
        return suggestedentry;
      }
    }
  }

  // Look for other possible matches for the checksum
  const VerificationHashEntry *nextentry = VerificationHashEntry::Search(hashtable[crc & hashmask], crc);
  if (0 == nextentry)
    return 0;

  // If we don't have the hash yet, get it
  if (!havehash)
  {
    hash = checksummer.Hash();
  }

  // Look for an entry with a matching hash
  nextentry = VerificationHashEntry::Search(nextentry, hash);
  if (0 == nextentry)
    return 0;

  // Is there one match with the same checksum and hash, or many
  if (nextentry->Same() == 0)
  {
    // If the match is for a block that is part of a target file
    // for which we already have a complete match, then don't
    // return it.
    if (nextentry->SourceFile()->GetCompleteFile() != 0)
    {
      duplicate = true;
      return 0;
    }

    // If we are close to the end of the file and the block
    // length is wrong, don't return it because it is an
    // invalid match
    if (checksummer.ShortBlock() && checksummer.BlockLength() != nextentry->GetDataBlock()->GetLength())
    {
      return 0;
    }

    // If the match was at the start of the file and it is the first
    // block for a target file, then return it.
    if (nextentry->FirstBlock() && checksummer.Offset() == 0)
    {
      return nextentry;
    }

    // Was this match actually the one which had originally been suggested
    // but which has presumably already been found
    if (nextentry == suggestedentry)
    {
      // Was the existing match in the same file as the new match
      if (nextentry->IsSet() && 
          nextentry->GetDataBlock()->GetDiskFile() == checksummer.GetDiskFile())
      {
        // Yes. Don't return it
        duplicate = true;
        return 0;
      }
      else
      {
        // No, it was in a different file. Return it.
        // This ensures that we can find a perfect match for a target
        // file even if some of the blocks had already been found
        // in a different file.
        return nextentry;
      }
    }
    else
    {
      // return it only if it has not already been used
      if (nextentry->IsSet())
      {
        duplicate = true;
        return 0;
      }

      return nextentry;
    }
  }

  // Do we prefer to match entries for a particular source file
  if (0 != sourcefile)
  {
    const VerificationHashEntry *currententry = nextentry;
    nextentry = 0;

    // We don't want entries for the wrong source file, ones that
    // have already been matched, or ones that are the wrong length
    while (currententry && (currententry->SourceFile() != sourcefile || 
                            currententry->IsSet() ||
                            checksummer.ShortBlock() && checksummer.BlockLength() != currententry->GetDataBlock()->GetLength()
                           )
          )
    {
      // If we found an unused entry (which was presumably for the wrong 
      // source file) remember it (providing it is the correct length).
      if (0 == nextentry && !(currententry->IsSet() || 
                              checksummer.ShortBlock() && checksummer.BlockLength() != currententry->GetDataBlock()->GetLength()
                             )
         )
      {
        nextentry = currententry;
      }

      currententry = currententry->Same();
    }

    // If we found an unused entry for the source file we want, return it
    if (0 != currententry)
      return currententry;
  }

  // Check for an unused entry which is the correct length
  while (nextentry && (nextentry->IsSet() ||
                       checksummer.ShortBlock() && checksummer.BlockLength() != nextentry->GetDataBlock()->GetLength()
                      )
        )
  {
    nextentry = nextentry->Same();
  }

  // Return what we have found
  if (nextentry == 0)
  {
    duplicate = true;
  }

  return nextentry;
}

#endif // __VERIFICATIONHASHTABLE_H__

Generated by  Doxygen 1.6.0   Back to index