Difference between revisions of "User talk:Timeslip"

From the Oblivion ConstructionSet Wiki
Jump to navigation Jump to search
imported>Timeslip
(→‎Possible Issue: 64 bit ints)
imported>MentalElf
Line 63: Line 63:


:::[[User:Timeslip|Timeslip]] 04:04, 11 October 2006 (EDT): Yes, in C# longs are 64 bit. For languages without 64 bit integer support, you can just split the hash into two 32 bit ints, since the upper and lower DWORDs are calculated seperately.
:::[[User:Timeslip|Timeslip]] 04:04, 11 October 2006 (EDT): Yes, in C# longs are 64 bit. For languages without 64 bit integer support, you can just split the hash into two 32 bit ints, since the upper and lower DWORDs are calculated seperately.
<pre>
#############################################################################
## Procedure:  GenHash
proc ::GenHash {HFile HExt} {
    set hash64 [expr wide(0)]
    set HFile [string tolower $HFile]
    set HExt [string tolower $HExt]
    set HFLen [string length $HFile]
    if { $HFLen > 0 } {
set hash64 [expr wide([scan [string index $HFile [expr $HFLen - 1]] "%c"])]
tk_messageBox -message [format "A> %08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
if { $HFLen > 2 } {
    set hash64 [expr {$hash64 + wide([scan [string index $HFile [expr $HFLen - 2]] "%c"] << 8)}]
    tk_messageBox -message [format "B> %08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
}
set hash64 [expr {$hash64 + wide($HFLen << 16)}]
tk_messageBox -message [format "C> %08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
set hash64 [expr {$hash64 + (wide([scan [string index $HFile 0] "%c"] << 24) & 0xFFFFFFFF)}]
tk_messageBox -message [format "D> %08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
    }
    if { $HFLen > 3 } {
set hash64 [expr {$hash64 + ((wide([GenHash2 [string range $HFile 1 [expr $HFLen - 3]]]) & 0xFFFFFFFF) << 32)}]
tk_messageBox -message [format "E> %08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
    }
    if { [string length $HExt] > 0 } {
set hash64 [expr {$hash64 + ((wide([GenHash2 $HExt]) & 0xFFFFFFFF) << 32)}]
switch $HExt {
    ".nif" {
set i 1
    }
    ".kf" {
set i 2
    }
    ".dds" {
set i 3
    }
    ".wav" {
set i 4
    }
    default {
set i 0
    }
}
if { $i } {
    set a [expr {0xFF & ((($i & 0xFC) << 5) + (0xFF & (($hash64 & 0xFF000000) >> 24)))}]
    set b [expr {0xFF & ((($i & 0xFE) << 6) + (0xFF & $hash64))}]
    set c [expr {0xFF & (($i << 7) + (0xFF & (($hash64 & 0xFF00) >> 8)))}]
    set hash64 [expr {$hash64 - ($hash64 & 0xFF00FFFF)}]
    set hash64 [expr {$hash64 + ($a << 24) + $b + ($c << 8)}]
}
    }
    tk_messageBox -message [format "%08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
    return $hash64
}
#############################################################################
## Procedure:  GenHash2
proc ::GenHash2 {s} {
    for { set hash32 0; set i 0 } { $i < [string length $s] } { incr i } {
set hash32 [expr $hash32 * 0x1003F]
set hash32 [expr $hash32 + [scan [string index $s $i] "%c"]]
    }
    return $hash32
}
</pre>

Revision as of 06:09, 11 October 2006

BSA hashes

Oblivions BSA hashing algorithm is different from morrowinds, and doesn't appear to be documented anywhere. In case anyone else is working on any BSA tools and wants the algorithm, I'll post the C# code that obmm uses here.

//file is the file name without the extension
//Extension is the extension of a file, including the preceding '.'
//For hashing anything other than file names, ext must be an empty string
public static ulong GenHash(string file, string ext) {
  file=file.ToLower();
  ext=ext.ToLower();
  ulong hash=0;
  if(file.Length>0) {
    hash=(ulong)(
      (((byte)file[file.Length-1])*0x1)+
      ((file.Length>2?(byte)file[file.Length-2]:(byte)0)*0x100)+
      (file.Length*0x10000)+
      (((byte)file[0])*0x1000000)
    );
  }
  if(file.Length>3) {
    hash+=(ulong)(GenHash2(file.Substring(1, file.Length-3))*0x100000000);
  }
  if(ext.Length>0) {
    hash+=(ulong)(GenHash2(ext)*0x100000000);
    byte i=0;
    switch(ext) {
      case ".nif": i=1; break;
      case ".kf": i=2; break;
      case ".dds": i=3; break;
      case ".wav": i=4; break;
    }
    if(i!=0) {
      byte a=(byte)(((i&0xfc)<<5)+(byte)((hash&0xff000000)>>24));
      byte b=(byte)(((i&0xfe)<<6)+(byte)(hash&0xff));
      byte c=(byte)((i<<7)+(byte)((hash&0xff00)>>8));
      hash-=hash&0xFF00FFFF;
      hash+=(uint)((a<<24)+b+(c<<8));
    }
  }
  return hash;
}

private static uint GenHash2(string s) {
  uint hash=0;
  for(int i=0;i<s.Length;i++) {
    hash*=0x1003f;
    hash+=(byte)s[i];
  }
  return hash;
}

This may not be 100% accurate, but I've run it over every file and folder in oblivion v1.1's meshes, textures and misc BSAs without any mismatches, so I'm fairly sure it's right.

Possible Issue

MentalElf What about the other file types... ".mp3", ".spt", ".egm"???

Timeslip 15:42, 10 October 2006 (EDT): For any file types other than the ones I've listed in the switch, the lower dword is left untouched. The upper dword is calculated normally with GenHash2.
MentalElf Ok, thanks! I'm porting this to TCL/TK.
MentalElf Hmmm. I take it from the code that a "ulong" is 64 bits. This presents a problem for me. My long values are only 32 bits...
Timeslip 04:04, 11 October 2006 (EDT): Yes, in C# longs are 64 bit. For languages without 64 bit integer support, you can just split the hash into two 32 bit ints, since the upper and lower DWORDs are calculated seperately.
#############################################################################
## Procedure:  GenHash

proc ::GenHash {HFile HExt} {
    set hash64 [expr wide(0)]
    set HFile [string tolower $HFile]
    set HExt [string tolower $HExt]
    set HFLen [string length $HFile]

    if { $HFLen > 0 } {
	set hash64 [expr wide([scan [string index $HFile [expr $HFLen - 1]] "%c"])]
	tk_messageBox -message [format "A> %08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
	if { $HFLen > 2 } {
	    set hash64 [expr {$hash64 + wide([scan [string index $HFile [expr $HFLen - 2]] "%c"] << 8)}]
	    tk_messageBox -message [format "B> %08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
	}
	set hash64 [expr {$hash64 + wide($HFLen << 16)}]
	tk_messageBox -message [format "C> %08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
	set hash64 [expr {$hash64 + (wide([scan [string index $HFile 0] "%c"] << 24) & 0xFFFFFFFF)}]
	tk_messageBox -message [format "D> %08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
    }
    if { $HFLen > 3 } {
	set hash64 [expr {$hash64 + ((wide([GenHash2 [string range $HFile 1 [expr $HFLen - 3]]]) & 0xFFFFFFFF) << 32)}]
	tk_messageBox -message [format "E> %08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
    }
    if { [string length $HExt] > 0 } {
	set hash64 [expr {$hash64 + ((wide([GenHash2 $HExt]) & 0xFFFFFFFF) << 32)}]
	switch $HExt {
	    ".nif" {
		set i 1
	    }
	    ".kf" {
		set i 2
	    }
	    ".dds" {
		set i 3
	    }
	    ".wav" {
		set i 4
	    }
	    default {
		set i 0
	    }
	}
	if { $i } {
	    set a [expr {0xFF & ((($i & 0xFC) << 5) + (0xFF & (($hash64 & 0xFF000000) >> 24)))}]
	    set b [expr {0xFF & ((($i & 0xFE) << 6) + (0xFF & $hash64))}]
	    set c [expr {0xFF & (($i << 7) + (0xFF & (($hash64 & 0xFF00) >> 8)))}]
	    set hash64 [expr {$hash64 - ($hash64 & 0xFF00FFFF)}]
	    set hash64 [expr {$hash64 + ($a << 24) + $b + ($c << 8)}]
	}
    }
    tk_messageBox -message [format "%08X%08X" [expr {int($hash64 >> 32)}] [expr int($hash64)]]
    return $hash64
}
#############################################################################
## Procedure:  GenHash2

proc ::GenHash2 {s} {
    for { set hash32 0; set i 0 } { $i < [string length $s] } { incr i } {
	set hash32 [expr $hash32 * 0x1003F]
	set hash32 [expr $hash32 + [scan [string index $s $i] "%c"]]
    }
    return $hash32
}