/* Package mft provides functions to parse records and their attributes in an NTFS Master File Table ("MFT" for short). # Basic usage First parse a record using mft.ParseRecord(), which parses the record header and the attribute headers. Then parse each attribute's data individually using the various mft.Parse...() functions. // Error handling left out for brevity record, err := mft.ParseRecord() attrs, err := record.FindAttributes(mft.AttributeTypeFileName) fileName, err := mft.ParseFileName(attrs[0]) */ package mft import ( "bytes" "encoding/binary" "fmt" "b612.me/wincmd/ntfs/binutil" "b612.me/wincmd/ntfs/fragment" "b612.me/wincmd/ntfs/utf16" ) var ( fileSignature = []byte{0x46, 0x49, 0x4c, 0x45} ) const ( maxInt = int64(^uint(0) >> 1) minRecordHeaderLength = 42 minAttributeDataLength = 22 minAttributeListHeader = 8 minAttributeTypeLength = 4 dataRunTerminatorLength = 1 ) type recordHeader struct { signature []byte fileReference FileReference baseRecordReference FileReference logFileSequence uint64 hardLinkCount int flags RecordFlag actualSize uint32 allocatedSize uint32 nextAttributeID int firstAttributeOffset int } type attributeHeader struct { attrType AttributeType resident bool name string flags AttributeFlags attributeID int payloadOffset int } type attributePayload struct { allocatedSize uint64 actualSize uint64 data []byte } // A Record represents an MFT entry, excluding all technical data (such as "offset to first attribute"). The Attributes // list only contains the attribute headers and raw data; the attribute data has to be parsed separately. When this is a // base record, the BaseRecordReference will be zero. When it is an extension record, the BaseRecordReference points to // the record's base record. type Record struct { Signature []byte FileReference FileReference BaseRecordReference FileReference LogFileSequenceNumber uint64 HardLinkCount int Flags RecordFlag ActualSize uint32 AllocatedSize uint32 NextAttributeId int Attributes []Attribute } // ParseRecord parses bytes into a Record after applying fixup. The data is assumed to be in Little Endian order. Only // the attribute headers are parsed, not the actual attribute data. func ParseRecord(b []byte) (Record, error) { header, data, err := parseRecordHeader(b) if err != nil { return Record{}, err } attributes, err := ParseAttributes(data[header.firstAttributeOffset:]) if err != nil { return Record{}, err } return Record{ Signature: header.signature, FileReference: header.fileReference, BaseRecordReference: header.baseRecordReference, LogFileSequenceNumber: header.logFileSequence, HardLinkCount: header.hardLinkCount, Flags: header.flags, ActualSize: header.actualSize, AllocatedSize: header.allocatedSize, NextAttributeId: header.nextAttributeID, Attributes: attributes, }, nil } func parseRecordHeader(b []byte) (recordHeader, []byte, error) { if len(b) < minRecordHeaderLength { return recordHeader{}, nil, fmt.Errorf("record data length should be at least %d but is %d", minRecordHeaderLength, len(b)) } if !bytes.Equal(b[:4], fileSignature) { return recordHeader{}, nil, fmt.Errorf("unknown record signature: %# x", b[:4]) } data := binutil.Duplicate(b) r := binutil.NewLittleEndianReader(data) baseRecordRef, err := ParseFileReference(r.Read(0x20, 8)) if err != nil { return recordHeader{}, nil, fmt.Errorf("unable to parse base record reference: %v", err) } firstAttributeOffset := int(r.Uint16(0x14)) if firstAttributeOffset < 0 || firstAttributeOffset >= len(data) { return recordHeader{}, nil, fmt.Errorf("invalid first attribute offset %d (data length: %d)", firstAttributeOffset, len(data)) } if _, err := applyFixUp(data, int(r.Uint16(0x04)), int(r.Uint16(0x06))); err != nil { return recordHeader{}, nil, fmt.Errorf("unable to apply fixup: %v", err) } return recordHeader{ signature: binutil.Duplicate(data[:4]), fileReference: FileReference{RecordNumber: uint64(r.Uint32(0x2C)), SequenceNumber: r.Uint16(0x10)}, baseRecordReference: baseRecordRef, logFileSequence: r.Uint64(0x08), hardLinkCount: int(r.Uint16(0x12)), flags: RecordFlag(r.Uint16(0x16)), actualSize: r.Uint32(0x18), allocatedSize: r.Uint32(0x1C), nextAttributeID: int(r.Uint16(0x28)), firstAttributeOffset: firstAttributeOffset, }, data, nil } // A FileReference represents a reference to an MFT record. Since the FileReference in a Record is only 4 bytes, the // RecordNumber will probably not exceed 32 bits. type FileReference struct { RecordNumber uint64 SequenceNumber uint16 } func (f FileReference) ToUint64() uint64 { origin := make([]byte, 8) binary.LittleEndian.PutUint64(origin, f.RecordNumber) binary.LittleEndian.PutUint16(origin[6:], f.SequenceNumber) return binary.LittleEndian.Uint64(origin) } // ParseFileReference parses a Little Endian ordered 8-byte slice into a FileReference. The first 6 bytes indicate the // record number, while the final 2 bytes indicate the sequence number. func ParseFileReference(b []byte) (FileReference, error) { if len(b) != 8 { return FileReference{}, fmt.Errorf("expected 8 bytes but got %d", len(b)) } return FileReference{ RecordNumber: binary.LittleEndian.Uint64(padToUnsigned(b[:6], 8)), SequenceNumber: binary.LittleEndian.Uint16(b[6:]), }, nil } // RecordFlag represents a bit mask flag indicating the status of the MFT record. type RecordFlag uint16 // Bit values for the RecordFlag. For example, an in-use directory has value 0x0003. const ( RecordFlagInUse RecordFlag = 0x0001 RecordFlagIsDirectory RecordFlag = 0x0002 RecordFlagInExtend RecordFlag = 0x0004 RecordFlagIsIndex RecordFlag = 0x0008 ) // Is checks if this RecordFlag's bit mask contains the specified flag. func (f *RecordFlag) Is(c RecordFlag) bool { return *f&c == c } func applyFixUp(b []byte, offset int, length int) ([]byte, error) { if offset < 0 { return nil, fmt.Errorf("update sequence offset %d is negative", offset) } if length < 2 { return nil, fmt.Errorf("update sequence length %d is too small", length) } updateSequenceLength := length * 2 if offset > len(b) || updateSequenceLength > len(b)-offset { return nil, fmt.Errorf("update sequence range [%d:%d] exceeds record length %d", offset, offset+updateSequenceLength, len(b)) } r := binutil.NewLittleEndianReader(b) updateSequence := r.Read(offset, updateSequenceLength) // length is in pairs, not bytes updateSequenceNumber := updateSequence[:2] updateSequenceArray := updateSequence[2:] if len(updateSequenceArray) == 0 || len(updateSequenceArray)%2 != 0 { return nil, fmt.Errorf("invalid update sequence array length %d", len(updateSequenceArray)) } sectorCount := len(updateSequenceArray) / 2 if sectorCount == 0 { return nil, fmt.Errorf("update sequence does not contain any sector entries") } if len(b)%sectorCount != 0 { return nil, fmt.Errorf("record length %d is not divisible by sector count %d", len(b), sectorCount) } sectorSize := len(b) / sectorCount if sectorSize < 2 { return nil, fmt.Errorf("invalid sector size %d", sectorSize) } for i := 1; i <= sectorCount; i++ { sectorOffset := sectorSize*i - 2 if sectorOffset < 0 || sectorOffset+2 > len(b) { return nil, fmt.Errorf("invalid sector offset %d for record length %d", sectorOffset, len(b)) } if !bytes.Equal(updateSequenceNumber, b[sectorOffset:sectorOffset+2]) { return nil, fmt.Errorf("update sequence mismatch at pos %d", sectorOffset) } } for i := 0; i < sectorCount; i++ { offset := sectorSize*(i+1) - 2 num := i * 2 copy(b[offset:offset+2], updateSequenceArray[num:num+2]) } return b, nil } // FindAttributes returns all attributes of the specified type contained in this record. When no matches are found an // empty slice is returned. func (r *Record) FindAttributes(attrType AttributeType) []Attribute { ret := make([]Attribute, 0) for _, a := range r.Attributes { if a.Type == attrType { ret = append(ret, a) } } return ret } // Attribute represents an MFT record attribute header and its corresponding raw attribute Data (excluding header data). // When the attribute is Resident, the Data contains the actual attribute's data. When the attribute is non-resident, // the Data contains DataRuns pointing to the actual data. DataRun data can be parsed using ParseDataRuns(). type Attribute struct { Type AttributeType Resident bool Name string Flags AttributeFlags AttributeId int AllocatedSize uint64 ActualSize uint64 Data []byte } // AttributeType represents the type of an Attribute. Use Name() to get the attribute type's name. type AttributeType uint32 // Known values for AttributeType. Note that other values might occur too. const ( AttributeTypeStandardInformation AttributeType = 0x10 // $STANDARD_INFORMATION; always resident AttributeTypeAttributeList AttributeType = 0x20 // $ATTRIBUTE_LIST; mixed residency AttributeTypeFileName AttributeType = 0x30 // $FILE_NAME; always resident AttributeTypeObjectId AttributeType = 0x40 // $OBJECT_ID; always resident AttributeTypeSecurityDescriptor AttributeType = 0x50 // $SECURITY_DESCRIPTOR; always resident? AttributeTypeVolumeName AttributeType = 0x60 // $VOLUME_NAME; always resident? AttributeTypeVolumeInformation AttributeType = 0x70 // $VOLUME_INFORMATION; never resident? AttributeTypeData AttributeType = 0x80 // $DATA; mixed residency AttributeTypeIndexRoot AttributeType = 0x90 // $INDEX_ROOT; always resident AttributeTypeIndexAllocation AttributeType = 0xa0 // $INDEX_ALLOCATION; never resident? AttributeTypeBitmap AttributeType = 0xb0 // $BITMAP; nearly always resident? AttributeTypeReparsePoint AttributeType = 0xc0 // $REPARSE_POINT; always resident? AttributeTypeEAInformation AttributeType = 0xd0 // $EA_INFORMATION; always resident AttributeTypeEA AttributeType = 0xe0 // $EA; nearly always resident? AttributeTypePropertySet AttributeType = 0xf0 // $PROPERTY_SET AttributeTypeLoggedUtilityStream AttributeType = 0x100 // $LOGGED_UTILITY_STREAM; always resident AttributeTypeTerminator AttributeType = 0xFFFFFFFF // Indicates the last attribute in a list; will not actually be returned by ParseAttributes ) // AttributeFlags represents a bit mask flag indicating various properties of an attribute's data. type AttributeFlags uint16 // Bit values for the AttributeFlags. For example, an encrypted, compressed attribute has value 0x4001. const ( AttributeFlagsCompressed AttributeFlags = 0x0001 AttributeFlagsEncrypted AttributeFlags = 0x4000 AttributeFlagsSparse AttributeFlags = 0x8000 ) // Is checks if this AttributeFlags's bit mask contains the specified flag. func (f *AttributeFlags) Is(c AttributeFlags) bool { return *f&c == c } // ParseAttributes parses bytes into Attributes. The data is assumed to be in Little Endian order. Only the attribute // headers are parsed, not the actual attribute data. func ParseAttributes(b []byte) ([]Attribute, error) { if len(b) == 0 { return []Attribute{}, nil } attributes := make([]Attribute, 0) for len(b) > 0 { recordData, remaining, done, err := nextAttributeRecordData(b) if err != nil { return nil, err } if done { break } attribute, err := ParseAttribute(recordData) if err != nil { return nil, err } attributes = append(attributes, attribute) b = remaining } return attributes, nil } func nextAttributeRecordData(b []byte) (recordData []byte, remaining []byte, done bool, err error) { if len(b) < minAttributeTypeLength { return nil, nil, false, fmt.Errorf("attribute header data should be at least %d bytes but is %d", minAttributeTypeLength, len(b)) } r := binutil.NewLittleEndianReader(b) if AttributeType(r.Uint32(0)) == AttributeTypeTerminator { return nil, nil, true, nil } if len(b) < minAttributeListHeader { return nil, nil, false, fmt.Errorf("cannot read attribute header record length, data should be at least %d bytes but is %d", minAttributeListHeader, len(b)) } uRecordLength := r.Uint32(0x04) if int64(uRecordLength) > maxInt { return nil, nil, false, fmt.Errorf("record length %d overflows maximum int value %d", uRecordLength, maxInt) } recordLength := int(uRecordLength) if recordLength <= 0 { return nil, nil, false, fmt.Errorf("cannot handle attribute with zero or negative record length %d", recordLength) } if recordLength > len(b) { return nil, nil, false, fmt.Errorf("attribute record length %d exceeds data length %d", recordLength, len(b)) } return r.Read(0, recordLength), r.ReadFrom(recordLength), false, nil } // ParseAttribute parses bytes into an Attribute. The data is assumed to be in Little Endian order. Only the attribute // headers are parsed, not the actual attribute data. func ParseAttribute(b []byte) (Attribute, error) { if len(b) < minAttributeDataLength { return Attribute{}, fmt.Errorf("attribute data should be at least %d bytes but is %d", minAttributeDataLength, len(b)) } r := binutil.NewLittleEndianReader(b) header, err := parseAttributeHeader(r, b) if err != nil { return Attribute{}, err } payload, err := parseAttributePayload(r, b, header) if err != nil { return Attribute{}, err } return Attribute{ Type: header.attrType, Resident: header.resident, Name: header.name, Flags: header.flags, AttributeId: header.attributeID, AllocatedSize: payload.allocatedSize, ActualSize: payload.actualSize, Data: binutil.Duplicate(payload.data), }, nil } func parseAttributeHeader(r *binutil.BinReader, b []byte) (attributeHeader, error) { nameLength := int(r.Byte(0x09)) nameOffset := int(r.Uint16(0x0A)) name := "" if nameLength != 0 { nameEnd := nameOffset + nameLength*2 if len(b) < nameEnd { return attributeHeader{}, fmt.Errorf("expected attribute name length to be at least %d but is %d", nameEnd, len(b)) } name = utf16.DecodeString(r.Read(nameOffset, nameLength*2), binary.LittleEndian) } resident := r.Byte(0x08) == 0x00 payloadOffset := int(r.Uint16(0x20)) if resident { payloadOffset = int(r.Uint16(0x14)) } return attributeHeader{ attrType: AttributeType(r.Uint32(0)), resident: resident, name: name, flags: AttributeFlags(r.Uint16(0x0C)), attributeID: int(r.Uint16(0x0E)), payloadOffset: payloadOffset, }, nil } func parseAttributePayload(r *binutil.BinReader, b []byte, header attributeHeader) (attributePayload, error) { if header.resident { uDataLength := r.Uint32(0x10) if int64(uDataLength) > maxInt { return attributePayload{}, fmt.Errorf("attribute data length %d overflows maximum int value %d", uDataLength, maxInt) } dataLength := int(uDataLength) expectedDataLength := header.payloadOffset + dataLength if len(b) < expectedDataLength { return attributePayload{}, fmt.Errorf("expected attribute data length to be at least %d but is %d", expectedDataLength, len(b)) } return attributePayload{data: r.Read(header.payloadOffset, dataLength)}, nil } if len(b) < header.payloadOffset { return attributePayload{}, fmt.Errorf("expected attribute data length to be at least %d but is %d", header.payloadOffset, len(b)) } return attributePayload{ allocatedSize: r.Uint64(0x28), actualSize: r.Uint64(0x30), data: r.ReadFrom(header.payloadOffset), }, nil } // A DataRun represents a fragment of data somewhere on a volume. The OffsetCluster, which can be negative, is relative // to a previous DataRun's offset. The OffsetCluster of the first DataRun in a list is relative to the beginning of the // volume. type DataRun struct { OffsetCluster int64 LengthInClusters uint64 } // ParseDataRuns parses bytes into a list of DataRuns. Each DataRun's OffsetCluster is relative to the DataRun before // it. The first element's OffsetCluster is relative to the beginning of the volume. func ParseDataRuns(b []byte) ([]DataRun, error) { if len(b) == 0 { return []DataRun{}, nil } runs := make([]DataRun, 0) for len(b) > 0 { run, consumed, done, err := parseDataRun(b) if err != nil { return nil, err } if done { break } runs = append(runs, run) b = b[consumed:] } return runs, nil } func parseDataRun(b []byte) (DataRun, int, bool, error) { r := binutil.NewLittleEndianReader(b) header := r.Byte(0) if header == 0 { return DataRun{}, dataRunTerminatorLength, true, nil } lengthLength := int(header &^ 0xF0) offsetLength := int(header >> 4) dataRunDataLength := offsetLength + lengthLength headerAndDataLength := dataRunDataLength + dataRunTerminatorLength if len(b) < headerAndDataLength { return DataRun{}, 0, false, fmt.Errorf("expected at least %d bytes of datarun data but is %d", headerAndDataLength, len(b)) } dataRunData := r.Reader(1, dataRunDataLength) lengthBytes := dataRunData.Read(0, lengthLength) offsetBytes := dataRunData.Read(lengthLength, offsetLength) return DataRun{ OffsetCluster: int64(binary.LittleEndian.Uint64(padToSigned(offsetBytes, 8))), LengthInClusters: binary.LittleEndian.Uint64(padToUnsigned(lengthBytes, 8)), }, headerAndDataLength, false, nil } // DataRunsToFragments transform a list of DataRuns with relative offsets and lengths specified in cluster into a list // of fragment.Fragment elements with absolute offsets and lengths specified in bytes (for example for use in a // fragment.Reader). Note that data will probably not align to a cluster exactly so there could be some padding at the // end. It is up to the user of the Fragments to limit reads to actual data size (eg. by using an io.LimitedReader or // modifying the last element in the list to limit its length). func DataRunsToFragments(runs []DataRun, bytesPerCluster int) []fragment.Fragment { frags := make([]fragment.Fragment, len(runs)) previousOffsetCluster := int64(0) for i, run := range runs { exactClusterOffset := previousOffsetCluster + run.OffsetCluster frags[i] = fragment.Fragment{ Offset: exactClusterOffset * int64(bytesPerCluster), Length: int64(run.LengthInClusters) * int64(bytesPerCluster), } previousOffsetCluster = exactClusterOffset } return frags } func padToUnsigned(data []byte, length int) []byte { if len(data) > length { return data } if len(data) == length { return data } result := make([]byte, length) if len(data) == 0 { return result } copy(result, data) return result } func padToSigned(data []byte, length int) []byte { if len(data) > length { return data } if len(data) == length { return data } result := make([]byte, length) if len(data) == 0 { return result } copy(result, data) if data[len(data)-1]&0x80 != 0 { for i := len(data); i < length; i++ { result[i] = 0xFF } } return result } // Name returns a string representation of the attribute type. For example "$STANDARD_INFORMATION" or "$FILE_NAME". For // anyte attribute type which is unknown, Name will return "unknown". func (at AttributeType) Name() string { switch at { case AttributeTypeStandardInformation: return "$STANDARD_INFORMATION" case AttributeTypeAttributeList: return "$ATTRIBUTE_LIST" case AttributeTypeFileName: return "$FILE_NAME" case AttributeTypeObjectId: return "$OBJECT_ID" case AttributeTypeSecurityDescriptor: return "$SECURITY_DESCRIPTOR" case AttributeTypeVolumeName: return "$VOLUME_NAME" case AttributeTypeVolumeInformation: return "$VOLUME_INFORMATION" case AttributeTypeData: return "$DATA" case AttributeTypeIndexRoot: return "$INDEX_ROOT" case AttributeTypeIndexAllocation: return "$INDEX_ALLOCATION" case AttributeTypeBitmap: return "$BITMAP" case AttributeTypeReparsePoint: return "$REPARSE_POINT" case AttributeTypeEAInformation: return "$EA_INFORMATION" case AttributeTypeEA: return "$EA" case AttributeTypePropertySet: return "$PROPERTY_SET" case AttributeTypeLoggedUtilityStream: return "$LOGGED_UTILITY_STREAM" } return "unknown" }