/*
Package mft provides functions to parse records and their attributes in an NTFS Master File Table ("MFT" for short).

# Basic usage

First parse a record using mft.ParseRecord(), which parses the record header and the attribute headers. Then parse
each attribute's data individually using the various mft.Parse...() functions.

	// Error handling left out for brevity
	record, err := mft.ParseRecord()
	attrs, err := record.FindAttributes(mft.AttributeTypeFileName)
	fileName, err := mft.ParseFileName(attrs[0])
*/
package mft

import (
	"bytes"
	"encoding/binary"
	"fmt"

	"b612.me/wincmd/ntfs/binutil"
	"b612.me/wincmd/ntfs/fragment"
	"b612.me/wincmd/ntfs/utf16"
)

var (
	fileSignature = []byte{0x46, 0x49, 0x4c, 0x45}
)

const (
	maxInt                  = int64(^uint(0) >> 1)
	minRecordHeaderLength   = 42
	minAttributeDataLength  = 22
	minAttributeListHeader  = 8
	minAttributeTypeLength  = 4
	dataRunTerminatorLength = 1
)

type recordHeader struct {
	signature            []byte
	fileReference        FileReference
	baseRecordReference  FileReference
	logFileSequence      uint64
	hardLinkCount        int
	flags                RecordFlag
	actualSize           uint32
	allocatedSize        uint32
	nextAttributeID      int
	firstAttributeOffset int
}

type attributeHeader struct {
	attrType      AttributeType
	resident      bool
	name          string
	flags         AttributeFlags
	attributeID   int
	payloadOffset int
}

type attributePayload struct {
	allocatedSize uint64
	actualSize    uint64
	data          []byte
}

// A Record represents an MFT entry, excluding all technical data (such as "offset to first attribute"). The Attributes
// list only contains the attribute headers and raw data; the attribute data has to be parsed separately. When this is a
// base record, the BaseRecordReference will be zero. When it is an extension record, the BaseRecordReference points to
// the record's base record.
type Record struct {
	Signature             []byte
	FileReference         FileReference
	BaseRecordReference   FileReference
	LogFileSequenceNumber uint64
	HardLinkCount         int
	Flags                 RecordFlag
	ActualSize            uint32
	AllocatedSize         uint32
	NextAttributeId       int
	Attributes            []Attribute
}

// ParseRecord parses bytes into a Record after applying fixup. The data is assumed to be in Little Endian order. Only
// the attribute headers are parsed, not the actual attribute data.
func ParseRecord(b []byte) (Record, error) {
	header, data, err := parseRecordHeader(b)
	if err != nil {
		return Record{}, err
	}

	attributes, err := ParseAttributes(data[header.firstAttributeOffset:])
	if err != nil {
		return Record{}, err
	}
	return Record{
		Signature:             header.signature,
		FileReference:         header.fileReference,
		BaseRecordReference:   header.baseRecordReference,
		LogFileSequenceNumber: header.logFileSequence,
		HardLinkCount:         header.hardLinkCount,
		Flags:                 header.flags,
		ActualSize:            header.actualSize,
		AllocatedSize:         header.allocatedSize,
		NextAttributeId:       header.nextAttributeID,
		Attributes:            attributes,
	}, nil
}

func parseRecordHeader(b []byte) (recordHeader, []byte, error) {
	if len(b) < minRecordHeaderLength {
		return recordHeader{}, nil, fmt.Errorf("record data length should be at least %d but is %d", minRecordHeaderLength, len(b))
	}
	if !bytes.Equal(b[:4], fileSignature) {
		return recordHeader{}, nil, fmt.Errorf("unknown record signature: %# x", b[:4])
	}

	data := binutil.Duplicate(b)
	r := binutil.NewLittleEndianReader(data)

	baseRecordRef, err := ParseFileReference(r.Read(0x20, 8))
	if err != nil {
		return recordHeader{}, nil, fmt.Errorf("unable to parse base record reference: %v", err)
	}

	firstAttributeOffset := int(r.Uint16(0x14))
	if firstAttributeOffset < 0 || firstAttributeOffset >= len(data) {
		return recordHeader{}, nil, fmt.Errorf("invalid first attribute offset %d (data length: %d)", firstAttributeOffset, len(data))
	}

	if _, err := applyFixUp(data, int(r.Uint16(0x04)), int(r.Uint16(0x06))); err != nil {
		return recordHeader{}, nil, fmt.Errorf("unable to apply fixup: %v", err)
	}

	return recordHeader{
		signature:            binutil.Duplicate(data[:4]),
		fileReference:        FileReference{RecordNumber: uint64(r.Uint32(0x2C)), SequenceNumber: r.Uint16(0x10)},
		baseRecordReference:  baseRecordRef,
		logFileSequence:      r.Uint64(0x08),
		hardLinkCount:        int(r.Uint16(0x12)),
		flags:                RecordFlag(r.Uint16(0x16)),
		actualSize:           r.Uint32(0x18),
		allocatedSize:        r.Uint32(0x1C),
		nextAttributeID:      int(r.Uint16(0x28)),
		firstAttributeOffset: firstAttributeOffset,
	}, data, nil
}

// A FileReference represents a reference to an MFT record. Since the FileReference in a Record is only 4 bytes, the
// RecordNumber will probably not exceed 32 bits.
type FileReference struct {
	RecordNumber   uint64
	SequenceNumber uint16
}

func (f FileReference) ToUint64() uint64 {
	origin := make([]byte, 8)
	binary.LittleEndian.PutUint64(origin, f.RecordNumber)
	binary.LittleEndian.PutUint16(origin[6:], f.SequenceNumber)
	return binary.LittleEndian.Uint64(origin)
}

// ParseFileReference parses a Little Endian ordered 8-byte slice into a FileReference. The first 6 bytes indicate the
// record number, while the final 2 bytes indicate the sequence number.
func ParseFileReference(b []byte) (FileReference, error) {
	if len(b) != 8 {
		return FileReference{}, fmt.Errorf("expected 8 bytes but got %d", len(b))
	}

	return FileReference{
		RecordNumber:   binary.LittleEndian.Uint64(padToUnsigned(b[:6], 8)),
		SequenceNumber: binary.LittleEndian.Uint16(b[6:]),
	}, nil
}

// RecordFlag represents a bit mask flag indicating the status of the MFT record.
type RecordFlag uint16

// Bit values for the RecordFlag. For example, an in-use directory has value 0x0003.
const (
	RecordFlagInUse       RecordFlag = 0x0001
	RecordFlagIsDirectory RecordFlag = 0x0002
	RecordFlagInExtend    RecordFlag = 0x0004
	RecordFlagIsIndex     RecordFlag = 0x0008
)

// Is checks if this RecordFlag's bit mask contains the specified flag.
func (f *RecordFlag) Is(c RecordFlag) bool {
	return *f&c == c
}

func applyFixUp(b []byte, offset int, length int) ([]byte, error) {
	if offset < 0 {
		return nil, fmt.Errorf("update sequence offset %d is negative", offset)
	}
	if length < 2 {
		return nil, fmt.Errorf("update sequence length %d is too small", length)
	}
	updateSequenceLength := length * 2
	if offset > len(b) || updateSequenceLength > len(b)-offset {
		return nil, fmt.Errorf("update sequence range [%d:%d] exceeds record length %d", offset, offset+updateSequenceLength, len(b))
	}

	r := binutil.NewLittleEndianReader(b)

	updateSequence := r.Read(offset, updateSequenceLength) // length is in pairs, not bytes
	updateSequenceNumber := updateSequence[:2]
	updateSequenceArray := updateSequence[2:]
	if len(updateSequenceArray) == 0 || len(updateSequenceArray)%2 != 0 {
		return nil, fmt.Errorf("invalid update sequence array length %d", len(updateSequenceArray))
	}

	sectorCount := len(updateSequenceArray) / 2
	if sectorCount == 0 {
		return nil, fmt.Errorf("update sequence does not contain any sector entries")
	}
	if len(b)%sectorCount != 0 {
		return nil, fmt.Errorf("record length %d is not divisible by sector count %d", len(b), sectorCount)
	}
	sectorSize := len(b) / sectorCount
	if sectorSize < 2 {
		return nil, fmt.Errorf("invalid sector size %d", sectorSize)
	}

	for i := 1; i <= sectorCount; i++ {
		sectorOffset := sectorSize*i - 2
		if sectorOffset < 0 || sectorOffset+2 > len(b) {
			return nil, fmt.Errorf("invalid sector offset %d for record length %d", sectorOffset, len(b))
		}
		if !bytes.Equal(updateSequenceNumber, b[sectorOffset:sectorOffset+2]) {
			return nil, fmt.Errorf("update sequence mismatch at pos %d", sectorOffset)
		}
	}

	for i := 0; i < sectorCount; i++ {
		offset := sectorSize*(i+1) - 2
		num := i * 2
		copy(b[offset:offset+2], updateSequenceArray[num:num+2])
	}

	return b, nil
}

// FindAttributes returns all attributes of the specified type contained in this record. When no matches are found an
// empty slice is returned.
func (r *Record) FindAttributes(attrType AttributeType) []Attribute {
	ret := make([]Attribute, 0)
	for _, a := range r.Attributes {
		if a.Type == attrType {
			ret = append(ret, a)
		}
	}
	return ret
}

// Attribute represents an MFT record attribute header and its corresponding raw attribute Data (excluding header data).
// When the attribute is Resident, the Data contains the actual attribute's data. When the attribute is non-resident,
// the Data contains DataRuns pointing to the actual data. DataRun data can be parsed using ParseDataRuns().
type Attribute struct {
	Type          AttributeType
	Resident      bool
	Name          string
	Flags         AttributeFlags
	AttributeId   int
	AllocatedSize uint64
	ActualSize    uint64
	Data          []byte
}

// AttributeType represents the type of an Attribute. Use Name() to get the attribute type's name.
type AttributeType uint32

// Known values for AttributeType. Note that other values might occur too.
const (
	AttributeTypeStandardInformation AttributeType = 0x10       // $STANDARD_INFORMATION; always resident
	AttributeTypeAttributeList       AttributeType = 0x20       // $ATTRIBUTE_LIST; mixed residency
	AttributeTypeFileName            AttributeType = 0x30       // $FILE_NAME; always resident
	AttributeTypeObjectId            AttributeType = 0x40       // $OBJECT_ID; always resident
	AttributeTypeSecurityDescriptor  AttributeType = 0x50       // $SECURITY_DESCRIPTOR; always resident?
	AttributeTypeVolumeName          AttributeType = 0x60       // $VOLUME_NAME; always resident?
	AttributeTypeVolumeInformation   AttributeType = 0x70       // $VOLUME_INFORMATION; never resident?
	AttributeTypeData                AttributeType = 0x80       // $DATA; mixed residency
	AttributeTypeIndexRoot           AttributeType = 0x90       // $INDEX_ROOT; always resident
	AttributeTypeIndexAllocation     AttributeType = 0xa0       // $INDEX_ALLOCATION; never resident?
	AttributeTypeBitmap              AttributeType = 0xb0       // $BITMAP; nearly always resident?
	AttributeTypeReparsePoint        AttributeType = 0xc0       // $REPARSE_POINT; always resident?
	AttributeTypeEAInformation       AttributeType = 0xd0       // $EA_INFORMATION; always resident
	AttributeTypeEA                  AttributeType = 0xe0       // $EA; nearly always resident?
	AttributeTypePropertySet         AttributeType = 0xf0       // $PROPERTY_SET
	AttributeTypeLoggedUtilityStream AttributeType = 0x100      // $LOGGED_UTILITY_STREAM; always resident
	AttributeTypeTerminator          AttributeType = 0xFFFFFFFF // Indicates the last attribute in a list; will not actually be returned by ParseAttributes
)

// AttributeFlags represents a bit mask flag indicating various properties of an attribute's data.
type AttributeFlags uint16

// Bit values for the AttributeFlags. For example, an encrypted, compressed attribute has value 0x4001.
const (
	AttributeFlagsCompressed AttributeFlags = 0x0001
	AttributeFlagsEncrypted  AttributeFlags = 0x4000
	AttributeFlagsSparse     AttributeFlags = 0x8000
)

// Is checks if this AttributeFlags's bit mask contains the specified flag.
func (f *AttributeFlags) Is(c AttributeFlags) bool {
	return *f&c == c
}

// ParseAttributes parses bytes into Attributes. The data is assumed to be in Little Endian order. Only the attribute
// headers are parsed, not the actual attribute data.
func ParseAttributes(b []byte) ([]Attribute, error) {
	if len(b) == 0 {
		return []Attribute{}, nil
	}
	attributes := make([]Attribute, 0)
	for len(b) > 0 {
		recordData, remaining, done, err := nextAttributeRecordData(b)
		if err != nil {
			return nil, err
		}
		if done {
			break
		}
		attribute, err := ParseAttribute(recordData)
		if err != nil {
			return nil, err
		}
		attributes = append(attributes, attribute)
		b = remaining
	}
	return attributes, nil
}

func nextAttributeRecordData(b []byte) (recordData []byte, remaining []byte, done bool, err error) {
	if len(b) < minAttributeTypeLength {
		return nil, nil, false, fmt.Errorf("attribute header data should be at least %d bytes but is %d", minAttributeTypeLength, len(b))
	}

	r := binutil.NewLittleEndianReader(b)
	if AttributeType(r.Uint32(0)) == AttributeTypeTerminator {
		return nil, nil, true, nil
	}

	if len(b) < minAttributeListHeader {
		return nil, nil, false, fmt.Errorf("cannot read attribute header record length, data should be at least %d bytes but is %d", minAttributeListHeader, len(b))
	}

	uRecordLength := r.Uint32(0x04)
	if int64(uRecordLength) > maxInt {
		return nil, nil, false, fmt.Errorf("record length %d overflows maximum int value %d", uRecordLength, maxInt)
	}
	recordLength := int(uRecordLength)
	if recordLength <= 0 {
		return nil, nil, false, fmt.Errorf("cannot handle attribute with zero or negative record length %d", recordLength)
	}
	if recordLength > len(b) {
		return nil, nil, false, fmt.Errorf("attribute record length %d exceeds data length %d", recordLength, len(b))
	}
	return r.Read(0, recordLength), r.ReadFrom(recordLength), false, nil
}

// ParseAttribute parses bytes into an Attribute. The data is assumed to be in Little Endian order. Only the attribute
// headers are parsed, not the actual attribute data.
func ParseAttribute(b []byte) (Attribute, error) {
	if len(b) < minAttributeDataLength {
		return Attribute{}, fmt.Errorf("attribute data should be at least %d bytes but is %d", minAttributeDataLength, len(b))
	}

	r := binutil.NewLittleEndianReader(b)
	header, err := parseAttributeHeader(r, b)
	if err != nil {
		return Attribute{}, err
	}
	payload, err := parseAttributePayload(r, b, header)
	if err != nil {
		return Attribute{}, err
	}

	return Attribute{
		Type:          header.attrType,
		Resident:      header.resident,
		Name:          header.name,
		Flags:         header.flags,
		AttributeId:   header.attributeID,
		AllocatedSize: payload.allocatedSize,
		ActualSize:    payload.actualSize,
		Data:          binutil.Duplicate(payload.data),
	}, nil
}

func parseAttributeHeader(r *binutil.BinReader, b []byte) (attributeHeader, error) {
	nameLength := int(r.Byte(0x09))
	nameOffset := int(r.Uint16(0x0A))
	name := ""
	if nameLength != 0 {
		nameEnd := nameOffset + nameLength*2
		if len(b) < nameEnd {
			return attributeHeader{}, fmt.Errorf("expected attribute name length to be at least %d but is %d", nameEnd, len(b))
		}
		name = utf16.DecodeString(r.Read(nameOffset, nameLength*2), binary.LittleEndian)
	}

	resident := r.Byte(0x08) == 0x00
	payloadOffset := int(r.Uint16(0x20))
	if resident {
		payloadOffset = int(r.Uint16(0x14))
	}

	return attributeHeader{
		attrType:      AttributeType(r.Uint32(0)),
		resident:      resident,
		name:          name,
		flags:         AttributeFlags(r.Uint16(0x0C)),
		attributeID:   int(r.Uint16(0x0E)),
		payloadOffset: payloadOffset,
	}, nil
}

func parseAttributePayload(r *binutil.BinReader, b []byte, header attributeHeader) (attributePayload, error) {
	if header.resident {
		uDataLength := r.Uint32(0x10)
		if int64(uDataLength) > maxInt {
			return attributePayload{}, fmt.Errorf("attribute data length %d overflows maximum int value %d", uDataLength, maxInt)
		}
		dataLength := int(uDataLength)
		expectedDataLength := header.payloadOffset + dataLength
		if len(b) < expectedDataLength {
			return attributePayload{}, fmt.Errorf("expected attribute data length to be at least %d but is %d", expectedDataLength, len(b))
		}
		return attributePayload{data: r.Read(header.payloadOffset, dataLength)}, nil
	}

	if len(b) < header.payloadOffset {
		return attributePayload{}, fmt.Errorf("expected attribute data length to be at least %d but is %d", header.payloadOffset, len(b))
	}
	return attributePayload{
		allocatedSize: r.Uint64(0x28),
		actualSize:    r.Uint64(0x30),
		data:          r.ReadFrom(header.payloadOffset),
	}, nil
}

// A DataRun represents a fragment of data somewhere on a volume. The OffsetCluster, which can be negative, is relative
// to a previous DataRun's offset. The OffsetCluster of the first DataRun in a list is relative to the beginning of the
// volume.
type DataRun struct {
	OffsetCluster    int64
	LengthInClusters uint64
}

// ParseDataRuns parses bytes into a list of DataRuns. Each DataRun's OffsetCluster is relative to the DataRun before
// it. The first element's OffsetCluster is relative to the beginning of the volume.
func ParseDataRuns(b []byte) ([]DataRun, error) {
	if len(b) == 0 {
		return []DataRun{}, nil
	}

	runs := make([]DataRun, 0)
	for len(b) > 0 {
		run, consumed, done, err := parseDataRun(b)
		if err != nil {
			return nil, err
		}
		if done {
			break
		}
		runs = append(runs, run)
		b = b[consumed:]
	}

	return runs, nil
}

func parseDataRun(b []byte) (DataRun, int, bool, error) {
	r := binutil.NewLittleEndianReader(b)
	header := r.Byte(0)
	if header == 0 {
		return DataRun{}, dataRunTerminatorLength, true, nil
	}

	lengthLength := int(header &^ 0xF0)
	offsetLength := int(header >> 4)
	dataRunDataLength := offsetLength + lengthLength
	headerAndDataLength := dataRunDataLength + dataRunTerminatorLength
	if len(b) < headerAndDataLength {
		return DataRun{}, 0, false, fmt.Errorf("expected at least %d bytes of datarun data but is %d", headerAndDataLength, len(b))
	}

	dataRunData := r.Reader(1, dataRunDataLength)
	lengthBytes := dataRunData.Read(0, lengthLength)
	offsetBytes := dataRunData.Read(lengthLength, offsetLength)

	return DataRun{
		OffsetCluster:    int64(binary.LittleEndian.Uint64(padToSigned(offsetBytes, 8))),
		LengthInClusters: binary.LittleEndian.Uint64(padToUnsigned(lengthBytes, 8)),
	}, headerAndDataLength, false, nil
}

// DataRunsToFragments transform a list of DataRuns with relative offsets and lengths specified in cluster into a list
// of fragment.Fragment elements with absolute offsets and lengths specified in bytes (for example for use in a
// fragment.Reader). Note that data will probably not align to a cluster exactly so there could be some padding at the
// end. It is up to the user of the Fragments to limit reads to actual data size (eg. by using an io.LimitedReader or
// modifying the last element in the list to limit its length).
func DataRunsToFragments(runs []DataRun, bytesPerCluster int) []fragment.Fragment {
	frags := make([]fragment.Fragment, len(runs))
	previousOffsetCluster := int64(0)
	for i, run := range runs {
		exactClusterOffset := previousOffsetCluster + run.OffsetCluster
		frags[i] = fragment.Fragment{
			Offset: exactClusterOffset * int64(bytesPerCluster),
			Length: int64(run.LengthInClusters) * int64(bytesPerCluster),
		}
		previousOffsetCluster = exactClusterOffset
	}
	return frags
}

func padToUnsigned(data []byte, length int) []byte {
	if len(data) > length {
		return data
	}
	if len(data) == length {
		return data
	}
	result := make([]byte, length)
	if len(data) == 0 {
		return result
	}
	copy(result, data)
	return result
}

func padToSigned(data []byte, length int) []byte {
	if len(data) > length {
		return data
	}
	if len(data) == length {
		return data
	}
	result := make([]byte, length)
	if len(data) == 0 {
		return result
	}
	copy(result, data)
	if data[len(data)-1]&0x80 != 0 {
		for i := len(data); i < length; i++ {
			result[i] = 0xFF
		}
	}
	return result
}

// Name returns a string representation of the attribute type. For example "$STANDARD_INFORMATION" or "$FILE_NAME". For
// anyte attribute type which is unknown, Name will return "unknown".
func (at AttributeType) Name() string {
	switch at {
	case AttributeTypeStandardInformation:
		return "$STANDARD_INFORMATION"
	case AttributeTypeAttributeList:
		return "$ATTRIBUTE_LIST"
	case AttributeTypeFileName:
		return "$FILE_NAME"
	case AttributeTypeObjectId:
		return "$OBJECT_ID"
	case AttributeTypeSecurityDescriptor:
		return "$SECURITY_DESCRIPTOR"
	case AttributeTypeVolumeName:
		return "$VOLUME_NAME"
	case AttributeTypeVolumeInformation:
		return "$VOLUME_INFORMATION"
	case AttributeTypeData:
		return "$DATA"
	case AttributeTypeIndexRoot:
		return "$INDEX_ROOT"
	case AttributeTypeIndexAllocation:
		return "$INDEX_ALLOCATION"
	case AttributeTypeBitmap:
		return "$BITMAP"
	case AttributeTypeReparsePoint:
		return "$REPARSE_POINT"
	case AttributeTypeEAInformation:
		return "$EA_INFORMATION"
	case AttributeTypeEA:
		return "$EA"
	case AttributeTypePropertySet:
		return "$PROPERTY_SET"
	case AttributeTypeLoggedUtilityStream:
		return "$LOGGED_UTILITY_STREAM"
	}
	return "unknown"
}