diff --git a/HISTORY.md b/HISTORY.md index 8568ea4..2ee3550 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,7 @@ +## v0.2.27 add multiple fields to remove duplicates in dedup mode + +- dedup of multiple fields to remove duplicates use as: ```fofa dedup -d host,port,ip,protocol -i input.csv -o output.csv``` + ## v0.2.26 update jsRender mode and headline in dump mode - update jsRender deadline diff --git a/cmd/fofa/cmd/duplicate.go b/cmd/fofa/cmd/duplicate.go index 1df50ab..0d2d302 100644 --- a/cmd/fofa/cmd/duplicate.go +++ b/cmd/fofa/cmd/duplicate.go @@ -79,46 +79,48 @@ func writeCSV(filePath string, records [][]string) error { return nil } -func deduplicate(records [][]string, fieldName string) ([][]string, error) { - headers := records[0] - var index int - found := false +// indexOf 查找字段在表头中的索引 +func indexOf(headers []string, field string) int { for i, header := range headers { - if header == fieldName { - index = i - found = true - break + if header == field { + return i } } - if !found { - return nil, fmt.Errorf("field %s not exist", fieldName) + return -1 +} + +func deduplicates(records [][]string, fields []string) ([][]string, error) { + if len(records) < 2 { + return nil, errors.New("deduplicate failed: CSV file is empty") + } + + // 获取字段索引 + fieldIndexes := make([]int, 0, len(fields)) + for _, field := range fields { + index := indexOf(fields, field) + if index == -1 { + return nil, fmt.Errorf("field '%s' not found in headers", field) + } + fieldIndexes = append(fieldIndexes, index) } + // 去重逻辑 seen := make(map[string]bool) - var uniqueRecords [][]string - uniqueRecords = append(uniqueRecords, headers) + uniqueRows := [][]string{fields} - for _, record := range records[1:] { - key := record[index] + for _, row := range records[1:] { + keyParts := make([]string, len(fieldIndexes)) + for i, idx := range fieldIndexes { + keyParts[i] = row[idx] + } + key := strings.Join(keyParts, "|") if !seen[key] { - uniqueRecords = append(uniqueRecords, record) seen[key] = true + uniqueRows = append(uniqueRows, row) } } - return uniqueRecords, nil -} - -func deduplicates(records [][]string, fields []string) ([][]string, error) { - var uniqueRecords [][]string - for _, field := range fields { - uniq, err := deduplicate(records, field) - if err != nil { - return nil, errors.New("deduplicate failed: " + err.Error()) - } - uniqueRecords = append(uniqueRecords, uniq...) - } - return uniqueRecords, nil + return uniqueRows, nil } func deduplicateAction(ctx *cli.Context) error {