photos/src/lib/file-scanner.ts
Michael Mainguy 5c3ad988f5 Add duplicate detection, conflict handling, and fix pagination issues
- Add photo_conflicts table for files with same path but different content
- Implement SHA256-based duplicate detection in file scanner
- Add conflict detection methods to PhotoService
- Skip identical files with info logging, store conflicts with warnings
- Fix infinite scroll pagination race conditions with functional state updates
- Add scroll throttling to prevent rapid API calls
- Enhance PhotoThumbnail with comprehensive EXIF date/time display
- Add composite React keys to prevent duplicate rendering issues

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-27 10:55:28 -05:00

459 lines
16 KiB
TypeScript

import { readdir, stat } from 'fs/promises'
import { createReadStream } from 'fs'
import { join, extname, basename } from 'path'
import { photoService } from './photo-service'
import { randomUUID, createHash } from 'crypto'
import sharp from 'sharp'
import exifReader from 'exif-reader'
// Supported image file extensions
const SUPPORTED_EXTENSIONS = new Set([
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff', '.tif', '.ico', '.svg'
])
interface ScanResult {
totalFiles: number
photosAdded: number
photosSkipped: number
errors: number
}
export async function scanDirectory(directoryPath: string): Promise<ScanResult> {
const scanStartTime = Date.now()
console.log(`[FILE SCANNER] ========================================`)
console.log(`[FILE SCANNER] Starting scan of directory: ${directoryPath}`)
console.log(`[FILE SCANNER] Start time: ${new Date().toISOString()}`)
const result: ScanResult = {
totalFiles: 0,
photosAdded: 0,
photosSkipped: 0,
errors: 0
}
try {
console.log(`[FILE SCANNER] Beginning recursive directory scan...`)
await scanDirectoryRecursive(directoryPath, directoryPath, result)
const scanDuration = Date.now() - scanStartTime
console.log(`[FILE SCANNER] Recursive scan completed in ${scanDuration}ms`)
console.log(`[FILE SCANNER] Files processed: ${result.totalFiles}`)
console.log(`[FILE SCANNER] Photos added: ${result.photosAdded}`)
console.log(`[FILE SCANNER] Photos skipped: ${result.photosSkipped}`)
console.log(`[FILE SCANNER] Errors encountered: ${result.errors}`)
// Update directory statistics
console.log(`[FILE SCANNER] Updating directory statistics...`)
const directoryRecord = photoService.getDirectoryByPath(directoryPath)
if (directoryRecord) {
const directoryPhotos = photoService.getPhotos({ directory: directoryPath })
const totalSize = directoryPhotos.reduce((sum, photo) => sum + photo.filesize, 0)
console.log(`[FILE SCANNER] Directory stats: ${directoryPhotos.length} photos, ${totalSize} bytes`)
photoService.createOrUpdateDirectory({
path: directoryPath,
name: directoryRecord.name,
last_scanned: new Date().toISOString(),
photo_count: directoryPhotos.length,
total_size: totalSize
})
console.log(`[FILE SCANNER] Directory record updated successfully`)
} else {
console.warn(`[FILE SCANNER] Directory record not found for ${directoryPath}`)
}
const totalDuration = Date.now() - scanStartTime
console.log(`[FILE SCANNER] ========================================`)
console.log(`[FILE SCANNER] Scan completed for ${directoryPath}`)
console.log(`[FILE SCANNER] Total duration: ${totalDuration}ms`)
console.log(`[FILE SCANNER] Final result:`, result)
console.log(`[FILE SCANNER] ========================================`)
return result
} catch (error) {
const totalDuration = Date.now() - scanStartTime
console.error(`[FILE SCANNER] ========================================`)
console.error(`[FILE SCANNER] Error scanning directory ${directoryPath} after ${totalDuration}ms:`, error)
console.error(`[FILE SCANNER] Partial result:`, result)
console.error(`[FILE SCANNER] ========================================`)
result.errors++
return result
}
}
async function scanDirectoryRecursive(
currentPath: string,
basePath: string,
result: ScanResult
): Promise<void> {
try {
const entries = await readdir(currentPath, { withFileTypes: true })
for (const entry of entries) {
const fullPath = join(currentPath, entry.name)
try {
if (entry.isDirectory()) {
// Skip hidden directories and common non-photo directories
if (!entry.name.startsWith('.') &&
!['node_modules', 'dist', 'build', 'temp', 'cache'].includes(entry.name.toLowerCase())) {
await scanDirectoryRecursive(fullPath, basePath, result)
}
} else if (entry.isFile()) {
result.totalFiles++
const ext = extname(entry.name).toLowerCase()
if (SUPPORTED_EXTENSIONS.has(ext)) {
await processPhotoFile(fullPath, basePath, result)
}
}
} catch (fileError) {
console.error(`Error processing ${fullPath}:`, fileError)
result.errors++
}
}
} catch (error) {
console.error(`Error reading directory ${currentPath}:`, error)
result.errors++
}
}
async function processPhotoFile(
filePath: string,
basePath: string,
result: ScanResult
): Promise<void> {
const filename = basename(filePath)
let stats: any = null
let photoData: any = null
try {
stats = await stat(filePath)
// Compute SHA256 hash first for conflict detection
console.log(`[FILE SCANNER] Computing SHA256 hash for: ${filename}`)
const sha256Hash = await computeFileHash(filePath)
console.log(`[FILE SCANNER] Computed hash for ${filename}: ${sha256Hash}`)
// Check for conflicts with existing photos
const conflictCheck = photoService.checkForPhotoConflict(filePath, sha256Hash)
if (conflictCheck.isDuplicate) {
console.info(`[FILE SCANNER] Skipping duplicate file (same path, same content): ${filename}`)
result.photosSkipped++
return
}
if (conflictCheck.hasConflict && conflictCheck.existingPhoto) {
console.warn(`[FILE SCANNER] CONFLICT DETECTED: File ${filename} has same path but different content than existing photo`)
// Create basic photo record for the conflicting file
photoData = {
filename,
filepath: filePath,
directory: basePath,
filesize: stats.size,
created_at: stats.birthtime.toISOString(),
modified_at: stats.mtime.toISOString(),
favorite: false,
metadata: JSON.stringify({
extension: extname(filename).toLowerCase(),
scanned_at: new Date().toISOString()
})
}
// Try to extract image metadata (width, height, format)
try {
const metadata = await extractImageMetadata(filePath)
Object.assign(photoData, metadata)
} catch (metadataError) {
console.warn(`[FILE SCANNER] Could not extract metadata for ${filePath}:`, metadataError)
}
// Store in conflicts table
const conflictReason = `File has same path as existing photo but different SHA256 hash. Original hash: ${conflictCheck.existingPhoto.id}, New hash: ${sha256Hash}`
const conflictId = photoService.createPhotoConflict(photoData, conflictCheck.existingPhoto.id, conflictReason)
console.warn(`[FILE SCANNER] Stored conflict record with ID: ${conflictId}`)
result.errors++
return
}
// Create basic photo record for new file
photoData = {
filename,
filepath: filePath,
directory: basePath,
filesize: stats.size,
created_at: stats.birthtime.toISOString(),
modified_at: stats.mtime.toISOString(),
favorite: false,
metadata: JSON.stringify({
extension: extname(filename).toLowerCase(),
scanned_at: new Date().toISOString()
})
}
// Try to extract image metadata (width, height, format)
try {
const metadata = await extractImageMetadata(filePath)
Object.assign(photoData, metadata)
} catch (metadataError) {
console.warn(`[FILE SCANNER] Could not extract metadata for ${filePath}:`, metadataError)
}
console.log(`[FILE SCANNER] Creating photo record for: ${filename}`)
console.log(`[FILE SCANNER] Photo data:`, photoData)
// Debug: Log each value and its type before database insertion
console.log(`[FILE SCANNER] Debug - checking photoData types:`)
Object.entries(photoData).forEach(([key, value]) => {
console.log(` ${key}:`, typeof value, value)
})
// Create photo record
const photo = photoService.createPhoto(photoData)
result.photosAdded++
console.log(`[FILE SCANNER] Successfully added photo: ${filename}`)
// Store SHA256 hash
try {
// Create or update hash record
const hashRecord = photoService.createOrUpdateImageHash(sha256Hash)
// Associate photo with hash
const associated = photoService.associatePhotoWithHash(photo.id, hashRecord.id)
console.log(`[FILE SCANNER] Associated photo with hash: ${associated}`)
} catch (hashError) {
console.error(`[FILE SCANNER] Error storing hash for ${filePath}:`, hashError)
// Continue processing even if hash storage fails
}
// Log progress every 100 files
if ((result.photosAdded + result.photosSkipped) % 100 === 0) {
console.log(`[FILE SCANNER] Progress: ${result.photosAdded + result.photosSkipped} photos processed (${result.photosAdded} added, ${result.photosSkipped} skipped, ${result.errors} errors)`)
}
} catch (error) {
console.error(`[FILE SCANNER] Error processing photo ${filePath}:`, error)
console.error(`[FILE SCANNER] Photo data that failed:`, {
filename,
filepath: filePath,
directory: basePath,
filesize: stats ? stats.size : 'unknown',
photoData: photoData ? Object.keys(photoData) : 'not created'
})
result.errors++
}
}
async function extractImageMetadata(filePath: string): Promise<{
width?: number
height?: number
format?: string
metadata?: string
}> {
try {
const ext = extname(filePath).toLowerCase()
// Skip SVG files as Sharp doesn't handle them well
if (ext === '.svg') {
return { format: 'SVG' }
}
// Use Sharp to get basic image information and EXIF data
const image = sharp(filePath)
const metadata = await image.metadata()
const result: {
width?: number
height?: number
format?: string
metadata?: string
} = {
width: typeof metadata.width === 'number' ? metadata.width : undefined,
height: typeof metadata.height === 'number' ? metadata.height : undefined,
format: metadata.format?.toUpperCase() || 'Unknown'
}
// Extract EXIF data if available
if (metadata.exif) {
try {
const exifData = exifReader(metadata.exif)
// Parse and store relevant EXIF information
const exifInfo: Record<string, any> = {}
// Use any type to handle dynamic EXIF structure
const exif: any = exifData
// Image information
if (exif.image || exif.Image) {
const imageData = exif.image || exif.Image
if (imageData.Make) exifInfo.camera_make = imageData.Make
if (imageData.Model) exifInfo.camera_model = imageData.Model
if (imageData.Software) exifInfo.software = imageData.Software
if (imageData.DateTime) exifInfo.date_time = imageData.DateTime
if (imageData.Orientation) exifInfo.orientation = imageData.Orientation
if (imageData.XResolution) exifInfo.x_resolution = imageData.XResolution
if (imageData.YResolution) exifInfo.y_resolution = imageData.YResolution
}
// Photo-specific EXIF data
if (exif.exif || exif.Exif) {
const photoData = exif.exif || exif.Exif
if (photoData.DateTimeOriginal) exifInfo.date_time_original = photoData.DateTimeOriginal
if (photoData.DateTimeDigitized) exifInfo.date_time_digitized = photoData.DateTimeDigitized
if (photoData.ExposureTime) exifInfo.exposure_time = photoData.ExposureTime
if (photoData.FNumber) exifInfo.f_number = photoData.FNumber
if (photoData.ExposureProgram) exifInfo.exposure_program = photoData.ExposureProgram
if (photoData.ISOSpeedRatings) exifInfo.iso_speed = photoData.ISOSpeedRatings
if (photoData.FocalLength) exifInfo.focal_length = photoData.FocalLength
if (photoData.Flash) exifInfo.flash = photoData.Flash
if (photoData.WhiteBalance) exifInfo.white_balance = photoData.WhiteBalance
if (photoData.ColorSpace) exifInfo.color_space = photoData.ColorSpace
if (photoData.LensModel) exifInfo.lens_model = photoData.LensModel
}
// GPS information
if (exif.gps || exif.GPS) {
const gpsData = exif.gps || exif.GPS
const gpsInfo: Record<string, any> = {}
if (gpsData.GPSLatitude && gpsData.GPSLatitudeRef) {
gpsInfo.latitude = convertDMSToDD(gpsData.GPSLatitude, gpsData.GPSLatitudeRef)
}
if (gpsData.GPSLongitude && gpsData.GPSLongitudeRef) {
gpsInfo.longitude = convertDMSToDD(gpsData.GPSLongitude, gpsData.GPSLongitudeRef)
}
if (gpsData.GPSAltitude && gpsData.GPSAltitudeRef !== undefined) {
gpsInfo.altitude = gpsData.GPSAltitudeRef === 1 ? -gpsData.GPSAltitude : gpsData.GPSAltitude
}
if (Object.keys(gpsInfo).length > 0) {
exifInfo.gps = gpsInfo
}
}
// Store EXIF data as JSON string if we found any relevant data
if (Object.keys(exifInfo).length > 0) {
result.metadata = JSON.stringify({
extension: ext,
scanned_at: new Date().toISOString(),
exif: exifInfo
})
}
} catch (exifError) {
console.warn(`Error parsing EXIF data for ${filePath}:`, exifError)
// Fall back to basic metadata
result.metadata = JSON.stringify({
extension: ext,
scanned_at: new Date().toISOString(),
exif_error: 'Failed to parse EXIF data'
})
}
} else {
// No EXIF data available
result.metadata = JSON.stringify({
extension: ext,
scanned_at: new Date().toISOString(),
exif: null
})
}
return result
} catch (error) {
console.warn(`Error extracting metadata for ${filePath}:`, error)
// Fall back to basic format detection
const ext = extname(filePath).toLowerCase()
const formatMap: Record<string, string> = {
'.jpg': 'JPEG',
'.jpeg': 'JPEG',
'.png': 'PNG',
'.gif': 'GIF',
'.bmp': 'BMP',
'.webp': 'WebP',
'.tiff': 'TIFF',
'.tif': 'TIFF',
'.ico': 'ICO',
'.svg': 'SVG'
}
return {
format: formatMap[ext] || 'Unknown',
metadata: JSON.stringify({
extension: ext,
scanned_at: new Date().toISOString(),
extraction_error: error instanceof Error ? error.message : 'Unknown error'
})
}
}
}
// Helper function to convert DMS (Degrees, Minutes, Seconds) to DD (Decimal Degrees)
function convertDMSToDD(dms: number[], ref: string): number {
if (!Array.isArray(dms) || dms.length < 3) return 0
const degrees = dms[0] || 0
const minutes = dms[1] || 0
const seconds = dms[2] || 0
let dd = degrees + minutes / 60 + seconds / 3600
if (ref === 'S' || ref === 'W') {
dd = -dd
}
return dd
}
// Helper function to compute SHA256 hash of a file
async function computeFileHash(filePath: string): Promise<string> {
return new Promise((resolve, reject) => {
const hash = createHash('sha256')
const stream = createReadStream(filePath)
stream.on('data', (data) => {
hash.update(data)
})
stream.on('end', () => {
resolve(hash.digest('hex'))
})
stream.on('error', (error) => {
reject(error)
})
})
}
// Helper function to check if a path contains photos
export async function hasPhotos(directoryPath: string): Promise<boolean> {
try {
const entries = await readdir(directoryPath, { withFileTypes: true })
for (const entry of entries) {
if (entry.isFile()) {
const ext = extname(entry.name).toLowerCase()
if (SUPPORTED_EXTENSIONS.has(ext)) {
return true
}
} else if (entry.isDirectory() && !entry.name.startsWith('.')) {
const fullPath = join(directoryPath, entry.name)
if (await hasPhotos(fullPath)) {
return true
}
}
}
return false
} catch (error) {
console.error(`Error checking for photos in ${directoryPath}:`, error)
return false
}
}