Files
imgPub/src/components/OcrStep.jsx

361 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { useEffect, useMemo, useRef, useState } from 'react';
import {
Alert,
Box,
Button,
LinearProgress,
Stack,
TextField,
Typography,
} from '@mui/material';
import { useNavigate } from 'react-router-dom';
import Tesseract from 'tesseract.js';
import { useAppStore } from '../store/useAppStore';
import { correctTurkishCharacters } from '../utils/ocrUtils';
const OcrStep = () => {
const navigate = useNavigate();
const isDev = import.meta.env.DEV;
const croppedImages = useAppStore((state) => state.croppedImages);
const ocrText = useAppStore((state) => state.ocrText);
const setOcrText = useAppStore((state) => state.setOcrText);
const setError = useAppStore((state) => state.setError);
const clearTranslation = useAppStore((state) => state.clearTranslation);
const bookMetadata = useAppStore((state) => state.bookMetadata);
const epubImports = useAppStore((state) => state.epubImports);
const pdfImports = useAppStore((state) => state.pdfImports);
const [status, setStatus] = useState('idle');
const [currentIndex, setCurrentIndex] = useState(0);
const [previewText, setPreviewText] = useState('');
const [isEditing, setIsEditing] = useState(false);
const total = croppedImages.length;
const abortRef = useRef(false);
const assetBase = useMemo(() => {
const rawBase = import.meta.env.BASE_URL ?? '/';
if (rawBase === '.' || rawBase === './' || rawBase === '/') {
return '';
}
return rawBase.endsWith('/') ? rawBase.slice(0, -1) : rawBase;
}, []);
const workerRef = useRef(null);
const [workerReady, setWorkerReady] = useState(false);
const previewRef = useRef(null);
// removed auto navigation to translation
const handlePreviewChange = (event) => {
if (!isEditing) return;
const value = event.target.value;
setPreviewText(value);
setOcrText(value);
};
const orderedImages = useMemo(
() => [...croppedImages].sort((a, b) => (a.order ?? 0) - (b.order ?? 0)),
[croppedImages],
);
useEffect(() => {
if (!orderedImages.length) return undefined;
let cancelled = false;
const origin =
typeof window !== 'undefined' ? window.location.origin : '';
const prefix = `${origin}${assetBase}`;
const paths = {
workerPath: `${prefix}/tesseract/worker.min.js`,
corePath: `${prefix}/tesseract/tesseract-core-simd-lstm.wasm.js`,
langPath: `${prefix}/tesseract`,
};
const initWorker = async () => {
setWorkerReady(false);
try {
const workerOptions = {
workerPath: paths.workerPath,
corePath: paths.corePath,
langPath: paths.langPath,
};
if (isDev) {
workerOptions.logger = (m) => console.log('Tesseract:', m);
}
const worker = await Tesseract.createWorker(
'tur', // Dil doğrudan belirt
1, // OEM level (LSTM)
workerOptions,
);
// Türkçe karakter tanımını iyileştir
await worker.setParameters({
tessedit_char_whitelist: 'abcçdefgğhıijklmnoöprsştuüvyzâîûABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ0123456789 .,;:!?\'"-_',
tessedit_pageseg_mode: '6', // Tek bir metin bloğu varsay
preserve_interword_spaces: '1',
});
if (cancelled) {
await worker.terminate();
return;
}
// Dil ve worker zaten createWorker sırasında yüklendi
if (isDev) {
console.log('Tesseract worker başarıyla oluşturuldu');
}
workerRef.current = worker;
setWorkerReady(true);
} catch (error) {
console.error('Tesseract başlatma hatası:', error);
let errorMessage;
if (error.message.includes('traineddata')) {
errorMessage = 'Tesseract dil dosyaları bulunamadı. Lütfen tarayıcı cache\'ini temizleyip sayfayı yenileyin.';
} else if (error.message.includes('TESSDATA_PREFIX')) {
errorMessage = 'Tesseract yapılandırma hatası: Lütfen sayfayı yenileyin.';
} else {
errorMessage = `Tesseract başlatılamadı: ${error.message}`;
}
setError(errorMessage);
setWorkerReady(false);
}
};
initWorker();
return () => {
cancelled = true;
if (workerRef.current) {
workerRef.current.terminate();
workerRef.current = null;
setWorkerReady(false);
}
};
}, [assetBase, isDev, orderedImages.length, setError]);
useEffect(() => {
if (!orderedImages.length && !(epubImports.length || pdfImports.length)) return;
setStatus('idle');
setCurrentIndex(0);
setPreviewText(ocrText || '');
if (!ocrText) {
setOcrText('');
}
clearTranslation();
}, [clearTranslation, epubImports.length, orderedImages, pdfImports.length, ocrText, setOcrText]);
useEffect(() => {
if (previewRef.current) {
previewRef.current.scrollTop = previewRef.current.scrollHeight;
}
}, [previewText]);
useEffect(() => {
if (!total || status === 'done' || !workerReady) return;
abortRef.current = false;
const run = async () => {
setStatus('running');
setCurrentIndex(0);
const worker = workerRef.current;
if (!worker) return;
try {
let combinedText = '';
setOcrText('');
setPreviewText('');
for (let index = 0; index < orderedImages.length; index += 1) {
if (abortRef.current) break;
const image = orderedImages[index];
setCurrentIndex(index + 1);
// eslint-disable-next-line no-await-in-loop
const { data } = await worker.recognize(image.blob);
const correctedText = correctTurkishCharacters(data.text || '');
if (correctedText) {
combinedText = combinedText
? `${combinedText}\n\n${correctedText}`
: correctedText;
setPreviewText(combinedText);
}
}
if (!abortRef.current) {
setOcrText(combinedText);
setStatus('done');
}
} catch (error) {
if (!abortRef.current) {
setError(error.message);
setStatus('idle');
}
}
};
run();
return () => {
abortRef.current = true;
};
}, [orderedImages, setError, setOcrText, status, total, workerReady]);
if (!orderedImages.length && !epubImports.length && !pdfImports.length) {
return (
<Stack spacing={2}>
<Alert severity="info">Önce görselleri cropla.</Alert>
<Button variant="contained" onClick={() => navigate('/bulk-crop')}>
Toplu Crop adımına dön
</Button>
</Stack>
);
}
if (!orderedImages.length && (epubImports.length || pdfImports.length)) {
const documentCount = epubImports.length + pdfImports.length;
return (
<Stack spacing={4}>
{bookMetadata && (
<Typography variant="body2" color="success.main">
Seçilen kitap: <strong>{bookMetadata.title}</strong>
{bookMetadata.authors?.length ? `${bookMetadata.authors.join(', ')}` : ''}
</Typography>
)}
<Box textAlign="center">
<Typography variant="h5">Belge metni ayrıştırıldı</Typography>
<Typography color="text.secondary">
{documentCount > 1
? `${documentCount} belge dosyasından metin çıkarıldı.`
: 'Yüklediğin belgedeki metin çıkarıldı.'}
</Typography>
</Box>
<Box sx={{ p: 2, borderRadius: 2, bgcolor: 'background.default' }}>
<Stack direction="row" alignItems="center" justifyContent="space-between">
<Typography variant="subtitle1">Ön izleme</Typography>
<Button
size="small"
variant={isEditing ? 'contained' : 'outlined'}
onClick={() => setIsEditing((prev) => !prev)}
>
{isEditing ? 'Görünüme geç' : 'Metni düzenle'}
</Button>
</Stack>
{isEditing ? (
<TextField
fullWidth
multiline
minRows={6}
value={previewText}
onChange={handlePreviewChange}
sx={{ mt: 1 }}
/>
) : (
<Box
ref={previewRef}
sx={{
mt: 1,
maxHeight: '10em',
overflowY: 'auto',
whiteSpace: 'pre-wrap',
lineHeight: 1.5,
fontSize: '0.95rem',
color: 'text.secondary',
pr: 1,
}}
>
{previewText || ocrText?.trim() || 'Metin ayrıştırılıyor...'}
</Box>
)}
</Box>
<Stack direction={{ xs: 'column', sm: 'row' }} spacing={2} justifyContent="space-between">
<Button variant="contained" onClick={() => navigate('/')}>
Yükleme adımına dön
</Button>
<Button
variant="contained"
onClick={() => navigate('/translate')}
disabled={!ocrText?.trim()}
>
Çeviri adımına geç
</Button>
</Stack>
</Stack>
);
}
const progressValue =
workerReady && total ? (currentIndex / total) * 100 : 0;
const progressVariant = workerReady ? 'determinate' : 'indeterminate';
const progressText = !workerReady
? 'OCR işçisi hazırlanıyor...'
: status === 'done'
? 'OCR işlemi tamamlandı.'
: `Şu an ${currentIndex}/${total} resim işleniyor`;
return (
<Stack spacing={4}>
{bookMetadata && (
<Typography variant="body2" color="success.main">
Seçilen kitap: <strong>{bookMetadata.title}</strong>
{bookMetadata.authors?.length ? `${bookMetadata.authors.join(', ')}` : ''}
</Typography>
)}
<Box textAlign="center">
<Typography variant="h5">OCR işlemi</Typography>
<Typography color="text.secondary">
Tüm görseller sırayla işleniyor. Bu adım biraz sürebilir.
</Typography>
</Box>
<Box>
<LinearProgress
variant={progressVariant}
value={progressVariant === 'determinate' ? progressValue : undefined}
sx={{ height: 10, borderRadius: 5 }}
/>
<Typography mt={2} align="center">
{progressText}
</Typography>
</Box>
<Box sx={{ p: 2, borderRadius: 2, bgcolor: 'background.default' }}>
<Stack direction="row" alignItems="center" justifyContent="space-between">
<Typography variant="subtitle1">Ön izleme</Typography>
<Button
size="small"
variant={isEditing ? 'contained' : 'outlined'}
onClick={() => setIsEditing((prev) => !prev)}
>
{isEditing ? 'Görünüme geç' : 'Metni düzenle'}
</Button>
</Stack>
{isEditing ? (
<TextField
fullWidth
multiline
minRows={6}
value={previewText}
onChange={handlePreviewChange}
sx={{ mt: 1 }}
/>
) : (
<Box
ref={previewRef}
sx={{
mt: 1,
maxHeight: '8.5em',
overflowY: 'auto',
whiteSpace: 'pre-wrap',
lineHeight: 1.5,
fontSize: '0.95rem',
color: 'text.secondary',
pr: 1,
}}
>
{previewText || 'Metin bekleniyor'}
</Box>
)}
</Box>
<Stack direction={{ xs: 'column', sm: 'row' }} spacing={2} justifyContent="space-between">
<Button variant="contained" onClick={() => navigate('/bulk-crop')}>
Geri dön
</Button>
<Button
variant="contained"
onClick={() => navigate('/translate')}
disabled={status !== 'done'}
>
Çeviri adımına geç
</Button>
</Stack>
</Stack>
);
};
export default OcrStep;