From b6155c3fcba0ac92922b4f92e3b58e4fcd7a8d4f Mon Sep 17 00:00:00 2001 From: sbilketay Date: Mon, 17 Nov 2025 23:21:29 +0300 Subject: [PATCH] =?UTF-8?q?Resim=20ile=20beraber=20epub=20da=20y=C3=BCklem?= =?UTF-8?q?e=20=C3=B6zelli=C4=9Fi=20eklendi?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 1 + src/components/OcrStep.jsx | 55 +++++++++++- src/components/UploadStep.jsx | 160 +++++++++++++++++++++++++++++++--- src/store/useAppStore.js | 3 + src/utils/epubImport.js | 131 ++++++++++++++++++++++++++++ 5 files changed, 335 insertions(+), 15 deletions(-) create mode 100644 src/utils/epubImport.js diff --git a/package.json b/package.json index 1d3139f..eada62c 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "@mui/icons-material": "^6.1.1", "@mui/material": "^6.1.1", "@supabase/supabase-js": "^2.81.1", + "jszip": "^3.10.1", "react": "^18.3.1", "react-dom": "^18.3.1", "react-dropzone": "^14.2.3", diff --git a/src/components/OcrStep.jsx b/src/components/OcrStep.jsx index b780187..12da1c9 100644 --- a/src/components/OcrStep.jsx +++ b/src/components/OcrStep.jsx @@ -21,6 +21,7 @@ const OcrStep = () => { const setError = useAppStore((state) => state.setError); const clearTranslation = useAppStore((state) => state.clearTranslation); const bookMetadata = useAppStore((state) => state.bookMetadata); + const epubImports = useAppStore((state) => state.epubImports); const [status, setStatus] = useState('idle'); const [currentIndex, setCurrentIndex] = useState(0); const [previewText, setPreviewText] = useState(''); @@ -118,6 +119,7 @@ const OcrStep = () => { }, [assetBase, isDev, orderedImages.length, setError]); useEffect(() => { + if (!orderedImages.length) return; setStatus('idle'); setCurrentIndex(0); setPreviewText(''); @@ -174,7 +176,7 @@ const OcrStep = () => { }, [orderedImages, setError, setOcrText, status, total, workerReady]); - if (!orderedImages.length) { + if (!orderedImages.length && !epubImports.length) { return ( Önce görselleri cropla. @@ -185,6 +187,57 @@ const OcrStep = () => { ); } + if (!orderedImages.length && epubImports.length) { + return ( + + {bookMetadata && ( + + Seçilen kitap: {bookMetadata.title} + {bookMetadata.authors?.length ? ` • ${bookMetadata.authors.join(', ')}` : ''} + + )} + + EPUB metni ayrıştırıldı + + {epubImports.length > 1 + ? `${epubImports.length} EPUB dosyasından metin çıkarıldı.` + : 'EPUB dosyasındaki metin çıkarıldı.'} + + + + Ön izleme + + {ocrText?.trim() || 'Metin ayrıştırılıyor...'} + + + + + + + + ); + } + const progressValue = workerReady && total ? (currentIndex / total) * 100 : 0; const progressVariant = workerReady ? 'determinate' : 'indeterminate'; diff --git a/src/components/UploadStep.jsx b/src/components/UploadStep.jsx index 57955bb..bbfccda 100644 --- a/src/components/UploadStep.jsx +++ b/src/components/UploadStep.jsx @@ -18,6 +18,7 @@ import { } from '@mui/material'; import { useNavigate } from 'react-router-dom'; import { useAppStore } from '../store/useAppStore'; +import { extractTextFromEpub } from '../utils/epubImport'; const dropzoneStyle = { border: '2px dashed rgba(108, 155, 207, 0.7)', @@ -39,16 +40,23 @@ const UploadStep = () => { const setBookTitle = useAppStore((state) => state.setBookTitle); const bookMetadata = useAppStore((state) => state.bookMetadata); const setBookMetadata = useAppStore((state) => state.setBookMetadata); + const epubImports = useAppStore((state) => state.epubImports); + const setEpubImports = useAppStore((state) => state.setEpubImports); + const setOcrText = useAppStore((state) => state.setOcrText); + const clearTranslation = useAppStore((state) => state.clearTranslation); + const setError = useAppStore((state) => state.setError); const [searchResults, setSearchResults] = useState([]); const [searching, setSearching] = useState(false); const [searchError, setSearchError] = useState(null); const [selectedBookId, setSelectedBookId] = useState(bookMetadata?.id || null); const skipSearchRef = useRef(false); const [showResults, setShowResults] = useState(false); + const [epubProcessing, setEpubProcessing] = useState(false); const onDrop = useCallback( - (acceptedFiles) => { + async (acceptedFiles) => { if (!acceptedFiles.length) return; + setEpubProcessing(true); const preservedMetadata = bookMetadata; const preservedTitle = bookTitle; resetFromStep('upload'); @@ -60,21 +68,92 @@ const UploadStep = () => { skipSearchRef.current = true; setBookTitle(preservedTitle); } - const mapped = acceptedFiles.map((file, index) => ({ - id: crypto.randomUUID(), - file, - previewUrl: URL.createObjectURL(file), - order: uploadedImages.length + index, - filename: file.name, - })); - setUploadedImages([...uploadedImages, ...mapped]); + + const imageFiles = []; + const epubFiles = []; + acceptedFiles.forEach((file) => { + const isEpub = + file.type === 'application/epub+zip' || file.name?.toLowerCase().endsWith('.epub'); + if (isEpub) { + epubFiles.push(file); + } else { + imageFiles.push(file); + } + }); + + if (imageFiles.length) { + const mapped = imageFiles.map((file, index) => ({ + id: crypto.randomUUID(), + file, + previewUrl: URL.createObjectURL(file), + order: uploadedImages.length + index, + filename: file.name, + })); + setUploadedImages([...uploadedImages, ...mapped]); + } + + const importedEntries = []; + if (epubFiles.length) { + for (const file of epubFiles) { + try { + // eslint-disable-next-line no-await-in-loop + const parsed = await extractTextFromEpub(file); + importedEntries.push({ + id: crypto.randomUUID(), + filename: file.name, + size: file.size, + text: parsed.text, + metadata: parsed.metadata, + }); + } catch (error) { + setError(error.message || `${file.name} okunamadı.`); + } + } + setEpubImports(importedEntries); + clearTranslation(); + const combinedText = importedEntries.map((entry) => entry.text).filter(Boolean).join('\n\n'); + if (combinedText) { + setOcrText(combinedText); + } + if (!preservedMetadata && importedEntries[0]?.metadata) { + const meta = importedEntries[0].metadata; + setBookMetadata({ + id: `epub-${crypto.randomUUID()}`, + title: meta.title || bookTitle || 'İsimsiz EPUB', + subtitle: '', + authors: meta.authors || [], + publisher: meta.publisher || '', + publishedDate: meta.publishedDate || '', + description: meta.description || '', + pageCount: null, + categories: meta.categories || [], + averageRating: null, + ratingsCount: null, + language: meta.language || '', + infoLink: '', + identifiers: meta.identifiers || [], + thumbnail: null, + }); + if (!preservedTitle?.trim() && meta.title) { + skipSearchRef.current = true; + setBookTitle(meta.title); + } + } + } else { + setEpubImports([]); + } + setEpubProcessing(false); }, [ bookMetadata, bookTitle, + clearTranslation, resetFromStep, setBookMetadata, setBookTitle, + setEpubImports, + setError, + setOcrText, setUploadedImages, uploadedImages, ], @@ -175,6 +254,11 @@ const UploadStep = () => { accept: { 'image/png': ['.png'], 'image/jpeg': ['.jpg', '.jpeg'], + 'image/webp': ['.webp'], + 'application/epub+zip': ['.epub'], + 'application/zip': ['.epub'], + 'application/x-zip-compressed': ['.epub'], + 'application/octet-stream': ['.epub'], }, multiple: true, }); @@ -218,6 +302,11 @@ const UploadStep = () => { return { authorsLine, details }; }, [bookMetadata]); + const hasImages = uploadedImages.length > 0; + const hasEpubImports = epubImports.length > 0; + const canProceed = hasImages || hasEpubImports; + const nextPath = hasImages ? '/crop' : '/ocr'; + return ( {bookMetadata && ( @@ -377,10 +466,10 @@ const UploadStep = () => { - Görselleri sürükleyip bırak veya tıkla + Görselleri veya EPUB dosyasını sürükleyip bırak ya da tıkla - .png, .jpg, .jpeg formatlarında çoklu dosya yükleyebilirsin. + .png, .jpg, .jpeg formatlarında çoklu görsel ya da .epub dosyaları yükleyebilirsin. diff --git a/src/store/useAppStore.js b/src/store/useAppStore.js index 38a10c2..4a4a34f 100644 --- a/src/store/useAppStore.js +++ b/src/store/useAppStore.js @@ -44,6 +44,7 @@ export const useAppStore = create((set) => ({ coverCropConfig: createEmptyCropConfig(), croppedCoverImage: null, ocrText: '', + epubImports: [], bookTitle: '', bookMetadata: null, translatedText: '', @@ -74,6 +75,7 @@ export const useAppStore = create((set) => ({ return { croppedCoverImage: image }; }), setOcrText: (text) => set({ ocrText: text }), + setEpubImports: (imports) => set({ epubImports: imports }), setBookTitle: (title) => set({ bookTitle: title }), setBookMetadata: (metadata) => set({ bookMetadata: metadata }), setTranslatedText: (text) => set({ translatedText: text }), @@ -151,6 +153,7 @@ export const useAppStore = create((set) => ({ draft.coverCropConfig = createEmptyCropConfig(); draft.croppedCoverImage = null; draft.ocrText = ''; + draft.epubImports = []; draft.bookTitle = ''; draft.bookMetadata = null; draft.translatedText = ''; diff --git a/src/utils/epubImport.js b/src/utils/epubImport.js new file mode 100644 index 0000000..4ca2e63 --- /dev/null +++ b/src/utils/epubImport.js @@ -0,0 +1,131 @@ +import JSZip from 'jszip'; + +const parseXml = (content) => { + const parser = new DOMParser(); + return parser.parseFromString(content, 'application/xml'); +}; + +const cleanText = (value = '') => + value + .replace(/\r/g, '') + .replace(/\t/g, ' ') + .replace(/\s+\n/g, '\n') + .replace(/\n{3,}/g, '\n\n') + .replace(/[ \u00A0]{2,}/g, ' ') + .trim(); + +const resolvePath = (basePath, targetPath = '') => { + if (!basePath) return targetPath; + const baseParts = basePath.split('/').filter(Boolean); + const targetParts = targetPath.split('/'); + for (const part of targetParts) { + if (!part || part === '.') continue; + if (part === '..') { + baseParts.pop(); + } else { + baseParts.push(part); + } + } + return baseParts.join('/'); +}; + +const TEXT_MEDIA_TYPES = new Set([ + 'application/xhtml+xml', + 'application/x-dtbook+xml', + 'text/html', + 'text/x-oeb1-document', + 'application/xml', + 'text/xml', +]); + +const looksLikeTextFile = (href = '') => + /\.(x?html?|xml)$/i.test(href) || /\.(xhtml|html|htm)$/i.test(href); + +const parseMetadata = (metadataNode) => { + if (!metadataNode) return {}; + const getFirst = (selector) => metadataNode.querySelector(selector)?.textContent?.trim(); + const authors = Array.from(metadataNode.querySelectorAll('creator, dc\\:creator')).map((node) => + node.textContent?.trim(), + ); + const identifiers = Array.from(metadataNode.querySelectorAll('identifier, dc\\:identifier')).map((node) => ({ + identifier: node.textContent?.trim(), + type: node.getAttribute('opf:scheme') || node.getAttribute('id'), + })); + const categories = Array.from(metadataNode.querySelectorAll('subject, dc\\:subject')) + .map((node) => node.textContent?.trim()) + .filter(Boolean); + return { + title: getFirst('title, dc\\:title'), + language: getFirst('language, dc\\:language'), + publisher: getFirst('publisher, dc\\:publisher'), + description: getFirst('description, dc\\:description'), + publishedDate: getFirst('date, dc\\:date'), + authors: authors.filter(Boolean), + identifiers: identifiers.filter((item) => item.identifier), + categories, + }; +}; + +export const extractTextFromEpub = async (file) => { + const zip = await JSZip.loadAsync(file); + const containerEntry = zip.file('META-INF/container.xml'); + if (!containerEntry) { + throw new Error('EPUB container bilgileri bulunamadı.'); + } + const containerXml = await containerEntry.async('string'); + const containerDoc = parseXml(containerXml); + const rootPath = containerDoc.querySelector('rootfile')?.getAttribute('full-path'); + if (!rootPath) { + throw new Error('EPUB manifest dosyası bulunamadı.'); + } + + const opfEntry = zip.file(rootPath); + if (!opfEntry) { + throw new Error('EPUB manifesti okunamadı.'); + } + const opfXml = await opfEntry.async('string'); + const opfDoc = parseXml(opfXml); + const metadata = parseMetadata(opfDoc.querySelector('metadata')); + const manifest = {}; + opfDoc.querySelectorAll('manifest > item').forEach((item) => { + manifest[item.getAttribute('id')] = { + href: item.getAttribute('href'), + mediaType: item.getAttribute('media-type'), + }; + }); + const baseDir = rootPath.includes('/') ? rootPath.split('/').slice(0, -1).join('/') : ''; + + const spineRefs = Array.from(opfDoc.querySelectorAll('spine > itemref')) + .map((item) => manifest[item.getAttribute('idref')]) + .filter(Boolean); + + const parser = new DOMParser(); + const collectedTexts = []; + for (const ref of spineRefs) { + if (!ref.href) continue; + const shouldRead = + (ref.mediaType && TEXT_MEDIA_TYPES.has(ref.mediaType)) || looksLikeTextFile(ref.href); + if (!shouldRead) continue; + const targetPath = resolvePath(baseDir, ref.href); + const entry = zip.file(targetPath); + if (!entry) continue; + // eslint-disable-next-line no-await-in-loop + const markup = await entry.async('string'); + const doc = parser.parseFromString(markup, 'text/html'); + const text = doc.body?.textContent || doc.documentElement?.textContent || ''; + const cleaned = cleanText(text); + if (cleaned) { + collectedTexts.push(cleaned); + } + } + + return { + text: collectedTexts.join('\n\n').trim(), + metadata: { + ...metadata, + filename: file.name, + fileSize: file.size, + }, + }; +}; +