-- Fix word count calculation issues
-- This migration addresses multiple problems with word counting
-- 1. Fix calculate_book_word_count function to use correct column names and better file filtering
CREATE OR REPLACE FUNCTION calculate_book_word_count(book_uuid UUID)
RETURNS INTEGER
SECURITY DEFINER
SET search_path = public, pg_catalog, pg_temp
AS $$
DECLARE
total_words INTEGER := 0;
item_record RECORD;
BEGIN
-- Sum word counts from all text files in the book
-- Use 'type' column (not 'item_type') and include all text file types
FOR item_record IN
SELECT content
FROM file_system_items
WHERE book_id = book_uuid
AND type = 'file'
AND (
-- Include common text file extensions
file_extension IN ('txt', 'md', 'markdown', 'doc', 'docx', 'rtf', 'odt', 'html', 'htm', 'xml', 'json', 'csv', 'tex', 'rst', 'asciidoc', 'adoc')
OR
-- Include files with text mime types
mime_type LIKE 'text/%'
OR
-- Include files without extension but with content (fallback)
(file_extension IS NULL AND content IS NOT NULL AND content != '')
)
AND content IS NOT NULL
AND content != ''
LOOP
-- Improved word count: split by whitespace and count non-empty elements
-- Handle multiple whitespace characters and newlines properly
total_words := total_words + (
SELECT array_length(
string_to_array(
regexp_replace(item_record.content, '\s+', ' ', 'g'),
' '
),
1
)
);
END LOOP;
RETURN COALESCE(total_words, 0);
END;
$$ LANGUAGE plpgsql;
-- 2. Fix update_book_word_count function to use correct column names
CREATE OR REPLACE FUNCTION update_book_word_count()
RETURNS TRIGGER
SECURITY DEFINER
SET search_path = public, pg_catalog, pg_temp
AS $$
DECLARE
book_uuid UUID;
new_word_count INTEGER;
BEGIN
-- Get the book_id for the file that was modified
IF TG_OP = 'DELETE' THEN
book_uuid := OLD.book_id;
ELSE
book_uuid := NEW.book_id;
END IF;
-- Only process if this is a text file
-- Use 'type' column (not 'item_type') and include all text file types
IF (TG_OP = 'DELETE' AND OLD.type = 'file' AND (
OLD.file_extension IN ('txt', 'md', 'markdown', 'doc', 'docx', 'rtf', 'odt', 'html', 'htm', 'xml', 'json', 'csv', 'tex', 'rst', 'asciidoc', 'adoc')
OR OLD.mime_type LIKE 'text/%'
OR (OLD.file_extension IS NULL AND OLD.content IS NOT NULL AND OLD.content != '')
)) OR
(TG_OP IN ('INSERT', 'UPDATE') AND NEW.type = 'file' AND (
NEW.file_extension IN ('txt', 'md', 'markdown', 'doc', 'docx', 'rtf', 'odt', 'html', 'htm', 'xml', 'json', 'csv', 'tex', 'rst', 'asciidoc', 'adoc')
OR NEW.mime_type LIKE 'text/%'
OR (NEW.file_extension IS NULL AND NEW.content IS NOT NULL AND NEW.content != '')
)) THEN
-- Calculate new word count for the book
new_word_count := calculate_book_word_count(book_uuid);
-- Update the book's word count
UPDATE books
SET word_count = new_word_count,
updated_at = NOW()
WHERE id = book_uuid;
END IF;
-- Return appropriate record based on operation
IF TG_OP = 'DELETE' THEN
RETURN OLD;
ELSE
RETURN NEW;
END IF;
END;
$$ LANGUAGE plpgsql;
-- 3. Create a function to manually recalculate word counts for all books
CREATE OR REPLACE FUNCTION recalculate_all_book_word_counts()
RETURNS TABLE(book_id UUID, book_title TEXT, old_word_count INTEGER, new_word_count INTEGER)
SECURITY DEFINER
SET search_path = public, pg_catalog, pg_temp
AS $$
DECLARE
book_record RECORD;
new_count INTEGER;
BEGIN
-- Loop through all books and recalculate their word counts
FOR book_record IN
SELECT id, title, word_count
FROM books
ORDER BY updated_at DESC
LOOP
-- Calculate new word count
new_count := calculate_book_word_count(book_record.id);
-- Update the book's word count
UPDATE books
SET word_count = new_count,
updated_at = NOW()
WHERE id = book_record.id;
-- Return the change information
book_id := book_record.id;
book_title := book_record.title;
old_word_count := book_record.word_count;
new_word_count := new_count;
RETURN NEXT;
END LOOP;
RETURN;
END;
$$ LANGUAGE plpgsql;
-- 4. Create a function to recalculate word count for a specific book
CREATE OR REPLACE FUNCTION recalculate_book_word_count(book_uuid UUID)
RETURNS INTEGER
SECURITY DEFINER
SET search_path = public, pg_catalog, pg_temp
AS $$
DECLARE
new_word_count INTEGER;
BEGIN
-- Calculate new word count
new_word_count := calculate_book_word_count(book_uuid);
-- Update the book's word count
UPDATE books
SET word_count = new_word_count,
updated_at = NOW()
WHERE id = book_uuid;
RETURN new_word_count;
END;
$$ LANGUAGE plpgsql;
-- 5. Add comments for documentation
COMMENT ON FUNCTION calculate_book_word_count(UUID) IS 'Calculates word count for a book from all text files - includes various text file types and handles edge cases';
COMMENT ON FUNCTION update_book_word_count() IS 'Updates book word count when files change - triggers automatically on file operations';
COMMENT ON FUNCTION recalculate_all_book_word_counts() IS 'Manually recalculates word counts for all books - useful for fixing existing data';
COMMENT ON FUNCTION recalculate_book_word_count(UUID) IS 'Manually recalculates word count for a specific book - useful for fixing individual books';
-- 6. Ensure the trigger exists and is working correctly
-- Drop and recreate the trigger to ensure it uses the latest function
DROP TRIGGER IF EXISTS update_word_count_trigger ON file_system_items;
CREATE TRIGGER update_word_count_trigger
AFTER INSERT OR UPDATE OR DELETE ON file_system_items
FOR EACH ROW
EXECUTE FUNCTION update_book_word_count();
-- 7. Create a table to track books that need word count recalculation
CREATE TABLE IF NOT EXISTS book_word_count_queue (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
book_id UUID REFERENCES books(id) ON DELETE CASCADE NOT NULL,
queued_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
processed_at TIMESTAMP WITH TIME ZONE,
attempts INTEGER DEFAULT 0,
last_error TEXT,
-- Prevent duplicate entries for the same book
UNIQUE(book_id)
);
-- Create index for performance
CREATE INDEX IF NOT EXISTS idx_book_word_count_queue_book_id ON book_word_count_queue(book_id);
CREATE INDEX IF NOT EXISTS idx_book_word_count_queue_processed_at ON book_word_count_queue(processed_at);
CREATE INDEX IF NOT EXISTS idx_book_word_count_queue_queued_at ON book_word_count_queue(queued_at);
-- Enable RLS
ALTER TABLE book_word_count_queue ENABLE ROW LEVEL SECURITY;
-- RLS policies - only allow system access for now
CREATE POLICY "System access only" ON book_word_count_queue
FOR ALL USING (false);
-- 8. Function to queue a book for word count recalculation
CREATE OR REPLACE FUNCTION queue_book_for_word_count_recalc(book_uuid UUID)
RETURNS VOID
SECURITY DEFINER
SET search_path = public, pg_catalog, pg_temp
AS $$
BEGIN
-- Insert or update the queue entry
INSERT INTO book_word_count_queue (book_id, queued_at, processed_at, attempts, last_error)
VALUES (book_uuid, NOW(), NULL, 0, NULL)
ON CONFLICT (book_id) DO UPDATE SET
queued_at = NOW(),
processed_at = NULL,
attempts = 0,
last_error = NULL;
END;
$$ LANGUAGE plpgsql;
-- 9. Function to process queued word count recalculations
CREATE OR REPLACE FUNCTION process_word_count_queue(batch_size INTEGER DEFAULT 10)
RETURNS TABLE(book_id UUID, old_count INTEGER, new_count INTEGER, success BOOLEAN, error_message TEXT)
SECURITY DEFINER
SET search_path = public, pg_catalog, pg_temp
AS $$
DECLARE
queue_record RECORD;
old_word_count INTEGER;
new_word_count INTEGER;
error_text TEXT;
BEGIN
-- Process books that need recalculation
FOR queue_record IN
SELECT q.book_id, b.title, b.word_count
FROM book_word_count_queue q
JOIN books b ON q.book_id = b.id
WHERE q.processed_at IS NULL
AND q.attempts < 3 -- Limit retry attempts
ORDER BY q.queued_at ASC
LIMIT batch_size
LOOP
BEGIN
old_word_count := queue_record.word_count;
-- Calculate new word count
new_word_count := calculate_book_word_count(queue_record.book_id);
-- Update the book's word count
UPDATE books
SET word_count = new_word_count,
updated_at = NOW()
WHERE id = queue_record.book_id;
-- Mark as processed
UPDATE book_word_count_queue
SET processed_at = NOW(),
last_error = NULL
WHERE book_id = queue_record.book_id;
-- Return success result
book_id := queue_record.book_id;
old_count := old_word_count;
new_count := new_word_count;
success := TRUE;
error_message := NULL;
RETURN NEXT;
EXCEPTION WHEN OTHERS THEN
-- Handle errors
error_text := SQLERRM;
-- Update attempt count and error
UPDATE book_word_count_queue
SET attempts = attempts + 1,
last_error = error_text
WHERE book_id = queue_record.book_id;
-- Return error result
book_id := queue_record.book_id;
old_count := queue_record.word_count;
new_count := NULL;
success := FALSE;
error_message := error_text;
RETURN NEXT;
END;
END LOOP;
RETURN;
END;
$$ LANGUAGE plpgsql;
-- 10. Enhanced trigger function that also queues books for background recalculation
CREATE OR REPLACE FUNCTION update_book_word_count_with_queue()
RETURNS TRIGGER
SECURITY DEFINER
SET search_path = public, pg_catalog, pg_temp
AS $$
DECLARE
book_uuid UUID;
new_word_count INTEGER;
should_recalc BOOLEAN := FALSE;
BEGIN
-- Get the book_id for the file that was modified
IF TG_OP = 'DELETE' THEN
book_uuid := OLD.book_id;
ELSE
book_uuid := NEW.book_id;
END IF;
-- Check if this is a text file that should trigger recalculation
IF (TG_OP = 'DELETE' AND OLD.type = 'file' AND (
OLD.file_extension IN ('txt', 'md', 'markdown', 'doc', 'docx', 'rtf', 'odt', 'html', 'htm', 'xml', 'json', 'csv', 'tex', 'rst', 'asciidoc', 'adoc')
OR OLD.mime_type LIKE 'text/%'
OR (OLD.file_extension IS NULL AND OLD.content IS NOT NULL AND OLD.content != '')
)) OR
(TG_OP IN ('INSERT', 'UPDATE') AND NEW.type = 'file' AND (
NEW.file_extension IN ('txt', 'md', 'markdown', 'doc', 'docx', 'rtf', 'odt', 'html', 'htm', 'xml', 'json', 'csv', 'tex', 'rst', 'asciidoc', 'adoc')
OR NEW.mime_type LIKE 'text/%'
OR (NEW.file_extension IS NULL AND NEW.content IS NOT NULL AND NEW.content != '')
)) THEN
should_recalc := TRUE;
END IF;
-- If we need to recalculate, do immediate calculation and also queue for background processing
IF should_recalc THEN
-- Try immediate calculation (for real-time updates)
BEGIN
new_word_count := calculate_book_word_count(book_uuid);
UPDATE books
SET word_count = new_word_count,
updated_at = NOW()
WHERE id = book_uuid;
EXCEPTION WHEN OTHERS THEN
-- If immediate calculation fails, just queue it for background processing
NULL;
END;
-- Always queue for background processing as well (for reliability)
PERFORM queue_book_for_word_count_recalc(book_uuid);
END IF;
-- Return appropriate record based on operation
IF TG_OP = 'DELETE' THEN
RETURN OLD;
ELSE
RETURN NEW;
END IF;
END;
$$ LANGUAGE plpgsql;
-- 11. Replace the existing trigger with the enhanced version
DROP TRIGGER IF EXISTS update_word_count_trigger ON file_system_items;
CREATE TRIGGER update_word_count_trigger
AFTER INSERT OR UPDATE OR DELETE ON file_system_items
FOR EACH ROW
EXECUTE FUNCTION update_book_word_count_with_queue();
-- 12. Queue all existing books for word count recalculation
INSERT INTO book_word_count_queue (book_id, queued_at, processed_at, attempts, last_error)
SELECT id, NOW(), NULL, 0, NULL
FROM books
ON CONFLICT (book_id) DO UPDATE SET
queued_at = NOW(),
processed_at = NULL,
attempts = 0,
last_error = NULL;
-- 13. Add comments for new functions
COMMENT ON FUNCTION queue_book_for_word_count_recalc(UUID) IS 'Queues a book for background word count recalculation';
COMMENT ON FUNCTION process_word_count_queue(INTEGER) IS 'Processes queued word count recalculations in batches';
COMMENT ON FUNCTION update_book_word_count_with_queue() IS 'Enhanced trigger that does immediate recalc and queues for background processing';
COMMENT ON TABLE book_word_count_queue IS 'Queue for books that need word count recalculation for reliability and batch processing';