-- Create temp table to speed up looking for a PublisherId when we know the MessageId
BEGIN TRY
CREATE TABLE #PublisherMessageReverseIndex(MessageStringId UNIQUEIDENTIFIER,
MessageId INT)
CREATE CLUSTERED INDEX #PublisherMessageReverseIndex_CI ON #PublisherMessageReverseIndex(MessageStringId)
INSERT INTO #PublisherMessageReverseIndex (MessageStringId, MessageId)
SELECT MessageStringId, MessageId
FROM dbo.PublisherMessages
-- Create temp table of message lengths, message id, and Message Hash with the
-- Message String Id so that we can efficiently determine whether a given message
-- is duplicated. The duplicate messages that are generated by the converted MP have
-- different PublisherId's, but everything else is identical. INDEX this TABLE so that
-- we can look up quickly by the MessageStringId and also by values we expect to see
-- duplicated.
CREATE TABLE #LTHashStrings (MessageStringId UNIQUEIDENTIFIER,
LTValueLen INT,
LTValueHash VARBINARY(32),
MessageId INT NULL)
CREATE CLUSTERED INDEX #LTHashStrings_CI ON #LTHashStrings(MessageStringId)
CREATE NONCLUSTERED INDEX #LTHashStrings_NCI1 ON #LTHashStrings(LTValueLen, MessageId, LTValueHash)
-- Create temp table for the Orphaned PublisherStrings that we find. These
-- are rows in PublisherMessages whose corresponding Events have already been groomed away.
-- They still have corresponding rows in LocalizedText. We will not add rows for PublisherMessages,
-- which are not for a duplicated message.
CREATE TABLE #OrphanedPublisherStrings (PublisherId UNIQUEIDENTIFIER,
MessageStringId UNIQUEIDENTIFIER)
CREATE CLUSTERED INDEX #OrphanedPublisherStrings_CI ON #OrphanedPublisherStrings(MessageStringId)
-- Create temp table to use in looking up whether a PublisherMessages row still
-- has a corresponding Event. Event_01 etc. do not have an index on PublisherId, so we
-- do not want to do a query that keeps seeking into EventAllView.
-- If a PublisherId occurs multiple times in the Event tables, we will only need it
-- one time in our temp table, hence the unique clustered index with IGNORE_DUP_KEY.
-- This keeps the temp table relatively small and will save time when
-- we want to see which PublisherMessages are orphaned.
CREATE TABLE #EventAllPublishers (PublisherId UNIQUEIDENTIFIER)
CREATE UNIQUE CLUSTERED INDEX #EventAllPublishers_CI ON #EventAllPublishers (PublisherId)
WITH (IGNORE_DUP_KEY = ON)
-- Populate temp table by scanning EventAllView one time
INSERT INTO #EventAllPublishers(PublisherId)
SELECT PublisherId
FROM EventAllView
-- Populate the first Temp table to determine which messages are duplicated
INSERT INTO #LTHashStrings (MessageStringId, LTValueLen, LTValueHash, MessageId)
SELECT LTStringId, len(LTValue), HashBytes('SHA1', LTValue), MessageId
FROM dbo.LocalizedText LT
JOIN #PublisherMessageReverseIndex PM ON PM.MessageStringId = LTStringId
-- Create the second table to determine which messages are duplicated.
CREATE TABLE #LTCountByMessage( LTValueLen INT,
MessageId INT,
LTValueHash VARBINARY(32),
MsgCount INT)
CREATE CLUSTERED INDEX #LTCountByMessage_CI ON #LTCountByMessage(LTValueLen, MessageId, LTValueHash)
-- Populate second message for duplicate message detection by scanning INDEX of
-- the first one and doing a grouped count.
INSERT INTO #LTCountByMessage (LTValueLen, MessageId, LTValueHash, MsgCount)
SELECT LTValueLen, MessageId, LTValueHash, COUNT(1)
FROM #LTHashStrings
GROUP BY LTValueLen, MessageId, LTValueHash
-- Now that we are set up to detect both Orphans and duplicated messages by
-- joining to our relatively small (and correctly indexed) temp tables,
-- determine the OrphanedPublisherStrings that have duplicate messages
INSERT INTO #OrphanedPublisherStrings (PublisherId, MessageStringId)
SELECT PM.PublisherId, PM.MessageStringId
FROM dbo.PublisherMessages PM
JOIN #LTHashStrings LTS ON (LTS.MessageStringId = PM.MessageStringId AND LTS.MessageId = PM.MessageId)
JOIN #LTCountByMessage LTC ON (LTC.LTValueLen = LTS.LTValueLen AND
LTC.MessageId = LTS.MessageId AND LTC.LTValueHash = LTS.LTValueHash)
WHERE PM.PublisherId NOT IN (SELECT PublisherId FROM #EventAllPublishers) AND
LTC.MsgCount > 1
-- Deleting all of the OrphanedPublisherStrings and corresponding LocalizedText rows
-- at once may be too large for the transaction log to handle. Create a
-- numbered / ordered table so that we can delete them in relatively small batches
-- and not overtax the transaction log.
CREATE TABLE #NumberOrphanPublisherStrings(OrphanNum INT IDENTITY,
PublisherId UNIQUEIDENTIFIER,
MessageStringId UNIQUEIDENTIFIER)
CREATE CLUSTERED INDEX #NumberOrphanPublisherStrings_CI on #NumberOrphanPublisherStrings(OrphanNum)
-- Populate Numbered TABLE
INSERT INTO #NumberOrphanPublisherStrings (PublisherId, MessageStringId)
SELECT PublisherId, MessageStringId FROM #OrphanedPublisherStrings
END TRY
BEGIN CATCH
GOTO Error
END CATCH
-- Set up variables so that we can delete our orphaned rows
-- If transaction log fills up, try to reduce the @OrphanIncrement value,
-- which controls the number of rows that we delete at a time
DECLARE @OrphanNum INT
DECLARE @OrphanIncrement INT
DECLARE @OrphanLimit INT
SET @OrphanNum = 0
SET @OrphanIncrement = 10000
SELECT @OrphanLimit = MAX(OrphanNum) FROM #NumberOrphanPublisherStrings
BEGIN TRY
WHILE @OrphanNum < @OrphanLimit
BEGIN
DELETE dbo.LocalizedText FROM
#NumberOrphanPublisherStrings OPS JOIN dbo.LocalizedText LT
ON LT.LTStringId = OPS.MessageStringId
WHERE OPS.OrphanNum >= @OrphanNum AND OPS.OrphanNum < @OrphanNum + @OrphanIncrement
DELETE dbo.PublisherMessages FROM
#NumberOrphanPublisherStrings OPS JOIN dbo.PublisherMessages PM
ON PM.PublisherId = OPS.PublisherId
WHERE OPS.OrphanNum >= @OrphanNum AND OPS.OrphanNum < @OrphanNum + @OrphanIncrement
SET @OrphanNum = @OrphanNum + @OrphanIncrement
END
END TRY
BEGIN CATCH
GOTO Error
END CATCH
Error:
IF @@ERROR <> 0
SELECT
ERROR_NUMBER() AS ErrorNumber,
ERROR_MESSAGE() AS ErrorMessage;
-- Try to drop all of the Temp tables
BEGIN TRY
IF EXISTS (SELECT 1 FROM tempdb.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME LIKE '#PublisherMessage%')
DROP TABLE #PublisherMessageReverseIndex
IF EXISTS (SELECT 1 FROM tempdb.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME LIKE '#OrphanedPublisherStrings%')
DROP TABLE #OrphanedPublisherStrings
IF EXISTS (SELECT 1 FROM tempdb.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME LIKE '#LTHashStrings%')
DROP TABLE #LTHashStrings
IF EXISTS (SELECT 1 FROM tempdb.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME LIKE '#EventAllPublishers%')
DROP TABLE #EventAllPublishers
IF EXISTS (SELECT 1 FROM tempdb.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME LIKE '#LTCountByMessage%')
DROP TABLE #LTCountByMessage
IF EXISTS (SELECT 1 FROM tempdb.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME LIKE '#NumberOrphanPublisherStrings%')
DROP TABLE #NumberOrphanPublisherStrings
END TRY
BEGIN CATCH
SELECT
ERROR_NUMBER() AS ErrorNumber,
ERROR_MESSAGE() AS ErrorMessage;
END CATCH