SQL’de Benzerlik Algoritmaları…

// 2 Nisan 2010 // Delphi

EDIT DISTANCE

Aşağıdaki 2 fonksiyon size stringler arasındaki benzerlik oranlarını vermekte en benzer olanların sonuçları en küçük olanlar çıkacaktır. Yani fark arttıkça dönen sayı artmakta bunu unutmayın.

CREATE FUNCTION edit_distance(@s1 nvarchar(3999), @s2 nvarchar(3999))
RETURNS int
AS
BEGIN
  DECLARE 
     @s1_len int, 
     @s2_len int, 
     @i int, 
     @j int, 
     @s1_char nchar, 
     @c int, 
     @c_temp int,
     @cv0 varbinary(8000), 
     @cv1 varbinary(8000)

  SELECT 
    @s1_len = LEN(@s1), 
    @s2_len = LEN(@s2), 
    @cv1 = 0x0000,
    @j = 1, 
    @i = 1, 
    @c = 0

   WHILE @j <= @s2_len
     SELECT 
       @cv1 = @cv1 + CAST(@j AS binary(2)), 
       @j = @j + 1

  WHILE @i <= @s1_len
  BEGIN
    SELECT 
      @s1_char = SUBSTRING(@s1, @i, 1), 
      @c = @i,
      @cv0 = CAST(@i AS binary(2)),
      @j = 1

    WHILE @j <= @s2_len
    BEGIN
      SET @c = @c + 1
      SET @c_temp = CAST(SUBSTRING(@cv1, @j+@j-1, 2) AS int) +
        CASE 
          WHEN @s1_char = SUBSTRING(@s2, @j, 1) THEN 0 ELSE 1 
        END

      IF @c > @c_temp SET @c = @c_temp
      SET @c_temp = CAST(SUBSTRING(@cv1, @j+@j+1, 2) AS int)+1

      IF @c > @c_temp SET @c = @c_temp
      SELECT @cv0 = @cv0 + CAST(@c AS binary(2)), @j = @j + 1
    END

  SELECT @cv1 = @cv0, @i = @i + 1
  END
RETURN @c
END

LEVINSTEIN ALGORITMASI

CREATE FUNCTION dbo.fn_LevenshteinDistance
(
  @Str1 VARCHAR(3999)
, @Str2 VARCHAR(3999)
)
RETURNS INT
AS
BEGIN
/*
#
SELECT dbo.fn_LevenshteinDistance('test', 'test')
#
SELECT dbo.fn_LevenshteinDistance('tesot', 'test')
#
SELECT dbo.fn_LevenshteinDistance('testing1', 'tes_ing')
#
*/

  DECLARE @m INT, @n INT
  DECLARE @editMatrix TABLE(pk1 INT, pk2 INT, v INT, PRIMARY KEY (pk1, pk2))
  DECLARE @i INT, @j INT, @cost INT
  DECLARE @str1_i CHAR, @str2_j CHAR

  SET @n = DATALENGTH(ISNULL(@Str1, ''))
  SET @m = DATALENGTH(ISNULL(@Str2, ''))

  IF @n = 0 OR @m = 0 RETURN 0

  SET @i = 0
  WHILE @i <= @n 
  BEGIN
    SET @j = 0
    WHILE @j <= @m 
    BEGIN
      IF @j = 0
        INSERT INTO @editMatrix VALUES(@i, 0, @i)
      ELSE IF @i = 0
        INSERT INTO @editMatrix VALUES(0, @j, @j)
      ELSE
        INSERT INTO @editMatrix VALUES(@i, @j, 0)

      SET @j = @j + 1
    END

    SET @i = @i + 1
  END

  SET @i = 1
  WHILE @i <= @n 
  BEGIN
    SET @str1_i = SUBSTRING(@Str1, @i, 1)

    SET @j = 1
    WHILE @j <= @m 
    BEGIN
      SET @str2_j = SUBSTRING(@Str2, @j, 1)

      IF @str1_i = @str2_j
        SET @cost = 0
      ELSE
        SET @cost = 1

      UPDATE em
      SET v = (
        SELECT MIN(ret) AS MIN_ret
        FROM (
          SELECT v + 1 AS ret 
          FROM @editMatrix 
          WHERE pk1 = @i - 1 AND pk2 = @j     

          UNION ALL

          SELECT v + 1 AS ret 
          FROM @editMatrix 
          WHERE pk1 = @i AND pk2 = @j - 1 

         UNION ALL

         SELECT v + @cost AS ret 
         FROM @editMatrix 
         WHERE pk1 = @i - 1 AND pk2 = @j - 1
       ) AS x
    )
      FROM @editMatrix AS em
      WHERE pk1 = @i AND pk2 = @j

      SET @j = @j + 1
    END

    SET @i = @i + 1
  END

 RETURN (SELECT ISNULL(v, -1) FROM @editMatrix WHERE pk1 = @n AND pk2 = @m)
END

“SQL’de Benzerlik Algoritmaları…” için 1 Yorum

  1. T.Terlemez diyor ki:

    Teşekkürler, çok işime yaradı verdiğiniz kodlar.

Yorum Yazın