04 October 2006

Finding Duplicate Records In Oracle

Sometimes to validate data integrity, you may want to find out duplicate records. There are some ways to find them. I put them together.

The example I show has three key columns. If you have only one key column you can follow keep dense_rank as Laurent demonstrated

Connected to Oracle Database 10g Enterprise Edition Release 10.2.0.1.0
Connected as SYS



SQL> set timing on
SQL>
SQL> drop table dup;

drop table dup

ORA-00942: table or view does not exist


SQL> create table dup(i number, x varchar2(8), d date );

Table created

Executed in 0,094 seconds


SQL> BEGIN
  2    FOR i IN 1 .. 500000 LOOP
  3      INSERT INTO dup
  4      VALUES
  5        (trunc(dbms_random.VALUE(1, 100)),
  6         dbms_random.STRING('U', 5),
  7         trunc(SYSDATE + trunc(dbms_random.VALUE(1, 100))));
  8    END LOOP;
  9  END;
 10  /

PL/SQL procedure successfully completed

Executed in 105,859 seconds


SQL> --
SQL> SELECT i, x, d, COUNT(*)
  2    FROM dup
  3   GROUP BY i, x, d
  4  HAVING COUNT(*) > 1;

         I X        D             COUNT(*)
---------- -------- ----------- ----------
        85 LBKMA    15.10.2006           2

Executed in 0,938 seconds


SQL> --
SQL> SELECT dd.i, dd.x, dd.d
  2    FROM (SELECT i,
  3                 x,
  4                 d,
  5                 ROWID,
  6                 row_number() over(PARTITION BY i, x, d ORDER BY ROWID) rn
  7            FROM dup) dd
  8   WHERE dd.rn != 1;

         I X        D
---------- -------- -----------
        85 LBKMA    15.10.2006

Executed in 1,531 seconds


SQL> --
SQL> SELECT *
  2    FROM dup a
  3   WHERE a.ROWID NOT IN (SELECT MAX(ROWID)
  4                           FROM dup b
  5                          WHERE a.i = b.i
  6                            AND a.x = b.x
  7                            AND a.d = b.d);

         I X        D
---------- -------- -----------
        85 LBKMA    15.10.2006

Executed in 3,625 seconds


SQL> --
SQL> SELECT i, x, d, ROWID
  2    FROM dup
  3  MINUS
  4  SELECT i, x, d, MAX(ROWID) FROM dup GROUP BY i, x, d;

         I X        D           ROWID
---------- -------- ----------- ------------------
        85 LBKMA    15.10.2006  AAAOqHAABAAAPqgABO

Executed in 10,063 seconds

SQL>

No comments: