create or replace package owb_match_merge AS --  PRAGMA SERIALLY_REUSABLE; --  PRAGMA RESTRICT_REFERENCES(default,WNDS,WNPS,RNDS,RNPS);


-- User configuration:
--
-- Standardization
-- Standardization will
--    force upper
--    remove noise characters
--    optionally remove noise words
-- remove noise characters:
--   standardize uses translate to remove noise characters.
--   modify std_translateFrom and std_translateTo to change the
--   translation. Note that translate requires at least one one
--   character in each string to operate. The default values translate
--   '1' to '1' to accomodate this requirement.
--
-- Remove noise words
--   if noise_words is not null, any words in noise_words will be 
--   removed from the standardized string or the standardized tokens.
--   Use may add words to be removed by adding the space delimited words
--   to noise_words.

-- Firm rule noise words
-- The firm rule noise words may be modified in Firm_opt.noise_words.

-- Token standardization
--   Firm, partial_name, abbreviation, and acronym match rules break the match 
--   attribute into standardized tokens. The noise characters are removed from
--   through the translation strings tok_std_translateFrom and tok_std_translateTo.
--   Typically these translation strings will be identical to std_translateFrom and
--   std_translateTo with the exception that the space character should not be 
--   translated out.

-- Mrs match
--  The mrs match allows customized name matching based on the 'Mrs' prename.
--  The prename denoting female, married for the english is 'MRS'. Additional
--  prenames may be added to this list by adding space delimited names to 
--  mrs_prenames.

  std_translateFrom       varchar2(255) := '1 `~!@#$%^&*()_+|}{:"[]:"<>?/.,''';
  std_translateTo         varchar2(255) := '1';

  tok_std_translateFrom   varchar2(255) := '1`~!@#$%^&*()_+|}{:"[]:"<>?/.,''';
  tok_std_translateTo     varchar2(255) := '1';

  -- noise words removed by standardize and tokenize.
  noise_words             varchar2(255) := '';

  -- prenames indicating married female
  mrs_prenames            varchar2(255) := 'MRS';




  type tokens is table of varchar2(2000) index by binary_integer;

  nullToken tokens;

  ISFOUND varchar2(3) := 'T';
  ISNOTFOUND varchar2(3) := 'F';
  
  -- match rule record types
  type Person_rcd is record (
    Prename             varchar2(1000) default null,
    First_name_std      varchar2(1000) default null,
    Middle_name_std     varchar2(1000) default null,
    Middle_name_2_std   varchar2(1000) default null,
    Middle_name_3_std   varchar2(1000) default null,
    Last_name           varchar2(1000) default null,
    Maturity_postname   varchar2(1000) default null,
    is_standardized     boolean := false
  );

  type Person_opt is record (
    Detect_switched_name_order  boolean default false,
    -- First name
    Match_on_FN_initials        boolean default false,
    Match_on_FN_substrings      boolean default false,
    Match_on_FN_similarity      boolean default false,
    Match_on_FN_algorithm       pls_integer default 0,
    FN_similarity_score         pls_integer default 85,
    Match_on_FN_soundex         boolean default false,
    Match_on_FN_sdx_algorithm   pls_integer default 2,
    Detect_compound_FN          boolean default false,
    Mrs_match                   boolean default false,
    -- Middle name
    Match_on_MN_initials        boolean default false,
    Match_on_MN_substrings      boolean default false,
    Match_on_MN_similarity      boolean default false,
    Match_on_MN_algorithm       pls_integer default 0,
    MN_similarity_score         pls_integer default 85,
    Match_on_MN_soundex         boolean default false,
    Match_on_MN_sdx_algorithm   pls_integer default 2,
    -- Last name
    Match_on_hyphenated_LN      boolean default false,
    Detect_missing_LN_hyphen    boolean default false,
    Match_on_LN_similarity      boolean default false,
    Match_on_LN_algorithm       pls_integer default 0,
    LN_similarity_score         pls_integer default 85,
    Match_on_LN_soundex         boolean default false,
    Match_on_LN_sdx_algorithm   pls_integer default 2
    
  );



  type Address_rcd is record (
    Primary_address             varchar2(1000) default null,
    Unit_number                 varchar2(1000) default null,
    PO_box                      varchar2(1000) default null,
    Dual_primary_address        varchar2(1000) default null,
    Dual_unit_number            varchar2(1000) default null,
    Dual_po_box                 varchar2(1000) default null,
    City                        varchar2(1000) default null,
    State                       varchar2(1000) default null,
    Postal_code                 varchar2(1000) default null,
    Is_found                    varchar2(3) default 'F'
    );

  type Address_opt is record (
    Allow_differing_sec_address         boolean default false,
    Match_on_blank_sec_address          boolean default false,
    Match_on_street_or_PO               boolean default false,
    Match_on_addr_similarity            boolean default false,
    Match_on_addr_algorithm             pls_integer default 0,
    Address_line_similarity_score       pls_integer default 85,
    Match_on_last_line_similarity       boolean default false,
    Match_on_last_line_algorithm        pls_integer default 0,
    Last_line_similarity_score          pls_integer default 85
  );

  type Firm_rcd is record (
    Init            boolean default false,
    Firm1           varchar2(1000) default null,
    Firm1_stripped  varchar2(1000) default null,
    Firm1_t         tokens default nullToken,
    Firm2           varchar2(1000) default null,
    Firm2_stripped  varchar2(1000) default null,
    Firm2_t         tokens default nullToken
    );

  type Firm_opt is record (
    Strip_noise_words           boolean default false,
    Noise_words                 varchar2(2000) default 'THE AND CORP CORPORATION CO COMPANY INC INCORPORATED LTD TO OF BY',
    Cross_match_firm1_firm2     boolean default false,
    Match_on_partial_firm_name  boolean default false,
    Match_on_abbreviations      boolean default false,
    Match_on_acronyms           boolean default false,
    Match_on_similarity         boolean default false,
    Match_on_similarity_algorithm pls_integer default 0,
    Similarity_score            pls_integer default 85
  );



  -- general functions
  function get_version return varchar2;

  -- character functions
  function standardize( s in varchar2) return varchar2;
  function is_blank( s in varchar2) return boolean;
  function similarity( s1 in varchar2, s2 in varchar2 ) return binary_integer;
  function edit_distance( s1 in varchar2, s2 in varchar2 ) return number;
  function edit_distance_test( s1 in varchar2, s2 in varchar2, c in number ) return number;
  
  function jaro_winkler(s1 in varchar2, s2 in varchar2) return number;
  function jw_similarity( s1 in varchar2, s2 in varchar2 ) return binary_integer;
  
  procedure double_metaphone(s in varchar2, primary_code in out varchar2, secondary_code in out varchar2);
  function dmcodes_match(lhs in varchar2, rhs in varchar2) return boolean;
  

  -- return a list of standardized tokens with noise words removed.
  function tokenize(s in varchar2, delimiter in char default ' ') return tokens;

  -- conditional match functions 
  function  partial_name_match( tsl in tokens, tsr in tokens ) return boolean;
  function abbreviation_match( tsl in tokens, tsr in tokens ) return boolean;
  function acronym_match( tsl in Tokens, tsr in Tokens) return boolean;
  
  -- name/address match functions
  function match_person(lhs in Person_rcd, rhs in Person_rcd, opt in Person_opt)
           return boolean;

  function match_address(lhs in Address_rcd, rhs in Address_rcd, opt in Address_opt)
           return boolean;

  function match_firm(lhs in out nocopy Firm_rcd, rhs in out nocopy Firm_rcd, opt in Firm_opt)
           return boolean;

end owb_match_merge;
/
