Documentation

Jaccard.D

A theorem on metrics based on min and max #

In this file we give a formal proof that in terms of d(X,Y)= m min(|X\Y|, |Y\X|) + M max(|X\Y|, |Y\X|) the function D(X,Y) = d(X,Y)/(|X ∩ Y|+d(X,Y)) is a metric if 0 < m ≤ M and 1 ≤ M. In particular, taking m=M=1, the Jaccard distance is a metric on Finset ℕ.

Main results #

Notation #

References #

For the original proof see https://arxiv.org/abs/2111.02498

noncomputable def D {α : Type u_1} [DecidableEq α] :
Finset αFinset α
Equations
Instances For
    theorem cap_sdiff {α : Type u_1} [DecidableEq α] (X Y Z : Finset α) :
    X Z X Y Z \ Y
    theorem sdiff_cap {α : Type u_1} [DecidableEq α] (X Y Z : Finset α) :
    (X Z) \ Y Z \ Y
    theorem twelve_end {α : Type u_1} [DecidableEq α] (X Y Z : Finset α) :
    |X Z| |X Y| + |Z \ Y| |Y \ Z|
    theorem twelve_middle {m M : } {α : Type u_1} [DecidableEq α] (hm : 0 m) (hM : 0 < M) (X Y Z : Finset α) :
    let y_z := |Y \ Z|; let z_y := |Z \ Y|; let xy := |X Y|; |X Z| xy + z_y y_z + m / M * (z_y y_z)
    theorem jn_self {m M : } {α : Type u_1} [DecidableEq α] (X : Finset α) :
    D m M X X = 0
    theorem delta_nonneg {m M : } {α : Type u_1} [DecidableEq α] {x y : Finset α} (hm : 0 m) (hM : m M) :
    0 δ m M x y
    theorem jn_comm {m M : } {α : Type u_1} [DecidableEq α] (X Y : Finset α) :
    D m M X Y = D m M Y X
    theorem card_inter_nonneg {α : Type u_1} [DecidableEq α] (X Y : Finset α) :
    0 |X Y|
    theorem D_denom_nonneg {m M : } {α : Type u_1} [DecidableEq α] (X Y : Finset α) (hm : 0 m) (hM : m M) :
    0 |X Y| + δ m M X Y
    theorem eq_of_jn_eq_zero {m M : } {α : Type u_1} [DecidableEq α] (hm : 0 < m) (hM : m M) (X Y : Finset α) (h : D m M X Y = 0) :
    X = Y
    theorem D_nonneg {m M : } {α : Type u_1} [DecidableEq α] (X Y : Finset α) (hm : 0 m) (hM : m M) :
    0 D m M X Y
    theorem D_empty_1 {α : Type u_1} [DecidableEq α] (m M : ) {X Y : Finset α} (hm : 0 < m) (hM : m M) (hx : X = ) (hy : Y ) :
    D m M X Y = 1
    theorem D_empty_2 {α : Type u_1} [DecidableEq α] (m M : ) {X Y : Finset α} (hm : 0 < m) (hM : m M) (hx : X ) (hy : Y = ) :
    D m M X Y = 1
    theorem D_bounded {α : Type u_1} [DecidableEq α] (m M : ) (X Y : Finset α) (hm : 0 m) (hM : m M) :
    D m M X Y 1
    theorem intersect_cases {α : Type u_1} [DecidableEq α] (m M : ) (Y Z : Finset α) (hm : 0 < m) (hM : m M) (hy : Y ) :
    let ayz := |Z Y|; let dyz := δ m M Z Y; 0 < ayz + dyz
    theorem four_immediate_from {α : Type u_1} [DecidableEq α] (m M : ) (X Y Z : Finset α) (hm : 0 < m) (hM : m M) (h1M : 1 M) (hz : Z ) :
    let axy := |X Y|; let dxz := δ m M X Z; let dyz := δ m M Z Y; let axz := |X Z|; let denom := axy + dxz + dyz; dxz / denom dxz / (axz + dxz)
    theorem four_immediate_from_and {α : Type u_1} [DecidableEq α] (m M : ) (X Y Z : Finset α) (hm : 0 < m) (hM : m M) (h1M : 1 M) (hz : Z ) :
    δ m M Z Y / (|X Y| + δ m M X Z + δ m M Z Y) δ m M Z Y / (|Z Y| + δ m M Z Y)
    theorem mul_le_mul_rt {a b c : } (h : 0 c) (hab : a b) :
    a * c b * c
    theorem abc_lemma {a b c : } (h : 0 a) (hb : a b) (hc : 0 c) :
    a / (a + c) b / (b + c)
    theorem three {m M : } (X Y Z : Finset ) (hm : 0 < m) (hM : m M) :
    let axy := |X Y|; let dxy := δ m M X Y; let dxz := δ m M X Z; let dyz := δ m M Z Y; let denom := axy + dxz + dyz; dxy / (axy + dxy) (dxz + dyz) / denom
    theorem jn_triangle_nonempty (m M : ) (X Y Z : Finset ) (hm : 0 < m) (hM : m M) (h1M : 1 M) (hz : Z ) :
    D m M X Y D m M X Z + D m M Z Y
    theorem jn_triangle (m M : ) (X Y Z : Finset ) (hm : 0 < m) (hM : m M) (h1M : 1 M) :
    D m M X Y D m M X Z + D m M Z Y
    noncomputable instance jaccard_nid.metric_space {m M : } (hm : 0 < m) (hM : m M) (h1M : 1 M) :
    Equations
    def J :
    Equations
    Instances For
      theorem J_characterization (X Y : Finset ) :
      (J X Y) = D 1 1 X Y
      noncomputable instance jaccard.metric_space (hm : 0 < 1) (hM h1M : 1 1) :
      Equations
      instance instSDiffFinsetOfFintypeOfDecidablePredMem_jaccard {α : Type} [Fintype α] [(X : Finset α) → DecidablePred fun (a : α) => a X] :
      Equations
      • instSDiffFinsetOfFintypeOfDecidablePredMem_jaccard = { sdiff := fun (X Y : Finset α) => Finset.filter (fun (a : α) => a X aY) Finset.univ }
      instance instUnionFinsetOfFintypeOfDecidablePredMem_jaccard {α : Type} [Fintype α] [(X : Finset α) → DecidablePred fun (a : α) => a X] :
      Equations
      • instUnionFinsetOfFintypeOfDecidablePredMem_jaccard = { union := fun (X Y : Finset α) => Finset.filter (fun (a : α) => a X a Y) Finset.univ }
      def J' {α : Type} [Fintype α] [(X : Finset α) → DecidablePred fun (a : α) => a X] :
      Finset αFinset α

      Computable Jaccard distance on sets of words etc.

      Equations
      Instances For