diff --git a/data/fagin_k1_ground_truth.csv b/data/fagin_k1_ground_truth.csv index e69de29..3b3af87 100644 --- a/data/fagin_k1_ground_truth.csv +++ b/data/fagin_k1_ground_truth.csv @@ -0,0 +1,1089 @@ +0,344,0.5048781642974013 +1,789,0.4359286452893223 +2,4,0.25 +3,2,0.23570226039551587 +4,5,0.31980107453341566 +5,4,0.31980107453341566 +6,2,0.0944911182523068 +7,401,0.6405126152203485 +8,173,0.6933752452815365 +9,773,0.6428571428571429 +10,232,0.5962847939999439 +11,519,0.5039526306789697 +12,496,0.7368421052631579 +13,61,0.45643546458763845 +14,69,0.45291081365783825 +15,423,0.6689936080056726 +16,65,0.5892556509887896 +17,151,0.4558423058385518 +18,20,0.7745966692414834 +19,18,0.7453559924999299 +20,18,0.7745966692414834 +21,224,0.6092717958449424 +22,201,0.7519206177414045 +23,377,0.6446583712203042 +24,433,0.6543303050815759 +25,699,0.6324555320336759 +26,770,0.7024393586862705 +27,287,0.611775290321498 +28,344,0.7592566023652966 +29,363,0.901127113779166 +30,344,0.5662888944297417 +31,760,0.6405126152203485 +32,78,0.8807048459279793 +33,258,0.26646935501059654 +34,1035,0.7877263614433762 +35,495,0.4629100498862757 +36,1015,0.7807200583588266 +37,311,0.667124384994991 +38,80,0.5720775535473553 +39,496,0.7024393586862705 +40,361,0.6951816970931011 +41,815,0.6335525936249404 +42,760,0.6172133998483676 +43,430,0.3899064336825273 +44,45,0.4472135954999579 +45,44,0.4472135954999579 +46,228,0.6674238124719146 +47,649,0.5303300858899107 +48,561,0.6428243465332251 +49,282,0.4490502093697089 +50,1079,0.6396021490668313 +51,450,0.7715167498104595 +52,560,0.7008766440504625 +53,606,0.6324555320336759 +54,69,0.5962847939999439 +55,732,0.9230769230769231 +56,1012,0.5309907904212606 +57,227,0.6227991553292184 +58,431,0.6054055145966812 +59,590,0.6445033866354896 +60,760,0.6943650748294136 +61,550,0.5 +62,344,0.7050239879106326 +63,1037,0.7142857142857143 +64,399,0.7412493166611012 +65,16,0.5892556509887896 +66,67,0.6155870112510925 +67,66,0.6155870112510925 +68,32,0.7001400420140048 +69,233,0.6666666666666666 +70,275,0.6900655593423543 +71,246,0.5773502691896257 +72,540,0.5869391856534222 +73,248,0.4819315973414993 +74,594,0.6210590034081188 +75,32,0.7453559924999299 +76,654,0.7715167498104595 +77,303,0.7627700713964739 +78,32,0.8807048459279793 +79,1071,0.5303300858899107 +80,38,0.5720775535473553 +81,399,0.7412493166611012 +82,401,0.6708203932499369 +83,227,0.7252406676228422 +84,755,0.6153846153846154 +85,323,0.5715476066494082 +86,344,0.6435959029348384 +87,1086,0.6963106238227914 +88,777,0.4811252243246881 +89,560,0.7181848464596079 +90,353,0.8401680504168059 +91,209,0.6923076923076923 +92,323,0.43852900965351466 +93,182,0.7590721152765897 +94,570,0.6482037235521644 +95,610,0.7205766921228921 +96,151,0.4558423058385518 +97,269,0.7730012058189372 +98,97,0.6900655593423543 +99,738,0.6847367880174606 +100,691,0.7356123579206245 +101,100,0.6551384198826605 +102,104,0.7742781018131459 +103,104,0.7910398521054725 +104,107,0.8308675641104959 +105,104,0.7364853795464743 +106,104,0.8067793113007157 +107,104,0.8308675641104959 +108,104,0.7846715615207801 +109,104,0.7742781018131459 +110,104,0.764286998047602 +111,104,0.7846715615207801 +112,151,0.5241424183609592 +113,586,0.7893297629345911 +114,488,0.5443310539518174 +115,32,0.6804138174397717 +116,392,0.4767312946227962 +117,631,0.5916079783099616 +118,536,0.5015503913178175 +119,549,0.7276068751089989 +120,77,0.6390096504226938 +121,32,0.7453559924999299 +122,227,0.6428243465332251 +123,392,0.41403933560541256 +124,767,0.4923659639173309 +125,16,0.4902903378454601 +126,21,0.46709936649691375 +127,443,0.7276068751089989 +128,141,0.4160251471689218 +129,971,0.2069593385961789 +130,521,0.6288281455225324 +131,1052,0.78125 +132,217,0.6956521739130435 +133,265,0.7348469228349535 +134,151,0.5976143046671968 +135,861,0.5555838995037159 +136,259,0.7412493166611012 +137,543,0.7592566023652966 +138,861,0.5657789498610036 +139,308,0.43033148291193524 +140,624,0.5345224838248488 +141,142,0.6324555320336759 +142,141,0.6324555320336759 +143,540,0.5948118774794626 +144,674,0.4364357804719847 +145,926,0.8563488385776753 +146,1086,0.7833494518006403 +147,759,0.7161148740394329 +148,486,0.41247895569215276 +149,51,0.7205766921228921 +150,759,0.7412493166611012 +151,318,0.6681531047810609 +152,186,0.5555555555555556 +153,291,0.40032038451271784 +154,344,0.6482037235521644 +155,832,0.5423261445466404 +156,162,0.5400617248673216 +157,491,0.5769230769230769 +158,157,0.5725025740766715 +159,142,0.5642880936468347 +160,746,0.4707919090691997 +161,162,0.46656947481584343 +162,156,0.5400617248673216 +163,164,0.612056372482123 +164,163,0.612056372482123 +165,836,0.5473987486932373 +166,287,0.35909242322980395 +167,543,0.7409585736349483 +168,16,0.3952847075210474 +169,8,0.6445033866354896 +170,89,0.5514109665703558 +171,241,0.7008766440504625 +172,16,0.36084391824351614 +173,175,0.8528028654224417 +174,173,0.8 +175,173,0.8528028654224417 +176,777,0.5659164584181103 +177,763,0.5680375574437545 +178,151,0.545544725589981 +179,364,0.7526178090063816 +180,611,0.7333333333333333 +181,392,0.6674238124719146 +182,93,0.7590721152765897 +183,51,0.6708203932499369 +184,32,0.7216878364870323 +185,510,0.4016632088371218 +186,152,0.5555555555555556 +187,442,0.667124384994991 +188,947,0.3224510108185121 +189,377,0.6030226891555273 +190,320,0.6978631577988531 +191,46,0.6390096504226938 +192,384,0.9128709291752769 +193,760,0.5443310539518174 +194,91,0.6153846153846154 +195,208,0.6689936080056726 +196,607,0.667124384994991 +197,282,0.5773502691896257 +198,516,0.7541997111515459 +199,76,0.7200822998230956 +200,677,0.612056372482123 +201,668,0.7925939239012171 +202,759,0.7412493166611012 +203,46,0.6139406135149205 +204,815,0.816496580927726 +205,69,0.5962847939999439 +206,424,0.6092717958449424 +207,377,0.6963106238227914 +208,195,0.6689936080056726 +209,91,0.6923076923076923 +210,51,0.75 +211,431,0.6445033866354896 +212,251,0.7412493166611012 +213,308,0.6030226891555273 +214,1051,0.746003846592251 +215,462,0.8219529433756427 +216,1055,0.9130434782608695 +217,1056,0.9130434782608695 +218,1050,0.25819888974716115 +219,1058,0.92 +220,926,0.8424235391742319 +221,623,0.7484551991837488 +222,789,0.19287918745261487 +223,777,0.40422604172722165 +224,21,0.6092717958449424 +225,16,0.5590169943749475 +226,1077,0.6123724356957946 +227,32,0.7833494518006403 +228,451,0.7526178090063816 +229,51,0.75 +230,186,0.45883146774112354 +231,344,0.7592566023652966 +232,583,0.6666666666666666 +233,69,0.6666666666666666 +234,345,0.46308093477152584 +235,423,0.6030226891555273 +236,368,0.7938841860374447 +237,1073,0.6900655593423543 +238,76,0.6599663291074443 +239,240,0.5539117094069973 +240,239,0.5539117094069973 +241,327,0.7509392614826383 +242,772,0.5477225575051661 +243,573,0.7727272727272727 +244,563,0.6542886560876245 +245,249,0.4919349550499537 +246,269,0.7777777777777778 +247,1043,0.7637626158259733 +248,73,0.4819315973414993 +249,344,0.7592566023652966 +250,431,0.6240377207533828 +251,212,0.7412493166611012 +252,241,0.6847623832623259 +253,944,0.5444655064804709 +254,392,0.3956282840374722 +255,227,0.6227991553292184 +256,597,0.6180700462007377 +257,259,0.5150262026246047 +258,560,0.5143028508594515 +259,364,0.7526178090063816 +260,760,0.5773502691896258 +261,227,0.7526178090063816 +262,16,0.5590169943749475 +263,631,0.5368754921931592 +264,468,0.42107596053325946 +265,133,0.7348469228349535 +266,223,0.38805700005813276 +267,271,0.6940220937885672 +268,607,0.45573271518765 +269,246,0.7777777777777778 +270,692,0.8075728530872482 +271,344,0.7592566023652966 +272,760,0.5039526306789696 +273,89,0.5669467095138409 +274,777,0.6030226891555273 +275,70,0.6900655593423543 +276,69,0.6172133998483676 +277,1066,0.830336491706037 +278,777,0.5270462766947299 +279,323,0.5720775535473553 +280,284,0.28721347895177635 +281,241,0.6847623832623259 +282,565,0.7777777777777778 +283,633,0.769800358919501 +284,698,0.2959320151246863 +285,570,0.5976143046671968 +286,329,0.3350831266333564 +287,622,0.8421052631578947 +288,296,0.7142857142857143 +289,290,0.7223151185146152 +290,289,0.7223151185146152 +291,153,0.40032038451271784 +292,134,0.5262348115842176 +293,323,0.5720775535473553 +294,880,0.526136791501647 +295,560,0.596420051979227 +296,288,0.7142857142857143 +297,50,0.45643546458763845 +298,377,0.6030226891555273 +299,226,0.5103103630798288 +300,1079,0.6689936080056726 +301,660,0.6978631577988531 +302,631,0.45184805705753195 +303,77,0.7627700713964739 +304,760,0.6301260378126043 +305,453,0.2524467210539465 +306,713,0.6227991553292184 +307,151,0.5241424183609592 +308,213,0.6030226891555273 +309,69,0.5773502691896258 +310,680,0.5570860145311557 +311,37,0.667124384994991 +312,366,0.6227991553292184 +313,381,0.5714285714285714 +314,401,0.6172133998483676 +315,768,0.5685352436149612 +316,607,0.6054055145966812 +317,50,0.625 +318,151,0.6681531047810609 +319,77,0.6390096504226938 +320,190,0.6978631577988531 +321,478,0.7777777777777778 +322,368,0.7409585736349483 +323,574,0.6 +324,719,0.5217732845620352 +325,543,0.8003267306650412 +326,723,0.7637626158259733 +327,241,0.7509392614826383 +328,1047,0.7905694150420948 +329,374,0.612056372482123 +330,560,0.6748819059987713 +331,332,0.7954951288348661 +332,331,0.7954951288348661 +333,246,0.7777777777777778 +334,416,0.8104432008587534 +335,337,0.8583950752789522 +336,335,0.7309738846478072 +337,335,0.8583950752789522 +338,335,0.8164965809277259 +339,335,0.7637626158259734 +340,341,0.816496580927726 +341,340,0.816496580927726 +342,1047,0.8075728530872482 +343,290,0.7175473098524098 +344,368,0.7647058823529411 +345,358,0.9213717213621487 +346,201,0.7783117824941562 +347,733,0.64 +348,1014,0.6324555320336759 +349,443,0.6882472016116852 +350,504,0.6164889707256676 +351,202,0.6573757351339165 +352,698,0.3563483225498992 +353,90,0.8401680504168059 +354,448,0.5726562866782 +355,141,0.375 +356,314,0.5976143046671968 +357,287,0.6282808624375432 +358,345,0.9213717213621487 +359,16,0.5330017908890261 +360,758,0.48038446141526137 +361,663,0.7280252083092641 +362,662,0.6405126152203485 +363,29,0.901127113779166 +364,623,0.7833494518006403 +365,1084,0.7833494518006403 +366,754,0.7272727272727273 +367,344,0.6880329612324521 +368,236,0.7938841860374447 +369,777,0.6324555320336759 +370,631,0.5533985905294664 +371,151,0.545544725589981 +372,345,0.7998971731801373 +373,284,0.27755717517061185 +374,329,0.612056372482123 +375,32,0.8198915917499229 +376,967,0.5962847939999439 +377,385,0.778498944161523 +378,663,0.6182840223353118 +379,201,0.7280440075456133 +380,926,0.8424235391742319 +381,313,0.5714285714285714 +382,366,0.6963106238227914 +383,151,0.545544725589981 +384,192,0.9128709291752769 +385,377,0.778498944161523 +386,553,0.5773502691896258 +387,151,0.5241424183609592 +388,423,0.6227991553292184 +389,426,0.5635445125120265 +390,610,0.6666666666666666 +391,449,0.7745966692414834 +392,181,0.6674238124719146 +393,364,0.6581451817144176 +394,84,0.6054055145966812 +395,677,0.6288281455225324 +396,411,0.7844645405527362 +397,631,0.6139406135149205 +398,431,0.4803844614152614 +399,64,0.7412493166611012 +400,151,0.36667939881128453 +401,603,0.75 +402,768,0.6446583712203042 +403,414,0.6888467201936644 +404,1033,0.6288281455225324 +405,696,0.6708203932499369 +406,631,0.5715476066494082 +407,270,0.7779866052154991 +408,874,0.5506887917539348 +409,408,0.5361109642475097 +410,1034,0.6054055145966812 +411,396,0.7844645405527362 +412,27,0.46193488543715594 +413,541,0.6929348671835832 +414,444,0.7333333333333333 +415,752,0.6324555320336759 +416,334,0.8104432008587534 +417,740,0.6351073488299558 +418,441,0.8340576562282991 +419,420,0.6513389472789296 +420,419,0.6513389472789296 +421,419,0.6110100926607788 +422,377,0.47304991679126607 +423,541,0.7252406676228422 +424,206,0.6092717958449424 +425,227,0.7272727272727273 +426,389,0.5635445125120265 +427,323,0.5916079783099616 +428,377,0.6030226891555273 +429,877,0.565685424949238 +430,350,0.554321862524587 +431,377,0.6689936080056726 +432,396,0.7302967433402214 +433,24,0.6543303050815759 +434,1069,0.5017452060042544 +435,1076,0.6669729688499156 +436,448,0.6445033866354896 +437,38,0.4743416490252569 +438,484,0.4457424941602093 +439,760,0.5601120336112039 +440,571,0.3535533905932738 +441,418,0.8340576562282991 +442,572,0.6923076923076923 +443,127,0.7276068751089989 +444,414,0.7333333333333333 +445,723,0.7035264706814485 +446,883,0.35223497683817345 +447,775,0.6643638388299198 +448,665,0.6726727939963124 +449,391,0.7745966692414834 +450,51,0.7715167498104595 +451,228,0.7526178090063816 +452,442,0.6405126152203485 +453,906,0.4868538260775398 +454,775,0.746003846592251 +455,610,0.7205766921228921 +456,914,0.15512630699850574 +457,680,0.6735753140545634 +458,688,0.8 +459,843,0.6761234037828132 +460,770,0.7509392614826383 +461,498,0.7369555266607762 +462,215,0.8219529433756427 +463,186,0.5345224838248488 +464,631,0.5533985905294664 +465,892,0.6454972243679028 +466,926,0.7928249671720918 +467,226,0.5103103630798288 +468,1020,0.5547001962252291 +469,543,0.7409585736349483 +470,469,0.5819876952473779 +471,472,0.7096774193548387 +472,471,0.7096774193548387 +473,173,0.7456011350793257 +474,475,0.4216370213557839 +475,344,0.6435959029348384 +476,190,0.6825236327899351 +477,345,0.8472686738391096 +478,321,0.7777777777777778 +479,334,0.8 +480,481,0.8052627168725033 +481,480,0.8052627168725033 +482,481,0.7634222834143518 +483,344,0.6722139969935249 +484,438,0.4457424941602093 +485,287,0.7694837640638656 +486,1069,0.5222329678670935 +487,344,0.6880329612324521 +488,114,0.5443310539518174 +489,91,0.5229763603684907 +490,157,0.4823819106188661 +491,157,0.5769230769230769 +492,708,0.6338656910463875 +493,711,0.7235680511449364 +494,689,0.6557892373388906 +495,35,0.4629100498862757 +496,12,0.7368421052631579 +497,212,0.5547001962252291 +498,461,0.7369555266607762 +499,501,0.5039526306789697 +500,1012,0.6324555320336759 +501,637,0.6686478498357316 +502,663,0.6185895741317419 +503,378,0.5986843400892496 +504,350,0.6164889707256676 +505,597,0.4216370213557839 +506,425,0.7252406676228422 +507,156,0.4762896722078402 +508,412,0.4403855060505443 +509,50,0.45643546458763845 +510,1012,0.7302967433402214 +511,1012,0.5309907904212606 +512,421,0.23333333333333334 +513,753,0.6956521739130435 +514,1012,0.7559289460184545 +515,1084,0.6784005252999682 +516,198,0.7541997111515459 +517,37,0.667124384994991 +518,345,0.6933949711814644 +519,520,0.6285393610547089 +520,344,0.6534640392130713 +521,130,0.6288281455225324 +522,443,0.6255432421712244 +523,577,0.7058823529411765 +524,1053,0.8275159265510557 +525,538,0.7492686492653552 +526,542,0.5595028849441883 +527,524,0.8100925873009825 +528,216,0.8086937042208112 +529,217,0.8086937042208112 +530,509,0.4131182235954578 +531,533,0.4117647058823529 +532,551,0.7248824356090754 +533,531,0.4117647058823529 +534,693,0.6255432421712243 +535,694,0.8421052631578947 +536,397,0.5929994533288809 +537,760,0.7223151185146152 +538,525,0.7492686492653552 +539,631,0.6674238124719146 +540,541,0.6744532734334624 +541,423,0.7252406676228422 +542,692,0.6928203230275508 +543,325,0.8003267306650412 +544,742,0.7378647873726218 +545,1047,0.8660254037844386 +546,547,0.5813776741499453 +547,540,0.6155870112510925 +548,227,0.6225430174794672 +549,119,0.7276068751089989 +550,61,0.5 +551,532,0.7248824356090754 +552,771,0.5960395606792697 +553,386,0.5773502691896258 +554,1012,0.8236877675803729 +555,770,0.7368421052631579 +556,756,0.8387096774193549 +557,227,0.6030226891555273 +558,190,0.6825236327899351 +559,207,0.5555555555555556 +560,89,0.7181848464596079 +561,608,0.8181818181818182 +562,738,0.7280252083092641 +563,244,0.6542886560876245 +564,401,0.6405126152203485 +565,282,0.7777777777777778 +566,555,0.24525573579398632 +567,344,0.6790997501017323 +568,570,0.6131393394849658 +569,431,0.6054055145966812 +570,94,0.6482037235521644 +571,1063,0.6666666666666666 +572,442,0.6923076923076923 +573,243,0.7727272727272727 +574,323,0.6 +575,345,0.8687219087128831 +576,761,0.6805446536716203 +577,523,0.7058823529411765 +578,1086,0.5244044240850758 +579,674,0.6324555320336759 +580,344,0.6880329612324521 +581,580,0.6179143806533246 +582,1042,0.5720775535473553 +583,232,0.6666666666666666 +584,93,0.7319250547113999 +585,1013,0.6239177481057773 +586,113,0.7893297629345911 +587,69,0.5303300858899107 +588,32,0.7216878364870323 +589,197,0.5400617248673216 +590,591,0.7867957924694432 +591,590,0.7867957924694432 +592,590,0.693888666488711 +593,70,0.6900655593423543 +594,74,0.6210590034081188 +595,832,0.6577935144802719 +596,631,0.6674238124719146 +597,1068,0.7037037037037037 +598,648,0.7857142857142857 +599,777,0.39223227027636803 +600,50,0.625 +601,760,0.8006407690254357 +602,226,0.5892556509887896 +603,401,0.75 +604,151,0.5241424183609592 +605,287,0.32118202741878643 +606,53,0.6324555320336759 +607,1075,0.6923076923076923 +608,561,0.8181818181818182 +609,745,0.6736330697086078 +610,95,0.7205766921228921 +611,180,0.7333333333333333 +612,165,0.48038446141526137 +613,1083,0.8461538461538461 +614,151,0.5698028822981898 +615,401,0.5809475019311126 +616,1012,0.7427813527082074 +617,618,0.8320502943378436 +618,617,0.8320502943378436 +619,226,0.5330017908890261 +620,760,0.5773502691896258 +621,1012,0.7365895075034008 +622,287,0.8421052631578947 +623,364,0.7833494518006403 +624,401,0.6172133998483676 +625,627,0.6666666666666666 +626,627,0.7071067811865475 +627,626,0.7071067811865475 +628,759,0.7412493166611012 +629,91,0.47172817652486326 +630,287,0.6882472016116853 +631,208,0.6674238124719146 +632,760,0.5298129428260175 +633,283,0.769800358919501 +634,401,0.5809475019311126 +635,549,0.23570226039551587 +636,643,0.5367450401216932 +637,501,0.6686478498357316 +638,594,0.5832118435198043 +639,66,0.6139406135149205 +640,638,0.5673086289311754 +641,490,0.43033148291193524 +642,712,0.6069769786668839 +643,594,0.5378528742004771 +644,157,0.5707817929853929 +645,344,0.7431605356175384 +646,210,0.7216878364870323 +647,335,0.5518254055364693 +648,598,0.7857142857142857 +649,650,0.5833333333333334 +650,649,0.5833333333333334 +651,652,0.5657789498610036 +652,651,0.5657789498610036 +653,543,0.6659120918162916 +654,76,0.7715167498104595 +655,656,0.6324555320336759 +656,655,0.6324555320336759 +657,589,0.5039526306789696 +658,1009,0.548454318095348 +659,287,0.23606684260939012 +660,301,0.6978631577988531 +661,1084,0.7252406676228422 +662,362,0.6405126152203485 +663,361,0.7280252083092641 +664,665,0.7058823529411765 +665,664,0.7058823529411765 +666,1009,0.548454318095348 +667,84,0.5929994533288809 +668,201,0.7925939239012171 +669,76,0.7378647873726218 +670,397,0.53813823519705 +671,926,0.7928249671720918 +672,631,0.6139406135149205 +673,1044,0.7142857142857143 +674,579,0.6324555320336759 +675,610,0.5960395606792697 +676,46,0.6674238124719146 +677,395,0.6288281455225324 +678,727,0.6172133998483676 +679,926,0.7817359599705717 +680,457,0.6735753140545634 +681,233,0.6154574548966636 +682,344,0.6790997501017323 +683,760,0.5773502691896257 +684,692,0.6324555320336759 +685,1052,0.8616404368553291 +686,527,0.724568837309472 +687,527,0.7047138579747255 +688,458,0.8 +689,551,0.6963106238227914 +690,906,0.619522474129893 +691,100,0.7356123579206245 +692,270,0.8075728530872482 +693,534,0.6255432421712243 +694,535,0.8421052631578947 +695,55,0.5726562866782 +696,697,0.6956083436402525 +697,277,0.7044699536763469 +698,352,0.3563483225498992 +699,25,0.6324555320336759 +700,377,0.51425947722658 +701,344,0.6722139969935249 +702,594,0.5518254055364693 +703,602,0.5163977794943223 +704,1083,0.7211102550927979 +705,226,0.2988071523335984 +706,594,0.5050762722761054 +707,561,0.6225430174794672 +708,492,0.6338656910463875 +709,493,0.5750335777679776 +710,711,0.7606087305741639 +711,710,0.7606087305741639 +712,642,0.6069769786668839 +713,306,0.6227991553292184 +714,598,0.7590721152765897 +715,719,0.7894374242084413 +716,719,0.8003267306650412 +717,719,0.7963908027525801 +718,719,0.8406680016960503 +719,718,0.8406680016960503 +720,721,0.7849313448431118 +721,719,0.8116794499134278 +722,759,0.7412493166611012 +723,1012,0.769800358919501 +724,392,0.5262348115842176 +725,1086,0.5685352436149612 +726,335,0.5669467095138409 +727,678,0.6172133998483676 +728,1021,0.48989794855663565 +729,377,0.6227991553292184 +730,1042,0.2672612419124244 +731,89,0.6708203932499369 +732,55,0.9230769230769231 +733,347,0.64 +734,377,0.6227991553292184 +735,498,0.724568837309472 +736,287,0.7509392614826383 +737,631,0.6324555320336759 +738,562,0.7280252083092641 +739,482,0.5945945945945946 +740,417,0.6351073488299558 +741,1047,0.8451542547285166 +742,544,0.7378647873726218 +743,166,0.27937211830783126 +744,746,0.3117398431942748 +745,609,0.6736330697086078 +746,745,0.5238095238095238 +747,746,0.5135525910130955 +748,570,0.5572782125753528 +749,991,0.9332565252573828 +750,992,0.9284766908852593 +751,370,0.027441064997422587 +752,415,0.6324555320336759 +753,513,0.6956521739130435 +754,366,0.7272727272727273 +755,84,0.6153846153846154 +756,556,0.8387096774193549 +757,560,0.6180642325727469 +758,360,0.48038446141526137 +759,227,0.7526178090063816 +760,601,0.8006407690254357 +761,551,0.6963106238227914 +762,392,0.3227486121839514 +763,177,0.5680375574437545 +764,712,0.6043672230190352 +765,764,0.511766315719159 +766,143,0.5527707983925666 +767,535,0.635850784457874 +768,51,0.6963106238227914 +769,775,0.7302967433402214 +770,460,0.7509392614826383 +771,401,0.6666666666666666 +772,242,0.5477225575051661 +773,9,0.6428571428571429 +774,485,0.6885303726590963 +775,454,0.746003846592251 +776,287,0.3278050340535929 +777,46,0.6324555320336759 +778,1006,0.2847473987257497 +779,782,0.7058823529411765 +780,779,0.53813823519705 +781,780,0.3344968040028363 +782,779,0.7058823529411765 +783,784,0.5060480768510598 +784,787,0.6852482888641929 +785,787,0.6634888026970371 +786,787,0.6741052976458244 +787,784,0.6852482888641929 +788,789,0.6002450479987809 +789,788,0.6002450479987809 +790,631,0.3872983346207417 +791,792,0.669438681395203 +792,804,0.6900655593423543 +793,794,0.7405316311773545 +794,793,0.7405316311773545 +795,793,0.7405316311773545 +796,793,0.6183469424008423 +797,793,0.7058823529411765 +798,792,0.6515837655350015 +799,792,0.6888467201936644 +800,802,0.75 +801,800,0.7385489458759964 +802,800,0.75 +803,804,0.7559289460184544 +804,803,0.7559289460184544 +805,804,0.6267831705280087 +806,794,0.682736429567124 +807,804,0.7349684152591671 +808,807,0.7071067811865476 +809,804,0.6929348671835832 +810,812,0.7050239879106326 +811,812,0.7431605356175384 +812,811,0.7431605356175384 +813,916,0.7624425757515653 +814,815,0.7050239879106326 +815,825,0.8451542547285166 +816,824,0.8068715304598785 +817,815,0.8304547985373997 +818,815,0.816496580927726 +819,815,0.7559289460184545 +820,815,0.8087360843031884 +821,823,0.8060599359358184 +822,823,0.8458258116519014 +823,822,0.8458258116519014 +824,815,0.816496580927726 +825,815,0.8451542547285166 +826,814,0.6676978608895887 +827,814,0.6676978608895887 +828,829,0.7060180864974626 +829,828,0.7060180864974626 +830,829,0.7060180864974626 +831,829,0.7060180864974626 +832,595,0.6577935144802719 +833,942,0.6676978608895887 +834,804,0.5976143046671968 +835,804,0.6482037235521644 +836,837,0.6771612231098582 +837,836,0.6771612231098582 +838,835,0.6351073488299558 +839,804,0.5698028822981898 +840,841,0.75 +841,840,0.75 +842,844,0.7302967433402215 +843,844,0.7745966692414834 +844,843,0.7745966692414834 +845,844,0.6460582824697986 +846,848,0.6197797868009122 +847,848,0.6515837655350015 +848,850,0.710046946804693 +849,848,0.6350852961085883 +850,848,0.710046946804693 +851,804,0.6482037235521644 +852,952,0.708010432393843 +853,855,0.6669729688499156 +854,853,0.5820855000871991 +855,804,0.6681531047810609 +856,861,0.6929348671835832 +857,858,0.5443310539518174 +858,861,0.600099198148979 +859,861,0.6929348671835832 +860,861,0.6837634587578276 +861,804,0.7142857142857143 +862,867,0.7590721152765897 +863,864,0.5510140510374545 +864,865,0.7111590022187595 +865,864,0.7111590022187595 +866,864,0.6023442460127328 +867,862,0.7590721152765897 +868,804,0.6681531047810609 +869,868,0.6481812160876687 +870,959,0.5865884600854132 +871,872,0.6928203230275508 +872,871,0.6928203230275508 +873,871,0.6888467201936644 +874,873,0.6676978608895887 +875,871,0.5274096316339225 +876,877,0.7745966692414834 +877,876,0.7745966692414834 +878,877,0.6928203230275508 +879,877,0.7302967433402215 +880,294,0.526136791501647 +881,987,0.7602631123499285 +882,877,0.5313689313240572 +883,884,0.7503664793768668 +884,883,0.7503664793768668 +885,877,0.6888467201936644 +886,892,0.8 +887,891,0.75 +888,887,0.6625891564490793 +889,888,0.5640333466372089 +890,891,0.8067793113007157 +891,890,0.8067793113007157 +892,893,0.8885233166386385 +893,892,0.8885233166386385 +894,886,0.7108186533109108 +895,886,0.7302967433402215 +896,895,0.6255432421712243 +897,898,0.7299963950884314 +898,897,0.7299963950884314 +899,113,0.7405316311773545 +900,899,0.6871842709362768 +901,899,0.6351073488299558 +902,873,0.540728715025007 +903,902,0.42526587428312096 +904,905,0.7580980435789034 +905,904,0.7580980435789034 +906,690,0.619522474129893 +907,906,0.5838403593598094 +908,909,0.6444240777830837 +909,910,0.6947125179709105 +910,912,0.7567567567567568 +911,912,0.8160720960636174 +912,911,0.8160720960636174 +913,910,0.7467330458877309 +914,915,0.6978631577988531 +915,914,0.6978631577988531 +916,813,0.7624425757515653 +917,918,0.7624437362098716 +918,917,0.7624437362098716 +919,920,0.65 +920,919,0.65 +921,831,0.6933752452815364 +922,1012,0.6246950475544243 +923,924,0.6875 +924,923,0.6875 +925,923,0.6875 +926,145,0.8563488385776753 +927,804,0.5976143046671968 +928,929,0.6155870112510925 +929,928,0.6155870112510925 +930,932,0.6324555320336759 +931,932,0.7378647873726218 +932,931,0.7378647873726218 +933,804,0.629940788348712 +934,933,0.5163977794943223 +935,936,0.5022831168966868 +936,935,0.5022831168966868 +937,804,0.5832118435198043 +938,939,0.7276068751089989 +939,938,0.7276068751089989 +940,939,0.457495710997814 +941,939,0.6481812160876687 +942,833,0.6676978608895887 +943,949,0.6888467201936644 +944,653,0.613395615082148 +945,943,0.5144957554275265 +946,943,0.620505227994023 +947,946,0.5956833971812706 +948,949,0.669438681395203 +949,963,0.710046946804693 +950,949,0.6515837655350015 +951,952,0.7112867591590193 +952,951,0.7112867591590193 +953,951,0.6978631577988531 +954,815,0.7058865787567897 +955,956,0.7171087882915773 +956,955,0.7171087882915773 +957,949,0.6350852961085883 +958,949,0.6350852961085883 +959,870,0.5865884600854132 +960,961,0.7349684152591671 +961,960,0.7349684152591671 +962,963,0.6708203932499369 +963,964,0.75 +964,963,0.75 +965,949,0.710046946804693 +966,201,0.7399400733959437 +967,980,0.710046946804693 +968,970,0.8548504142651103 +969,968,0.747545001596402 +970,968,0.8548504142651103 +971,973,0.4437601569801833 +972,973,0.7559289460184545 +973,974,0.76 +974,973,0.76 +975,973,0.7452413135250994 +976,978,0.7276068751089989 +977,976,0.7184212081070996 +978,976,0.7276068751089989 +979,870,0.5715476066494082 +980,967,0.710046946804693 +981,804,0.7142857142857143 +982,804,0.6482037235521644 +983,984,0.6666666666666666 +984,983,0.6666666666666666 +985,986,0.5820855000871991 +986,988,0.6859943405700354 +987,881,0.7602631123499285 +988,986,0.6859943405700354 +989,990,0.6978631577988531 +990,989,0.6978631577988531 +991,749,0.9332565252573828 +992,750,0.9284766908852593 +993,992,0.7348469228349535 +994,804,0.6482037235521644 +995,1001,0.790695492614867 +996,1001,0.8013093298449995 +997,1001,0.8238858408710995 +998,997,0.8058665208462562 +999,1001,0.8238858408710995 +1000,1001,0.7804925426732847 +1001,997,0.8238858408710995 +1002,1003,0.606128125356204 +1003,1002,0.606128125356204 +1004,804,0.7142857142857143 +1005,1009,0.5969620057957091 +1006,1007,0.4931969619160719 +1007,1008,0.7905694150420949 +1008,1007,0.7905694150420949 +1009,1005,0.5969620057957091 +1010,1009,0.5743796858006126 +1011,226,0.5330017908890261 +1012,554,0.8236877675803729 +1013,344,0.7647058823529411 +1014,348,0.6324555320336759 +1015,36,0.7807200583588266 +1016,509,0.2971334519846062 +1017,366,0.5850179393017045 +1018,413,0.6880624620561867 +1019,1020,0.6602252917735247 +1020,1019,0.6602252917735247 +1021,323,0.5 +1022,1087,0.4612656040144425 +1023,8,0.4895354638983791 +1024,69,0.5773502691896258 +1025,1050,0.50709255283711 +1026,1042,0.3956282840374722 +1027,1050,0.22360679774997896 +1028,366,0.6446583712203042 +1029,680,0.5270462766947299 +1030,323,0.6 +1031,760,0.5443310539518174 +1032,69,0.5539117094069973 +1033,404,0.6288281455225324 +1034,69,0.6405126152203485 +1035,34,0.7877263614433762 +1036,1037,0.6813851438692469 +1037,63,0.7142857142857143 +1038,555,0.7175473098524098 +1039,1020,0.6045603888763706 +1040,265,0.6335525936249404 +1041,1038,0.6255432421712243 +1042,582,0.5720775535473553 +1043,247,0.7637626158259733 +1044,449,0.7590721152765897 +1045,677,0.4708709557974187 +1046,1069,0.5222329678670935 +1047,545,0.8660254037844386 +1048,926,0.7928249671720918 +1049,1050,0.7454993164109749 +1050,1049,0.7454993164109749 +1051,214,0.746003846592251 +1052,685,0.8616404368553291 +1053,524,0.8275159265510557 +1054,1053,0.8106434833777776 +1055,216,0.9130434782608695 +1056,217,0.9130434782608695 +1057,541,0.5765566601970551 +1058,219,0.92 +1059,1062,0.6513389472789296 +1060,1062,0.7108057085060271 +1061,1062,0.6617241025372945 +1062,1060,0.7108057085060271 +1063,571,0.6666666666666666 +1064,926,0.7071067811865476 +1065,1066,0.5333964609104418 +1066,277,0.830336491706037 +1067,760,0.4923659639173309 +1068,597,0.7037037037037037 +1069,486,0.5222329678670935 +1070,323,0.5477225575051661 +1071,397,0.5547001962252291 +1072,344,0.7592566023652966 +1073,610,0.6943650748294136 +1074,760,0.45184805705753195 +1075,607,0.6923076923076923 +1076,435,0.6669729688499156 +1077,226,0.6123724356957946 +1078,631,0.5715476066494082 +1079,300,0.6689936080056726 +1080,210,0.7453559924999299 +1081,578,0.5172935265326569 +1082,368,0.7789808377045201 +1083,613,0.8461538461538461 +1084,365,0.7833494518006403 +1085,69,0.5962847939999439 +1086,146,0.7833494518006403 +1087,597,0.5333628833421287 +1088,0,0.0 diff --git a/experiments/fagin.py b/experiments/fagin.py index 15afebc..5a2878c 100644 --- a/experiments/fagin.py +++ b/experiments/fagin.py @@ -1,6 +1,7 @@ import csv +import pytest from collections import Counter -from fog.clustering.fagin import fagin_k1 +from fog.clustering.fagin import fagin_k1, threshold_algorithm_k1 from fog.tokenizers import ngrams from fog.metrics import sparse_cosine_similarity from experiments.utils import Timer @@ -13,37 +14,44 @@ with open('./data/fagin_k1_ground_truth.csv') as f: VECTORS = [Counter(ngrams(5, chars)) for chars in UNIVERSITIES] -with Timer('quadratic'): - with open('./data/fagin_k1_ground_truth.csv', 'w') as f: - # writer = csv.writer(f) +# with Timer('quadratic'): +# with open('./data/fagin_k1_ground_truth.csv', 'w') as f: +# writer = csv.writer(f) - for i in range(len(VECTORS)): - v1 = VECTORS[i] - best = None +# for i in range(len(VECTORS)): +# v1 = VECTORS[i] +# best = None - for j in range(len(VECTORS)): - if i == j: - continue +# for j in range(len(VECTORS)): +# if i == j: +# continue - v2 = VECTORS[j] +# v2 = VECTORS[j] - c = sparse_cosine_similarity(v1, v2) +# c = sparse_cosine_similarity(v1, v2) - # NOTE: this is stable and lower index wins - if best is None or c > best[0]: - best = (c, j) +# # NOTE: this is stable and lower index wins +# if best is None or c > best[0]: +# best = (c, j) - # print(UNIVERSITIES[i], UNIVERSITIES[best[1]]) - # writer.writerow([i, best[1], str(best[0])]) +# # print(UNIVERSITIES[i], UNIVERSITIES[best[1]]) +# writer.writerow([i, best[1], str(best[0])]) -with Timer('Fagin'): - for i, candidates in fagin_k1(VECTORS): - v = VECTORS[i] - j = max(candidates, key=lambda c: sparse_cosine_similarity(v, VECTORS[c])) +# with Timer('FA'): +# for i, candidates in fagin_k1(VECTORS): +# v = VECTORS[i] +# j = max(candidates, key=lambda c: sparse_cosine_similarity(v, VECTORS[c])) - # print("'%s'" % UNIVERSITIES[i]) - # print("'%s'" % UNIVERSITIES[GROUND_TRUTH[i][0]]) - # print("'%s'" % UNIVERSITIES[j]) - # print(i, j, len(candidates), GROUND_TRUTH[i], sparse_cosine_similarity(v, VECTORS[j])) +# # print("'%s'" % UNIVERSITIES[i]) +# # print("'%s'" % UNIVERSITIES[GROUND_TRUTH[i][0]]) +# # print("'%s'" % UNIVERSITIES[j]) +# # print(i, j, len(candidates), GROUND_TRUTH[i], sparse_cosine_similarity(v, VECTORS[j])) - assert j == GROUND_TRUTH[i][0] +# assert j == GROUND_TRUTH[i][0] + +with Timer('TA'): + + # TODO: current heap comparison used is not stable + for i, j in threshold_algorithm_k1(VECTORS): + if i != j: + assert sparse_cosine_similarity(VECTORS[i], VECTORS[j]) == pytest.approx(GROUND_TRUTH[i][1]) diff --git a/fog/clustering/fagin.py b/fog/clustering/fagin.py index a3d8632..53c12d5 100644 --- a/fog/clustering/fagin.py +++ b/fog/clustering/fagin.py @@ -22,6 +22,8 @@ # from collections import defaultdict, Counter +from fog.metrics.cosine import sparse_cosine_similarity + def fagin_k1(vectors): inverted_lists = defaultdict(list) @@ -57,3 +59,67 @@ def fagin_k1(vectors): break offset += 1 + + +def threshold_algorithm_k1(vectors): + inverted_lists = defaultdict(list) + + for i, vector in enumerate(vectors): + for d, w in vector.items(): + inverted_lists[d].append((w, i)) + + for l in inverted_lists.values(): + l.sort() + + for i, vector in enumerate(vectors): + visited = set() + offset = 0 + + t = 0.0 + best = [None, None] + t_vector = {} + + while True: + stop = True + + for d in vector: + l = inverted_lists[d] + + if offset >= len(l): + continue + + stop = False + + w, j = l[offset] + t_vector[d] = w + + if j in visited: + continue + + cs = sparse_cosine_similarity(vector, vectors[j]) + visited.add(j) + + if best[0] is None: + best[0] = (cs, j) + else: + if cs > best[0][0]: + best[1] = best[0] + best[0] = (cs, j) + else: + if best[1] is None: + best[1] = (cs, j) + elif cs > best[1][0]: + best[1] = (cs, j) + + # Final break + return self if best cos is 0.0 + if stop: + yield i, best[1][1] if best[1] is not None else best[0][1] + break + + t = sparse_cosine_similarity(vector, t_vector) + + if best[1] is not None and best[1][0] >= t: + yield i, best[1][1] if best[1] is not None else best[0][1] + break + + offset += 1