diff --git a/indexing/basic/index.pl b/indexing/basic/index.pl index 1967e7bd4..b40996d56 100755 --- a/indexing/basic/index.pl +++ b/indexing/basic/index.pl @@ -60,48 +60,53 @@ my $json = JSON::Any->new; # TODO: remove hard-coded realm $ua->credentials($hostport, "camlistored", "user", $netrc_mach->password); -my $after = ""; -my $n_blobs = 0; -while (1) { - print "Enumerating starting at: $after ... ($n_blobs blobs so far)\n"; - my $res = $ua->get("$scheme://$hostport/camli/enumerate-blobs?after=$after&limit=5000"); - unless ($res->is_success) { - die "Failure from /camli/enumerate-blobs?after=$after: " . $res->status_line; - } - my $jres = $json->jsonToObj($res->content); +print "Iterating over blobs.\n"; +my $n_blobs = learn_blob_digests_and_sizes(); +print "Number of blobs: $n_blobs.\n"; - my $bloblist = $jres->{'blobs'}; - if (ref($bloblist) eq "ARRAY") { - my $first = $bloblist->[0]{'blobRef'}; - my $last = $bloblist->[-1]{'blobRef'}; - my $sth = $db->prepare("SELECT blobref, size, mimetype FROM blobs WHERE " . - "blobref >= ? AND blobref <= ?"); - $sth->execute($first, $last); - my %inventory; # blobref -> [$size, $bool_have_mime] - while (my ($lblob, $lsize, $lmimetype) = $sth->fetchrow_array) { - $inventory{$lblob} = [$lsize, defined($lmimetype)]; +sub learn_blob_digests_and_sizes { + my $after = ""; + my $n_blobs = 0; + while (1) { + my $after_display = $after || "(start)"; + print "Enumerating starting at: $after_display ... ($n_blobs blobs so far)\n"; + my $res = $ua->get("$scheme://$hostport/camli/enumerate-blobs?after=$after&limit=1000"); + unless ($res->is_success) { + die "Failure from /camli/enumerate-blobs?after=$after: " . $res->status_line; } + my $jres = $json->jsonToObj($res->content); - foreach my $blob (@$bloblist) { - $n_blobs++; - my $lblob = $inventory{$blob->{'blobRef'}}; - if (!$lblob) { - print "Inserting $blob->{'blobRef'} ...\n"; - $db->do("INSERT INTO blobs (blobref, size) VALUES (?, ?)", undef, - $blob->{'blobRef'}, $blob->{'size'}); - next; + my $bloblist = $jres->{'blobs'}; + if (ref($bloblist) eq "ARRAY") { + my $first = $bloblist->[0]{'blobRef'}; + my $last = $bloblist->[-1]{'blobRef'}; + my $sth = $db->prepare("SELECT blobref, size, mimetype FROM blobs WHERE " . + "blobref >= ? AND blobref <= ?"); + $sth->execute($first, $last); + my %inventory; # blobref -> [$size, $bool_have_mime] + while (my ($lblob, $lsize, $lmimetype) = $sth->fetchrow_array) { + $inventory{$lblob} = [$lsize, defined($lmimetype)]; } - if ($lblob && !$lblob->[0] && $blob->{'size'}) { - print "Updating size of $blob->{'blobRef'} ...\n"; - $db->do("UPDATE blobs SET size=? WHERE blobref=?", undef, - $blob->{'size'}, $blob->{'blobRef'}); - next; + foreach my $blob (@$bloblist) { + $n_blobs++; + my $lblob = $inventory{$blob->{'blobRef'}}; + if (!$lblob) { + print "Inserting $blob->{'blobRef'} ...\n"; + $db->do("INSERT INTO blobs (blobref, size) VALUES (?, ?)", undef, + $blob->{'blobRef'}, $blob->{'size'}); + next; + } + if ($lblob && !$lblob->[0] && $blob->{'size'}) { + print "Updating size of $blob->{'blobRef'} ...\n"; + $db->do("UPDATE blobs SET size=? WHERE blobref=?", undef, + $blob->{'size'}, $blob->{'blobRef'}); + next; + } } } + last unless $jres->{'after'} && $jres->{'after'} gt $after; + $after = $jres->{'after'}; } - - last unless $jres->{'after'} && $jres->{'after'} gt $after; - $after = $jres->{'after'}; + return $n_blobs; } -print "$n_blobs blobs indexed.\n";