mirror of https://github.com/perkeep/perkeep.git
108 lines
3.3 KiB
Perl
108 lines
3.3 KiB
Perl
|
#!/usr/bin/perl
|
||
|
#
|
||
|
# This is a basic indexing system for prototyping and demonstration
|
||
|
# purposes only.
|
||
|
#
|
||
|
# It's not production quality by any stretch of the imagination: it
|
||
|
# uses sqlite, is single-threaded, single-process, etc.
|
||
|
#
|
||
|
|
||
|
use strict;
|
||
|
use DBI;
|
||
|
use DBD::SQLite;
|
||
|
use Getopt::Long;
|
||
|
use LWP::UserAgent;
|
||
|
use LWP::ConnCache;
|
||
|
use JSON::Any;
|
||
|
use Net::Netrc;
|
||
|
|
||
|
my $blobserver;
|
||
|
my $dbname;
|
||
|
GetOptions(
|
||
|
"server=s" => \$blobserver,
|
||
|
"dbfile=s" => \$dbname,
|
||
|
) or usage();
|
||
|
|
||
|
usage() unless $blobserver && $blobserver =~ m!^(?:(https?)://)?([^/]+)!;
|
||
|
my $scheme = $1 || "http";
|
||
|
my $hostport = $2;
|
||
|
|
||
|
sub usage {
|
||
|
die
|
||
|
"Usage: index.pl --server=[http|https://]hostname[:port]\n" .
|
||
|
" --dbfile=<filename> (defaults to hostname_port.camlidx.db)\n";
|
||
|
}
|
||
|
|
||
|
my $netrc_mach = Net::Netrc->lookup($hostport)
|
||
|
or die "No ~/.netrc file or section for host '$hostport'.\n";
|
||
|
|
||
|
unless ($dbname) {
|
||
|
$dbname = "$hostport.camlidx.db";
|
||
|
$dbname =~ s/:/_/;
|
||
|
}
|
||
|
my $needs_init = ! -f $dbname || -s $dbname == 0;
|
||
|
my $db = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", { RaiseError => 1 })
|
||
|
or die "Failed to open sqlite file $dbname";
|
||
|
if ($needs_init) {
|
||
|
$db->do("CREATE TABLE blobs (" .
|
||
|
" blobref VARCHAR(80) NOT NULL PRIMARY KEY, " .
|
||
|
" size INT NULL, " .
|
||
|
" mimetype VARCHAR(30) NULL)");
|
||
|
}
|
||
|
|
||
|
my $ua = LWP::UserAgent->new(
|
||
|
agent => "Camlistore/BasicIndexer",
|
||
|
conn_cache => LWP::ConnCache->new,
|
||
|
);
|
||
|
|
||
|
my $json = JSON::Any->new;
|
||
|
|
||
|
# TODO: remove hard-coded realm
|
||
|
$ua->credentials($hostport, "camlistored", "user", $netrc_mach->password);
|
||
|
|
||
|
my $after = "";
|
||
|
my $n_blobs = 0;
|
||
|
while (1) {
|
||
|
print "Enumerating starting at: $after ... ($n_blobs blobs so far)\n";
|
||
|
my $res = $ua->get("$scheme://$hostport/camli/enumerate-blobs?after=$after&limit=5000");
|
||
|
unless ($res->is_success) {
|
||
|
die "Failure from /camli/enumerate-blobs?after=$after: " . $res->status_line;
|
||
|
}
|
||
|
my $jres = $json->jsonToObj($res->content);
|
||
|
|
||
|
my $bloblist = $jres->{'blobs'};
|
||
|
if (ref($bloblist) eq "ARRAY") {
|
||
|
my $first = $bloblist->[0]{'blobRef'};
|
||
|
my $last = $bloblist->[-1]{'blobRef'};
|
||
|
my $sth = $db->prepare("SELECT blobref, size, mimetype FROM blobs WHERE " .
|
||
|
"blobref >= ? AND blobref <= ?");
|
||
|
$sth->execute($first, $last);
|
||
|
my %inventory; # blobref -> [$size, $bool_have_mime]
|
||
|
while (my ($lblob, $lsize, $lmimetype) = $sth->fetchrow_array) {
|
||
|
$inventory{$lblob} = [$lsize, defined($lmimetype)];
|
||
|
}
|
||
|
|
||
|
foreach my $blob (@$bloblist) {
|
||
|
$n_blobs++;
|
||
|
my $lblob = $inventory{$blob->{'blobRef'}};
|
||
|
if (!$lblob) {
|
||
|
print "Inserting $blob->{'blobRef'} ...\n";
|
||
|
$db->do("INSERT INTO blobs (blobref, size) VALUES (?, ?)", undef,
|
||
|
$blob->{'blobRef'}, $blob->{'size'});
|
||
|
next;
|
||
|
}
|
||
|
if ($lblob && !$lblob->[0] && $blob->{'size'}) {
|
||
|
print "Updating size of $blob->{'blobRef'} ...\n";
|
||
|
$db->do("UPDATE blobs SET size=? WHERE blobref=?", undef,
|
||
|
$blob->{'size'}, $blob->{'blobRef'});
|
||
|
next;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
last unless $jres->{'after'} && $jres->{'after'} gt $after;
|
||
|
$after = $jres->{'after'};
|
||
|
}
|
||
|
print "$n_blobs blobs indexed.\n";
|