Update *.pl files
This commit is contained in:
parent
85d3c5ed42
commit
6c9c024b0b
40
build.pl
40
build.pl
|
@ -7,9 +7,18 @@ $/ = "</dublin_core>\n"; # record separator
|
||||||
|
|
||||||
$what = 1000; # dummy id for when there’s no file
|
$what = 1000; # dummy id for when there’s no file
|
||||||
|
|
||||||
|
$file = $ARGV[0];
|
||||||
|
|
||||||
|
mkdir "import", 0755;
|
||||||
|
|
||||||
while (<>) {
|
while (<>) {
|
||||||
$cul = "";
|
$cul = "";
|
||||||
$tac = "";
|
$tac = "";
|
||||||
|
$url = "";
|
||||||
|
|
||||||
|
if (!(defined $_ and length $_ > 0)) {
|
||||||
|
exit;
|
||||||
|
}
|
||||||
|
|
||||||
# discard the top and bottom tags
|
# discard the top and bottom tags
|
||||||
s/<collection>\n//;
|
s/<collection>\n//;
|
||||||
|
@ -21,7 +30,8 @@ while (<>) {
|
||||||
#if (s!<dcvalue element="relation" qualifier="uri">\s*(.*?)\s*<\/dcvalue>\n!!s) {
|
#if (s!<dcvalue element="relation" qualifier="uri">\s*(.*?)\s*<\/dcvalue>\n!!s) {
|
||||||
if (/<dcvalue element="relation" qualifier="uri">\s*(.*?)\s*<\/dcvalue>\n/) {
|
if (/<dcvalue element="relation" qualifier="uri">\s*(.*?)\s*<\/dcvalue>\n/) {
|
||||||
$cul = $1;
|
$cul = $1;
|
||||||
if ($cul =~ /docs/) {
|
$url = $1;
|
||||||
|
if ($cul =~ /docs/ || $cul =~ /file/) {
|
||||||
#$cul =~ s@.*(folders/|d/)([^?&/]+).*@\2@;
|
#$cul =~ s@.*(folders/|d/)([^?&/]+).*@\2@;
|
||||||
$cul =~ s@.*(d/)([^?&/]+).*@$2@;
|
$cul =~ s@.*(d/)([^?&/]+).*@$2@;
|
||||||
$tac = "https://drive.google.com/uc?export=download&id=$cul";
|
$tac = "https://drive.google.com/uc?export=download&id=$cul";
|
||||||
|
@ -52,27 +62,39 @@ while (<>) {
|
||||||
print "$tac\n";
|
print "$tac\n";
|
||||||
print "$cul\n";
|
print "$cul\n";
|
||||||
|
|
||||||
|
my $duplicated = `grep -o \"$url\" $file | wc -l`;
|
||||||
|
if ($duplicated > 1) {
|
||||||
|
print "DUPLICATED";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
system "curl -L \"$tac\" -o import/$id/$cul";
|
system "curl -L \"$tac\" -o import/$id/$cul";
|
||||||
#system "curl -s -L \"$tac\" -o import/$id/$cul";
|
#system "curl -s -L \"$tac\" -o import/$id/$cul";
|
||||||
|
|
||||||
my $output = `file import/$id/$cul`;
|
my $output = `file import/$id/$cul`;
|
||||||
print $output;
|
print $output;
|
||||||
|
|
||||||
|
$ext = '';
|
||||||
|
|
||||||
if ($output =~ /PDF/) {
|
if ($output =~ /PDF/) {
|
||||||
$ext = "pdf";
|
$ext = "pdf";
|
||||||
|
system "mv import/$id/$cul import/$id/$cul.pdf";
|
||||||
}
|
}
|
||||||
if ($output =~ /Word/) {
|
if ($output =~ /Word/) {
|
||||||
system "soffice --headless --convert-to pdf import/$id/$cul";
|
system "soffice --headless --convert-to pdf import/$id/$cul";
|
||||||
$ext = "pdf";
|
$ext = "docx";
|
||||||
|
system "mv import/$id/$cul import/$id/$cul.docx";
|
||||||
|
system "mv $cul.pdf import/$id/$cul.pdf";
|
||||||
}
|
}
|
||||||
|
|
||||||
system "mv import/$id/$cul import/$id/$cul.$ext";
|
if ($ext eq 'pdf' || $ext eq 'docx') {
|
||||||
|
# ... create the contents file ...
|
||||||
# ... create the contents file ...
|
open OUT, ">import/$id/contents"
|
||||||
open OUT, ">import/$id/contents"
|
or die "Cannot open contents for $id, $!\n";
|
||||||
or die "Cannot open contents for $id, $!\n";
|
print OUT "$cul.pdf";
|
||||||
print OUT "$cul.$ext";
|
print OUT "\n$cul.docx" if $ext eq 'docx';
|
||||||
close OUT;
|
close OUT;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
18
marc2dc.pl
18
marc2dc.pl
|
@ -11,27 +11,25 @@ print qq|<collection>\n|;
|
||||||
|
|
||||||
while (my $blob = <>) { # suck in one MARC record at a time
|
while (my $blob = <>) { # suck in one MARC record at a time
|
||||||
|
|
||||||
print qq|<dublin_core>\n|;
|
|
||||||
|
|
||||||
# convert the MARC to DC
|
# convert the MARC to DC
|
||||||
my $marc = MARC::Record->new_from_usmarc( $blob );
|
my $marc = MARC::Record->new_from_usmarc( $blob );
|
||||||
my $crosswalk = MARC::Crosswalk::DublinCore->new( qualified => 0 );
|
my $crosswalk = MARC::Crosswalk::DublinCore->new( qualified => 0 );
|
||||||
my $dc = $crosswalk->as_dublincore( $marc );
|
my $dc = $crosswalk->as_dublincore( $marc );
|
||||||
|
|
||||||
|
my $has_content = 0;
|
||||||
|
|
||||||
# output the DC as XML
|
# output the DC as XML
|
||||||
for( $dc->elements ) {
|
for( $dc->elements ) {
|
||||||
|
if (!$has_content) {
|
||||||
|
$has_content = 1;
|
||||||
|
print qq|<dublin_core>\n|;
|
||||||
|
}
|
||||||
|
|
||||||
my $element = lc($_->name);
|
my $element = lc($_->name);
|
||||||
my $qualifier = lc($_->qualifier);
|
my $qualifier = lc($_->qualifier);
|
||||||
my $scheme = lc($_->scheme);
|
my $scheme = lc($_->scheme);
|
||||||
my $content = $_->content;
|
my $content = $_->content;
|
||||||
|
|
||||||
##print $_->content;
|
|
||||||
|
|
||||||
#print qq|$element\n|;
|
|
||||||
#print qq|$qualifier\n|;
|
|
||||||
#print qq|$scheme\n|;
|
|
||||||
#print qq|$content\n|;
|
|
||||||
|
|
||||||
# escape reserved characters
|
# escape reserved characters
|
||||||
$content =~ s/&/&/gs;
|
$content =~ s/&/&/gs;
|
||||||
$content =~ s/</</gs;
|
$content =~ s/</</gs;
|
||||||
|
@ -87,7 +85,7 @@ while (my $blob = <>) { # suck in one MARC record at a time
|
||||||
# printf qq|>%s</dcvalue>\n|, $content;
|
# printf qq|>%s</dcvalue>\n|, $content;
|
||||||
# }
|
# }
|
||||||
|
|
||||||
print qq|</dublin_core>\n|;
|
print qq|</dublin_core>\n| if $has_content;
|
||||||
}
|
}
|
||||||
|
|
||||||
print qq|</collection>\n|;
|
print qq|</collection>\n|;
|
||||||
|
|
Loading…
Reference in New Issue