Remove unused code

This commit is contained in:
Santiago Lo Coco 2023-10-07 15:17:56 -03:00
parent 338136c9e8
commit 08de4100f1
2 changed files with 4 additions and 27 deletions

View File

@ -5,9 +5,9 @@ binmode(STDOUT, ":utf8");
my @chars = ("A".."Z", "a".."z", "1".."9", "_"); my @chars = ("A".."Z", "a".."z", "1".."9", "_");
$/ = "</dublin_core>\n"; # record separator $/ = "</dublin_core>\n";
$what = 1000; # dummy id for when theres no file $what = 1000;
$file = $ARGV[0]; $file = $ARGV[0];
@ -22,19 +22,13 @@ while (<>) {
exit; exit;
} }
# discard the top and bottom tags
s/<collection>\n//; s/<collection>\n//;
s/<\/collection>\n//; s/<\/collection>\n//;
# extract the file path from the identifier
# use the file name as an id
# note that identifier element is discarded!
#if (s!<dcvalue element="relation" qualifier="uri">\s*(.*?)\s*<\/dcvalue>\n!!s) {
if (/<dcvalue element="relation" qualifier="uri">\s*(.*?)\s*<\/dcvalue>\n/) { if (/<dcvalue element="relation" qualifier="uri">\s*(.*?)\s*<\/dcvalue>\n/) {
$cul = $1; $cul = $1;
$url = $1; $url = $1;
if ($cul =~ /docs/ || $cul =~ /file/) { if ($cul =~ /docs/ || $cul =~ /file/) {
#$cul =~ s@.*(folders/|d/)([^?&/]+).*@\2@;
$cul =~ s@.*(d/)([^?&/]+).*@$2@; $cul =~ s@.*(d/)([^?&/]+).*@$2@;
$tac = "https://drive.google.com/uc?export=download&id=$cul"; $tac = "https://drive.google.com/uc?export=download&id=$cul";
} elsif ($cul =~ /pdf/ && $cul =~ /usi/) { } elsif ($cul =~ /pdf/ && $cul =~ /usi/) {
@ -49,13 +43,10 @@ while (<>) {
$id = $what++; $id = $what++;
} }
# let the operator know where were up to
print "$path/$id\n"; print "$path/$id\n";
# create the item directory
mkdir "import/$id", 0755; mkdir "import/$id", 0755;
# create the dublin_core.xml file
open DC, ">import/$id/dublin_core.xml" open DC, ">import/$id/dublin_core.xml"
or die "Cannot open dublin core for $id, $!\n"; or die "Cannot open dublin core for $id, $!\n";
print DC $_; print DC $_;
@ -72,7 +63,6 @@ while (<>) {
} }
system "curl -L \"$tac\" -o import/$id/$cul"; system "curl -L \"$tac\" -o import/$id/$cul";
#system "curl -s -L \"$tac\" -o import/$id/$cul";
my $output = `file import/$id/$cul`; my $output = `file import/$id/$cul`;
print "$output\n"; print "$output\n";
@ -91,7 +81,6 @@ while (<>) {
} }
if ($ext eq 'pdf' || $ext eq 'docx') { if ($ext eq 'pdf' || $ext eq 'docx') {
# ... create the contents file ...
open OUT, ">import/$id/contents" open OUT, ">import/$id/contents"
or die "Cannot open contents for $id, $!\n"; or die "Cannot open contents for $id, $!\n";
print OUT "$cul.pdf"; print OUT "$cul.pdf";
@ -101,7 +90,6 @@ while (<>) {
} }
} }
print "eliminamos $id\n";
system "rm -rf import/$id"; system "rm -rf import/$id";
__END__ __END__

View File

@ -5,20 +5,18 @@ use MARC::File::USMARC;
use utf8; use utf8;
binmode(STDOUT, ":utf8"); binmode(STDOUT, ":utf8");
$/ = chr(29); # MARC record separator $/ = chr(29);
print qq|<collection>\n|; print qq|<collection>\n|;
while (my $blob = <>) { # suck in one MARC record at a time while (my $blob = <>) {
# convert the MARC to DC
my $marc = MARC::Record->new_from_usmarc( $blob ); my $marc = MARC::Record->new_from_usmarc( $blob );
my $crosswalk = MARC::Crosswalk::DublinCore->new( qualified => 0 ); my $crosswalk = MARC::Crosswalk::DublinCore->new( qualified => 0 );
my $dc = $crosswalk->as_dublincore( $marc ); my $dc = $crosswalk->as_dublincore( $marc );
my $has_content = 0; my $has_content = 0;
# output the DC as XML
for( $dc->elements ) { for( $dc->elements ) {
if (!$has_content) { if (!$has_content) {
$has_content = 1; $has_content = 1;
@ -30,12 +28,10 @@ while (my $blob = <>) { # suck in one MARC record at a time
my $scheme = lc($_->scheme); my $scheme = lc($_->scheme);
my $content = $_->content; my $content = $_->content;
# escape reserved characters
$content =~ s/&/&amp;/gs; $content =~ s/&/&amp;/gs;
$content =~ s/</&lt;/gs; $content =~ s/</&lt;/gs;
$content =~ s/>/&gt;/gs; $content =~ s/>/&gt;/gs;
# munge attributes for DSpace compatibility
if ($element eq 'creator') { if ($element eq 'creator') {
$element = 'contributor'; $element = 'contributor';
$qualifier = 'author'; $qualifier = 'author';
@ -84,13 +80,6 @@ while (my $blob = <>) { # suck in one MARC record at a time
} }
} }
# if ($ARGV[0]) {
# printf qq| <dcvalue element="dspace"|, $element;
# printf qq| qualifier="entity"|, $qualifier if $qualifier;
# printf qq| qualifier="%s"|, $scheme if $scheme and !$qualifier;
# printf qq|>%s</dcvalue>\n|, $content;
# }
print qq|</dublin_core>\n| if $has_content; print qq|</dublin_core>\n| if $has_content;
} }