From 08de4100f13f20dbaaa3b4861c486e662cc3cedd Mon Sep 17 00:00:00 2001 From: Santiago Lo Coco Date: Sat, 7 Oct 2023 15:17:56 -0300 Subject: [PATCH] Remove unused code --- build.pl | 16 ++-------------- marc2dc.pl | 15 ++------------- 2 files changed, 4 insertions(+), 27 deletions(-) diff --git a/build.pl b/build.pl index cc3257d..e3525b1 100755 --- a/build.pl +++ b/build.pl @@ -5,9 +5,9 @@ binmode(STDOUT, ":utf8"); my @chars = ("A".."Z", "a".."z", "1".."9", "_"); -$/ = "\n"; # record separator +$/ = "\n"; -$what = 1000; # dummy id for when there’s no file +$what = 1000; $file = $ARGV[0]; @@ -22,19 +22,13 @@ while (<>) { exit; } - # discard the top and bottom tags s/\n//; s/<\/collection>\n//; - # extract the file path from the identifier - # use the file name as an id - # note that identifier element is discarded! - #if (s!\s*(.*?)\s*<\/dcvalue>\n!!s) { if (/\s*(.*?)\s*<\/dcvalue>\n/) { $cul = $1; $url = $1; if ($cul =~ /docs/ || $cul =~ /file/) { - #$cul =~ s@.*(folders/|d/)([^?&/]+).*@\2@; $cul =~ s@.*(d/)([^?&/]+).*@$2@; $tac = "https://drive.google.com/uc?export=download&id=$cul"; } elsif ($cul =~ /pdf/ && $cul =~ /usi/) { @@ -49,13 +43,10 @@ while (<>) { $id = $what++; } - # let the operator know where we’re up to print "$path/$id\n"; - # create the item directory mkdir "import/$id", 0755; - # create the dublin_core.xml file open DC, ">import/$id/dublin_core.xml" or die "Cannot open dublin core for $id, $!\n"; print DC $_; @@ -72,7 +63,6 @@ while (<>) { } system "curl -L \"$tac\" -o import/$id/$cul"; - #system "curl -s -L \"$tac\" -o import/$id/$cul"; my $output = `file import/$id/$cul`; print "$output\n"; @@ -91,7 +81,6 @@ while (<>) { } if ($ext eq 'pdf' || $ext eq 'docx') { - # ... create the contents file ... open OUT, ">import/$id/contents" or die "Cannot open contents for $id, $!\n"; print OUT "$cul.pdf"; @@ -101,7 +90,6 @@ while (<>) { } } -print "eliminamos $id\n"; system "rm -rf import/$id"; __END__ diff --git a/marc2dc.pl b/marc2dc.pl index 0f09a54..fd2d3b3 100755 --- a/marc2dc.pl +++ b/marc2dc.pl @@ -5,20 +5,18 @@ use MARC::File::USMARC; use utf8; binmode(STDOUT, ":utf8"); -$/ = chr(29); # MARC record separator +$/ = chr(29); print qq|\n|; -while (my $blob = <>) { # suck in one MARC record at a time +while (my $blob = <>) { - # convert the MARC to DC my $marc = MARC::Record->new_from_usmarc( $blob ); my $crosswalk = MARC::Crosswalk::DublinCore->new( qualified => 0 ); my $dc = $crosswalk->as_dublincore( $marc ); my $has_content = 0; - # output the DC as XML for( $dc->elements ) { if (!$has_content) { $has_content = 1; @@ -30,12 +28,10 @@ while (my $blob = <>) { # suck in one MARC record at a time my $scheme = lc($_->scheme); my $content = $_->content; - # escape reserved characters $content =~ s/&/&/gs; $content =~ s//>/gs; - # munge attributes for DSpace compatibility if ($element eq 'creator') { $element = 'contributor'; $qualifier = 'author'; @@ -84,13 +80,6 @@ while (my $blob = <>) { # suck in one MARC record at a time } } - # if ($ARGV[0]) { - # printf qq| %s\n|, $content; - # } - print qq|\n| if $has_content; }