| Filename | /home/gmc/projects/marcpm/marc-xml/lib/MARC/File/XML.pm |
| Statements | Executed 613516 statements in 10.9s |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 5000 | 1 | 1 | 6.40s | 17.4s | MARC::File::XML::decode |
| 5000 | 1 | 1 | 219ms | 419ms | MARC::File::XML::_next |
| 5000 | 1 | 1 | 116ms | 116ms | MARC::File::XML::CORE:subst (opcode) |
| 5000 | 1 | 1 | 70.0ms | 90.4ms | MARC::File::XML::decideMARC8Binary |
| 5000 | 1 | 1 | 63.2ms | 63.2ms | MARC::File::XML::CORE:readline (opcode) |
| 20000 | 4 | 1 | 44.2ms | 44.2ms | MARC::File::XML::CORE:match (opcode) |
| 1 | 1 | 1 | 14.8ms | 58.2ms | MARC::File::XML::BEGIN@9 |
| 5000 | 1 | 1 | 7.75ms | 7.75ms | MARC::File::XML::CORE:eof (opcode) |
| 1 | 1 | 1 | 2.90ms | 9.67ms | MARC::File::XML::BEGIN@7 |
| 1 | 1 | 1 | 2.75ms | 2.84ms | MARC::File::XML::BEGIN@3 |
| 1 | 1 | 1 | 2.52ms | 56.7ms | MARC::File::XML::BEGIN@11 |
| 1 | 1 | 1 | 1.45ms | 2.98ms | MARC::File::XML::BEGIN@6 |
| 1 | 1 | 1 | 584µs | 1.28ms | MARC::File::XML::BEGIN@5 |
| 1 | 1 | 1 | 454µs | 507µs | MARC::File::XML::BEGIN@4 |
| 1 | 1 | 1 | 32µs | 32µs | MARC::File::XML::close |
| 1 | 1 | 1 | 22µs | 378µs | MARC::File::XML::BEGIN@12 |
| 1 | 1 | 1 | 16µs | 48µs | MARC::File::XML::DESTROY |
| 1 | 1 | 1 | 15µs | 75µs | MARC::File::XML::BEGIN@13 |
| 1 | 1 | 1 | 11µs | 11µs | MARC::File::XML::BEGIN@8 |
| 1 | 1 | 1 | 10µs | 10µs | MARC::File::XML::import |
| 1 | 1 | 1 | 9µs | 9µs | MARC::File::XML::BEGIN@14 |
| 1 | 1 | 1 | 7µs | 7µs | MARC::File::XML::CORE:qr (opcode) |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::_unimarc_encoding |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::default_record_format |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::encode |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::escape |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::footer |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::header |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::out |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::record |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::write |
| 0 | 0 | 0 | 0s | 0s | MARC::Record::as_xml |
| 0 | 0 | 0 | 0s | 0s | MARC::Record::as_xml_record |
| 0 | 0 | 0 | 0s | 0s | MARC::Record::new_from_xml |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package MARC::File::XML; | ||||
| 2 | |||||
| 3 | 3 | 2.57ms | 2 | 2.89ms | # spent 2.84ms (2.75+91µs) within MARC::File::XML::BEGIN@3 which was called:
# once (2.75ms+91µs) by main::BEGIN@3 at line 3 # spent 2.84ms making 1 call to MARC::File::XML::BEGIN@3
# spent 47µs making 1 call to warnings::import |
| 4 | 3 | 442µs | 2 | 513µs | # spent 507µs (454+53) within MARC::File::XML::BEGIN@4 which was called:
# once (454µs+53µs) by main::BEGIN@3 at line 4 # spent 507µs making 1 call to MARC::File::XML::BEGIN@4
# spent 6µs making 1 call to strict::import |
| 5 | 3 | 174µs | 2 | 1.36ms | # spent 1.28ms (584µs+700µs) within MARC::File::XML::BEGIN@5 which was called:
# once (584µs+700µs) by main::BEGIN@3 at line 5 # spent 1.28ms making 1 call to MARC::File::XML::BEGIN@5
# spent 80µs making 1 call to vars::import |
| 6 | 3 | 191µs | 2 | 4.41ms | # spent 2.98ms (1.45+1.53) within MARC::File::XML::BEGIN@6 which was called:
# once (1.45ms+1.53ms) by main::BEGIN@3 at line 6 # spent 2.98ms making 1 call to MARC::File::XML::BEGIN@6
# spent 1.43ms making 1 call to base::import |
| 7 | 3 | 163µs | 2 | 9.71ms | # spent 9.67ms (2.90+6.77) within MARC::File::XML::BEGIN@7 which was called:
# once (2.90ms+6.77ms) by main::BEGIN@3 at line 7 # spent 9.67ms making 1 call to MARC::File::XML::BEGIN@7
# spent 41µs making 1 call to Exporter::import |
| 8 | 3 | 41µs | 1 | 11µs | # spent 11µs within MARC::File::XML::BEGIN@8 which was called:
# once (11µs+0s) by main::BEGIN@3 at line 8 # spent 11µs making 1 call to MARC::File::XML::BEGIN@8 |
| 9 | 3 | 186µs | 2 | 58.5ms | # spent 58.2ms (14.8+43.4) within MARC::File::XML::BEGIN@9 which was called:
# once (14.8ms+43.4ms) by main::BEGIN@3 at line 9 # spent 58.2ms making 1 call to MARC::File::XML::BEGIN@9
# spent 363µs making 1 call to XML::LibXML::import |
| 10 | |||||
| 11 | 3 | 192µs | 2 | 56.8ms | # spent 56.7ms (2.52+54.2) within MARC::File::XML::BEGIN@11 which was called:
# once (2.52ms+54.2ms) by main::BEGIN@3 at line 11 # spent 56.7ms making 1 call to MARC::File::XML::BEGIN@11
# spent 89µs making 1 call to Exporter::import |
| 12 | 3 | 49µs | 2 | 734µs | # spent 378µs (22+356) within MARC::File::XML::BEGIN@12 which was called:
# once (22µs+356µs) by main::BEGIN@3 at line 12 # spent 378µs making 1 call to MARC::File::XML::BEGIN@12
# spent 356µs making 1 call to Exporter::import |
| 13 | 3 | 41µs | 2 | 135µs | # spent 75µs (15+60) within MARC::File::XML::BEGIN@13 which was called:
# once (15µs+60µs) by main::BEGIN@3 at line 13 # spent 75µs making 1 call to MARC::File::XML::BEGIN@13
# spent 60µs making 1 call to Exporter::import |
| 14 | 3 | 2.48ms | 1 | 9µs | # spent 9µs within MARC::File::XML::BEGIN@14 which was called:
# once (9µs+0s) by main::BEGIN@3 at line 14 # spent 9µs making 1 call to MARC::File::XML::BEGIN@14 |
| 15 | |||||
| 16 | 1 | 2µs | $VERSION = '0.92'; | ||
| 17 | |||||
| 18 | 1 | 10µs | 1 | 27µs | my $parser = XML::LibXML->new(); # spent 27µs making 1 call to XML::LibXML::new |
| 19 | |||||
| 20 | # spent 10µs within MARC::File::XML::import which was called:
# once (10µs+0s) by main::BEGIN@3 at line 3 of conv.pl | ||||
| 21 | 4 | 13µs | my $class = shift; | ||
| 22 | %_load_args = @_; | ||||
| 23 | $_load_args{ DefaultEncoding } ||= 'UTF-8'; | ||||
| 24 | $_load_args{ RecordFormat } ||= 'USMARC'; | ||||
| 25 | } | ||||
| 26 | |||||
| 27 | =head1 NAME | ||||
| 28 | |||||
| - - | |||||
| 99 | sub default_record_format { | ||||
| 100 | my $self = shift; | ||||
| 101 | my $format = shift; | ||||
| 102 | |||||
| 103 | $_load_args{RecordFormat} = $format if ($format); | ||||
| 104 | |||||
| 105 | return $_load_args{RecordFormat}; | ||||
| 106 | } | ||||
| 107 | |||||
| 108 | |||||
| 109 | =head2 as_xml() | ||||
| 110 | |||||
| - - | |||||
| 119 | sub MARC::Record::as_xml { | ||||
| 120 | my $record = shift; | ||||
| 121 | my $format = shift || $_load_args{RecordFormat}; | ||||
| 122 | return( MARC::File::XML::encode( $record, $format ) ); | ||||
| 123 | } | ||||
| 124 | |||||
| 125 | =head2 as_xml_record([$format]) | ||||
| 126 | |||||
| - - | |||||
| 135 | sub MARC::Record::as_xml_record { | ||||
| 136 | my $record = shift; | ||||
| 137 | my $format = shift || $_load_args{RecordFormat}; | ||||
| 138 | return( MARC::File::XML::encode( $record, $format, 1 ) ); | ||||
| 139 | } | ||||
| 140 | |||||
| 141 | =head2 new_from_xml([$encoding, $format]) | ||||
| 142 | |||||
| - - | |||||
| 155 | sub MARC::Record::new_from_xml { | ||||
| 156 | my $xml = shift; | ||||
| 157 | ## to allow calling as MARC::Record::new_from_xml() | ||||
| 158 | ## or MARC::Record->new_from_xml() | ||||
| 159 | $xml = shift if ( ref($xml) || ($xml eq "MARC::Record") ); | ||||
| 160 | |||||
| 161 | my $enc = shift || $_load_args{BinaryEncoding}; | ||||
| 162 | my $format = shift || $_load_args{RecordFormat}; | ||||
| 163 | return( MARC::File::XML::decode( $xml, $enc, $format ) ); | ||||
| 164 | } | ||||
| 165 | |||||
| 166 | =pod | ||||
| 167 | |||||
| - - | |||||
| 182 | sub out { | ||||
| 183 | my ( $class, $filename, $enc ) = @_; | ||||
| 184 | my $fh = IO::File->new( ">$filename" ) or croak( $! ); | ||||
| 185 | $enc ||= $_load_args{DefaultEncoding}; | ||||
| 186 | |||||
| 187 | if ($enc =~ /^utf-?8$/oi) { | ||||
| 188 | $fh->binmode(':utf8'); | ||||
| 189 | } else { | ||||
| 190 | $fh->binmode(':raw'); | ||||
| 191 | } | ||||
| 192 | |||||
| 193 | my %self = ( | ||||
| 194 | filename => $filename, | ||||
| 195 | fh => $fh, | ||||
| 196 | header => 0, | ||||
| 197 | encoding => $enc | ||||
| 198 | ); | ||||
| 199 | return( bless \%self, ref( $class ) || $class ); | ||||
| 200 | } | ||||
| 201 | |||||
| 202 | =head2 write() | ||||
| 203 | |||||
| - - | |||||
| 212 | sub write { | ||||
| 213 | my ( $self, $record, $enc ) = @_; | ||||
| 214 | if ( ! $self->{ fh } ) { | ||||
| 215 | croak( "MARC::File::XML object not open for writing" ); | ||||
| 216 | } | ||||
| 217 | if ( ! $record ) { | ||||
| 218 | croak( "must pass write() a MARC::Record object" ); | ||||
| 219 | } | ||||
| 220 | ## print the XML header if we haven't already | ||||
| 221 | if ( ! $self->{ header } ) { | ||||
| 222 | $enc ||= $self->{ encoding } || $_load_args{DefaultEncoding}; | ||||
| 223 | $self->{ fh }->print( header( $enc ) ); | ||||
| 224 | $self->{ header } = 1; | ||||
| 225 | } | ||||
| 226 | ## print out the record | ||||
| 227 | $self->{ fh }->print( record( $record ) ) || croak( $! ); | ||||
| 228 | return( 1 ); | ||||
| 229 | } | ||||
| 230 | |||||
| 231 | =head2 close() | ||||
| 232 | |||||
| - - | |||||
| 239 | # spent 32µs within MARC::File::XML::close which was called:
# once (32µs+0s) by MARC::File::XML::DESTROY at line 253 | ||||
| 240 | 7 | 36µs | my $self = shift; | ||
| 241 | if ( $self->{ fh } ) { | ||||
| 242 | $self->{ fh }->print( footer() ) if $self->{ header }; | ||||
| 243 | $self->{ fh } = undef; | ||||
| 244 | $self->{ filename } = undef; | ||||
| 245 | $self->{ header } = undef; | ||||
| 246 | } | ||||
| 247 | return( 1 ); | ||||
| 248 | } | ||||
| 249 | |||||
| 250 | ## makes sure that the XML file is closed off | ||||
| 251 | |||||
| 252 | # spent 48µs (16+32) within MARC::File::XML::DESTROY which was called:
# once (16µs+32µs) by main::NULL at line 0 of conv.pl | ||||
| 253 | 1 | 12µs | 1 | 32µs | shift->close(); # spent 32µs making 1 call to MARC::File::XML::close |
| 254 | } | ||||
| 255 | |||||
| 256 | =pod | ||||
| 257 | |||||
| - - | |||||
| 276 | sub header { | ||||
| 277 | my $enc = shift; | ||||
| 278 | $enc = shift if ( $enc && (ref($enc) || ($enc eq "MARC::File::XML")) ); | ||||
| 279 | $enc ||= 'UTF-8'; | ||||
| 280 | return( <<MARC_XML_HEADER ); | ||||
| 281 | <?xml version="1.0" encoding="$enc"?> | ||||
| 282 | <collection | ||||
| 283 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
| 284 | xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" | ||||
| 285 | xmlns="http://www.loc.gov/MARC21/slim"> | ||||
| 286 | MARC_XML_HEADER | ||||
| 287 | } | ||||
| 288 | |||||
| 289 | =head2 footer() | ||||
| 290 | |||||
| - - | |||||
| 295 | sub footer { | ||||
| 296 | return( "</collection>" ); | ||||
| 297 | } | ||||
| 298 | |||||
| 299 | =head2 record() | ||||
| 300 | |||||
| - - | |||||
| 305 | sub record { | ||||
| 306 | my $record = shift; | ||||
| 307 | my $format = shift; | ||||
| 308 | my $include_full_record_header = shift; | ||||
| 309 | my $enc = shift; | ||||
| 310 | |||||
| 311 | $format ||= $_load_args{RecordFormat}; | ||||
| 312 | |||||
| 313 | my $_transcode = 0; | ||||
| 314 | my $ldr = $record->leader; | ||||
| 315 | my $original_encoding = substr($ldr,9,1); | ||||
| 316 | |||||
| 317 | # Does the record think it is already Unicode? | ||||
| 318 | if ($original_encoding ne 'a' && lc($format) !~ /^unimarc/o) { | ||||
| 319 | # If not, we'll make it so | ||||
| 320 | $_transcode++; | ||||
| 321 | substr($ldr,9,1,'a'); | ||||
| 322 | $record->leader( $ldr ); | ||||
| 323 | } | ||||
| 324 | |||||
| 325 | my @xml = (); | ||||
| 326 | |||||
| 327 | if ($include_full_record_header) { | ||||
| 328 | push @xml, <<HEADER | ||||
| 329 | <?xml version="1.0" encoding="$enc"?> | ||||
| 330 | <record | ||||
| 331 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
| 332 | xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" | ||||
| 333 | xmlns="http://www.loc.gov/MARC21/slim"> | ||||
| 334 | HEADER | ||||
| 335 | |||||
| 336 | } else { | ||||
| 337 | push( @xml, "<record>" ); | ||||
| 338 | } | ||||
| 339 | |||||
| 340 | push( @xml, " <leader>" . escape( $record->leader ) . "</leader>" ); | ||||
| 341 | |||||
| 342 | foreach my $field ( $record->fields() ) { | ||||
| 343 | my ($tag) = escape( $field->tag() ); | ||||
| 344 | if ( $field->is_control_field() ) { | ||||
| 345 | my $data = $field->data; | ||||
| 346 | push( @xml, qq( <controlfield tag="$tag">) . | ||||
| 347 | escape( ($_transcode ? marc8_to_utf8($data) : $data) ). qq(</controlfield>) ); | ||||
| 348 | } else { | ||||
| 349 | my ($i1) = escape( $field->indicator( 1 ) ); | ||||
| 350 | my ($i2) = escape( $field->indicator( 2 ) ); | ||||
| 351 | push( @xml, qq( <datafield tag="$tag" ind1="$i1" ind2="$i2">) ); | ||||
| 352 | foreach my $subfield ( $field->subfields() ) { | ||||
| 353 | my ( $code, $data ) = ( escape( $$subfield[0] ), $$subfield[1] ); | ||||
| 354 | push( @xml, qq( <subfield code="$code">). | ||||
| 355 | escape( ($_transcode ? marc8_to_utf8($data) : $data) ).qq(</subfield>) ); | ||||
| 356 | } | ||||
| 357 | push( @xml, " </datafield>" ); | ||||
| 358 | } | ||||
| 359 | } | ||||
| 360 | push( @xml, "</record>\n" ); | ||||
| 361 | |||||
| 362 | if ($_transcode) { | ||||
| 363 | substr($ldr,9,1,$original_encoding); | ||||
| 364 | $record->leader( $ldr ); | ||||
| 365 | } | ||||
| 366 | |||||
| 367 | return( join( "\n", @xml ) ); | ||||
| 368 | } | ||||
| 369 | |||||
| 370 | 1 | 4µs | my %ESCAPES = ( | ||
| 371 | '&' => '&', | ||||
| 372 | '<' => '<', | ||||
| 373 | '>' => '>', | ||||
| 374 | ); | ||||
| 375 | 3 | 6µs | my $ESCAPE_REGEX = | ||
| 376 | eval 'qr/' . | ||||
| 377 | 1 | 58µs | join( '|', map { $_ = "\Q$_\E" } keys %ESCAPES ) . # spent 19µs executing statements in string eval | ||
| 378 | '/;' | ||||
| 379 | ; | ||||
| 380 | |||||
| 381 | sub escape { | ||||
| 382 | my $string = shift; | ||||
| 383 | return '' if ! defined $string or $string eq ''; | ||||
| 384 | $string =~ s/($ESCAPE_REGEX)/$ESCAPES{$1}/oge; | ||||
| 385 | return( $string ); | ||||
| 386 | } | ||||
| 387 | |||||
| 388 | # spent 419ms (219+200) within MARC::File::XML::_next which was called 5000 times, avg 84µs/call:
# 5000 times (219ms+200ms) by MARC::File::next at line 104 of MARC/File.pm, avg 84µs/call | ||||
| 389 | 40000 | 425ms | my $self = shift; | ||
| 390 | my $fh = $self->{ fh }; | ||||
| 391 | |||||
| 392 | ## return undef at the end of the file | ||||
| 393 | 5000 | 7.75ms | return if eof($fh); # spent 7.75ms making 5000 calls to MARC::File::XML::CORE:eof, avg 2µs/call | ||
| 394 | |||||
| 395 | ## get a chunk of xml for a record | ||||
| 396 | local $/ = '</record>'; | ||||
| 397 | 5000 | 63.2ms | my $xml = <$fh>; # spent 63.2ms making 5000 calls to MARC::File::XML::CORE:readline, avg 13µs/call | ||
| 398 | |||||
| 399 | ## trim stuff before the start record element | ||||
| 400 | 5000 | 116ms | $xml =~ s/.*<record.*?>/<record>/s; # spent 116ms making 5000 calls to MARC::File::XML::CORE:subst, avg 23µs/call | ||
| 401 | |||||
| 402 | ## return undef if there isn't a good chunk of xml | ||||
| 403 | 5000 | 12.5ms | return if ( $xml !~ m|<record>.*</record>|s ); # spent 12.5ms making 5000 calls to MARC::File::XML::CORE:match, avg 3µs/call | ||
| 404 | |||||
| 405 | ## return the chunk of xml | ||||
| 406 | return( $xml ); | ||||
| 407 | } | ||||
| 408 | |||||
| 409 | =head2 decode() | ||||
| 410 | |||||
| - - | |||||
| 418 | # spent 17.4s (6.40+11.0) within MARC::File::XML::decode which was called 5000 times, avg 3.48ms/call:
# 5000 times (6.40s+11.0s) by MARC::File::next at line 105 of MARC/File.pm, avg 3.48ms/call | ||||
| 419 | 553463 | 10.3s | my $self = shift; | ||
| 420 | my $text; | ||||
| 421 | my $location = ''; | ||||
| 422 | |||||
| 423 | 5000 | 11.3ms | if ( ref($self) =~ /^MARC::File/ ) { # spent 11.3ms making 5000 calls to MARC::File::XML::CORE:match, avg 2µs/call | ||
| 424 | $location = 'in record '.$self->{recnum}; | ||||
| 425 | $text = shift; | ||||
| 426 | } else { | ||||
| 427 | $location = 'in record 1'; | ||||
| 428 | $text = $self=~/MARC::File/ ? shift : $self; | ||||
| 429 | } | ||||
| 430 | |||||
| 431 | my $enc = shift || $_load_args{BinaryEncoding}; | ||||
| 432 | my $format = shift || $_load_args{RecordFormat}; | ||||
| 433 | |||||
| 434 | 5000 | 1.60s | my $xml = $parser->parse_string($text); # spent 1.60s making 5000 calls to XML::LibXML::parse_string, avg 320µs/call | ||
| 435 | |||||
| 436 | 5000 | 26.7ms | my $root = $xml->documentElement; # spent 26.7ms making 5000 calls to XML::LibXML::Document::documentElement, avg 5µs/call | ||
| 437 | croak('MARCXML document has no root element') unless defined $root; | ||||
| 438 | 5000 | 17.1ms | if ($root->localname eq 'collection') { # spent 17.1ms making 5000 calls to XML::LibXML::Node::localname, avg 3µs/call | ||
| 439 | my @records = $root->getChildrenByLocalName('record'); | ||||
| 440 | croak('MARCXML document has no record element') unless @records; | ||||
| 441 | $root = $records[0]; | ||||
| 442 | } | ||||
| 443 | |||||
| 444 | 5000 | 52.7ms | my $rec = MARC::Record->new(); # spent 52.7ms making 5000 calls to MARC::Record::new, avg 11µs/call | ||
| 445 | my @fields = (); | ||||
| 446 | my $transcode_to_marc8; | ||||
| 447 | 5000 | 180ms | foreach my $elt ($root->getChildrenByLocalName('*')) { # spent 180ms making 5000 calls to XML::LibXML::Element::getChildrenByLocalName, avg 36µs/call | ||
| 448 | 84541 | 259ms | my $elt_name = $elt->localname; # spent 259ms making 84541 calls to XML::LibXML::Node::localname, avg 3µs/call | ||
| 449 | 278623 | 1.16s | if ($elt_name eq 'leader') { # spent 532ms making 20000 calls to MARC::Field::new, avg 27µs/call
# spent 388ms making 198623 calls to XML::LibXML::Node::DESTROY, avg 2µs/call
# spent 98.5ms making 20000 calls to XML::LibXML::Element::getAttributeNode, avg 5µs/call
# spent 76.3ms making 20000 calls to XML::LibXML::Node::textContent, avg 4µs/call
# spent 69.9ms making 20000 calls to XML::LibXML::Attr::value, avg 3µs/call | ||
| 450 | # this bit is rather questionable | ||||
| 451 | 5000 | 23.5ms | my $leader = $elt->textContent; # spent 23.5ms making 5000 calls to XML::LibXML::Node::textContent, avg 5µs/call | ||
| 452 | 5000 | 90.4ms | $transcode_to_marc8 = substr($leader, 9, 1) eq 'a' && decideMARC8Binary($format, $enc) ? 1 : 0; # spent 90.4ms making 5000 calls to MARC::File::XML::decideMARC8Binary, avg 18µs/call | ||
| 453 | substr($leader, 9, 1) = ' ' if $transcode_to_marc8; | ||||
| 454 | 5000 | 60.9ms | $rec->leader($leader); # spent 60.9ms making 5000 calls to MARC::Record::leader, avg 12µs/call | ||
| 455 | } elsif ($elt_name eq 'controlfield') { | ||||
| 456 | push @fields, MARC::Field->new($elt->getAttributeNode('tag')->value, $elt->textContent); | ||||
| 457 | } elsif ($elt_name eq 'datafield') { | ||||
| 458 | my @sfs = (); | ||||
| 459 | 59541 | 989ms | foreach my $sfelt ($elt->getChildrenByLocalName('subfield')) { # spent 989ms making 59541 calls to XML::LibXML::Element::getChildrenByLocalName, avg 17µs/call | ||
| 460 | 383032 | 1.33s | push @sfs, $sfelt->getAttributeNode('code')->value, # spent 456ms making 95758 calls to XML::LibXML::Element::getAttributeNode, avg 5µs/call
# spent 365ms making 95758 calls to XML::LibXML::Node::textContent, avg 4µs/call
# spent 324ms making 95758 calls to XML::LibXML::Attr::value, avg 3µs/call
# spent 182ms making 95758 calls to XML::LibXML::Node::DESTROY, avg 2µs/call | ||
| 461 | $transcode_to_marc8 ? utf8_to_marc8($sfelt->textContent()) : $sfelt->textContent(); | ||||
| 462 | } | ||||
| 463 | 512545 | 4.66s | push @fields, MARC::Field->new( # spent 3.15s making 59541 calls to MARC::Field::new, avg 53µs/call
# spent 763ms making 178623 calls to XML::LibXML::Element::getAttributeNode, avg 4µs/call
# spent 558ms making 178623 calls to XML::LibXML::Attr::value, avg 3µs/call
# spent 186ms making 95758 calls to XML::LibXML::Node::DESTROY, avg 2µs/call | ||
| 464 | $elt->getAttributeNode('tag')->value, | ||||
| 465 | $elt->getAttributeNode('ind1')->value, | ||||
| 466 | $elt->getAttributeNode('ind2')->value, | ||||
| 467 | @sfs | ||||
| 468 | ); | ||||
| 469 | } | ||||
| 470 | } | ||||
| 471 | 89541 | 437ms | $rec->append_fields(@fields); # spent 283ms making 5000 calls to MARC::Record::append_fields, avg 57µs/call
# spent 154ms making 84541 calls to XML::LibXML::Node::DESTROY, avg 2µs/call | ||
| 472 | return $rec; | ||||
| 473 | |||||
| 474 | } | ||||
| 475 | |||||
| 476 | # spent 90.4ms (70.0+20.4) within MARC::File::XML::decideMARC8Binary which was called 5000 times, avg 18µs/call:
# 5000 times (70.0ms+20.4ms) by MARC::File::XML::decode at line 452, avg 18µs/call | ||||
| 477 | 20000 | 92.5ms | my $format = shift; | ||
| 478 | my $enc = shift; | ||||
| 479 | |||||
| 480 | 5000 | 5.96ms | return 0 if (defined($format) && lc($format) =~ /^unimarc/o); # spent 5.96ms making 5000 calls to MARC::File::XML::CORE:match, avg 1µs/call | ||
| 481 | 5000 | 14.4ms | return 0 if (defined($enc) && lc($enc) =~ /^utf-?8/o); # spent 14.4ms making 5000 calls to MARC::File::XML::CORE:match, avg 3µs/call | ||
| 482 | return 1; | ||||
| 483 | } | ||||
| 484 | |||||
| 485 | |||||
| 486 | =head2 encode() | ||||
| 487 | |||||
| - - | |||||
| 495 | sub encode { | ||||
| 496 | my $record = shift; | ||||
| 497 | my $format = shift || $_load_args{RecordFormat}; | ||||
| 498 | my $without_collection_header = shift; | ||||
| 499 | my $enc = shift || $_load_args{DefaultEncoding}; | ||||
| 500 | |||||
| 501 | if (lc($format) =~ /^unimarc/o) { | ||||
| 502 | $enc = _unimarc_encoding( $format => $record ); | ||||
| 503 | } | ||||
| 504 | |||||
| 505 | my @xml = (); | ||||
| 506 | push( @xml, header( $enc ) ) unless ($without_collection_header); | ||||
| 507 | # verbose, but naming the header output flags this way to avoid | ||||
| 508 | # the potential confusion identified in CPAN bug #34082 | ||||
| 509 | # http://rt.cpan.org/Public/Bug/Display.html?id=34082 | ||||
| 510 | my $include_full_record_header = ($without_collection_header) ? 1 : 0; | ||||
| 511 | push( @xml, record( $record, $format, $include_full_record_header, $enc ) ); | ||||
| 512 | push( @xml, footer() ) unless ($without_collection_header); | ||||
| 513 | |||||
| 514 | return( join( "\n", @xml ) ); | ||||
| 515 | } | ||||
| 516 | |||||
| 517 | sub _unimarc_encoding { | ||||
| 518 | my $f = shift; | ||||
| 519 | my $r = shift; | ||||
| 520 | |||||
| 521 | my $pos = 26; | ||||
| 522 | $pos = 13 if (lc($f) eq 'unimarcauth'); | ||||
| 523 | |||||
| 524 | my $enc = substr( $r->subfield(100 => 'a'), $pos, 2 ); | ||||
| 525 | |||||
| 526 | if ($enc eq '01' || $enc eq '03') { | ||||
| 527 | return 'ISO-8859-1'; | ||||
| 528 | } elsif ($enc eq '50') { | ||||
| 529 | return 'UTF-8'; | ||||
| 530 | } else { | ||||
| 531 | die "Unsupported UNIMARC character encoding [$enc] for XML output for $f; 100\$a -> " . $r->subfield(100 => 'a'); | ||||
| 532 | } | ||||
| 533 | } | ||||
| 534 | |||||
| 535 | =head1 TODO | ||||
| 536 | |||||
| - - | |||||
| 569 | 1 | 11µs | 1; | ||
# spent 7.75ms within MARC::File::XML::CORE:eof which was called 5000 times, avg 2µs/call:
# 5000 times (7.75ms+0s) by MARC::File::XML::_next at line 393, avg 2µs/call | |||||
# spent 44.2ms within MARC::File::XML::CORE:match which was called 20000 times, avg 2µs/call:
# 5000 times (14.4ms+0s) by MARC::File::XML::decideMARC8Binary at line 481, avg 3µs/call
# 5000 times (12.5ms+0s) by MARC::File::XML::_next at line 403, avg 3µs/call
# 5000 times (11.3ms+0s) by MARC::File::XML::decode at line 423, avg 2µs/call
# 5000 times (5.96ms+0s) by MARC::File::XML::decideMARC8Binary at line 480, avg 1µs/call | |||||
# spent 7µs within MARC::File::XML::CORE:qr which was called:
# once (7µs+0s) by main::BEGIN@3 at line 1 of (eval 8)[lib/MARC/File/XML.pm:377] | |||||
# spent 63.2ms within MARC::File::XML::CORE:readline which was called 5000 times, avg 13µs/call:
# 5000 times (63.2ms+0s) by MARC::File::XML::_next at line 397, avg 13µs/call | |||||
# spent 116ms within MARC::File::XML::CORE:subst which was called 5000 times, avg 23µs/call:
# 5000 times (116ms+0s) by MARC::File::XML::_next at line 400, avg 23µs/call |