#!/usr/bin/env perl # # A consistency checker for BibTeX files. # Copyright (c) 2005-2012, Hiroyuki Ohsaki. # All rights reserved. # # $Id: bibcheck,v 1.14 2023/03/30 08:39:01 ohsaki Exp $ # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. use English; use Jcode; use Text::BibTeX::Bib; use Text::BibTeX; use strict; # redefine Text::BibTeX::Entry::warn with supressing warnings { no warnings; eval <<'EOF'; sub Text::BibTeX::Entry::warn { my ($self, $warning, $field) = @_; my $location = ''; if ($self->{'file'}) { $location = $self->{'file'}{'filename'} . ":"; } my $lines = $self->{'lines'}; my $entry_range = "$lines->{'START'}"; if (defined $field) { $location .= (exists $lines->{$field}) ? "line $lines->{$field}: " : "$entry_range (unknown field \"$field\"): "; } else { $location .= "$entry_range: "; } print "$location**** $warning\n"; } EOF } my %CHECK_TYPE_TBL = ( 'article' => \&check_type_article, 'inproceedings' => \&check_type_inproceedings, 'misc' => \&nop, 'unpublished' => \&nop, 'mastersthesis' => \&check_type_mastersthesis, 'book' => \&check_type_book, ); my %CHECK_FIELD_TBL = ( 'abstract' => \&nop, 'eabstract' => \&nop, 'author' => \&check_field_author, 'booktitle' => \&check_field_booktitle, 'journal' => \&nop, 'month' => \&check_field_month, 'number' => \&nop, 'pages' => \&check_field_pages, 'title' => \&check_field_title, 'volume' => \&nop, 'year' => \&nop, 'note' => \&check_field_note, 'editor' => \&nop, 'publisher' => \&nop, 'school' => \&nop, 'howpublished' => \&check_field_howpublished, ); sub nop { } sub check_type_article { my ($ent, $hashp) = @_; for (qw(author title journal year pages month)) { # allow missing `pages' for unpublished documents if ($_ eq 'pages') { my $journal = $ent->get('journal'); next if ( $journal and $journal =~ /(submitted to|to appear in|ÆÃÊ̸¦µæÊó¹ð|internet draft|request for comments)/i ); } $ent->warn("missing mandatory field `$_'") unless $hashp->{$_}; } } sub check_type_inproceedings { my ($ent, $hashp) = @_; for (qw(author title booktitle pages year month)) { $ent->warn("missing mandatory field `$_'") unless $hashp->{$_}; } } sub check_type_mastersthesis { my ($ent, $hashp) = @_; for (qw(author title school year month)) { $ent->warn("missing mandatory field `$_'") unless $hashp->{$_}; } } sub check_type_book { my ($ent, $hashp) = @_; for (qw(author title publisher year month)) { $ent->warn("missing mandatory field `$_'") unless $hashp->{$_}; } } sub check_field_title { my $ent = shift; my $title = $ent->get('title'); # quote all spaces within brackets (dirty hack) $title =~ s/({.*?)\s+(.*?})/${1}_$2/g; my @list = split(/\s+/, $title); # guess if title is spelled in lower case my $lower_words = 0; for (@list) { next unless $_; $lower_words++ if /^[a-z]/; } # if so, assume words beginning with capital letters should be quoted my $is_lower = ($lower_words > @list / 2); for (@list) { next unless $_; $ent->warn("capital word `$_' must be quoted (e.g., `{TCP})'") if (/^[A-Z][A-Z.-]+$/ or ($_ ne $list[0] and $is_lower and /^[A-Z]/)); } } sub check_field_author { my $ent = shift; my $author = $ent->get('author'); my $code = getcode($author) || 'ascii'; if ($code eq 'ascii') { my @list = split(/\s+and\s+/, $author); $ent->warn("use `and others' instead of `et al.'") if ($author =~ /et al\./); for (@list) { # check misuse of comma $ent->warn( "invalid author name `$_' (e.g., `John Smith and Mike Johns')") if /,/; for my $v (split(/\s+/, $_)) { $ent->warn("non-capitalized author name `$_'") unless ($v ne 'others' and $v =~ /^[A-Z]/); } } } elsif ($code eq 'euc') { # author names in Japanese must be fully quoted $ent->warn("author names must be enclosed by brackets") unless ($author =~ /^{(.*)}$/); next unless $1; my @list = split(/,\s+/, $1); for (@list) { # check lacking space or misuse of touten $ent->warn("invalid author name `$_'") if /(,|¡¢)/; } } else { # unreachable } } sub check_field_booktitle { my $ent = shift; my $type = $ent->type; my $title = $ent->get('booktitle'); $ent->warn("missing `Proceedings of' in booktitle") if ($type eq 'inproceedings' and $title !~ /Proceedings of/); } sub check_field_pages { my $ent = shift; my $pages = $ent->get('pages') || ''; $ent->warn("malformatted page number `$pages' (e.g., ``12--18')") unless ($pages =~ /^[A-Za-z\d.-]+(--[A-Za-z\d.-]+)?$/); } sub check_field_month { my $ent = shift; my $month = $ent->get('month') || ''; $ent->warn("use month macros instead of `$month' (e.g., `sep')") unless ($month =~ /^(January|February|March|April|May|June|July|August|September|October|November|December)$/ ); } sub check_field_note { my $ent = shift; my $note = $ent->get('note'); if ($note =~ m|(http://[^\s\}]+)|) { my $url = $1; $ent->warn("missing `Also available as' before `$url'") unless ($note =~ /Also available as/); $ent->warn("use \\url{} macro for including `$url'") unless ($note =~ /\\url/); } } sub check_field_howpublished { my $ent = shift; my $howpublished = $ent->get('howpublished'); if ($howpublished =~ m|(http://[^\s\}]+)|) { my $url = $1; $ent->warn("remove `$MATCH' before `$url'") if ($howpublished =~ /(Also\s+)?available(\s(as|at))?/); $ent->warn("use \\url{} macro for including `$url'") unless ($howpublished =~ /\\url/); } } sub check_key { my $ent = shift; my $key = $ent->key; $ent->warn("malformatted key `$key' (e.g., `Smith05:Anonymous')") unless ($key =~ /^[0-9A-Za-z_-]+\d\d:[0-9A-Za-z_-]+$/ or $key =~ /^RFC/); } sub check_file { my $file = shift; my $bib = new Text::BibTeX::File $file; while (my $ent = new Text::BibTeX::Entry $bib) { next unless $ent->parse_ok; my $code = getcode($ent->print_s) || 'ascii'; $ent->warn("invalid character code `$code' (must be ascii/euc)") unless ($code eq 'ascii' or $code eq 'euc'); check_key($ent); my @fields = $ent->fieldlist; my %found; for my $f (@fields) { if (exists $CHECK_FIELD_TBL{$f}) { &{$CHECK_FIELD_TBL{$f}}($ent); $found{$f} = 1; } else { $ent->warn("unknown/unsupported field `$f'"); } } my $type = $ent->type; if (exists $CHECK_TYPE_TBL{$type}) { &{$CHECK_TYPE_TBL{$type}}($ent, \%found); } else { $ent->warn("unknown/unsupported type `$type'"); } } } for my $file (@ARGV) { check_file($file); }