The University of Michigan Historical Math Collection has a bunch of math books.
The ummath module supports the following download formats:
-f value | Description |
|---|---|
Pages are returned as one PDF file per page. | |
image | Pages are returned as one GIF image per page. |
Note: When downloading images, each image is generated on the server real-time. The script must access each page individually (i.e., send separate HTTP requests) first to generate the image, and then to download the generated image. When downloading PDF files, there is no need to send these extra HTTP requests.
This moamb ID number is in the form 'XXX####.####.###' (without quotes), where X is a letter and # is a number. For example, the unique ID for James Fenimore Cooper's novel 'The Last of the Mohicans' is 'ABB2610.0001.001'.
You can find the ID number by navigating to the book you want to retrieve and then copying the link address (URL) from that book. In the URL, there is a portion that looks like:
idno=
The portion following that, and continuing to the next ampersand (&), is the ID.
# ummath : University of Michigan Historical Math Collection
#
# This module will allow you to download page images from
# the Making of America - Michigan books site. See the
# documentation for additional details
$module_c = "umhistmath";
$module_default_format = "pdf";
%module_formats = (
'image' => 'gif',
'gif' => 'gif',
'pdf' => 'pdf',
);
$module_format = module_check_format($module_default_format,keys(%module_formats));
$config{'ext'} = $module_formats{$module_format};
if ( $module_format eq "gif" ) {
$module_format = "image";
}
if ( $module_format eq "pdf" ) {
print_v("Renumbering pages!");
$config{'renumber'} = 1;
}
$lcidno = lc($idno);
$lc1 = substr($lcidno,0,1);
$lc2 = substr($lcidno,1,1);
$lc3 = substr($lcidno,2,1);
$module_url{'sitebase'} = "http://www.hti.umich.edu/";
$module_url{'plistbase'} = $module_url{'sitebase'}."cgi/t/text/text-idx?c=$module_c;idno=$idno";
$module_url{'touchbase'} = $module_url{'sitebase'}."cgi/t/text/pageviewer-idx?c=$module_c;idno=$idno;size=l;view=$module_format;seq=";
$module_url{'imagebase'} = $module_url{'sitebase'}."cache/$lc1/$lc2/$lc3/$lcidno/";
print_v("Getting page listing... (".$module_url{'plistbase'}.")");
$res = $ua->get($module_url{'plistbase'});
if ( $res->is_error ) {
print STDERR "Could not get page listing\n".$res->status_line;
exit();
}
@content = split(/<div class="indentlevel1"><span class="scopingpage">/,$res->content);
$module_available_high = $#content;
print_v("This volume has $module_available_high pages...");
$module_actual_high = module_set_limit($module_available_high);
foreach $module_i ( $config{'start'} .. $module_actual_high ) {
$module_touch_url = $module_url{'touchbase'}.$module_i;
if ( $config{'ext'} eq "pdf" ) {
$module_image_url = $module_url{'imagebase'}."00000001.tif.20.pdf#page=".$module_i;
} else {
$module_j = sprintf("%08d",$module_i);
$module_image_url = $module_url{'imagebase'}."$module_j.tifl.gif";
}
print_v("Touch URL: $module_touch_url");
print_v("Image URL: $module_image_url");
push(@touchurls,$module_touch_url);
push(@urls,$module_image_url);
}