/android/import-contacts : revision 22

To get this branch, use:

bzr branch
http://bzr.ed.am/android/import-contacts

« back to all changes in this revision

Viewing changes to src/org/waxworlds/edam/importcontacts/VCFImporter.java

Committer: edam
Date: 2010-12-11 23:57:07 UTC
Revision ID: edam@waxworlds.org-20101211235707-czyw48tt3hcopuwf

- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
- ASCII is now (correctly) the default charset
- added conversion from 8-bit ASCII to UTF-8 (not used on 7-bit US-ASCII) which works on raw bytes, not chars
- unencode quoted-printable now works on raw bytes, not chars

files modified:
gen/org/waxworlds/edam/importcontacts/R.java

res/values/strings.xml

src/org/waxworlds/edam/importcontacts/VCFImporter.java

Show diffs side-by-side

added added

removed removed

src/org/waxworlds/edam/importcontacts/VCFImporter.java

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileReader;

import java.io.FilenameFilter;

import java.io.IOException;

import java.io.UnsupportedEncodingException;

import java.nio.ByteBuffer;

import java.util.Arrays;

import java.util.HashSet;

import java.util.List;

148

150

149

151

private void importVCardFile( File file ) throws AbortImportException

150

152

{

153

// check file is good

154

if( !file.exists() )

155

showError( getText( R.string.error_filenotfound ) +

156

file.getName() );

157

if( file.length() == 0 )

158

showError( getText( R.string.error_fileisempty ) +

159

file.getName() );

160

151

161

try

152

162

{

153

// open file

154

BufferedReader reader = new BufferedReader(

155

new FileReader( file ) );

156

157

// read

158

StringBuffer content = new StringBuffer();

159

String line;

160

while( ( line = reader.readLine() ) != null )

161

content.append( line ).append( "\n" );

162

163

importVCardFileContent( content.toString(), file.getName() );

163

// open/read file

164

FileInputStream istream = new FileInputStream( file );

165

byte[] content = new byte[ (int)file.length() ];

166

istream.read( content );

167

168

// import

169

importVCardFileContent( content, file.getName() );

164

170

}

165

171

catch( FileNotFoundException e ) {

166

172

showError( getText( R.string.error_filenotfound ) +

171

177

}

172

178

}

173

179

174

private void importVCardFileContent( String content, String fileName )

180

private void importVCardFileContent( byte[] content, String fileName )

175

181

throws AbortImportException

176

182

{

177

// get lines and parse them

178

String[] lines = content.split( "\n" );

183

ByteBuffer buffers[] = getLinesFromContent( content );

184

185

// go through lines

179

186

VCard vCard = null;

180

for( int i = 0; i < lines.length; i++ )

187

for( int i = 0; i < buffers.length; i++ )

181

188

{

182

String line = lines[ i ];

189

// get a US-ASCII version of the line for processing

190

String line;

191

try {

192

line = new String( buffers[ i ].array(), buffers[ i ].position(),

193

buffers[ i ].limit() - buffers[ i ].position(), "US-ASCII" );

194

}

195

catch( UnsupportedEncodingException e ) {

196

// we know US-ASCII is supported, so appease the compiler...

197

line = "";

198

}

183

199

184

200

if( vCard == null ) {

185

201

// look for vcard beginning

214

230

{

215

231

// try giving the line to the vcard

216

232

try {

217

vCard.parseLine( line );

233

vCard.parseLine( buffers[ i ] );

218

234

}

219

235

catch( VCard.ParseException e ) {

220

236

skipContact();

239

255

}

240

256

}

241

257

258

private ByteBuffer[] getLinesFromContent( byte[] content )

259

{

260

// count lines in data

261

int num_lines = 1;

262

for( int a = 0; a < content.length; a++ )

263

if( content[ a ] == '\n' )

264

num_lines++;

265

266

// get lines, removing \r's and \n's as we go

267

ByteBuffer lines[] = new ByteBuffer[ num_lines ];

268

int last = 0;

269

for( int a = 0, b = 0; a < content.length; a++ )

270

if( content[ a ] == '\n' ) {

271

int to = ( a > 0 && content[ a - 1 ] == '\r' &&

272

a - 1 >= last )? a - 1 : a;

273

lines[ b++ ] = ByteBuffer.wrap( content, last, to - last );

274

last = a + 1;

275

}

276

lines[ lines.length - 1 ] = ByteBuffer.wrap( content, last,

277

content.length - last );

278

279

return lines;

280

}

281

242

282

private class VCard extends ContactData

243

283

{

244

284

private final static int NAMELEVEL_NONE = 0;

247

287

private final static int NAMELEVEL_N = 3;

248

288

249

289

private String _version = null;

250

private Vector< String > _lines = null;

290

private Vector< ByteBuffer > _buffers = null;

251

291

private int _name_level = NAMELEVEL_NONE;

252

292

private boolean _parser_in_multiline = false;

253

293

private String _parser_current_name_and_params = null;

256

296

protected class UnencodeResult

257

297

{

258

298

private boolean _another_line_required;

259

private byte[] _bytes;

260

private int _num_bytes;

299

private ByteBuffer _buffer;

261

300

262

public UnencodeResult( boolean another_line_required, byte[] bytes,

263

int num_bytes )

301

public UnencodeResult( boolean another_line_required,

302

ByteBuffer buffer )

264

303

{

265

304

_another_line_required = another_line_required;

266

_bytes = bytes;

267

_num_bytes = num_bytes;

305

_buffer = buffer;

268

306

}

269

307

270

308

public boolean isAnotherLineRequired()

272

310

return _another_line_required;

273

311

}

274

312

275

public byte[] getBytes()

276

{

277

return _bytes;

278

}

279

280

public int getNumBytes()

281

{

282

return _num_bytes;

313

public ByteBuffer getBuffer()

314

{

315

return _buffer;

283

316

}

284

317

}

285

318

301

334

@SuppressWarnings("serial")

302

335

protected class SkipContactException extends Exception { }

303

336

304

public void parseLine( String line )

337

public void parseLine( ByteBuffer buffer )

305

338

throws ParseException, SkipContactException,

306

339

AbortImportException

307

340

{

341

// get a US-ASCII version of the line for processing

342

String line;

343

try {

344

line = new String( buffer.array(), buffer.position(),

345

buffer.limit() - buffer.position(), "US-ASCII" );

346

}

347

catch( UnsupportedEncodingException e ) {

348

// we know US-ASCII is supported, so appease the compiler...

349

line = "";

350

}

351

308

352

// ignore empty lines

309

353

if( line.trim() == "" ) return;

310

354

311

355

// split line into name and value parts (this may turn out to be

312

356

// unwanted if the line is a subsequent line in a multi-line

313

357

// value, but we have to do this now to check for and handle VCF

314

// versions first)

315

String[] props = line.split( ":", 2 );

316

for( int i = 0; i < props.length; i++ )

317

props[ i ] = props[ i ].trim();

358

// versions first). Also, the value part is only created tentatively

359

// because it may have an encoding/charset. Since we're treating it

360

// as UTF-8 (which is compatible with 7-bit US-ASCII) this is ok

361

// though so long as we later use the raw bytes. ALso we check for

362

// malformed property:name pairs.

363

String name_and_params, string_value;

364

{

365

String[] bits = line.split( ":", 2 );

366

if( bits.length == 2 ) {

367

name_and_params = bits[ 0 ].trim();

368

string_value = bits[ 1 ].trim();

369

if( name_and_params.length() == 0 )

370

throw new ParseException( R.string.error_vcf_malformed );

371

}

372

else

373

{

374

if( !_parser_in_multiline )

375

throw new ParseException( R.string.error_vcf_malformed );

376

name_and_params = null;

377

string_value = null;

378

}

379

}

318

380

319

381

// if we haven't yet got a version, we won't be paring anything!

320

382

if( _version == null )

321

383

{

322

384

// is this a version?

323

if( props.length == 2 && props[ 0 ].equals( "VERSION" ) )

385

if( name_and_params.equals( "VERSION" ) )

324

386

{

325

387

// yes, check/store it

326

if( !props[ 1 ].equals( "2.1" ) &&

327

!props[ 1 ].equals( "3.0" ) )

388

if( !string_value.equals( "2.1" ) &&

389

!string_value.equals( "3.0" ) )

328

390

throw new ParseException( R.string.error_vcf_version );

329

_version = props[ 1 ];

391

_version = string_value;

330

392

331

// parse any other lines we've accumulated so far

332

if( _lines != null )

333

for( int i = 0; i < _lines.size(); i++ )

334

parseLine( _lines.get( i ) );

335

_lines = null;

393

// parse any other buffers we've accumulated so far

394

if( _buffers != null )

395

for( int i = 0; i < _buffers.size(); i++ )

396

parseLine( _buffers.get( i ) );

397

_buffers = null;

336

398

}

337

399

else

338

400

{

339

// no, so stash this line till we have a version

340

if( _lines == null )

341

_lines = new Vector< String >();

342

_lines.add( line );

401

// no, so stash this buffer till we have a version

402

if( _buffers == null )

403

_buffers = new Vector< ByteBuffer >();

404

_buffers.add( buffer );

343

405

}

344

406

}

345

407

else

346

408

{

409

// value bytes, for processing

410

ByteBuffer value;

411

347

412

if( _parser_in_multiline )

348

413

{

349

414

// if we're currently in a multi-line value, use the stored

350

415

// property name and parameters

351

props = new String[ 2 ];

352

props[ 0 ] = _parser_current_name_and_params;

353

props[ 1 ] = line.trim();

416

name_and_params = _parser_current_name_and_params;

417

418

// find start of string (skip spaces/tabs)

419

int pos = buffer.position();

420

byte[] buffer_array = buffer.array();

421

while( pos < buffer.limit() && (

422

buffer_array[ pos ] == ' ' ||

423

buffer_array[ pos ] == '\t' ) )

424

{

425

pos++;

426

}

427

428

// get value from buffer

429

value = ByteBuffer.wrap( buffer.array(), pos,

430

buffer.limit() - pos );

354

431

}

355

432

else

356

433

{

357

// for normal lines, check the property name/value bits

358

if( props.length < 2 || props[ 0 ].length() == 0 )

359

throw new ParseException(

360

R.string.error_vcf_malformed );

361

362

// ignore empty properties

363

if( props[ 1 ].length() < 1 )

364

return;

434

// ignore empty values

435

if( string_value.length() < 1 ) return;

436

437

// calculate how many chars to skip from beginning of line

438

// so we skip the property "name:" part

439

int pos = buffer.position() + name_and_params.length() + 1;

440

441

// get value from buffer

442

value = ByteBuffer.wrap( buffer.array(), pos,

443

buffer.limit() - pos );

365

444

366

445

// reset the saved multi-line state

367

_parser_current_name_and_params = props[ 0 ];

446

_parser_current_name_and_params = name_and_params;

368

447

_parser_buffered_value_so_far = "";

369

448

}

370

449

371

450

// get parameter parts

372

String[] params = props[ 0 ].split( ";" );

373

for( int i = 0; i < params.length; i++ )

374

params[ i ] = params[ i ].trim();

451

String[] name_and_param_bits = name_and_params.split( ";" );

452

for( int i = 0; i < name_and_param_bits.length; i++ )

453

name_and_param_bits[ i ] = name_and_param_bits[ i ].trim();

375

454

376

// parse charset and encoding parameters

377

String charset, encoding;

378

if( ( charset = checkParam( params, "CHARSET" ) ) != null &&

379

!charset.equals( "UTF-8" ) && !charset.equals( "UTF-16" ) )

380

{

381

throw new ParseException( R.string.error_vcf_charset );

382

}

383

if( ( encoding = checkParam( params, "ENCODING" ) ) != null &&

384

!encoding.equals( "QUOTED-PRINTABLE" ) &&

385

!encoding.equals( "8BIT" ) )

455

// parse encoding parameter

456

String encoding = checkParam( name_and_param_bits, "ENCODING" );

457

if( encoding != null ) encoding = encoding.toUpperCase();

458

if( encoding != null && !encoding.equals( "8BIT" ) &&

459

!encoding.equals( "QUOTED-PRINTABLE" ) )

386

460

//&& !encoding.equals( "BASE64" ) )

387

461

{

388

462

throw new ParseException( R.string.error_vcf_encoding );

389

463

}

390

464

465

// parse charset parameter

466

String charset = checkParam( name_and_param_bits, "CHARSET" );

467

if( charset != null ) charset = charset.toUpperCase();

468

if( charset != null && !charset.equals( "US-ASCII" ) &&

469

!charset.equals( "ASCII" ) && !charset.equals( "UTF-8" ) )

470

{

471

throw new ParseException( R.string.error_vcf_charset );

472

}

473

391

474

// do unencoding (or default to a fake unencoding result with

392

475

// the raw string)

393

UnencodeResult result;

476

UnencodeResult unencoding_result = null;

394

477

if( encoding != null && encoding.equals( "QUOTED-PRINTABLE" ) )

395

result = unencodeQuotedPrintable( props[ 1 ], charset );

478

unencoding_result = unencodeQuotedPrintable( value );

396

479

// else if( encoding != null && encoding.equals( "BASE64" ) )

397

480

// result = unencodeBase64( props[ 1 ], charset );

398

else

399

result = new UnencodeResult( false, props[ 1 ].getBytes(),

400

props[ 1 ].getBytes().length );

481

if( unencoding_result != null ) {

482

value = unencoding_result.getBuffer();

483

_parser_in_multiline =

484

unencoding_result.isAnotherLineRequired();

485

}

486

487

// convert 8-bit ASCII charset to US-ASCII

488

if( charset == null || charset == "ASCII" ) {

489

value = transcodeAsciiToUtf8( value );

490

charset = "UTF-8";

491

}

401

492

402

493

// process charset

403

494

try {

404

props[ 1 ] = new String( result.getBytes(), 0,

405

result.getNumBytes(),

406

charset == null? "UTF-8" : charset );

495

string_value =

496

new String( value.array(), value.position(),

497

value.limit() - value.position(), charset );

407

498

} catch( UnsupportedEncodingException e ) {

408

499

throw new ParseException( R.string.error_vcf_charset );

409

500

}

410

501

411

502

// handle multi-line requests

412

_parser_in_multiline = result.isAnotherLineRequired();

413

503

if( _parser_in_multiline ) {

414

_parser_buffered_value_so_far += props[ 1 ];

504

_parser_buffered_value_so_far += string_value;

415

505

return;

416

506

}

417

507

418

508

// add on buffered multi-line content

419

String value = _parser_buffered_value_so_far + props[ 1 ];

509

String complete_value =

510

_parser_buffered_value_so_far + string_value;

420

511

421

512

// parse some properties

422

if( params[ 0 ].equals( "N" ) )

423

parseN( params, value );

424

else if( params[ 0 ].equals( "FN" ) )

425

parseFN( params, value );

426

else if( params[ 0 ].equals( "ORG" ) )

427

parseORG( params, value );

428

else if( params[ 0 ].equals( "TEL" ) )

429

parseTEL( params, value );

430

else if( params[ 0 ].equals( "EMAIL" ) )

431

parseEMAIL( params, value );

513

if( name_and_param_bits[ 0 ].equals( "N" ) )

514

parseN( name_and_param_bits, complete_value );

515

else if( name_and_param_bits[ 0 ].equals( "FN" ) )

516

parseFN( name_and_param_bits, complete_value );

517

else if( name_and_param_bits[ 0 ].equals( "ORG" ) )

518

parseORG( name_and_param_bits, complete_value );

519

else if( name_and_param_bits[ 0 ].equals( "TEL" ) )

520

parseTEL( name_and_param_bits, complete_value );

521

else if( name_and_param_bits[ 0 ].equals( "EMAIL" ) )

522

parseEMAIL( name_and_param_bits, complete_value );

432

523

}

433

524

}

434

525

542

633

AbortImportException

543

634

{

544

635

// missing version (and data is present)

545

if( _version == null && _lines != null )

636

if( _version == null && _buffers != null )

546

637

throw new ParseException( R.string.error_vcf_malformed );

547

638

548

639

// missing name properties?

591

682

return types;

592

683

}

593

684

594

private UnencodeResult unencodeQuotedPrintable( String str, String charset )

685

private UnencodeResult unencodeQuotedPrintable( ByteBuffer in )

595

686

{

596

687

boolean another = false;

597

688

598

// default encoding scheme

599

if( charset == null ) charset = "UTF-8";

600

601

689

// unencode quoted-pritable encoding, as per RFC1521 section 5.1

602

byte[] bytes = new byte[ str.length() ];

690

byte[] out = new byte[ in.limit() - in.position() ];

603

691

int j = 0;

604

for( int i = 0; i < str.length(); i++ )

692

for( int i = in.position(); i < in.limit(); i++ )

605

693

{

606

694

// get next char and process...

607

char ch = str.charAt( i );

608

if( ch == '=' && i < str.length() - 2 )

695

byte ch = in.array()[ i ];

696

if( ch == '=' && i < in.limit() - 2 )

609

697

{

610

698

// we found a =XX format byte, add it

611

bytes[ j ] = (byte)(

612

Character.digit( str.charAt( i + 1 ), 16 ) * 16 +

613

Character.digit( str.charAt( i + 2 ), 16 ) );

699

out[ j ] = (byte)(

700

Character.digit( in.array()[ i + 1 ], 16 ) * 16 +

701

Character.digit( in.array()[ i + 2 ], 16 ) );

614

702

i += 2;

615

703

}

616

else if( ch == '=' && i == str.length() - 1 )

704

else if( ch == '=' && i == in.limit() - 1 )

617

705

{

618

706

// we found a '=' at the end of a line signifying a multi-

619

707

// line string, so we don't add it.

622

710

}

623

711

else

624

712

// just a normal char...

625

bytes[ j ] = (byte)ch;

713

out[ j ] = (byte)ch;

626

714

j++;

627

715

}

628

716

629

return new UnencodeResult( another, bytes, j );

717

return new UnencodeResult( another, ByteBuffer.wrap( out, 0, j ) );

718

}

719

720

private ByteBuffer transcodeAsciiToUtf8( ByteBuffer in )

721

{

722

// transcode

723

byte[] out = new byte[ ( in.limit() - in.position() ) * 2 ];

724

int j = 0;

725

for( int a = in.position(); a < in.limit(); a++ )

726

{

727

// if char is < 127, keep it as-is

728

if( in.array()[ a ] >= 0 )

729

out[ j++ ] = in.array()[ a ];

730

731

// else, convert it to UTF-8

732

else {

733

int b = 0xff & (int)in.array()[ a ];

734

out[ j++ ] = (byte)( 0xc0 | ( b >> 6 ) );

735

out[ j++ ] = (byte)( 0x80 | ( b & 0x3f ) );

736

}

737

}

738

739

return ByteBuffer.wrap( out, 0, j );

630

740

}

631

741

}

632

742

}